diff options
author | Ronald S. Bultje <rbultje@google.com> | 2012-11-01 11:09:58 -0700 |
---|---|---|
committer | Ronald S. Bultje <rbultje@google.com> | 2012-11-01 16:31:22 -0700 |
commit | 4b2c2b9aa4a273a23d90ddb3bbf6dfb3482e0b8f (patch) | |
tree | 20eef975f1a8c28978d826a354092433b9093588 /vp8/encoder | |
parent | 6c280c2299f078a475dc87e7615fdf1a4998cd31 (diff) | |
download | libvpx-4b2c2b9aa4a273a23d90ddb3bbf6dfb3482e0b8f.tar libvpx-4b2c2b9aa4a273a23d90ddb3bbf6dfb3482e0b8f.tar.gz libvpx-4b2c2b9aa4a273a23d90ddb3bbf6dfb3482e0b8f.tar.bz2 libvpx-4b2c2b9aa4a273a23d90ddb3bbf6dfb3482e0b8f.zip |
Rename vp8/ codec directory to vp9/.
Change-Id: Ic084c475844b24092a433ab88138cf58af3abbe4
Diffstat (limited to 'vp8/encoder')
120 files changed, 0 insertions, 48607 deletions
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c deleted file mode 100644 index 1a93bc3e5..000000000 --- a/vp8/encoder/arm/arm_csystemdependent.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "vpx_ports/arm.h" -#include "vp8/encoder/variance.h" -#include "vp8/encoder/onyx_int.h" - -extern void (*vp9_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); -extern void vp9_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); -extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); - -void vp9_arch_arm_encoder_init(VP9_COMP *cpi) { -#if CONFIG_RUNTIME_CPU_DETECT - int flags = cpi->common.rtcd.flags; - -#if HAVE_ARMV5TE - if (flags & HAS_EDSP) { - } -#endif - -#if HAVE_ARMV6 - if (flags & HAS_MEDIA) { - cpi->rtcd.variance.sad16x16 = vp9_sad16x16_armv6; - /*cpi->rtcd.variance.sad16x8 = vp9_sad16x8_c; - cpi->rtcd.variance.sad8x16 = vp9_sad8x16_c; - cpi->rtcd.variance.sad8x8 = vp9_sad8x8_c; - cpi->rtcd.variance.sad4x4 = vp9_sad4x4_c;*/ - - /*cpi->rtcd.variance.var4x4 = vp9_variance4x4_c;*/ - cpi->rtcd.variance.var8x8 = vp9_variance8x8_armv6; - /*cpi->rtcd.variance.var8x16 = vp9_variance8x16_c; - cpi->rtcd.variance.var16x8 = vp9_variance16x8_c;*/ - cpi->rtcd.variance.var16x16 = vp9_variance16x16_armv6; - - /*cpi->rtcd.variance.subpixvar4x4 = vp9_sub_pixel_variance4x4_c;*/ - cpi->rtcd.variance.subpixvar8x8 = vp9_sub_pixel_variance8x8_armv6; - /*cpi->rtcd.variance.subpixvar8x16 = vp9_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp9_sub_pixel_variance16x8_c;*/ - cpi->rtcd.variance.subpixvar16x16 = vp9_sub_pixel_variance16x16_armv6; - cpi->rtcd.variance.halfpixvar16x16_h = vp9_variance_halfpixvar16x16_h_armv6; - cpi->rtcd.variance.halfpixvar16x16_v = vp9_variance_halfpixvar16x16_v_armv6; - cpi->rtcd.variance.halfpixvar16x16_hv = vp9_variance_halfpixvar16x16_hv_armv6; - - cpi->rtcd.variance.mse16x16 = vp9_mse16x16_armv6; - /*cpi->rtcd.variance.getmbss = vp9_get_mb_ss_c;*/ - - cpi->rtcd.fdct.short4x4 = vp9_short_fdct4x4_armv6; - cpi->rtcd.fdct.short8x4 = vp9_short_fdct8x4_armv6; - cpi->rtcd.fdct.fast4x4 = vp9_short_fdct4x4_armv6; - cpi->rtcd.fdct.fast8x4 = vp9_short_fdct8x4_armv6; - cpi->rtcd.fdct.walsh_short4x4 = vp9_short_walsh4x4_armv6; - - /*cpi->rtcd.encodemb.berr = vp9_block_error_c; - cpi->rtcd.encodemb.mberr = vp9_mbblock_error_c; - cpi->rtcd.encodemb.mbuverr = vp9_mbuverror_c;*/ - cpi->rtcd.encodemb.subb = vp9_subtract_b_armv6; - cpi->rtcd.encodemb.submby = vp9_subtract_mby_armv6; - cpi->rtcd.encodemb.submbuv = vp9_subtract_mbuv_armv6; - - /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;*/ - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_armv6; - } -#endif - -#if HAVE_ARMV7 - if (flags & HAS_NEON) { - cpi->rtcd.variance.sad16x16 = vp9_sad16x16_neon; - cpi->rtcd.variance.sad16x8 = vp9_sad16x8_neon; - cpi->rtcd.variance.sad8x16 = vp9_sad8x16_neon; - cpi->rtcd.variance.sad8x8 = vp9_sad8x8_neon; - cpi->rtcd.variance.sad4x4 = vp9_sad4x4_neon; - - /*cpi->rtcd.variance.var4x4 = vp9_variance4x4_c;*/ - cpi->rtcd.variance.var8x8 = vp9_variance8x8_neon; - cpi->rtcd.variance.var8x16 = vp9_variance8x16_neon; - cpi->rtcd.variance.var16x8 = vp9_variance16x8_neon; - cpi->rtcd.variance.var16x16 = vp9_variance16x16_neon; - - /*cpi->rtcd.variance.subpixvar4x4 = vp9_sub_pixel_variance4x4_c;*/ - cpi->rtcd.variance.subpixvar8x8 = vp9_sub_pixel_variance8x8_neon; - /*cpi->rtcd.variance.subpixvar8x16 = vp9_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp9_sub_pixel_variance16x8_c;*/ - cpi->rtcd.variance.subpixvar16x16 = vp9_sub_pixel_variance16x16_neon; - cpi->rtcd.variance.halfpixvar16x16_h = vp9_variance_halfpixvar16x16_h_neon; - cpi->rtcd.variance.halfpixvar16x16_v = vp9_variance_halfpixvar16x16_v_neon; - cpi->rtcd.variance.halfpixvar16x16_hv = vp9_variance_halfpixvar16x16_hv_neon; - - cpi->rtcd.variance.mse16x16 = vp9_mse16x16_neon; - /*cpi->rtcd.variance.getmbss = vp9_get_mb_ss_c;*/ - - cpi->rtcd.fdct.short4x4 = vp9_short_fdct4x4_neon; - cpi->rtcd.fdct.short8x4 = vp9_short_fdct8x4_neon; - cpi->rtcd.fdct.fast4x4 = vp9_short_fdct4x4_neon; - cpi->rtcd.fdct.fast8x4 = vp9_short_fdct8x4_neon; - cpi->rtcd.fdct.walsh_short4x4 = vp9_short_walsh4x4_neon; - - /*cpi->rtcd.encodemb.berr = vp9_block_error_c; - cpi->rtcd.encodemb.mberr = vp9_mbblock_error_c; - cpi->rtcd.encodemb.mbuverr = vp9_mbuverror_c;*/ - cpi->rtcd.encodemb.subb = vp9_subtract_b_neon; - cpi->rtcd.encodemb.submby = vp9_subtract_mby_neon; - cpi->rtcd.encodemb.submbuv = vp9_subtract_mbuv_neon; - - /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; - cpi->rtcd.quantize.quantb_pair = vp8_regular_quantize_b_pair;*/ - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon; - cpi->rtcd.quantize.fastquantb_pair = vp8_fast_quantize_b_pair_neon; - } -#endif - -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (flags & HAS_NEON) -#endif - { - vp9_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon; - } -#endif -#endif -} diff --git a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm deleted file mode 100644 index 180637e68..000000000 --- a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm +++ /dev/null @@ -1,286 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_start_encode| - EXPORT |vp9_encode_bool| - EXPORT |vp8_stop_encode| - EXPORT |vp8_encode_value| - - INCLUDE asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY - -; r0 BOOL_CODER *br -; r1 unsigned char *source - -|vp8_start_encode| PROC - mov r12, #0 - mov r3, #255 - mvn r2, #23 - str r12, [r0, #vp9_writer_lowvalue] - str r3, [r0, #vp9_writer_range] - str r12, [r0, #vp9_writer_value] - str r2, [r0, #vp9_writer_count] - str r12, [r0, #vp9_writer_pos] - str r1, [r0, #vp9_writer_buffer] - bx lr - ENDP - -; r0 BOOL_CODER *br -; r1 int bit -; r2 int probability -|vp9_encode_bool| PROC - push {r4-r9, lr} - - mov r4, r2 - - ldr r2, [r0, #vp9_writer_lowvalue] - ldr r5, [r0, #vp9_writer_range] - ldr r3, [r0, #vp9_writer_count] - - sub r7, r5, #1 ; range-1 - - cmp r1, #0 - mul r6, r4, r7 ; ((range-1) * probability) - - mov r7, #1 - add r4, r7, r6, lsr #8 ; 1 + (((range-1) * probability) >> 8) - - addne r2, r2, r4 ; if (bit) lowvalue += split - subne r4, r5, r4 ; if (bit) range = range-split - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start -token_zero_while_loop - mov r9, #0 - strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r1, [r7, r4] - cmpge r1, #0xff - beq token_zero_while_loop - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r9, [r7, r4] ; w->buffer[x] - add r9, r9, #1 - strb r9, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r9, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r1, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r1, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r9, r4] ; w->buffer[w->pos++] - -token_count_lt_zero - lsl r2, r2, r6 ; lowvalue <<= shift - - str r2, [r0, #vp9_writer_lowvalue] - str r5, [r0, #vp9_writer_range] - str r3, [r0, #vp9_writer_count] - pop {r4-r9, pc} - ENDP - -; r0 BOOL_CODER *br -|vp8_stop_encode| PROC - push {r4-r10, lr} - - ldr r2, [r0, #vp9_writer_lowvalue] - ldr r5, [r0, #vp9_writer_range] - ldr r3, [r0, #vp9_writer_count] - - mov r10, #32 - -stop_encode_loop - sub r7, r5, #1 ; range-1 - - mov r4, r7, lsl #7 ; ((range-1) * 128) - - mov r7, #1 - add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero_se ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set_se - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start_se -token_zero_while_loop_se - mov r9, #0 - strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start_se - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r1, [r7, r4] - cmpge r1, #0xff - beq token_zero_while_loop_se - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r9, [r7, r4] ; w->buffer[x] - add r9, r9, #1 - strb r9, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set_se - rsb r4, r6, #24 ; 24-offset - ldr r9, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r1, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r1, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r9, r4] ; w->buffer[w->pos++] - -token_count_lt_zero_se - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r10, r10, #1 - bne stop_encode_loop - - str r2, [r0, #vp9_writer_lowvalue] - str r5, [r0, #vp9_writer_range] - str r3, [r0, #vp9_writer_count] - pop {r4-r10, pc} - - ENDP - -; r0 BOOL_CODER *br -; r1 int data -; r2 int bits -|vp8_encode_value| PROC - push {r4-r11, lr} - - mov r10, r2 - - ldr r2, [r0, #vp9_writer_lowvalue] - ldr r5, [r0, #vp9_writer_range] - ldr r3, [r0, #vp9_writer_count] - - rsb r4, r10, #32 ; 32-n - - ; v is kept in r1 during the token pack loop - lsl r1, r1, r4 ; r1 = v << 32 - n - -encode_value_loop - sub r7, r5, #1 ; range-1 - - ; Decisions are made based on the bit value shifted - ; off of v, so set a flag here based on this. - ; This value is refered to as "bb" - lsls r1, r1, #1 ; bit = v >> n - mov r4, r7, lsl #7 ; ((range-1) * 128) - - mov r7, #1 - add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) - - addcs r2, r2, r4 ; if (bit) lowvalue += split - subcs r4, r5, r4 ; if (bit) range = range-split - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero_ev ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set_ev - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start_ev -token_zero_while_loop_ev - mov r9, #0 - strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start_ev - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq token_zero_while_loop_ev - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r9, [r7, r4] ; w->buffer[x] - add r9, r9, #1 - strb r9, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set_ev - rsb r4, r6, #24 ; 24-offset - ldr r9, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r9, r4] ; w->buffer[w->pos++] - -token_count_lt_zero_ev - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r10, r10, #1 - bne encode_value_loop - - str r2, [r0, #vp9_writer_lowvalue] - str r5, [r0, #vp9_writer_range] - str r3, [r0, #vp9_writer_count] - pop {r4-r11, pc} - ENDP - - END diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm deleted file mode 100644 index bf299770b..000000000 --- a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm +++ /dev/null @@ -1,291 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8cx_pack_tokens_armv5| - - INCLUDE asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY - -; r0 vp9_writer *w -; r1 const TOKENEXTRA *p -; r2 int xcount -; r3 vp8_coef_encodings -; s0 vp8_extra_bits -; s1 vp8_coef_tree -|vp8cx_pack_tokens_armv5| PROC - push {r4-r11, lr} - - ; Add size of xcount * sizeof (TOKENEXTRA) to get stop - ; sizeof (TOKENEXTRA) is 8 - sub sp, sp, #12 - add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA) - str r2, [sp, #0] - str r3, [sp, #8] ; save vp8_coef_encodings - ldr r2, [r0, #vp9_writer_lowvalue] - ldr r5, [r0, #vp9_writer_range] - ldr r3, [r0, #vp9_writer_count] - b check_p_lt_stop - -while_p_lt_stop - ldrb r6, [r1, #tokenextra_token] ; t - ldr r4, [sp, #8] ; vp8_coef_encodings - mov lr, #0 - add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t - ldr r9, [r1, #tokenextra_context_tree] ; pp - - ldrb r7, [r1, #tokenextra_skip_eob_node] - - ldr r6, [r4, #vp9_token_value] ; v - ldr r8, [r4, #vp9_token_len] ; n - - ; vp8 specific skip_eob_node - cmp r7, #0 - movne lr, #2 ; i = 2 - subne r8, r8, #1 ; --n - - rsb r4, r8, #32 ; 32-n - ldr r10, [sp, #52] ; vp8_coef_tree - - ; v is kept in r12 during the token pack loop - lsl r12, r6, r4 ; r12 = v << 32 - n - -; loop start -token_loop - ldrb r4, [r9, lr, asr #1] ; pp [i>>1] - sub r7, r5, #1 ; range-1 - - ; Decisions are made based on the bit value shifted - ; off of v, so set a flag here based on this. - ; This value is refered to as "bb" - lsls r12, r12, #1 ; bb = v >> n - mul r6, r4, r7 ; ((range-1) * pp[i>>1])) - - ; bb can only be 0 or 1. So only execute this statement - ; if bb == 1, otherwise it will act like i + 0 - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] - add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start -token_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq token_zero_while_loop - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r10, [r7, r4] ; w->buffer[x] - add r10, r10, #1 - strb r10, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r10, r4] ; w->buffer[w->pos++] - - ; r10 is used earlier in the loop, but r10 is used as - ; temp variable here. So after r10 is used, reload - ; vp8_coef_tree_dcd into r10 - ldr r10, [sp, #52] ; vp8_coef_tree - -token_count_lt_zero - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r8, r8, #1 ; --n - bne token_loop - - ldrb r6, [r1, #tokenextra_token] ; t - ldr r7, [sp, #48] ; vp8_extra_bits - ; Add t * sizeof (vp9_extra_bit_struct) to get the desired - ; element. Here vp9_extra_bit_struct == 16 - add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t - - ldr r4, [r12, #vp9_extra_bit_struct_base_val] - cmp r4, #0 - beq skip_extra_bits - -; if( b->base_val) - ldr r8, [r12, #vp9_extra_bit_struct_len] ; L - ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra - cmp r8, #0 ; if( L) - beq no_extra_bits - - ldr r9, [r12, #vp9_extra_bit_struct_prob] - asr r7, lr, #1 ; v=e>>1 - - ldr r10, [r12, #vp9_extra_bit_struct_tree] - str r10, [sp, #4] ; b->tree - - rsb r4, r8, #32 - lsl r12, r7, r4 - - mov lr, #0 ; i = 0 - -extra_bits_loop - ldrb r4, [r9, lr, asr #1] ; pp[i>>1] - sub r7, r5, #1 ; range-1 - lsls r12, r12, #1 ; v >> n - mul r6, r4, r7 ; (range-1) * pp[i>>1] - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = b->tree[i+bb] - add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - clz r6, r4 - sub r6, r6, #24 - - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi extra_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset= shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl extra_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos - 1 - b extra_zero_while_start -extra_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -extra_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq extra_zero_while_loop - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r10, [r7, r4] - add r10, r10, #1 - strb r10, [r7, r4] -extra_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) - ldr r10, [sp, #4] ; b->tree -extra_count_lt_zero - lsl r2, r2, r6 - - subs r8, r8, #1 ; --n - bne extra_bits_loop ; while (n) - -no_extra_bits - ldr lr, [r1, #4] ; e = p->Extra - add r4, r5, #1 ; range + 1 - tst lr, #1 - lsr r4, r4, #1 ; split = (range + 1) >> 1 - addne r2, r2, r4 ; lowvalue += split - subne r4, r5, r4 ; range = range-split - tst r2, #0x80000000 ; lowvalue & 0x80000000 - lsl r5, r4, #1 ; range <<= 1 - beq end_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] - mov r7, #0 - sub r4, r4, #1 - b end_zero_while_start -end_zero_while_loop - strb r7, [r6, r4] - sub r4, r4, #1 ; x-- -end_zero_while_start - cmp r4, #0 - ldrge r6, [r0, #vp9_writer_buffer] - ldrb r12, [r6, r4] - cmpge r12, #0xff - beq end_zero_while_loop - - ldr r6, [r0, #vp9_writer_buffer] - ldrb r7, [r6, r4] - add r7, r7, #1 - strb r7, [r6, r4] -end_high_bit_not_set - adds r3, r3, #1 ; ++count - lsl r2, r2, #1 ; lowvalue <<= 1 - bne end_count_zero - - ldr r4, [r0, #vp9_writer_pos] - mvn r3, #7 - ldr r7, [r0, #vp9_writer_buffer] - lsr r6, r2, #24 ; lowvalue >> 24 - add r12, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r12, [r0, #0x10] - strb r6, [r7, r4] -end_count_zero -skip_extra_bits - add r1, r1, #TOKENEXTRA_SZ ; ++p -check_p_lt_stop - ldr r4, [sp, #0] ; stop - cmp r1, r4 ; while( p < stop) - bcc while_p_lt_stop - - str r2, [r0, #vp9_writer_lowvalue] - str r5, [r0, #vp9_writer_range] - str r3, [r0, #vp9_writer_count] - add sp, sp, #12 - pop {r4-r11, pc} - ENDP - - END diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm deleted file mode 100644 index a1c647d6c..000000000 --- a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm +++ /dev/null @@ -1,327 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8cx_pack_mb_row_tokens_armv5| - - INCLUDE asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY - -; r0 VP8_COMP *cpi -; r1 vp9_writer *w -; r2 vp8_coef_encodings -; r3 vp8_extra_bits -; s0 vp8_coef_tree - -|vp8cx_pack_mb_row_tokens_armv5| PROC - push {r4-r11, lr} - sub sp, sp, #24 - - ; Compute address of cpi->common.mb_rows - ldr r4, _VP8_COMP_common_ - ldr r6, _VP8_COMMON_MBrows_ - add r4, r0, r4 - - ldr r5, [r4, r6] ; load up mb_rows - - str r2, [sp, #20] ; save vp8_coef_encodings - str r5, [sp, #12] ; save mb_rows - str r3, [sp, #8] ; save vp8_extra_bits - - ldr r4, _VP8_COMP_tplist_ - add r4, r0, r4 - ldr r7, [r4, #0] ; dereference cpi->tp_list - - mov r0, r1 ; keep same as other loops - - ldr r2, [r0, #vp9_writer_lowvalue] - ldr r5, [r0, #vp9_writer_range] - ldr r3, [r0, #vp9_writer_count] - -mb_row_loop - - ldr r1, [r7, #tokenlist_start] - ldr r9, [r7, #tokenlist_stop] - str r9, [sp, #0] ; save stop for later comparison - str r7, [sp, #16] ; tokenlist address for next time - - b check_p_lt_stop - - ; actuall work gets done here! - -while_p_lt_stop - ldrb r6, [r1, #tokenextra_token] ; t - ldr r4, [sp, #20] ; vp8_coef_encodings - mov lr, #0 - add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t - ldr r9, [r1, #tokenextra_context_tree] ; pp - - ldrb r7, [r1, #tokenextra_skip_eob_node] - - ldr r6, [r4, #vp9_token_value] ; v - ldr r8, [r4, #vp9_token_len] ; n - - ; vp8 specific skip_eob_node - cmp r7, #0 - movne lr, #2 ; i = 2 - subne r8, r8, #1 ; --n - - rsb r4, r8, #32 ; 32-n - ldr r10, [sp, #60] ; vp8_coef_tree - - ; v is kept in r12 during the token pack loop - lsl r12, r6, r4 ; r12 = v << 32 - n - -; loop start -token_loop - ldrb r4, [r9, lr, asr #1] ; pp [i>>1] - sub r7, r5, #1 ; range-1 - - ; Decisions are made based on the bit value shifted - ; off of v, so set a flag here based on this. - ; This value is refered to as "bb" - lsls r12, r12, #1 ; bb = v >> n - mul r6, r4, r7 ; ((range-1) * pp[i>>1])) - - ; bb can only be 0 or 1. So only execute this statement - ; if bb == 1, otherwise it will act like i + 0 - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] - add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start -token_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq token_zero_while_loop - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r10, [r7, r4] ; w->buffer[x] - add r10, r10, #1 - strb r10, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r10, r4] ; w->buffer[w->pos++] - - ; r10 is used earlier in the loop, but r10 is used as - ; temp variable here. So after r10 is used, reload - ; vp8_coef_tree_dcd into r10 - ldr r10, [sp, #60] ; vp8_coef_tree - -token_count_lt_zero - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r8, r8, #1 ; --n - bne token_loop - - ldrb r6, [r1, #tokenextra_token] ; t - ldr r7, [sp, #8] ; vp8_extra_bits - ; Add t * sizeof (vp9_extra_bit_struct) to get the desired - ; element. Here vp9_extra_bit_struct == 16 - add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t - - ldr r4, [r12, #vp9_extra_bit_struct_base_val] - cmp r4, #0 - beq skip_extra_bits - -; if( b->base_val) - ldr r8, [r12, #vp9_extra_bit_struct_len] ; L - ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra - cmp r8, #0 ; if( L) - beq no_extra_bits - - ldr r9, [r12, #vp9_extra_bit_struct_prob] - asr r7, lr, #1 ; v=e>>1 - - ldr r10, [r12, #vp9_extra_bit_struct_tree] - str r10, [sp, #4] ; b->tree - - rsb r4, r8, #32 - lsl r12, r7, r4 - - mov lr, #0 ; i = 0 - -extra_bits_loop - ldrb r4, [r9, lr, asr #1] ; pp[i>>1] - sub r7, r5, #1 ; range-1 - lsls r12, r12, #1 ; v >> n - mul r6, r4, r7 ; (range-1) * pp[i>>1] - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = b->tree[i+bb] - add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - clz r6, r4 - sub r6, r6, #24 - - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi extra_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset= shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl extra_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos - 1 - b extra_zero_while_start -extra_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -extra_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq extra_zero_while_loop - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r10, [r7, r4] - add r10, r10, #1 - strb r10, [r7, r4] -extra_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) - ldr r10, [sp, #4] ; b->tree -extra_count_lt_zero - lsl r2, r2, r6 - - subs r8, r8, #1 ; --n - bne extra_bits_loop ; while (n) - -no_extra_bits - ldr lr, [r1, #4] ; e = p->Extra - add r4, r5, #1 ; range + 1 - tst lr, #1 - lsr r4, r4, #1 ; split = (range + 1) >> 1 - addne r2, r2, r4 ; lowvalue += split - subne r4, r5, r4 ; range = range-split - tst r2, #0x80000000 ; lowvalue & 0x80000000 - lsl r5, r4, #1 ; range <<= 1 - beq end_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] - mov r7, #0 - sub r4, r4, #1 - b end_zero_while_start -end_zero_while_loop - strb r7, [r6, r4] - sub r4, r4, #1 ; x-- -end_zero_while_start - cmp r4, #0 - ldrge r6, [r0, #vp9_writer_buffer] - ldrb r12, [r6, r4] - cmpge r12, #0xff - beq end_zero_while_loop - - ldr r6, [r0, #vp9_writer_buffer] - ldrb r7, [r6, r4] - add r7, r7, #1 - strb r7, [r6, r4] -end_high_bit_not_set - adds r3, r3, #1 ; ++count - lsl r2, r2, #1 ; lowvalue <<= 1 - bne end_count_zero - - ldr r4, [r0, #vp9_writer_pos] - mvn r3, #7 - ldr r7, [r0, #vp9_writer_buffer] - lsr r6, r2, #24 ; lowvalue >> 24 - add r12, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r12, [r0, #0x10] - strb r6, [r7, r4] -end_count_zero -skip_extra_bits - add r1, r1, #TOKENEXTRA_SZ ; ++p -check_p_lt_stop - ldr r4, [sp, #0] ; stop - cmp r1, r4 ; while( p < stop) - bcc while_p_lt_stop - - ldr r6, [sp, #12] ; mb_rows - ldr r7, [sp, #16] ; tokenlist address - subs r6, r6, #1 - add r7, r7, #TOKENLIST_SZ ; next element in the array - str r6, [sp, #12] - bne mb_row_loop - - str r2, [r0, #vp9_writer_lowvalue] - str r5, [r0, #vp9_writer_range] - str r3, [r0, #vp9_writer_count] - add sp, sp, #24 - pop {r4-r11, pc} - ENDP - -_VP8_COMP_common_ - DCD vp8_comp_common -_VP8_COMMON_MBrows_ - DCD vp8_common_mb_rows -_VP8_COMP_tplist_ - DCD vp8_comp_tplist - - END diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm deleted file mode 100644 index 86c2feb4a..000000000 --- a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm +++ /dev/null @@ -1,465 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8cx_pack_tokens_into_partitions_armv5| - - INCLUDE asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY - -; r0 VP8_COMP *cpi -; r1 unsigned char *cx_data -; r2 int num_part -; r3 *size -; s0 vp8_coef_encodings -; s1 vp8_extra_bits, -; s2 const vp9_tree_index *, - -|vp8cx_pack_tokens_into_partitions_armv5| PROC - push {r4-r11, lr} - sub sp, sp, #44 - - ; Compute address of cpi->common.mb_rows - ldr r4, _VP8_COMP_common_ - ldr r6, _VP8_COMMON_MBrows_ - add r4, r0, r4 - - ldr r5, [r4, r6] ; load up mb_rows - - str r5, [sp, #36] ; save mb_rows - str r1, [sp, #24] ; save cx_data - str r2, [sp, #20] ; save num_part - str r3, [sp, #8] ; save *size - - ; *size = 3*(num_part -1 ); - sub r2, r2, #1 ; num_part - 1 - add r2, r2, r2, lsl #1 ; 3*(num_part - 1) - str r2, [r3] - - add r2, r2, r1 ; cx_data + *size - str r2, [sp, #40] ; ptr - - ldr r4, _VP8_COMP_tplist_ - add r4, r0, r4 - ldr r7, [r4, #0] ; dereference cpi->tp_list - str r7, [sp, #32] ; store start of cpi->tp_list - - ldr r11, _VP8_COMP_bc2_ ; load up vp9_writer out of cpi - add r0, r0, r11 - - mov r11, #0 - str r11, [sp, #28] ; i - -numparts_loop - ldr r10, [sp, #40] ; ptr - ldr r5, [sp, #36] ; move mb_rows to the counting section - sub r5, r5, r11 ; move start point with each partition - ; mb_rows starts at i - str r5, [sp, #12] - - ; Reset all of the VP8 Writer data for each partition that - ; is processed. - ; start_encode - mov r2, #0 ; vp9_writer_lowvalue - mov r5, #255 ; vp9_writer_range - mvn r3, #23 ; vp9_writer_count - - str r2, [r0, #vp9_writer_value] - str r2, [r0, #vp9_writer_pos] - str r10, [r0, #vp9_writer_buffer] - -mb_row_loop - - ldr r1, [r7, #tokenlist_start] - ldr r9, [r7, #tokenlist_stop] - str r9, [sp, #0] ; save stop for later comparison - str r7, [sp, #16] ; tokenlist address for next time - - b check_p_lt_stop - - ; actual work gets done here! - -while_p_lt_stop - ldrb r6, [r1, #tokenextra_token] ; t - ldr r4, [sp, #80] ; vp8_coef_encodings - mov lr, #0 - add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t - ldr r9, [r1, #tokenextra_context_tree] ; pp - - ldrb r7, [r1, #tokenextra_skip_eob_node] - - ldr r6, [r4, #vp9_token_value] ; v - ldr r8, [r4, #vp9_token_len] ; n - - ; vp8 specific skip_eob_node - cmp r7, #0 - movne lr, #2 ; i = 2 - subne r8, r8, #1 ; --n - - rsb r4, r8, #32 ; 32-n - ldr r10, [sp, #88] ; vp8_coef_tree - - ; v is kept in r12 during the token pack loop - lsl r12, r6, r4 ; r12 = v << 32 - n - -; loop start -token_loop - ldrb r4, [r9, lr, asr #1] ; pp [i>>1] - sub r7, r5, #1 ; range-1 - - ; Decisions are made based on the bit value shifted - ; off of v, so set a flag here based on this. - ; This value is refered to as "bb" - lsls r12, r12, #1 ; bb = v >> n - mul r6, r4, r7 ; ((range-1) * pp[i>>1])) - - ; bb can only be 0 or 1. So only execute this statement - ; if bb == 1, otherwise it will act like i + 0 - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] - add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start -token_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq token_zero_while_loop - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r10, [r7, r4] ; w->buffer[x] - add r10, r10, #1 - strb r10, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r10, r4] ; w->buffer[w->pos++] - - ; r10 is used earlier in the loop, but r10 is used as - ; temp variable here. So after r10 is used, reload - ; vp8_coef_tree_dcd into r10 - ldr r10, [sp, #88] ; vp8_coef_tree - -token_count_lt_zero - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r8, r8, #1 ; --n - bne token_loop - - ldrb r6, [r1, #tokenextra_token] ; t - ldr r7, [sp, #84] ; vp8_extra_bits - ; Add t * sizeof (vp9_extra_bit_struct) to get the desired - ; element. Here vp9_extra_bit_struct == 16 - add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t - - ldr r4, [r12, #vp9_extra_bit_struct_base_val] - cmp r4, #0 - beq skip_extra_bits - -; if( b->base_val) - ldr r8, [r12, #vp9_extra_bit_struct_len] ; L - ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra - cmp r8, #0 ; if( L) - beq no_extra_bits - - ldr r9, [r12, #vp9_extra_bit_struct_prob] - asr r7, lr, #1 ; v=e>>1 - - ldr r10, [r12, #vp9_extra_bit_struct_tree] - str r10, [sp, #4] ; b->tree - - rsb r4, r8, #32 - lsl r12, r7, r4 - - mov lr, #0 ; i = 0 - -extra_bits_loop - ldrb r4, [r9, lr, asr #1] ; pp[i>>1] - sub r7, r5, #1 ; range-1 - lsls r12, r12, #1 ; v >> n - mul r6, r4, r7 ; (range-1) * pp[i>>1] - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = b->tree[i+bb] - add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - clz r6, r4 - sub r6, r6, #24 - - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi extra_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset= shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl extra_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos - 1 - b extra_zero_while_start -extra_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -extra_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq extra_zero_while_loop - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r10, [r7, r4] - add r10, r10, #1 - strb r10, [r7, r4] -extra_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) - ldr r10, [sp, #4] ; b->tree -extra_count_lt_zero - lsl r2, r2, r6 - - subs r8, r8, #1 ; --n - bne extra_bits_loop ; while (n) - -no_extra_bits - ldr lr, [r1, #4] ; e = p->Extra - add r4, r5, #1 ; range + 1 - tst lr, #1 - lsr r4, r4, #1 ; split = (range + 1) >> 1 - addne r2, r2, r4 ; lowvalue += split - subne r4, r5, r4 ; range = range-split - tst r2, #0x80000000 ; lowvalue & 0x80000000 - lsl r5, r4, #1 ; range <<= 1 - beq end_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] - mov r7, #0 - sub r4, r4, #1 - b end_zero_while_start -end_zero_while_loop - strb r7, [r6, r4] - sub r4, r4, #1 ; x-- -end_zero_while_start - cmp r4, #0 - ldrge r6, [r0, #vp9_writer_buffer] - ldrb r12, [r6, r4] - cmpge r12, #0xff - beq end_zero_while_loop - - ldr r6, [r0, #vp9_writer_buffer] - ldrb r7, [r6, r4] - add r7, r7, #1 - strb r7, [r6, r4] -end_high_bit_not_set - adds r3, r3, #1 ; ++count - lsl r2, r2, #1 ; lowvalue <<= 1 - bne end_count_zero - - ldr r4, [r0, #vp9_writer_pos] - mvn r3, #7 - ldr r7, [r0, #vp9_writer_buffer] - lsr r6, r2, #24 ; lowvalue >> 24 - add r12, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r12, [r0, #0x10] - strb r6, [r7, r4] -end_count_zero -skip_extra_bits - add r1, r1, #TOKENEXTRA_SZ ; ++p -check_p_lt_stop - ldr r4, [sp, #0] ; stop - cmp r1, r4 ; while( p < stop) - bcc while_p_lt_stop - - ldr r10, [sp, #20] ; num_parts - mov r1, #TOKENLIST_SZ - mul r1, r10, r1 - - ldr r6, [sp, #12] ; mb_rows - ldr r7, [sp, #16] ; tokenlist address - subs r6, r6, r10 - add r7, r7, r1 ; next element in the array - str r6, [sp, #12] - bgt mb_row_loop - - mov r12, #32 - -stop_encode_loop - sub r7, r5, #1 ; range-1 - - mov r4, r7, lsl #7 ; ((range-1) * 128) - - mov r7, #1 - add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero_se ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set_se - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start_se -token_zero_while_loop_se - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start_se - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq token_zero_while_loop_se - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r10, [r7, r4] ; w->buffer[x] - add r10, r10, #1 - strb r10, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set_se - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r10, r4] ; w->buffer[w->pos++] - -token_count_lt_zero_se - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r12, r12, #1 - bne stop_encode_loop - - ldr r10, [sp, #8] ; *size - ldr r11, [r10] - ldr r4, [r0, #vp9_writer_pos] ; w->pos - add r11, r11, r4 ; *size += w->pos - str r11, [r10] - - ldr r9, [sp, #20] ; num_parts - sub r9, r9, #1 - ldr r10, [sp, #28] ; i - cmp r10, r9 ; if(i<(num_part - 1)) - bge skip_write_partition - - ldr r12, [sp, #40] ; ptr - add r12, r12, r4 ; ptr += w->pos - str r12, [sp, #40] - - ldr r9, [sp, #24] ; cx_data - mov r8, r4, asr #8 - strb r4, [r9, #0] - strb r8, [r9, #1] - mov r4, r4, asr #16 - strb r4, [r9, #2] - - add r9, r9, #3 ; cx_data += 3 - str r9, [sp, #24] - -skip_write_partition - - ldr r11, [sp, #28] ; i - ldr r10, [sp, #20] ; num_parts - - add r11, r11, #1 ; i++ - str r11, [sp, #28] - - ldr r7, [sp, #32] ; cpi->tp_list[i] - mov r1, #TOKENLIST_SZ - add r7, r7, r1 ; next element in cpi->tp_list - str r7, [sp, #32] ; cpi->tp_list[i+1] - - cmp r10, r11 - bgt numparts_loop - - - add sp, sp, #44 - pop {r4-r11, pc} - ENDP - -_VP8_COMP_common_ - DCD vp8_comp_common -_VP8_COMMON_MBrows_ - DCD vp8_common_mb_rows -_VP8_COMP_tplist_ - DCD vp8_comp_tplist -_VP8_COMP_bc2_ - DCD vp8_comp_bc2 - - END diff --git a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm b/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm deleted file mode 100644 index ae2f6030d..000000000 --- a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm +++ /dev/null @@ -1,224 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_fast_quantize_b_armv6| - - INCLUDE asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 BLOCK *b -; r1 BLOCKD *d -|vp8_fast_quantize_b_armv6| PROC - stmfd sp!, {r1, r4-r11, lr} - - ldr r3, [r0, #vp8_block_coeff] ; coeff - ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast - ldr r5, [r0, #vp8_block_round] ; round - ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff - ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff - ldr r8, [r1, #vp8_blockd_dequant] ; dequant - - ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction - ; is used to update the counter so that - ; it can be used to mark nonzero - ; quantized coefficient pairs. - - mov r1, #0 ; flags for quantized coeffs - - ; PART 1: quantization and dequantization loop -loop - ldr r9, [r3], #4 ; [z1 | z0] - ldr r10, [r5], #4 ; [r1 | r0] - ldr r11, [r4], #4 ; [q1 | q0] - - ssat16 lr, #1, r9 ; [sz1 | sz0] - eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0] - ssub16 r9, r9, lr ; x = (z ^ sz) - sz - sadd16 r9, r9, r10 ; [x1+r1 | x0+r0] - - ldr r12, [r3], #4 ; [z3 | z2] - - smulbb r0, r9, r11 ; [(x0+r0)*q0] - smultt r9, r9, r11 ; [(x1+r1)*q1] - - ldr r10, [r5], #4 ; [r3 | r2] - - ssat16 r11, #1, r12 ; [sz3 | sz2] - eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2] - pkhtb r0, r9, r0, asr #16 ; [y1 | y0] - ldr r9, [r4], #4 ; [q3 | q2] - ssub16 r12, r12, r11 ; x = (z ^ sz) - sz - - sadd16 r12, r12, r10 ; [x3+r3 | x2+r2] - - eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)] - - smulbb r10, r12, r9 ; [(x2+r2)*q2] - smultt r12, r12, r9 ; [(x3+r3)*q3] - - ssub16 r0, r0, lr ; x = (y ^ sz) - sz - - cmp r0, #0 ; check if zero - orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs - - str r0, [r6], #4 ; *qcoeff++ = x - ldr r9, [r8], #4 ; [dq1 | dq0] - - pkhtb r10, r12, r10, asr #16 ; [y3 | y2] - eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)] - ssub16 r10, r10, r11 ; x = (y ^ sz) - sz - - cmp r10, #0 ; check if zero - orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs - - str r10, [r6], #4 ; *qcoeff++ = x - ldr r11, [r8], #4 ; [dq3 | dq2] - - smulbb r12, r0, r9 ; [x0*dq0] - smultt r0, r0, r9 ; [x1*dq1] - - smulbb r9, r10, r11 ; [x2*dq2] - smultt r10, r10, r11 ; [x3*dq3] - - lsls r2, r2, #2 ; update loop counter - strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0] - strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1] - strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2] - strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3] - add r7, r7, #8 ; dqcoeff += 8 - bne loop - - ; PART 2: check position for eob... - mov lr, #0 ; init eob - cmp r1, #0 ; coeffs after quantization? - ldr r11, [sp, #0] ; restore BLOCKD pointer - beq end ; skip eob calculations if all zero - - ldr r0, [r11, #vp8_blockd_qcoeff] - - ; check shortcut for nonzero qcoeffs - tst r1, #0x80 - bne quant_coeff_15_14 - tst r1, #0x20 - bne quant_coeff_13_11 - tst r1, #0x8 - bne quant_coeff_12_7 - tst r1, #0x40 - bne quant_coeff_10_9 - tst r1, #0x10 - bne quant_coeff_8_3 - tst r1, #0x2 - bne quant_coeff_6_5 - tst r1, #0x4 - bne quant_coeff_4_2 - b quant_coeff_1_0 - -quant_coeff_15_14 - ldrh r2, [r0, #30] ; rc=15, i=15 - mov lr, #16 - cmp r2, #0 - bne end - - ldrh r3, [r0, #28] ; rc=14, i=14 - mov lr, #15 - cmp r3, #0 - bne end - -quant_coeff_13_11 - ldrh r2, [r0, #22] ; rc=11, i=13 - mov lr, #14 - cmp r2, #0 - bne end - -quant_coeff_12_7 - ldrh r3, [r0, #14] ; rc=7, i=12 - mov lr, #13 - cmp r3, #0 - bne end - - ldrh r2, [r0, #20] ; rc=10, i=11 - mov lr, #12 - cmp r2, #0 - bne end - -quant_coeff_10_9 - ldrh r3, [r0, #26] ; rc=13, i=10 - mov lr, #11 - cmp r3, #0 - bne end - - ldrh r2, [r0, #24] ; rc=12, i=9 - mov lr, #10 - cmp r2, #0 - bne end - -quant_coeff_8_3 - ldrh r3, [r0, #18] ; rc=9, i=8 - mov lr, #9 - cmp r3, #0 - bne end - - ldrh r2, [r0, #12] ; rc=6, i=7 - mov lr, #8 - cmp r2, #0 - bne end - -quant_coeff_6_5 - ldrh r3, [r0, #6] ; rc=3, i=6 - mov lr, #7 - cmp r3, #0 - bne end - - ldrh r2, [r0, #4] ; rc=2, i=5 - mov lr, #6 - cmp r2, #0 - bne end - -quant_coeff_4_2 - ldrh r3, [r0, #10] ; rc=5, i=4 - mov lr, #5 - cmp r3, #0 - bne end - - ldrh r2, [r0, #16] ; rc=8, i=3 - mov lr, #4 - cmp r2, #0 - bne end - - ldrh r3, [r0, #8] ; rc=4, i=2 - mov lr, #3 - cmp r3, #0 - bne end - -quant_coeff_1_0 - ldrh r2, [r0, #2] ; rc=1, i=1 - mov lr, #2 - cmp r2, #0 - bne end - - mov lr, #1 ; rc=0, i=0 - -end - str lr, [r11, #vp8_blockd_eob] - ldmfd sp!, {r1, r4-r11, pc} - - ENDP - -loop_count - DCD 0x1000000 - - END - diff --git a/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm deleted file mode 100644 index 8e7283667..000000000 --- a/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm +++ /dev/null @@ -1,138 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_mse16x16_armv6| - - ARM - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -; -;note: Based on vp9_variance16x16_armv6. In this function, sum is never used. -; So, we can remove this part of calculation. - -|vp8_mse16x16_armv6| PROC - - push {r4-r9, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - - mov r12, #16 ; set loop counter to 16 (=block height) - mov r4, #0 ; initialize sse = 0 - -loop - ; 1st 4 pixels - ldr r5, [r0, #0x0] ; load 4 src pixels - ldr r6, [r2, #0x0] ; load 4 ref pixels - - mov lr, #0 ; constant zero - - usub8 r8, r5, r6 ; calculate difference - pld [r0, r1, lsl #1] - sel r7, r8, lr ; select bytes with positive difference - usub8 r9, r6, r5 ; calculate difference with reversed operands - pld [r2, r3, lsl #1] - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r5, r7, lr ; calculate sum of positive differences - usad8 r6, r8, lr ; calculate sum of negative differences - orr r8, r8, r7 ; differences of all 4 pixels - - ldr r5, [r0, #0x4] ; load 4 src pixels - - ; calculate sse - uxtb16 r6, r8 ; byte (two pixels) to halfwords - uxtb16 r7, r8, ror #8 ; another two pixels to halfwords - smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) - - ; 2nd 4 pixels - ldr r6, [r2, #0x4] ; load 4 ref pixels - smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) - - usub8 r8, r5, r6 ; calculate difference - sel r7, r8, lr ; select bytes with positive difference - usub8 r9, r6, r5 ; calculate difference with reversed operands - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r5, r7, lr ; calculate sum of positive differences - usad8 r6, r8, lr ; calculate sum of negative differences - orr r8, r8, r7 ; differences of all 4 pixels - ldr r5, [r0, #0x8] ; load 4 src pixels - ; calculate sse - uxtb16 r6, r8 ; byte (two pixels) to halfwords - uxtb16 r7, r8, ror #8 ; another two pixels to halfwords - smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) - - ; 3rd 4 pixels - ldr r6, [r2, #0x8] ; load 4 ref pixels - smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) - - usub8 r8, r5, r6 ; calculate difference - sel r7, r8, lr ; select bytes with positive difference - usub8 r9, r6, r5 ; calculate difference with reversed operands - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r5, r7, lr ; calculate sum of positive differences - usad8 r6, r8, lr ; calculate sum of negative differences - orr r8, r8, r7 ; differences of all 4 pixels - - ldr r5, [r0, #0xc] ; load 4 src pixels - - ; calculate sse - uxtb16 r6, r8 ; byte (two pixels) to halfwords - uxtb16 r7, r8, ror #8 ; another two pixels to halfwords - smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) - - ; 4th 4 pixels - ldr r6, [r2, #0xc] ; load 4 ref pixels - smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) - - usub8 r8, r5, r6 ; calculate difference - add r0, r0, r1 ; set src_ptr to next row - sel r7, r8, lr ; select bytes with positive difference - usub8 r9, r6, r5 ; calculate difference with reversed operands - add r2, r2, r3 ; set dst_ptr to next row - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r5, r7, lr ; calculate sum of positive differences - usad8 r6, r8, lr ; calculate sum of negative differences - orr r8, r8, r7 ; differences of all 4 pixels - - subs r12, r12, #1 ; next row - - ; calculate sse - uxtb16 r6, r8 ; byte (two pixels) to halfwords - uxtb16 r7, r8, ror #8 ; another two pixels to halfwords - smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) - smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) - - bne loop - - ; return stuff - ldr r1, [sp, #28] ; get address of sse - mov r0, r4 ; return sse - str r4, [r1] ; store sse - - pop {r4-r9, pc} - - ENDP - - END diff --git a/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm deleted file mode 100644 index 1b4f5cf3b..000000000 --- a/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm +++ /dev/null @@ -1,96 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_sad16x16_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 const unsigned char *src_ptr -; r1 int src_stride -; r2 const unsigned char *ref_ptr -; r3 int ref_stride -; stack max_sad (not used) -|vp8_sad16x16_armv6| PROC - stmfd sp!, {r4-r12, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - pld [r0, r1, lsl #1] - pld [r2, r3, lsl #1] - - mov r4, #0 ; sad = 0; - mov r5, #8 ; loop count - -loop - ; 1st row - ldr r6, [r0, #0x0] ; load 4 src pixels (1A) - ldr r8, [r2, #0x0] ; load 4 ref pixels (1A) - ldr r7, [r0, #0x4] ; load 4 src pixels (1A) - ldr r9, [r2, #0x4] ; load 4 ref pixels (1A) - ldr r10, [r0, #0x8] ; load 4 src pixels (1B) - ldr r11, [r0, #0xC] ; load 4 src pixels (1B) - - usada8 r4, r8, r6, r4 ; calculate sad for 4 pixels - usad8 r8, r7, r9 ; calculate sad for 4 pixels - - ldr r12, [r2, #0x8] ; load 4 ref pixels (1B) - ldr lr, [r2, #0xC] ; load 4 ref pixels (1B) - - add r0, r0, r1 ; set src pointer to next row - add r2, r2, r3 ; set dst pointer to next row - - pld [r0, r1, lsl #1] - pld [r2, r3, lsl #1] - - usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels - usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels - - ldr r6, [r0, #0x0] ; load 4 src pixels (2A) - ldr r7, [r0, #0x4] ; load 4 src pixels (2A) - add r4, r4, r8 ; add partial sad values - - ; 2nd row - ldr r8, [r2, #0x0] ; load 4 ref pixels (2A) - ldr r9, [r2, #0x4] ; load 4 ref pixels (2A) - ldr r10, [r0, #0x8] ; load 4 src pixels (2B) - ldr r11, [r0, #0xC] ; load 4 src pixels (2B) - - usada8 r4, r6, r8, r4 ; calculate sad for 4 pixels - usad8 r8, r7, r9 ; calculate sad for 4 pixels - - ldr r12, [r2, #0x8] ; load 4 ref pixels (2B) - ldr lr, [r2, #0xC] ; load 4 ref pixels (2B) - - add r0, r0, r1 ; set src pointer to next row - add r2, r2, r3 ; set dst pointer to next row - - usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels - usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels - - pld [r0, r1, lsl #1] - pld [r2, r3, lsl #1] - - subs r5, r5, #1 ; decrement loop counter - add r4, r4, r8 ; add partial sad values - - bne loop - - mov r0, r4 ; return sad - ldmfd sp!, {r4-r12, pc} - - ENDP - - END - diff --git a/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm b/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm deleted file mode 100644 index 8034c1db9..000000000 --- a/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm +++ /dev/null @@ -1,262 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp8_short_fdct4x4_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY -; void vp8_short_fdct4x4_c(short *input, short *output, int pitch) -|vp8_short_fdct4x4_armv6| PROC - - stmfd sp!, {r4 - r12, lr} - - ; PART 1 - - ; coeffs 0-3 - ldrd r4, r5, [r0] ; [i1 | i0] [i3 | i2] - - ldr r10, c7500 - ldr r11, c14500 - ldr r12, c0x22a453a0 ; [2217*4 | 5352*4] - ldr lr, c0x00080008 - ror r5, r5, #16 ; [i2 | i3] - - qadd16 r6, r4, r5 ; [i1+i2 | i0+i3] = [b1 | a1] without shift - qsub16 r7, r4, r5 ; [i1-i2 | i0-i3] = [c1 | d1] without shift - - add r0, r0, r2 ; update input pointer - - qadd16 r7, r7, r7 ; 2*[c1|d1] --> we can use smlad and smlsd - ; with 2217*4 and 5352*4 without losing the - ; sign bit (overflow) - - smuad r4, r6, lr ; o0 = (i1+i2)*8 + (i0+i3)*8 - smusd r5, r6, lr ; o2 = (i1+i2)*8 - (i0+i3)*8 - - smlad r6, r7, r12, r11 ; o1 = (c1 * 2217 + d1 * 5352 + 14500) - smlsdx r7, r7, r12, r10 ; o3 = (d1 * 2217 - c1 * 5352 + 7500) - - ldrd r8, r9, [r0] ; [i5 | i4] [i7 | i6] - - pkhbt r3, r4, r6, lsl #4 ; [o1 | o0], keep in register for PART 2 - pkhbt r6, r5, r7, lsl #4 ; [o3 | o2] - - str r6, [r1, #4] - - ; coeffs 4-7 - ror r9, r9, #16 ; [i6 | i7] - - qadd16 r6, r8, r9 ; [i5+i6 | i4+i7] = [b1 | a1] without shift - qsub16 r7, r8, r9 ; [i5-i6 | i4-i7] = [c1 | d1] without shift - - add r0, r0, r2 ; update input pointer - - qadd16 r7, r7, r7 ; 2x[c1|d1] --> we can use smlad and smlsd - ; with 2217*4 and 5352*4 without losing the - ; sign bit (overflow) - - smuad r9, r6, lr ; o4 = (i5+i6)*8 + (i4+i7)*8 - smusd r8, r6, lr ; o6 = (i5+i6)*8 - (i4+i7)*8 - - smlad r6, r7, r12, r11 ; o5 = (c1 * 2217 + d1 * 5352 + 14500) - smlsdx r7, r7, r12, r10 ; o7 = (d1 * 2217 - c1 * 5352 + 7500) - - ldrd r4, r5, [r0] ; [i9 | i8] [i11 | i10] - - pkhbt r9, r9, r6, lsl #4 ; [o5 | o4], keep in register for PART 2 - pkhbt r6, r8, r7, lsl #4 ; [o7 | o6] - - str r6, [r1, #12] - - ; coeffs 8-11 - ror r5, r5, #16 ; [i10 | i11] - - qadd16 r6, r4, r5 ; [i9+i10 | i8+i11]=[b1 | a1] without shift - qsub16 r7, r4, r5 ; [i9-i10 | i8-i11]=[c1 | d1] without shift - - add r0, r0, r2 ; update input pointer - - qadd16 r7, r7, r7 ; 2x[c1|d1] --> we can use smlad and smlsd - ; with 2217*4 and 5352*4 without losing the - ; sign bit (overflow) - - smuad r2, r6, lr ; o8 = (i9+i10)*8 + (i8+i11)*8 - smusd r8, r6, lr ; o10 = (i9+i10)*8 - (i8+i11)*8 - - smlad r6, r7, r12, r11 ; o9 = (c1 * 2217 + d1 * 5352 + 14500) - smlsdx r7, r7, r12, r10 ; o11 = (d1 * 2217 - c1 * 5352 + 7500) - - ldrd r4, r5, [r0] ; [i13 | i12] [i15 | i14] - - pkhbt r2, r2, r6, lsl #4 ; [o9 | o8], keep in register for PART 2 - pkhbt r6, r8, r7, lsl #4 ; [o11 | o10] - - str r6, [r1, #20] - - ; coeffs 12-15 - ror r5, r5, #16 ; [i14 | i15] - - qadd16 r6, r4, r5 ; [i13+i14 | i12+i15]=[b1|a1] without shift - qsub16 r7, r4, r5 ; [i13-i14 | i12-i15]=[c1|d1] without shift - - qadd16 r7, r7, r7 ; 2x[c1|d1] --> we can use smlad and smlsd - ; with 2217*4 and 5352*4 without losing the - ; sign bit (overflow) - - smuad r4, r6, lr ; o12 = (i13+i14)*8 + (i12+i15)*8 - smusd r5, r6, lr ; o14 = (i13+i14)*8 - (i12+i15)*8 - - smlad r6, r7, r12, r11 ; o13 = (c1 * 2217 + d1 * 5352 + 14500) - smlsdx r7, r7, r12, r10 ; o15 = (d1 * 2217 - c1 * 5352 + 7500) - - pkhbt r0, r4, r6, lsl #4 ; [o13 | o12], keep in register for PART 2 - pkhbt r6, r5, r7, lsl #4 ; [o15 | o14] - - str r6, [r1, #28] - - - ; PART 2 ------------------------------------------------- - ldr r11, c12000 - ldr r10, c51000 - ldr lr, c0x00070007 - - qadd16 r4, r3, r0 ; a1 = [i1+i13 | i0+i12] - qadd16 r5, r9, r2 ; b1 = [i5+i9 | i4+i8] - qsub16 r6, r9, r2 ; c1 = [i5-i9 | i4-i8] - qsub16 r7, r3, r0 ; d1 = [i1-i13 | i0-i12] - - qadd16 r4, r4, lr ; a1 + 7 - - add r0, r11, #0x10000 ; add (d!=0) - - qadd16 r2, r4, r5 ; a1 + b1 + 7 - qsub16 r3, r4, r5 ; a1 - b1 + 7 - - ldr r12, c0x08a914e8 ; [2217 | 5352] - - lsl r8, r2, #16 ; prepare bottom halfword for scaling - asr r2, r2, #4 ; scale top halfword - lsl r9, r3, #16 ; prepare bottom halfword for scaling - asr r3, r3, #4 ; scale top halfword - pkhtb r4, r2, r8, asr #20 ; pack and scale bottom halfword - pkhtb r5, r3, r9, asr #20 ; pack and scale bottom halfword - - smulbt r2, r6, r12 ; [ ------ | c1*2217] - str r4, [r1, #0] ; [ o1 | o0] - smultt r3, r6, r12 ; [c1*2217 | ------ ] - str r5, [r1, #16] ; [ o9 | o8] - - smlabb r8, r7, r12, r2 ; [ ------ | d1*5352] - smlatb r9, r7, r12, r3 ; [d1*5352 | ------ ] - - smulbb r2, r6, r12 ; [ ------ | c1*5352] - smultb r3, r6, r12 ; [c1*5352 | ------ ] - - lsls r6, r7, #16 ; d1 != 0 ? - addeq r8, r8, r11 ; c1_b*2217+d1_b*5352+12000 + (d==0) - addne r8, r8, r0 ; c1_b*2217+d1_b*5352+12000 + (d!=0) - asrs r6, r7, #16 - addeq r9, r9, r11 ; c1_t*2217+d1_t*5352+12000 + (d==0) - addne r9, r9, r0 ; c1_t*2217+d1_t*5352+12000 + (d!=0) - - smlabt r4, r7, r12, r10 ; [ ------ | d1*2217] + 51000 - smlatt r5, r7, r12, r10 ; [d1*2217 | ------ ] + 51000 - - pkhtb r9, r9, r8, asr #16 - - sub r4, r4, r2 - sub r5, r5, r3 - - ldr r3, [r1, #4] ; [i3 | i2] - - pkhtb r5, r5, r4, asr #16 ; [o13|o12] - - str r9, [r1, #8] ; [o5 | 04] - - ldr r9, [r1, #12] ; [i7 | i6] - ldr r8, [r1, #28] ; [i15|i14] - ldr r2, [r1, #20] ; [i11|i10] - str r5, [r1, #24] ; [o13|o12] - - qadd16 r4, r3, r8 ; a1 = [i3+i15 | i2+i14] - qadd16 r5, r9, r2 ; b1 = [i7+i11 | i6+i10] - - qadd16 r4, r4, lr ; a1 + 7 - - qsub16 r6, r9, r2 ; c1 = [i7-i11 | i6-i10] - qadd16 r2, r4, r5 ; a1 + b1 + 7 - qsub16 r7, r3, r8 ; d1 = [i3-i15 | i2-i14] - qsub16 r3, r4, r5 ; a1 - b1 + 7 - - lsl r8, r2, #16 ; prepare bottom halfword for scaling - asr r2, r2, #4 ; scale top halfword - lsl r9, r3, #16 ; prepare bottom halfword for scaling - asr r3, r3, #4 ; scale top halfword - pkhtb r4, r2, r8, asr #20 ; pack and scale bottom halfword - pkhtb r5, r3, r9, asr #20 ; pack and scale bottom halfword - - smulbt r2, r6, r12 ; [ ------ | c1*2217] - str r4, [r1, #4] ; [ o3 | o2] - smultt r3, r6, r12 ; [c1*2217 | ------ ] - str r5, [r1, #20] ; [ o11 | o10] - - smlabb r8, r7, r12, r2 ; [ ------ | d1*5352] - smlatb r9, r7, r12, r3 ; [d1*5352 | ------ ] - - smulbb r2, r6, r12 ; [ ------ | c1*5352] - smultb r3, r6, r12 ; [c1*5352 | ------ ] - - lsls r6, r7, #16 ; d1 != 0 ? - addeq r8, r8, r11 ; c1_b*2217+d1_b*5352+12000 + (d==0) - addne r8, r8, r0 ; c1_b*2217+d1_b*5352+12000 + (d!=0) - - asrs r6, r7, #16 - addeq r9, r9, r11 ; c1_t*2217+d1_t*5352+12000 + (d==0) - addne r9, r9, r0 ; c1_t*2217+d1_t*5352+12000 + (d!=0) - - smlabt r4, r7, r12, r10 ; [ ------ | d1*2217] + 51000 - smlatt r5, r7, r12, r10 ; [d1*2217 | ------ ] + 51000 - - pkhtb r9, r9, r8, asr #16 - - sub r4, r4, r2 - sub r5, r5, r3 - - str r9, [r1, #12] ; [o7 | o6] - pkhtb r5, r5, r4, asr #16 ; [o15|o14] - - str r5, [r1, #28] ; [o15|o14] - - ldmfd sp!, {r4 - r12, pc} - - ENDP - -; Used constants -c7500 - DCD 7500 -c14500 - DCD 14500 -c0x22a453a0 - DCD 0x22a453a0 -c0x00080008 - DCD 0x00080008 -c12000 - DCD 12000 -c51000 - DCD 51000 -c0x00070007 - DCD 0x00070007 -c0x08a914e8 - DCD 0x08a914e8 - - END diff --git a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm deleted file mode 100644 index 0ca74387b..000000000 --- a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm +++ /dev/null @@ -1,265 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_subtract_mby_armv6| - EXPORT |vp8_subtract_mbuv_armv6| - EXPORT |vp8_subtract_b_armv6| - - INCLUDE asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 BLOCK *be -; r1 BLOCKD *bd -; r2 int pitch -|vp8_subtract_b_armv6| PROC - - stmfd sp!, {r4-r9} - - ldr r4, [r0, #vp8_block_base_src] - ldr r5, [r0, #vp8_block_src] - ldr r6, [r0, #vp8_block_src_diff] - - ldr r3, [r4] - ldr r7, [r0, #vp8_block_src_stride] - add r3, r3, r5 ; src = *base_src + src - ldr r8, [r1, #vp8_blockd_predictor] - - mov r9, #4 ; loop count - -loop_block - - ldr r0, [r3], r7 ; src - ldr r1, [r8], r2 ; pred - - uxtb16 r4, r0 ; [s2 | s0] - uxtb16 r5, r1 ; [p2 | p0] - uxtb16 r0, r0, ror #8 ; [s3 | s1] - uxtb16 r1, r1, ror #8 ; [p3 | p1] - - usub16 r4, r4, r5 ; [d2 | d0] - usub16 r5, r0, r1 ; [d3 | d1] - - subs r9, r9, #1 ; decrement loop counter - - pkhbt r0, r4, r5, lsl #16 ; [d1 | d0] - pkhtb r1, r5, r4, asr #16 ; [d3 | d2] - - str r0, [r6, #0] ; diff - str r1, [r6, #4] ; diff - - add r6, r6, r2, lsl #1 ; update diff pointer - bne loop_block - - ldmfd sp!, {r4-r9} - mov pc, lr - - ENDP - - -; r0 short *diff -; r1 unsigned char *usrc -; r2 unsigned char *vsrc -; r3 unsigned char *pred -; stack int stride -|vp8_subtract_mbuv_armv6| PROC - - stmfd sp!, {r4-r12, lr} - - add r0, r0, #512 ; set *diff point to Cb - add r3, r3, #256 ; set *pred point to Cb - - mov r4, #8 ; loop count - ldr r5, [sp, #40] ; stride - - ; Subtract U block -loop_u - ldr r6, [r1] ; src (A) - ldr r7, [r3], #4 ; pred (A) - - uxtb16 r8, r6 ; [s2 | s0] (A) - uxtb16 r9, r7 ; [p2 | p0] (A) - uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) - uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) - - usub16 r6, r8, r9 ; [d2 | d0] (A) - usub16 r7, r10, r11 ; [d3 | d1] (A) - - ldr r10, [r1, #4] ; src (B) - ldr r11, [r3], #4 ; pred (B) - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) - - str r8, [r0], #4 ; diff (A) - uxtb16 r8, r10 ; [s2 | s0] (B) - str r9, [r0], #4 ; diff (A) - - uxtb16 r9, r11 ; [p2 | p0] (B) - uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) - uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) - - usub16 r6, r8, r9 ; [d2 | d0] (B) - usub16 r7, r10, r11 ; [d3 | d1] (B) - - add r1, r1, r5 ; update usrc pointer - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) - - str r8, [r0], #4 ; diff (B) - subs r4, r4, #1 ; update loop counter - str r9, [r0], #4 ; diff (B) - - bne loop_u - - mov r4, #8 ; loop count - - ; Subtract V block -loop_v - ldr r6, [r2] ; src (A) - ldr r7, [r3], #4 ; pred (A) - - uxtb16 r8, r6 ; [s2 | s0] (A) - uxtb16 r9, r7 ; [p2 | p0] (A) - uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) - uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) - - usub16 r6, r8, r9 ; [d2 | d0] (A) - usub16 r7, r10, r11 ; [d3 | d1] (A) - - ldr r10, [r2, #4] ; src (B) - ldr r11, [r3], #4 ; pred (B) - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) - - str r8, [r0], #4 ; diff (A) - uxtb16 r8, r10 ; [s2 | s0] (B) - str r9, [r0], #4 ; diff (A) - - uxtb16 r9, r11 ; [p2 | p0] (B) - uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) - uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) - - usub16 r6, r8, r9 ; [d2 | d0] (B) - usub16 r7, r10, r11 ; [d3 | d1] (B) - - add r2, r2, r5 ; update vsrc pointer - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) - - str r8, [r0], #4 ; diff (B) - subs r4, r4, #1 ; update loop counter - str r9, [r0], #4 ; diff (B) - - bne loop_v - - ldmfd sp!, {r4-r12, pc} - - ENDP - - -; r0 short *diff -; r1 unsigned char *src -; r2 unsigned char *pred -; r3 int stride -|vp8_subtract_mby_armv6| PROC - - stmfd sp!, {r4-r11} - - mov r4, #16 -loop - ldr r6, [r1] ; src (A) - ldr r7, [r2], #4 ; pred (A) - - uxtb16 r8, r6 ; [s2 | s0] (A) - uxtb16 r9, r7 ; [p2 | p0] (A) - uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) - uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) - - usub16 r6, r8, r9 ; [d2 | d0] (A) - usub16 r7, r10, r11 ; [d3 | d1] (A) - - ldr r10, [r1, #4] ; src (B) - ldr r11, [r2], #4 ; pred (B) - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) - - str r8, [r0], #4 ; diff (A) - uxtb16 r8, r10 ; [s2 | s0] (B) - str r9, [r0], #4 ; diff (A) - - uxtb16 r9, r11 ; [p2 | p0] (B) - uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) - uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) - - usub16 r6, r8, r9 ; [d2 | d0] (B) - usub16 r7, r10, r11 ; [d3 | d1] (B) - - ldr r10, [r1, #8] ; src (C) - ldr r11, [r2], #4 ; pred (C) - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) - - str r8, [r0], #4 ; diff (B) - uxtb16 r8, r10 ; [s2 | s0] (C) - str r9, [r0], #4 ; diff (B) - - uxtb16 r9, r11 ; [p2 | p0] (C) - uxtb16 r10, r10, ror #8 ; [s3 | s1] (C) - uxtb16 r11, r11, ror #8 ; [p3 | p1] (C) - - usub16 r6, r8, r9 ; [d2 | d0] (C) - usub16 r7, r10, r11 ; [d3 | d1] (C) - - ldr r10, [r1, #12] ; src (D) - ldr r11, [r2], #4 ; pred (D) - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C) - - str r8, [r0], #4 ; diff (C) - uxtb16 r8, r10 ; [s2 | s0] (D) - str r9, [r0], #4 ; diff (C) - - uxtb16 r9, r11 ; [p2 | p0] (D) - uxtb16 r10, r10, ror #8 ; [s3 | s1] (D) - uxtb16 r11, r11, ror #8 ; [p3 | p1] (D) - - usub16 r6, r8, r9 ; [d2 | d0] (D) - usub16 r7, r10, r11 ; [d3 | d1] (D) - - add r1, r1, r3 ; update src pointer - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D) - - str r8, [r0], #4 ; diff (D) - subs r4, r4, #1 ; update loop counter - str r9, [r0], #4 ; diff (D) - - bne loop - - ldmfd sp!, {r4-r11} - mov pc, lr - - ENDP - - END - diff --git a/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm deleted file mode 100644 index 110db3074..000000000 --- a/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm +++ /dev/null @@ -1,154 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_variance16x16_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -|vp9_variance16x16_armv6| PROC - - stmfd sp!, {r4-r12, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - - mov r8, #0 ; initialize sum = 0 - mov r11, #0 ; initialize sse = 0 - mov r12, #16 ; set loop counter to 16 (=block height) - -loop - ; 1st 4 pixels - ldr r4, [r0, #0] ; load 4 src pixels - ldr r5, [r2, #0] ; load 4 ref pixels - - mov lr, #0 ; constant zero - - usub8 r6, r4, r5 ; calculate difference - pld [r0, r1, lsl #1] - sel r7, r6, lr ; select bytes with positive difference - usub8 r9, r5, r4 ; calculate difference with reversed operands - pld [r2, r3, lsl #1] - sel r6, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - ; calculate total sum - adds r8, r8, r4 ; add positive differences to sum - subs r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r10, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 2nd 4 pixels - ldr r4, [r0, #4] ; load 4 src pixels - ldr r5, [r2, #4] ; load 4 ref pixels - smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r9, r5, r4 ; calculate difference with reversed operands - sel r6, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r10, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 3rd 4 pixels - ldr r4, [r0, #8] ; load 4 src pixels - ldr r5, [r2, #8] ; load 4 ref pixels - smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r9, r5, r4 ; calculate difference with reversed operands - sel r6, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r10, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 4th 4 pixels - ldr r4, [r0, #12] ; load 4 src pixels - ldr r5, [r2, #12] ; load 4 ref pixels - smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - add r0, r0, r1 ; set src_ptr to next row - sel r7, r6, lr ; select bytes with positive difference - usub8 r9, r5, r4 ; calculate difference with reversed operands - add r2, r2, r3 ; set dst_ptr to next row - sel r6, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r10, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) - - - subs r12, r12, #1 - - bne loop - - ; return stuff - ldr r6, [sp, #40] ; get address of sse - mul r0, r8, r8 ; sum * sum - str r11, [r6] ; store sse - sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) - - ldmfd sp!, {r4-r12, pc} - - ENDP - - END - diff --git a/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm deleted file mode 100644 index 101f6838d..000000000 --- a/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm +++ /dev/null @@ -1,101 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_variance8x8_armv6| - - ARM - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -|vp9_variance8x8_armv6| PROC - - push {r4-r10, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - - mov r12, #8 ; set loop counter to 8 (=block height) - mov r4, #0 ; initialize sum = 0 - mov r5, #0 ; initialize sse = 0 - -loop - ; 1st 4 pixels - ldr r6, [r0, #0x0] ; load 4 src pixels - ldr r7, [r2, #0x0] ; load 4 ref pixels - - mov lr, #0 ; constant zero - - usub8 r8, r6, r7 ; calculate difference - pld [r0, r1, lsl #1] - sel r10, r8, lr ; select bytes with positive difference - usub8 r9, r7, r6 ; calculate difference with reversed operands - pld [r2, r3, lsl #1] - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r6, r10, lr ; calculate sum of positive differences - usad8 r7, r8, lr ; calculate sum of negative differences - orr r8, r8, r10 ; differences of all 4 pixels - ; calculate total sum - add r4, r4, r6 ; add positive differences to sum - sub r4, r4, r7 ; substract negative differences from sum - - ; calculate sse - uxtb16 r7, r8 ; byte (two pixels) to halfwords - uxtb16 r10, r8, ror #8 ; another two pixels to halfwords - smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1) - - ; 2nd 4 pixels - ldr r6, [r0, #0x4] ; load 4 src pixels - ldr r7, [r2, #0x4] ; load 4 ref pixels - smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2) - - usub8 r8, r6, r7 ; calculate difference - add r0, r0, r1 ; set src_ptr to next row - sel r10, r8, lr ; select bytes with positive difference - usub8 r9, r7, r6 ; calculate difference with reversed operands - add r2, r2, r3 ; set dst_ptr to next row - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r6, r10, lr ; calculate sum of positive differences - usad8 r7, r8, lr ; calculate sum of negative differences - orr r8, r8, r10 ; differences of all 4 pixels - - ; calculate total sum - add r4, r4, r6 ; add positive differences to sum - sub r4, r4, r7 ; substract negative differences from sum - - ; calculate sse - uxtb16 r7, r8 ; byte (two pixels) to halfwords - uxtb16 r10, r8, ror #8 ; another two pixels to halfwords - smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1) - subs r12, r12, #1 ; next row - smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2) - - bne loop - - ; return stuff - ldr r8, [sp, #32] ; get address of sse - mul r1, r4, r4 ; sum * sum - str r5, [r8] ; store sse - sub r0, r5, r1, ASR #6 ; return (sse - ((sum * sum) >> 6)) - - pop {r4-r10, pc} - - ENDP - - END diff --git a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm deleted file mode 100644 index 7a8cafd3b..000000000 --- a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm +++ /dev/null @@ -1,182 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_variance_halfpixvar16x16_h_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -|vp9_variance_halfpixvar16x16_h_armv6| PROC - - stmfd sp!, {r4-r12, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - - mov r8, #0 ; initialize sum = 0 - ldr r10, c80808080 - mov r11, #0 ; initialize sse = 0 - mov r12, #16 ; set loop counter to 16 (=block height) - mov lr, #0 ; constant zero -loop - ; 1st 4 pixels - ldr r4, [r0, #0] ; load 4 src pixels - ldr r6, [r0, #1] ; load 4 src pixels with 1 byte offset - ldr r5, [r2, #0] ; load 4 ref pixels - - ; bilinear interpolation - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - - usub8 r6, r4, r5 ; calculate difference - pld [r0, r1, lsl #1] - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - pld [r2, r3, lsl #1] - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - ; calculate total sum - adds r8, r8, r4 ; add positive differences to sum - subs r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 2nd 4 pixels - ldr r4, [r0, #4] ; load 4 src pixels - ldr r6, [r0, #5] ; load 4 src pixels with 1 byte offset - ldr r5, [r2, #4] ; load 4 ref pixels - - ; bilinear interpolation - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 3rd 4 pixels - ldr r4, [r0, #8] ; load 4 src pixels - ldr r6, [r0, #9] ; load 4 src pixels with 1 byte offset - ldr r5, [r2, #8] ; load 4 ref pixels - - ; bilinear interpolation - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 4th 4 pixels - ldr r4, [r0, #12] ; load 4 src pixels - ldr r6, [r0, #13] ; load 4 src pixels with 1 byte offset - ldr r5, [r2, #12] ; load 4 ref pixels - - ; bilinear interpolation - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - add r0, r0, r1 ; set src_ptr to next row - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - add r2, r2, r3 ; set dst_ptr to next row - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - subs r12, r12, #1 - - bne loop - - ; return stuff - ldr r6, [sp, #40] ; get address of sse - mul r0, r8, r8 ; sum * sum - str r11, [r6] ; store sse - sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) - - ldmfd sp!, {r4-r12, pc} - - ENDP - -c80808080 - DCD 0x80808080 - - END - diff --git a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm deleted file mode 100644 index 6ad5e90bb..000000000 --- a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm +++ /dev/null @@ -1,222 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_variance_halfpixvar16x16_hv_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -|vp9_variance_halfpixvar16x16_hv_armv6| PROC - - stmfd sp!, {r4-r12, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - - mov r8, #0 ; initialize sum = 0 - ldr r10, c80808080 - mov r11, #0 ; initialize sse = 0 - mov r12, #16 ; set loop counter to 16 (=block height) - mov lr, #0 ; constant zero -loop - add r9, r0, r1 ; pointer to pixels on the next row - ; 1st 4 pixels - ldr r4, [r0, #0] ; load source pixels a, row N - ldr r6, [r0, #1] ; load source pixels b, row N - ldr r5, [r9, #0] ; load source pixels c, row N+1 - ldr r7, [r9, #1] ; load source pixels d, row N+1 - - ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 - mvn r7, r7 - uhsub8 r5, r5, r7 - eor r5, r5, r10 - ; z = (x + y + 1) >> 1, interpolate half pixel values vertically - mvn r5, r5 - uhsub8 r4, r4, r5 - ldr r5, [r2, #0] ; load 4 ref pixels - eor r4, r4, r10 - - usub8 r6, r4, r5 ; calculate difference - pld [r0, r1, lsl #1] - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - pld [r2, r3, lsl #1] - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - ; calculate total sum - adds r8, r8, r4 ; add positive differences to sum - subs r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 2nd 4 pixels - ldr r4, [r0, #4] ; load source pixels a, row N - ldr r6, [r0, #5] ; load source pixels b, row N - ldr r5, [r9, #4] ; load source pixels c, row N+1 - - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - ldr r7, [r9, #5] ; load source pixels d, row N+1 - - ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 - mvn r7, r7 - uhsub8 r5, r5, r7 - eor r5, r5, r10 - ; z = (x + y + 1) >> 1, interpolate half pixel values vertically - mvn r5, r5 - uhsub8 r4, r4, r5 - ldr r5, [r2, #4] ; load 4 ref pixels - eor r4, r4, r10 - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 3rd 4 pixels - ldr r4, [r0, #8] ; load source pixels a, row N - ldr r6, [r0, #9] ; load source pixels b, row N - ldr r5, [r9, #8] ; load source pixels c, row N+1 - - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - ldr r7, [r9, #9] ; load source pixels d, row N+1 - - ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 - mvn r7, r7 - uhsub8 r5, r5, r7 - eor r5, r5, r10 - ; z = (x + y + 1) >> 1, interpolate half pixel values vertically - mvn r5, r5 - uhsub8 r4, r4, r5 - ldr r5, [r2, #8] ; load 4 ref pixels - eor r4, r4, r10 - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 4th 4 pixels - ldr r4, [r0, #12] ; load source pixels a, row N - ldr r6, [r0, #13] ; load source pixels b, row N - ldr r5, [r9, #12] ; load source pixels c, row N+1 - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - ldr r7, [r9, #13] ; load source pixels d, row N+1 - - ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 - mvn r7, r7 - uhsub8 r5, r5, r7 - eor r5, r5, r10 - ; z = (x + y + 1) >> 1, interpolate half pixel values vertically - mvn r5, r5 - uhsub8 r4, r4, r5 - ldr r5, [r2, #12] ; load 4 ref pixels - eor r4, r4, r10 - - usub8 r6, r4, r5 ; calculate difference - add r0, r0, r1 ; set src_ptr to next row - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - add r2, r2, r3 ; set dst_ptr to next row - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - subs r12, r12, #1 - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - bne loop - - ; return stuff - ldr r6, [sp, #40] ; get address of sse - mul r0, r8, r8 ; sum * sum - str r11, [r6] ; store sse - sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) - - ldmfd sp!, {r4-r12, pc} - - ENDP - -c80808080 - DCD 0x80808080 - - END diff --git a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm deleted file mode 100644 index 0471d3d67..000000000 --- a/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm +++ /dev/null @@ -1,184 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_variance_halfpixvar16x16_v_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -|vp9_variance_halfpixvar16x16_v_armv6| PROC - - stmfd sp!, {r4-r12, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - - mov r8, #0 ; initialize sum = 0 - ldr r10, c80808080 - mov r11, #0 ; initialize sse = 0 - mov r12, #16 ; set loop counter to 16 (=block height) - mov lr, #0 ; constant zero -loop - add r9, r0, r1 ; set src pointer to next row - ; 1st 4 pixels - ldr r4, [r0, #0] ; load 4 src pixels - ldr r6, [r9, #0] ; load 4 src pixels from next row - ldr r5, [r2, #0] ; load 4 ref pixels - - ; bilinear interpolation - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - - usub8 r6, r4, r5 ; calculate difference - pld [r0, r1, lsl #1] - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - pld [r2, r3, lsl #1] - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - ; calculate total sum - adds r8, r8, r4 ; add positive differences to sum - subs r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 2nd 4 pixels - ldr r4, [r0, #4] ; load 4 src pixels - ldr r6, [r9, #4] ; load 4 src pixels from next row - ldr r5, [r2, #4] ; load 4 ref pixels - - ; bilinear interpolation - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 3rd 4 pixels - ldr r4, [r0, #8] ; load 4 src pixels - ldr r6, [r9, #8] ; load 4 src pixels from next row - ldr r5, [r2, #8] ; load 4 ref pixels - - ; bilinear interpolation - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 4th 4 pixels - ldr r4, [r0, #12] ; load 4 src pixels - ldr r6, [r9, #12] ; load 4 src pixels from next row - ldr r5, [r2, #12] ; load 4 ref pixels - - ; bilinear interpolation - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - add r0, r0, r1 ; set src_ptr to next row - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - add r2, r2, r3 ; set dst_ptr to next row - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - - subs r12, r12, #1 - - bne loop - - ; return stuff - ldr r6, [sp, #40] ; get address of sse - mul r0, r8, r8 ; sum * sum - str r11, [r6] ; store sse - sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) - - ldmfd sp!, {r4-r12, pc} - - ENDP - -c80808080 - DCD 0x80808080 - - END - diff --git a/vp8/encoder/arm/armv6/walsh_v6.asm b/vp8/encoder/arm/armv6/walsh_v6.asm deleted file mode 100644 index 5eaf3f25a..000000000 --- a/vp8/encoder/arm/armv6/walsh_v6.asm +++ /dev/null @@ -1,212 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp8_short_walsh4x4_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY ; name this block of code - -;short vp8_short_walsh4x4_armv6(short *input, short *output, int pitch) -; r0 short *input, -; r1 short *output, -; r2 int pitch -|vp8_short_walsh4x4_armv6| PROC - - stmdb sp!, {r4 - r11, lr} - - ldrd r4, r5, [r0], r2 - ldr lr, c00040004 - ldrd r6, r7, [r0], r2 - - ; 0-3 - qadd16 r3, r4, r5 ; [d1|a1] [1+3 | 0+2] - qsub16 r4, r4, r5 ; [c1|b1] [1-3 | 0-2] - - ldrd r8, r9, [r0], r2 - ; 4-7 - qadd16 r5, r6, r7 ; [d1|a1] [5+7 | 4+6] - qsub16 r6, r6, r7 ; [c1|b1] [5-7 | 4-6] - - ldrd r10, r11, [r0] - ; 8-11 - qadd16 r7, r8, r9 ; [d1|a1] [9+11 | 8+10] - qsub16 r8, r8, r9 ; [c1|b1] [9-11 | 8-10] - - ; 12-15 - qadd16 r9, r10, r11 ; [d1|a1] [13+15 | 12+14] - qsub16 r10, r10, r11 ; [c1|b1] [13-15 | 12-14] - - - lsls r2, r3, #16 - smuad r11, r3, lr ; A0 = a1<<2 + d1<<2 - addne r11, r11, #1 ; A0 += (a1!=0) - - lsls r2, r7, #16 - smuad r12, r7, lr ; C0 = a1<<2 + d1<<2 - addne r12, r12, #1 ; C0 += (a1!=0) - - add r0, r11, r12 ; a1_0 = A0 + C0 - sub r11, r11, r12 ; b1_0 = A0 - C0 - - lsls r2, r5, #16 - smuad r12, r5, lr ; B0 = a1<<2 + d1<<2 - addne r12, r12, #1 ; B0 += (a1!=0) - - lsls r2, r9, #16 - smuad r2, r9, lr ; D0 = a1<<2 + d1<<2 - addne r2, r2, #1 ; D0 += (a1!=0) - - add lr, r12, r2 ; d1_0 = B0 + D0 - sub r12, r12, r2 ; c1_0 = B0 - D0 - - ; op[0,4,8,12] - adds r2, r0, lr ; a2 = a1_0 + d1_0 - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - subs r0, r0, lr ; d2 = a1_0 - d1_0 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1] ; op[0] - - addmi r0, r0, #1 ; += a2 < 0 - add r0, r0, #3 ; += 3 - ldr lr, c00040004 - mov r0, r0, asr #3 ; >> 3 - strh r0, [r1, #24] ; op[12] - - adds r2, r11, r12 ; b2 = b1_0 + c1_0 - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - subs r0, r11, r12 ; c2 = b1_0 - c1_0 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #8] ; op[4] - - addmi r0, r0, #1 ; += a2 < 0 - add r0, r0, #3 ; += 3 - smusd r3, r3, lr ; A3 = a1<<2 - d1<<2 - smusd r7, r7, lr ; C3 = a1<<2 - d1<<2 - mov r0, r0, asr #3 ; >> 3 - strh r0, [r1, #16] ; op[8] - - - ; op[3,7,11,15] - add r0, r3, r7 ; a1_3 = A3 + C3 - sub r3, r3, r7 ; b1_3 = A3 - C3 - - smusd r5, r5, lr ; B3 = a1<<2 - d1<<2 - smusd r9, r9, lr ; D3 = a1<<2 - d1<<2 - add r7, r5, r9 ; d1_3 = B3 + D3 - sub r5, r5, r9 ; c1_3 = B3 - D3 - - adds r2, r0, r7 ; a2 = a1_3 + d1_3 - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - adds r9, r3, r5 ; b2 = b1_3 + c1_3 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #6] ; op[3] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - subs r2, r3, r5 ; c2 = b1_3 - c1_3 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #14] ; op[7] - - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - subs r9, r0, r7 ; d2 = a1_3 - d1_3 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #22] ; op[11] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - smuad r3, r4, lr ; A1 = b1<<2 + c1<<2 - smuad r5, r8, lr ; C1 = b1<<2 + c1<<2 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #30] ; op[15] - - ; op[1,5,9,13] - add r0, r3, r5 ; a1_1 = A1 + C1 - sub r3, r3, r5 ; b1_1 = A1 - C1 - - smuad r7, r6, lr ; B1 = b1<<2 + c1<<2 - smuad r9, r10, lr ; D1 = b1<<2 + c1<<2 - add r5, r7, r9 ; d1_1 = B1 + D1 - sub r7, r7, r9 ; c1_1 = B1 - D1 - - adds r2, r0, r5 ; a2 = a1_1 + d1_1 - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - adds r9, r3, r7 ; b2 = b1_1 + c1_1 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #2] ; op[1] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - subs r2, r3, r7 ; c2 = b1_1 - c1_1 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #10] ; op[5] - - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - subs r9, r0, r5 ; d2 = a1_1 - d1_1 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #18] ; op[9] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - smusd r4, r4, lr ; A2 = b1<<2 - c1<<2 - smusd r8, r8, lr ; C2 = b1<<2 - c1<<2 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #26] ; op[13] - - - ; op[2,6,10,14] - add r11, r4, r8 ; a1_2 = A2 + C2 - sub r12, r4, r8 ; b1_2 = A2 - C2 - - smusd r6, r6, lr ; B2 = b1<<2 - c1<<2 - smusd r10, r10, lr ; D2 = b1<<2 - c1<<2 - add r4, r6, r10 ; d1_2 = B2 + D2 - sub r8, r6, r10 ; c1_2 = B2 - D2 - - adds r2, r11, r4 ; a2 = a1_2 + d1_2 - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - adds r9, r12, r8 ; b2 = b1_2 + c1_2 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #4] ; op[2] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - subs r2, r12, r8 ; c2 = b1_2 - c1_2 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #12] ; op[6] - - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - subs r9, r11, r4 ; d2 = a1_2 - d1_2 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #20] ; op[10] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #28] ; op[14] - - - ldmia sp!, {r4 - r11, pc} - ENDP ; |vp8_short_walsh4x4_armv6| - -c00040004 - DCD 0x00040004 - - END diff --git a/vp8/encoder/arm/boolhuff_arm.c b/vp8/encoder/arm/boolhuff_arm.c deleted file mode 100644 index bc6fb4700..000000000 --- a/vp8/encoder/arm/boolhuff_arm.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp8/encoder/boolhuff.h" -#include "vp8/common/blockd.h" - -const unsigned int vp9_prob_cost[256] = { - 2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, 1129, 1099, 1072, 1046, - 1023, 1000, 979, 959, 940, 922, 905, 889, 873, 858, 843, 829, 816, 803, 790, 778, - 767, 755, 744, 733, 723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625, - 617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541, 534, 528, 522, 516, - 511, 505, 499, 494, 488, 483, 477, 472, 467, 462, 457, 452, 447, 442, 437, 433, - 428, 424, 419, 415, 410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365, - 361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321, 317, 314, 311, 307, - 304, 301, 297, 294, 291, 288, 285, 281, 278, 275, 272, 269, 266, 263, 260, 257, - 255, 252, 249, 246, 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214, - 211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184, 181, 179, 177, 174, - 172, 170, 168, 165, 163, 161, 159, 156, 154, 152, 150, 148, 145, 143, 141, 139, - 137, 135, 133, 131, 129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, - 105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84, 82, 81, 79, 77, - 75, 73, 72, 70, 68, 66, 65, 63, 61, 60, 58, 56, 55, 53, 51, 50, - 48, 46, 45, 43, 41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24, - 22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, 4, 3, 1, 1 -}; - diff --git a/vp8/encoder/arm/dct_arm.c b/vp8/encoder/arm/dct_arm.c deleted file mode 100644 index 3fd04f383..000000000 --- a/vp8/encoder/arm/dct_arm.c +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "./vpx_rtcd.h" - -#if HAVE_ARMV6 - -void vp9_short_fdct8x4_armv6(short *input, short *output, int pitch) { - vp9_short_fdct4x4_armv6(input, output, pitch); - vp9_short_fdct4x4_armv6(input + 4, output + 16, pitch); -} - -#endif /* HAVE_ARMV6 */ diff --git a/vp8/encoder/arm/dct_arm.h b/vp8/encoder/arm/dct_arm.h deleted file mode 100644 index 83c446e7e..000000000 --- a/vp8/encoder/arm/dct_arm.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef DCT_ARM_H -#define DCT_ARM_H - -#if HAVE_ARMV6 -extern prototype_fdct(vp9_short_walsh4x4_armv6); -extern prototype_fdct(vp9_short_fdct4x4_armv6); -extern prototype_fdct(vp9_short_fdct8x4_armv6); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_fdct_walsh_short4x4 -#define vp8_fdct_walsh_short4x4 vp9_short_walsh4x4_armv6 - -#undef vp8_fdct_short4x4 -#define vp8_fdct_short4x4 vp9_short_fdct4x4_armv6 - -#undef vp8_fdct_short8x4 -#define vp8_fdct_short8x4 vp9_short_fdct8x4_armv6 - -#undef vp8_fdct_fast4x4 -#define vp8_fdct_fast4x4 vp9_short_fdct4x4_armv6 - -#undef vp8_fdct_fast8x4 -#define vp8_fdct_fast8x4 vp9_short_fdct8x4_armv6 -#endif - -#endif /* HAVE_ARMV6 */ - -#if HAVE_ARMV7 -extern prototype_fdct(vp9_short_fdct4x4_neon); -extern prototype_fdct(vp9_short_fdct8x4_neon); -extern prototype_fdct(vp8_fast_fdct4x4_neon); -extern prototype_fdct(vp8_fast_fdct8x4_neon); -extern prototype_fdct(vp9_short_walsh4x4_neon); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_fdct_short4x4 -#define vp8_fdct_short4x4 vp9_short_fdct4x4_neon - -#undef vp8_fdct_short8x4 -#define vp8_fdct_short8x4 vp9_short_fdct8x4_neon - -#undef vp8_fdct_fast4x4 -#define vp8_fdct_fast4x4 vp9_short_fdct4x4_neon - -#undef vp8_fdct_fast8x4 -#define vp8_fdct_fast8x4 vp9_short_fdct8x4_neon - -#undef vp8_fdct_walsh_short4x4 -#define vp8_fdct_walsh_short4x4 vp9_short_walsh4x4_neon -#endif - -#endif - -#endif diff --git a/vp8/encoder/arm/encodemb_arm.h b/vp8/encoder/arm/encodemb_arm.h deleted file mode 100644 index 80bff79df..000000000 --- a/vp8/encoder/arm/encodemb_arm.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef ENCODEMB_ARM_H -#define ENCODEMB_ARM_H - -#if HAVE_ARMV6 -extern prototype_subb(vp9_subtract_b_armv6); -extern prototype_submby(vp9_subtract_mby_armv6); -extern prototype_submbuv(vp9_subtract_mbuv_armv6); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_encodemb_subb -#define vp8_encodemb_subb vp9_subtract_b_armv6 - -#undef vp8_encodemb_submby -#define vp8_encodemb_submby vp9_subtract_mby_armv6 - -#undef vp8_encodemb_submbuv -#define vp8_encodemb_submbuv vp9_subtract_mbuv_armv6 -#endif - -#endif /* HAVE_ARMV6 */ - -#if HAVE_ARMV7 -// extern prototype_berr(vp9_block_error_c); -// extern prototype_mberr(vp9_mbblock_error_c); -// extern prototype_mbuverr(vp9_mbuverror_c); - -extern prototype_subb(vp9_subtract_b_neon); -extern prototype_submby(vp9_subtract_mby_neon); -extern prototype_submbuv(vp9_subtract_mbuv_neon); - -// #undef vp8_encodemb_berr -// #define vp8_encodemb_berr vp9_block_error_c - -// #undef vp8_encodemb_mberr -// #define vp8_encodemb_mberr vp9_mbblock_error_c - -// #undef vp8_encodemb_mbuverr -// #define vp8_encodemb_mbuverr vp9_mbuverror_c - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_encodemb_subb -#define vp8_encodemb_subb vp9_subtract_b_neon - -#undef vp8_encodemb_submby -#define vp8_encodemb_submby vp9_subtract_mby_neon - -#undef vp8_encodemb_submbuv -#define vp8_encodemb_submbuv vp9_subtract_mbuv_neon -#endif - -#endif - -#endif diff --git a/vp8/encoder/arm/neon/fastquantizeb_neon.asm b/vp8/encoder/arm/neon/fastquantizeb_neon.asm deleted file mode 100644 index dcf3c5090..000000000 --- a/vp8/encoder/arm/neon/fastquantizeb_neon.asm +++ /dev/null @@ -1,261 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_fast_quantize_b_neon| - EXPORT |vp8_fast_quantize_b_pair_neon| - - INCLUDE asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=4 - -;vp8_fast_quantize_b_pair_neon(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2); -|vp8_fast_quantize_b_pair_neon| PROC - - stmfd sp!, {r4-r9} - vstmdb sp!, {q4-q7} - - ldr r4, [r0, #vp8_block_coeff] - ldr r5, [r0, #vp8_block_quant_fast] - ldr r6, [r0, #vp8_block_round] - - vld1.16 {q0, q1}, [r4@128] ; load z - - ldr r7, [r2, #vp8_blockd_qcoeff] - - vabs.s16 q4, q0 ; calculate x = abs(z) - vabs.s16 q5, q1 - - ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative - vshr.s16 q2, q0, #15 ; sz - vshr.s16 q3, q1, #15 - - vld1.s16 {q6, q7}, [r6@128] ; load round_ptr [0-15] - vld1.s16 {q8, q9}, [r5@128] ; load quant_ptr [0-15] - - ldr r4, [r1, #vp8_block_coeff] - - vadd.s16 q4, q6 ; x + Round - vadd.s16 q5, q7 - - vld1.16 {q0, q1}, [r4@128] ; load z2 - - vqdmulh.s16 q4, q8 ; y = ((Round+abs(z)) * Quant) >> 16 - vqdmulh.s16 q5, q9 - - vabs.s16 q10, q0 ; calculate x2 = abs(z_2) - vabs.s16 q11, q1 - vshr.s16 q12, q0, #15 ; sz2 - vshr.s16 q13, q1, #15 - - ;modify data to have its original sign - veor.s16 q4, q2 ; y^sz - veor.s16 q5, q3 - - vadd.s16 q10, q6 ; x2 + Round - vadd.s16 q11, q7 - - ldr r8, [r2, #vp8_blockd_dequant] - - vqdmulh.s16 q10, q8 ; y2 = ((Round+abs(z)) * Quant) >> 16 - vqdmulh.s16 q11, q9 - - vshr.s16 q4, #1 ; right shift 1 after vqdmulh - vshr.s16 q5, #1 - - vld1.s16 {q6, q7}, [r8@128] ;load dequant_ptr[i] - - vsub.s16 q4, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement) - vsub.s16 q5, q3 - - vshr.s16 q10, #1 ; right shift 1 after vqdmulh - vshr.s16 q11, #1 - - ldr r9, [r2, #vp8_blockd_dqcoeff] - - veor.s16 q10, q12 ; y2^sz2 - veor.s16 q11, q13 - - vst1.s16 {q4, q5}, [r7] ; store: qcoeff = x1 - - - vsub.s16 q10, q12 ; x2=(y^sz)-sz = (y^sz)-(-1) (2's complement) - vsub.s16 q11, q13 - - ldr r6, [r3, #vp8_blockd_qcoeff] - - vmul.s16 q2, q6, q4 ; x * Dequant - vmul.s16 q3, q7, q5 - - ldr r0, _inv_zig_zag_ ; load ptr of inverse zigzag table - - vceq.s16 q8, q8 ; set q8 to all 1 - - vst1.s16 {q10, q11}, [r6] ; store: qcoeff = x2 - - vmul.s16 q12, q6, q10 ; x2 * Dequant - vmul.s16 q13, q7, q11 - - vld1.16 {q6, q7}, [r0@128] ; load inverse scan order - - vtst.16 q14, q4, q8 ; now find eob - vtst.16 q15, q5, q8 ; non-zero element is set to all 1 - - vst1.s16 {q2, q3}, [r9] ; store dqcoeff = x * Dequant - - ldr r7, [r3, #vp8_blockd_dqcoeff] - - vand q0, q6, q14 ; get all valid numbers from scan array - vand q1, q7, q15 - - vst1.s16 {q12, q13}, [r7] ; store dqcoeff = x * Dequant - - vtst.16 q2, q10, q8 ; now find eob - vtst.16 q3, q11, q8 ; non-zero element is set to all 1 - - vmax.u16 q0, q0, q1 ; find maximum value in q0, q1 - - vand q10, q6, q2 ; get all valid numbers from scan array - vand q11, q7, q3 - vmax.u16 q10, q10, q11 ; find maximum value in q10, q11 - - vmax.u16 d0, d0, d1 - vmax.u16 d20, d20, d21 - vmovl.u16 q0, d0 - vmovl.u16 q10, d20 - - - vmax.u32 d0, d0, d1 - vmax.u32 d20, d20, d21 - vpmax.u32 d0, d0, d0 - vpmax.u32 d20, d20, d20 - - add r4, r2, #vp8_blockd_eob - add r5, r3, #vp8_blockd_eob - - vst1.32 {d0[0]}, [r4@32] - vst1.32 {d20[0]}, [r5@32] - - vldmia sp!, {q4-q7} - ldmfd sp!, {r4-r9} - bx lr - - ENDP - -;void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) -|vp8_fast_quantize_b_neon| PROC - - stmfd sp!, {r4-r7} - - ldr r3, [r0, #vp8_block_coeff] - ldr r4, [r0, #vp8_block_quant_fast] - ldr r5, [r0, #vp8_block_round] - - vld1.16 {q0, q1}, [r3@128] ; load z - vorr.s16 q14, q0, q1 ; check if all zero (step 1) - ldr r6, [r1, #vp8_blockd_qcoeff] - ldr r7, [r1, #vp8_blockd_dqcoeff] - vorr.s16 d28, d28, d29 ; check if all zero (step 2) - - vabs.s16 q12, q0 ; calculate x = abs(z) - vabs.s16 q13, q1 - - ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative - vshr.s16 q2, q0, #15 ; sz - vmov r2, r3, d28 ; check if all zero (step 3) - vshr.s16 q3, q1, #15 - - vld1.s16 {q14, q15}, [r5@128]; load round_ptr [0-15] - vld1.s16 {q8, q9}, [r4@128] ; load quant_ptr [0-15] - - vadd.s16 q12, q14 ; x + Round - vadd.s16 q13, q15 - - ldr r0, _inv_zig_zag_ ; load ptr of inverse zigzag table - - vqdmulh.s16 q12, q8 ; y = ((Round+abs(z)) * Quant) >> 16 - vqdmulh.s16 q13, q9 - - vld1.16 {q10, q11}, [r0@128]; load inverse scan order - - vceq.s16 q8, q8 ; set q8 to all 1 - - ldr r4, [r1, #vp8_blockd_dequant] - - vshr.s16 q12, #1 ; right shift 1 after vqdmulh - vshr.s16 q13, #1 - - orr r2, r2, r3 ; check if all zero (step 4) - cmp r2, #0 ; check if all zero (step 5) - beq zero_output ; check if all zero (step 6) - - ;modify data to have its original sign - veor.s16 q12, q2 ; y^sz - veor.s16 q13, q3 - - vsub.s16 q12, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement) - vsub.s16 q13, q3 - - vld1.s16 {q2, q3}, [r4@128] ; load dequant_ptr[i] - - vtst.16 q14, q12, q8 ; now find eob - vtst.16 q15, q13, q8 ; non-zero element is set to all 1 - - vst1.s16 {q12, q13}, [r6@128]; store: qcoeff = x1 - - vand q10, q10, q14 ; get all valid numbers from scan array - vand q11, q11, q15 - - - vmax.u16 q0, q10, q11 ; find maximum value in q0, q1 - vmax.u16 d0, d0, d1 - vmovl.u16 q0, d0 - - vmul.s16 q2, q12 ; x * Dequant - vmul.s16 q3, q13 - - vmax.u32 d0, d0, d1 - vpmax.u32 d0, d0, d0 - - vst1.s16 {q2, q3}, [r7@128] ; store dqcoeff = x * Dequant - - add r4, r1, #vp8_blockd_eob - vst1.32 {d0[0]}, [r4@32] - - ldmfd sp!, {r4-r7} - bx lr - -zero_output - str r2, [r1, #vp8_blockd_eob] - vst1.s16 {q0, q1}, [r6@128] ; qcoeff = 0 - vst1.s16 {q0, q1}, [r7@128] ; dqcoeff = 0 - - ldmfd sp!, {r4-r7} - bx lr - - ENDP - -; default inverse zigzag table is defined in vp8/common/entropy.c -_inv_zig_zag_ - DCD inv_zig_zag - - ALIGN 16 ; enable use of @128 bit aligned loads -inv_zig_zag - DCW 0x0001, 0x0002, 0x0006, 0x0007 - DCW 0x0003, 0x0005, 0x0008, 0x000d - DCW 0x0004, 0x0009, 0x000c, 0x000e - DCW 0x000a, 0x000b, 0x000f, 0x0010 - - END - diff --git a/vp8/encoder/arm/neon/picklpf_arm.c b/vp8/encoder/arm/neon/picklpf_arm.c deleted file mode 100644 index 44cb6a674..000000000 --- a/vp8/encoder/arm/neon/picklpf_arm.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp8/common/onyxc_int.h" -#include "vp8/encoder/onyx_int.h" -#include "vp8/encoder/quantize.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_scale/yv12extend.h" -#include "vpx_scale/vpxscale.h" -#include "vp8/common/alloccommon.h" - -extern void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz); - - -void -vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction) { - unsigned char *src_y, *dst_y; - int yheight; - int ystride; - int border; - int yoffset; - int linestocopy; - - border = src_ybc->border; - yheight = src_ybc->y_height; - ystride = src_ybc->y_stride; - - linestocopy = (yheight >> (Fraction + 4)); - - if (linestocopy < 1) - linestocopy = 1; - - linestocopy <<= 4; - - yoffset = ystride * ((yheight >> 5) * 16 - 8); - src_y = src_ybc->y_buffer + yoffset; - dst_y = dst_ybc->y_buffer + yoffset; - - // vpx_memcpy (dst_y, src_y, ystride * (linestocopy +16)); - vp8_memcpy_neon((unsigned char *)dst_y, (unsigned char *)src_y, (int)(ystride * (linestocopy + 16))); -} diff --git a/vp8/encoder/arm/neon/sad16_neon.asm b/vp8/encoder/arm/neon/sad16_neon.asm deleted file mode 100644 index d7c590e15..000000000 --- a/vp8/encoder/arm/neon/sad16_neon.asm +++ /dev/null @@ -1,207 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_sad16x16_neon| - EXPORT |vp8_sad16x8_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int src_stride -; r2 unsigned char *ref_ptr -; r3 int ref_stride -|vp8_sad16x16_neon| PROC -;; - vld1.8 {q0}, [r0], r1 - vld1.8 {q4}, [r2], r3 - - vld1.8 {q1}, [r0], r1 - vld1.8 {q5}, [r2], r3 - - vabdl.u8 q12, d0, d8 - vabdl.u8 q13, d1, d9 - - vld1.8 {q2}, [r0], r1 - vld1.8 {q6}, [r2], r3 - - vabal.u8 q12, d2, d10 - vabal.u8 q13, d3, d11 - - vld1.8 {q3}, [r0], r1 - vld1.8 {q7}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q13, d5, d13 - -;; - vld1.8 {q0}, [r0], r1 - vld1.8 {q4}, [r2], r3 - - vabal.u8 q12, d6, d14 - vabal.u8 q13, d7, d15 - - vld1.8 {q1}, [r0], r1 - vld1.8 {q5}, [r2], r3 - - vabal.u8 q12, d0, d8 - vabal.u8 q13, d1, d9 - - vld1.8 {q2}, [r0], r1 - vld1.8 {q6}, [r2], r3 - - vabal.u8 q12, d2, d10 - vabal.u8 q13, d3, d11 - - vld1.8 {q3}, [r0], r1 - vld1.8 {q7}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q13, d5, d13 - -;; - vld1.8 {q0}, [r0], r1 - vld1.8 {q4}, [r2], r3 - - vabal.u8 q12, d6, d14 - vabal.u8 q13, d7, d15 - - vld1.8 {q1}, [r0], r1 - vld1.8 {q5}, [r2], r3 - - vabal.u8 q12, d0, d8 - vabal.u8 q13, d1, d9 - - vld1.8 {q2}, [r0], r1 - vld1.8 {q6}, [r2], r3 - - vabal.u8 q12, d2, d10 - vabal.u8 q13, d3, d11 - - vld1.8 {q3}, [r0], r1 - vld1.8 {q7}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q13, d5, d13 - -;; - vld1.8 {q0}, [r0], r1 - vld1.8 {q4}, [r2], r3 - - vabal.u8 q12, d6, d14 - vabal.u8 q13, d7, d15 - - vld1.8 {q1}, [r0], r1 - vld1.8 {q5}, [r2], r3 - - vabal.u8 q12, d0, d8 - vabal.u8 q13, d1, d9 - - vld1.8 {q2}, [r0], r1 - vld1.8 {q6}, [r2], r3 - - vabal.u8 q12, d2, d10 - vabal.u8 q13, d3, d11 - - vld1.8 {q3}, [r0] - vld1.8 {q7}, [r2] - - vabal.u8 q12, d4, d12 - vabal.u8 q13, d5, d13 - - vabal.u8 q12, d6, d14 - vabal.u8 q13, d7, d15 - - vadd.u16 q0, q12, q13 - - vpaddl.u16 q1, q0 - vpaddl.u32 q0, q1 - - vadd.u32 d0, d0, d1 - - vmov.32 r0, d0[0] - - bx lr - - ENDP - -;============================== -;unsigned int vp8_sad16x8_c( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -|vp8_sad16x8_neon| PROC - vld1.8 {q0}, [r0], r1 - vld1.8 {q4}, [r2], r3 - - vld1.8 {q1}, [r0], r1 - vld1.8 {q5}, [r2], r3 - - vabdl.u8 q12, d0, d8 - vabdl.u8 q13, d1, d9 - - vld1.8 {q2}, [r0], r1 - vld1.8 {q6}, [r2], r3 - - vabal.u8 q12, d2, d10 - vabal.u8 q13, d3, d11 - - vld1.8 {q3}, [r0], r1 - vld1.8 {q7}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q13, d5, d13 - - vld1.8 {q0}, [r0], r1 - vld1.8 {q4}, [r2], r3 - - vabal.u8 q12, d6, d14 - vabal.u8 q13, d7, d15 - - vld1.8 {q1}, [r0], r1 - vld1.8 {q5}, [r2], r3 - - vabal.u8 q12, d0, d8 - vabal.u8 q13, d1, d9 - - vld1.8 {q2}, [r0], r1 - vld1.8 {q6}, [r2], r3 - - vabal.u8 q12, d2, d10 - vabal.u8 q13, d3, d11 - - vld1.8 {q3}, [r0], r1 - vld1.8 {q7}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q13, d5, d13 - - vabal.u8 q12, d6, d14 - vabal.u8 q13, d7, d15 - - vadd.u16 q0, q12, q13 - - vpaddl.u16 q1, q0 - vpaddl.u32 q0, q1 - - vadd.u32 d0, d0, d1 - - vmov.32 r0, d0[0] - - bx lr - - ENDP - - END diff --git a/vp8/encoder/arm/neon/sad8_neon.asm b/vp8/encoder/arm/neon/sad8_neon.asm deleted file mode 100644 index 23ba6df93..000000000 --- a/vp8/encoder/arm/neon/sad8_neon.asm +++ /dev/null @@ -1,209 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_sad8x8_neon| - EXPORT |vp8_sad8x16_neon| - EXPORT |vp8_sad4x4_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -; unsigned int vp8_sad8x8_c( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) - -|vp8_sad8x8_neon| PROC - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabdl.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vabal.u8 q12, d6, d14 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabal.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q12, d6, d14 - - vpaddl.u16 q1, q12 - vpaddl.u32 q0, q1 - vadd.u32 d0, d0, d1 - - vmov.32 r0, d0[0] - - bx lr - - ENDP - -;============================ -;unsigned int vp8_sad8x16_c( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) - -|vp8_sad8x16_neon| PROC - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabdl.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vabal.u8 q12, d6, d14 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabal.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vabal.u8 q12, d6, d14 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabal.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vabal.u8 q12, d6, d14 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabal.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q12, d6, d14 - - vpaddl.u16 q1, q12 - vpaddl.u32 q0, q1 - vadd.u32 d0, d0, d1 - - vmov.32 r0, d0[0] - - bx lr - - ENDP - -;=========================== -;unsigned int vp8_sad4x4_c( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) - -|vp8_sad4x4_neon| PROC - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabdl.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q12, d6, d14 - - vpaddl.u16 d1, d24 - vpaddl.u32 d0, d1 - vmov.32 r0, d0[0] - - bx lr - - ENDP - - END diff --git a/vp8/encoder/arm/neon/shortfdct_neon.asm b/vp8/encoder/arm/neon/shortfdct_neon.asm deleted file mode 100644 index 09dd011ec..000000000 --- a/vp8/encoder/arm/neon/shortfdct_neon.asm +++ /dev/null @@ -1,221 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_short_fdct4x4_neon| - EXPORT |vp8_short_fdct8x4_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=4 - - - ALIGN 16 ; enable use of @128 bit aligned loads -coeff - DCW 5352, 5352, 5352, 5352 - DCW 2217, 2217, 2217, 2217 - DCD 14500, 14500, 14500, 14500 - DCD 7500, 7500, 7500, 7500 - DCD 12000, 12000, 12000, 12000 - DCD 51000, 51000, 51000, 51000 - -;void vp8_short_fdct4x4_c(short *input, short *output, int pitch) -|vp8_short_fdct4x4_neon| PROC - - ; Part one - vld1.16 {d0}, [r0@64], r2 - adr r12, coeff - vld1.16 {d1}, [r0@64], r2 - vld1.16 {q8}, [r12@128]! ; d16=5352, d17=2217 - vld1.16 {d2}, [r0@64], r2 - vld1.32 {q9, q10}, [r12@128]! ; q9=14500, q10=7500 - vld1.16 {d3}, [r0@64], r2 - - ; transpose d0=ip[0], d1=ip[1], d2=ip[2], d3=ip[3] - vtrn.32 d0, d2 - vtrn.32 d1, d3 - vld1.32 {q11,q12}, [r12@128] ; q11=12000, q12=51000 - vtrn.16 d0, d1 - vtrn.16 d2, d3 - - vadd.s16 d4, d0, d3 ; a1 = ip[0] + ip[3] - vadd.s16 d5, d1, d2 ; b1 = ip[1] + ip[2] - vsub.s16 d6, d1, d2 ; c1 = ip[1] - ip[2] - vsub.s16 d7, d0, d3 ; d1 = ip[0] - ip[3] - - vshl.s16 q2, q2, #3 ; (a1, b1) << 3 - vshl.s16 q3, q3, #3 ; (c1, d1) << 3 - - vadd.s16 d0, d4, d5 ; op[0] = a1 + b1 - vsub.s16 d2, d4, d5 ; op[2] = a1 - b1 - - vmlal.s16 q9, d7, d16 ; d1*5352 + 14500 - vmlal.s16 q10, d7, d17 ; d1*2217 + 7500 - vmlal.s16 q9, d6, d17 ; c1*2217 + d1*5352 + 14500 - vmlsl.s16 q10, d6, d16 ; d1*2217 - c1*5352 + 7500 - - vshrn.s32 d1, q9, #12 ; op[1] = (c1*2217 + d1*5352 + 14500)>>12 - vshrn.s32 d3, q10, #12 ; op[3] = (d1*2217 - c1*5352 + 7500)>>12 - - - ; Part two - - ; transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12] - vtrn.32 d0, d2 - vtrn.32 d1, d3 - vtrn.16 d0, d1 - vtrn.16 d2, d3 - - vmov.s16 d26, #7 - - vadd.s16 d4, d0, d3 ; a1 = ip[0] + ip[12] - vadd.s16 d5, d1, d2 ; b1 = ip[4] + ip[8] - vsub.s16 d6, d1, d2 ; c1 = ip[4] - ip[8] - vadd.s16 d4, d4, d26 ; a1 + 7 - vsub.s16 d7, d0, d3 ; d1 = ip[0] - ip[12] - - vadd.s16 d0, d4, d5 ; op[0] = a1 + b1 + 7 - vsub.s16 d2, d4, d5 ; op[8] = a1 - b1 + 7 - - vmlal.s16 q11, d7, d16 ; d1*5352 + 12000 - vmlal.s16 q12, d7, d17 ; d1*2217 + 51000 - - vceq.s16 d4, d7, #0 - - vshr.s16 d0, d0, #4 - vshr.s16 d2, d2, #4 - - vmlal.s16 q11, d6, d17 ; c1*2217 + d1*5352 + 12000 - vmlsl.s16 q12, d6, d16 ; d1*2217 - c1*5352 + 51000 - - vmvn.s16 d4, d4 - vshrn.s32 d1, q11, #16 ; op[4] = (c1*2217 + d1*5352 + 12000)>>16 - vsub.s16 d1, d1, d4 ; op[4] += (d1!=0) - vshrn.s32 d3, q12, #16 ; op[12]= (d1*2217 - c1*5352 + 51000)>>16 - - vst1.16 {q0, q1}, [r1@128] - - bx lr - - ENDP - -;void vp8_short_fdct8x4_c(short *input, short *output, int pitch) -|vp8_short_fdct8x4_neon| PROC - - ; Part one - - vld1.16 {q0}, [r0@128], r2 - adr r12, coeff - vld1.16 {q1}, [r0@128], r2 - vld1.16 {q8}, [r12@128]! ; d16=5352, d17=2217 - vld1.16 {q2}, [r0@128], r2 - vld1.32 {q9, q10}, [r12@128]! ; q9=14500, q10=7500 - vld1.16 {q3}, [r0@128], r2 - - ; transpose q0=ip[0], q1=ip[1], q2=ip[2], q3=ip[3] - vtrn.32 q0, q2 ; [A0|B0] - vtrn.32 q1, q3 ; [A1|B1] - vtrn.16 q0, q1 ; [A2|B2] - vtrn.16 q2, q3 ; [A3|B3] - - vadd.s16 q11, q0, q3 ; a1 = ip[0] + ip[3] - vadd.s16 q12, q1, q2 ; b1 = ip[1] + ip[2] - vsub.s16 q13, q1, q2 ; c1 = ip[1] - ip[2] - vsub.s16 q14, q0, q3 ; d1 = ip[0] - ip[3] - - vshl.s16 q11, q11, #3 ; a1 << 3 - vshl.s16 q12, q12, #3 ; b1 << 3 - vshl.s16 q13, q13, #3 ; c1 << 3 - vshl.s16 q14, q14, #3 ; d1 << 3 - - vadd.s16 q0, q11, q12 ; [A0 | B0] = a1 + b1 - vsub.s16 q2, q11, q12 ; [A2 | B2] = a1 - b1 - - vmov.s16 q11, q9 ; 14500 - vmov.s16 q12, q10 ; 7500 - - vmlal.s16 q9, d28, d16 ; A[1] = d1*5352 + 14500 - vmlal.s16 q10, d28, d17 ; A[3] = d1*2217 + 7500 - vmlal.s16 q11, d29, d16 ; B[1] = d1*5352 + 14500 - vmlal.s16 q12, d29, d17 ; B[3] = d1*2217 + 7500 - - vmlal.s16 q9, d26, d17 ; A[1] = c1*2217 + d1*5352 + 14500 - vmlsl.s16 q10, d26, d16 ; A[3] = d1*2217 - c1*5352 + 7500 - vmlal.s16 q11, d27, d17 ; B[1] = c1*2217 + d1*5352 + 14500 - vmlsl.s16 q12, d27, d16 ; B[3] = d1*2217 - c1*5352 + 7500 - - vshrn.s32 d2, q9, #12 ; A[1] = (c1*2217 + d1*5352 + 14500)>>12 - vshrn.s32 d6, q10, #12 ; A[3] = (d1*2217 - c1*5352 + 7500)>>12 - vshrn.s32 d3, q11, #12 ; B[1] = (c1*2217 + d1*5352 + 14500)>>12 - vshrn.s32 d7, q12, #12 ; B[3] = (d1*2217 - c1*5352 + 7500)>>12 - - - ; Part two - vld1.32 {q9,q10}, [r12@128] ; q9=12000, q10=51000 - - ; transpose q0=ip[0], q1=ip[4], q2=ip[8], q3=ip[12] - vtrn.32 q0, q2 ; q0=[A0 | B0] - vtrn.32 q1, q3 ; q1=[A4 | B4] - vtrn.16 q0, q1 ; q2=[A8 | B8] - vtrn.16 q2, q3 ; q3=[A12|B12] - - vmov.s16 q15, #7 - - vadd.s16 q11, q0, q3 ; a1 = ip[0] + ip[12] - vadd.s16 q12, q1, q2 ; b1 = ip[4] + ip[8] - vadd.s16 q11, q11, q15 ; a1 + 7 - vsub.s16 q13, q1, q2 ; c1 = ip[4] - ip[8] - vsub.s16 q14, q0, q3 ; d1 = ip[0] - ip[12] - - vadd.s16 q0, q11, q12 ; a1 + b1 + 7 - vsub.s16 q1, q11, q12 ; a1 - b1 + 7 - - vmov.s16 q11, q9 ; 12000 - vmov.s16 q12, q10 ; 51000 - - vshr.s16 d0, d0, #4 ; A[0] = (a1 + b1 + 7)>>4 - vshr.s16 d4, d1, #4 ; B[0] = (a1 + b1 + 7)>>4 - vshr.s16 d2, d2, #4 ; A[8] = (a1 + b1 + 7)>>4 - vshr.s16 d6, d3, #4 ; B[8] = (a1 + b1 + 7)>>4 - - - vmlal.s16 q9, d28, d16 ; A[4] = d1*5352 + 12000 - vmlal.s16 q10, d28, d17 ; A[12] = d1*2217 + 51000 - vmlal.s16 q11, d29, d16 ; B[4] = d1*5352 + 12000 - vmlal.s16 q12, d29, d17 ; B[12] = d1*2217 + 51000 - - vceq.s16 q14, q14, #0 - - vmlal.s16 q9, d26, d17 ; A[4] = c1*2217 + d1*5352 + 12000 - vmlsl.s16 q10, d26, d16 ; A[12] = d1*2217 - c1*5352 + 51000 - vmlal.s16 q11, d27, d17 ; B[4] = c1*2217 + d1*5352 + 12000 - vmlsl.s16 q12, d27, d16 ; B[12] = d1*2217 - c1*5352 + 51000 - - vmvn.s16 q14, q14 - - vshrn.s32 d1, q9, #16 ; A[4] = (c1*2217 + d1*5352 + 12000)>>16 - vshrn.s32 d3, q10, #16 ; A[12]= (d1*2217 - c1*5352 + 51000)>>16 - vsub.s16 d1, d1, d28 ; A[4] += (d1!=0) - - vshrn.s32 d5, q11, #16 ; B[4] = (c1*2217 + d1*5352 + 12000)>>16 - vshrn.s32 d7, q12, #16 ; B[12]= (d1*2217 - c1*5352 + 51000)>>16 - vsub.s16 d5, d5, d29 ; B[4] += (d1!=0) - - vst1.16 {q0, q1}, [r1@128]! ; block A - vst1.16 {q2, q3}, [r1@128]! ; block B - - bx lr - - ENDP - - END - diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm deleted file mode 100644 index 68c295062..000000000 --- a/vp8/encoder/arm/neon/subtract_neon.asm +++ /dev/null @@ -1,185 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp8_subtract_b_neon| - EXPORT |vp8_subtract_mby_neon| - EXPORT |vp8_subtract_mbuv_neon| - - INCLUDE asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -;void vp8_subtract_b_neon(BLOCK *be, BLOCKD *bd, int pitch) -|vp8_subtract_b_neon| PROC - - stmfd sp!, {r4-r7} - - ldr r3, [r0, #vp8_block_base_src] - ldr r4, [r0, #vp8_block_src] - ldr r5, [r0, #vp8_block_src_diff] - ldr r3, [r3] - ldr r6, [r0, #vp8_block_src_stride] - add r3, r3, r4 ; src = *base_src + src - ldr r7, [r1, #vp8_blockd_predictor] - - vld1.8 {d0}, [r3], r6 ;load src - vld1.8 {d1}, [r7], r2 ;load pred - vld1.8 {d2}, [r3], r6 - vld1.8 {d3}, [r7], r2 - vld1.8 {d4}, [r3], r6 - vld1.8 {d5}, [r7], r2 - vld1.8 {d6}, [r3], r6 - vld1.8 {d7}, [r7], r2 - - vsubl.u8 q10, d0, d1 - vsubl.u8 q11, d2, d3 - vsubl.u8 q12, d4, d5 - vsubl.u8 q13, d6, d7 - - mov r2, r2, lsl #1 - - vst1.16 {d20}, [r5], r2 ;store diff - vst1.16 {d22}, [r5], r2 - vst1.16 {d24}, [r5], r2 - vst1.16 {d26}, [r5], r2 - - ldmfd sp!, {r4-r7} - bx lr - - ENDP - - -;========================================== -;void vp8_subtract_mby_neon(short *diff, unsigned char *src, unsigned char *pred, int stride) -|vp8_subtract_mby_neon| PROC - mov r12, #4 - -subtract_mby_loop - vld1.8 {q0}, [r1], r3 ;load src - vld1.8 {q1}, [r2]! ;load pred - vld1.8 {q2}, [r1], r3 - vld1.8 {q3}, [r2]! - vld1.8 {q4}, [r1], r3 - vld1.8 {q5}, [r2]! - vld1.8 {q6}, [r1], r3 - vld1.8 {q7}, [r2]! - - vsubl.u8 q8, d0, d2 - vsubl.u8 q9, d1, d3 - vsubl.u8 q10, d4, d6 - vsubl.u8 q11, d5, d7 - vsubl.u8 q12, d8, d10 - vsubl.u8 q13, d9, d11 - vsubl.u8 q14, d12, d14 - vsubl.u8 q15, d13, d15 - - vst1.16 {q8}, [r0]! ;store diff - vst1.16 {q9}, [r0]! - vst1.16 {q10}, [r0]! - vst1.16 {q11}, [r0]! - vst1.16 {q12}, [r0]! - vst1.16 {q13}, [r0]! - vst1.16 {q14}, [r0]! - vst1.16 {q15}, [r0]! - - subs r12, r12, #1 - bne subtract_mby_loop - - bx lr - ENDP - -;================================= -;void vp8_subtract_mbuv_neon(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) -|vp8_subtract_mbuv_neon| PROC - ldr r12, [sp] - -;u - add r0, r0, #512 ; short *udiff = diff + 256; - add r3, r3, #256 ; unsigned char *upred = pred + 256; - - vld1.8 {d0}, [r1], r12 ;load src - vld1.8 {d1}, [r3]! ;load pred - vld1.8 {d2}, [r1], r12 - vld1.8 {d3}, [r3]! - vld1.8 {d4}, [r1], r12 - vld1.8 {d5}, [r3]! - vld1.8 {d6}, [r1], r12 - vld1.8 {d7}, [r3]! - vld1.8 {d8}, [r1], r12 - vld1.8 {d9}, [r3]! - vld1.8 {d10}, [r1], r12 - vld1.8 {d11}, [r3]! - vld1.8 {d12}, [r1], r12 - vld1.8 {d13}, [r3]! - vld1.8 {d14}, [r1], r12 - vld1.8 {d15}, [r3]! - - vsubl.u8 q8, d0, d1 - vsubl.u8 q9, d2, d3 - vsubl.u8 q10, d4, d5 - vsubl.u8 q11, d6, d7 - vsubl.u8 q12, d8, d9 - vsubl.u8 q13, d10, d11 - vsubl.u8 q14, d12, d13 - vsubl.u8 q15, d14, d15 - - vst1.16 {q8}, [r0]! ;store diff - vst1.16 {q9}, [r0]! - vst1.16 {q10}, [r0]! - vst1.16 {q11}, [r0]! - vst1.16 {q12}, [r0]! - vst1.16 {q13}, [r0]! - vst1.16 {q14}, [r0]! - vst1.16 {q15}, [r0]! - -;v - vld1.8 {d0}, [r2], r12 ;load src - vld1.8 {d1}, [r3]! ;load pred - vld1.8 {d2}, [r2], r12 - vld1.8 {d3}, [r3]! - vld1.8 {d4}, [r2], r12 - vld1.8 {d5}, [r3]! - vld1.8 {d6}, [r2], r12 - vld1.8 {d7}, [r3]! - vld1.8 {d8}, [r2], r12 - vld1.8 {d9}, [r3]! - vld1.8 {d10}, [r2], r12 - vld1.8 {d11}, [r3]! - vld1.8 {d12}, [r2], r12 - vld1.8 {d13}, [r3]! - vld1.8 {d14}, [r2], r12 - vld1.8 {d15}, [r3]! - - vsubl.u8 q8, d0, d1 - vsubl.u8 q9, d2, d3 - vsubl.u8 q10, d4, d5 - vsubl.u8 q11, d6, d7 - vsubl.u8 q12, d8, d9 - vsubl.u8 q13, d10, d11 - vsubl.u8 q14, d12, d13 - vsubl.u8 q15, d14, d15 - - vst1.16 {q8}, [r0]! ;store diff - vst1.16 {q9}, [r0]! - vst1.16 {q10}, [r0]! - vst1.16 {q11}, [r0]! - vst1.16 {q12}, [r0]! - vst1.16 {q13}, [r0]! - vst1.16 {q14}, [r0]! - vst1.16 {q15}, [r0]! - - bx lr - ENDP - - END diff --git a/vp8/encoder/arm/neon/variance_neon.asm b/vp8/encoder/arm/neon/variance_neon.asm deleted file mode 100644 index 901934c61..000000000 --- a/vp8/encoder/arm/neon/variance_neon.asm +++ /dev/null @@ -1,276 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_variance16x16_neon| - EXPORT |vp9_variance16x8_neon| - EXPORT |vp9_variance8x16_neon| - EXPORT |vp9_variance8x8_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -|vp9_variance16x16_neon| PROC - vmov.i8 q8, #0 ;q8 - sum - vmov.i8 q9, #0 ;q9, q10 - sse - vmov.i8 q10, #0 - - mov r12, #8 - -variance16x16_neon_loop - vld1.8 {q0}, [r0], r1 ;Load up source and reference - vld1.8 {q2}, [r2], r3 - vld1.8 {q1}, [r0], r1 - vld1.8 {q3}, [r2], r3 - - vsubl.u8 q11, d0, d4 ;calculate diff - vsubl.u8 q12, d1, d5 - vsubl.u8 q13, d2, d6 - vsubl.u8 q14, d3, d7 - - ;VPADAL adds adjacent pairs of elements of a vector, and accumulates - ;the results into the elements of the destination vector. The explanation - ;in ARM guide is wrong. - vpadal.s16 q8, q11 ;calculate sum - vmlal.s16 q9, d22, d22 ;calculate sse - vmlal.s16 q10, d23, d23 - - subs r12, r12, #1 - - vpadal.s16 q8, q12 - vmlal.s16 q9, d24, d24 - vmlal.s16 q10, d25, d25 - vpadal.s16 q8, q13 - vmlal.s16 q9, d26, d26 - vmlal.s16 q10, d27, d27 - vpadal.s16 q8, q14 - vmlal.s16 q9, d28, d28 - vmlal.s16 q10, d29, d29 - - bne variance16x16_neon_loop - - vadd.u32 q10, q9, q10 ;accumulate sse - vpaddl.s32 q0, q8 ;accumulate sum - - ldr r12, [sp] ;load *sse from stack - - vpaddl.u32 q1, q10 - vadd.s64 d0, d0, d1 - vadd.u64 d1, d2, d3 - - ;vmov.32 r0, d0[0] ;this instruction costs a lot - ;vmov.32 r1, d1[0] - ;mul r0, r0, r0 - ;str r1, [r12] - ;sub r0, r1, r0, asr #8 - - ;sum is in [-255x256, 255x256]. sumxsum is 32-bit. Shift to right should - ;have sign-bit exension, which is vshr.s. Have to use s32 to make it right. - vmull.s32 q5, d0, d0 - vst1.32 {d1[0]}, [r12] ;store sse - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 - - vmov.32 r0, d0[0] ;return - bx lr - - ENDP - -;================================ -;unsigned int vp9_variance16x8_c( -; unsigned char *src_ptr, -; int source_stride, -; unsigned char *ref_ptr, -; int recon_stride, -; unsigned int *sse) -|vp9_variance16x8_neon| PROC - vmov.i8 q8, #0 ;q8 - sum - vmov.i8 q9, #0 ;q9, q10 - sse - vmov.i8 q10, #0 - - mov r12, #4 - -variance16x8_neon_loop - vld1.8 {q0}, [r0], r1 ;Load up source and reference - vld1.8 {q2}, [r2], r3 - vld1.8 {q1}, [r0], r1 - vld1.8 {q3}, [r2], r3 - - vsubl.u8 q11, d0, d4 ;calculate diff - vsubl.u8 q12, d1, d5 - vsubl.u8 q13, d2, d6 - vsubl.u8 q14, d3, d7 - - vpadal.s16 q8, q11 ;calculate sum - vmlal.s16 q9, d22, d22 ;calculate sse - vmlal.s16 q10, d23, d23 - - subs r12, r12, #1 - - vpadal.s16 q8, q12 - vmlal.s16 q9, d24, d24 - vmlal.s16 q10, d25, d25 - vpadal.s16 q8, q13 - vmlal.s16 q9, d26, d26 - vmlal.s16 q10, d27, d27 - vpadal.s16 q8, q14 - vmlal.s16 q9, d28, d28 - vmlal.s16 q10, d29, d29 - - bne variance16x8_neon_loop - - vadd.u32 q10, q9, q10 ;accumulate sse - vpaddl.s32 q0, q8 ;accumulate sum - - ldr r12, [sp] ;load *sse from stack - - vpaddl.u32 q1, q10 - vadd.s64 d0, d0, d1 - vadd.u64 d1, d2, d3 - - vmull.s32 q5, d0, d0 - vst1.32 {d1[0]}, [r12] ;store sse - vshr.s32 d10, d10, #7 - vsub.s32 d0, d1, d10 - - vmov.32 r0, d0[0] ;return - bx lr - - ENDP - -;================================= -;unsigned int vp9_variance8x16_c( -; unsigned char *src_ptr, -; int source_stride, -; unsigned char *ref_ptr, -; int recon_stride, -; unsigned int *sse) - -|vp9_variance8x16_neon| PROC - vmov.i8 q8, #0 ;q8 - sum - vmov.i8 q9, #0 ;q9, q10 - sse - vmov.i8 q10, #0 - - mov r12, #8 - -variance8x16_neon_loop - vld1.8 {d0}, [r0], r1 ;Load up source and reference - vld1.8 {d4}, [r2], r3 - vld1.8 {d2}, [r0], r1 - vld1.8 {d6}, [r2], r3 - - vsubl.u8 q11, d0, d4 ;calculate diff - vsubl.u8 q12, d2, d6 - - vpadal.s16 q8, q11 ;calculate sum - vmlal.s16 q9, d22, d22 ;calculate sse - vmlal.s16 q10, d23, d23 - - subs r12, r12, #1 - - vpadal.s16 q8, q12 - vmlal.s16 q9, d24, d24 - vmlal.s16 q10, d25, d25 - - bne variance8x16_neon_loop - - vadd.u32 q10, q9, q10 ;accumulate sse - vpaddl.s32 q0, q8 ;accumulate sum - - ldr r12, [sp] ;load *sse from stack - - vpaddl.u32 q1, q10 - vadd.s64 d0, d0, d1 - vadd.u64 d1, d2, d3 - - vmull.s32 q5, d0, d0 - vst1.32 {d1[0]}, [r12] ;store sse - vshr.s32 d10, d10, #7 - vsub.s32 d0, d1, d10 - - vmov.32 r0, d0[0] ;return - bx lr - - ENDP - -;================================== -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -|vp9_variance8x8_neon| PROC - vmov.i8 q8, #0 ;q8 - sum - vmov.i8 q9, #0 ;q9, q10 - sse - vmov.i8 q10, #0 - - mov r12, #2 - -variance8x8_neon_loop - vld1.8 {d0}, [r0], r1 ;Load up source and reference - vld1.8 {d4}, [r2], r3 - vld1.8 {d1}, [r0], r1 - vld1.8 {d5}, [r2], r3 - vld1.8 {d2}, [r0], r1 - vld1.8 {d6}, [r2], r3 - vld1.8 {d3}, [r0], r1 - vld1.8 {d7}, [r2], r3 - - vsubl.u8 q11, d0, d4 ;calculate diff - vsubl.u8 q12, d1, d5 - vsubl.u8 q13, d2, d6 - vsubl.u8 q14, d3, d7 - - vpadal.s16 q8, q11 ;calculate sum - vmlal.s16 q9, d22, d22 ;calculate sse - vmlal.s16 q10, d23, d23 - - subs r12, r12, #1 - - vpadal.s16 q8, q12 - vmlal.s16 q9, d24, d24 - vmlal.s16 q10, d25, d25 - vpadal.s16 q8, q13 - vmlal.s16 q9, d26, d26 - vmlal.s16 q10, d27, d27 - vpadal.s16 q8, q14 - vmlal.s16 q9, d28, d28 - vmlal.s16 q10, d29, d29 - - bne variance8x8_neon_loop - - vadd.u32 q10, q9, q10 ;accumulate sse - vpaddl.s32 q0, q8 ;accumulate sum - - ldr r12, [sp] ;load *sse from stack - - vpaddl.u32 q1, q10 - vadd.s64 d0, d0, d1 - vadd.u64 d1, d2, d3 - - vmull.s32 q5, d0, d0 - vst1.32 {d1[0]}, [r12] ;store sse - vshr.s32 d10, d10, #6 - vsub.s32 d0, d1, d10 - - vmov.32 r0, d0[0] ;return - bx lr - - ENDP - - END diff --git a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm deleted file mode 100644 index b0450e523..000000000 --- a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm +++ /dev/null @@ -1,68 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_memcpy_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;========================================= -;void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz); -|vp8_memcpy_neon| PROC - ;pld [r1] ;preload pred data - ;pld [r1, #128] - ;pld [r1, #256] - ;pld [r1, #384] - - mov r12, r2, lsr #8 ;copy 256 bytes data at one time - -memcpy_neon_loop - vld1.8 {q0, q1}, [r1]! ;load src data - subs r12, r12, #1 - vld1.8 {q2, q3}, [r1]! - vst1.8 {q0, q1}, [r0]! ;copy to dst_ptr - vld1.8 {q4, q5}, [r1]! - vst1.8 {q2, q3}, [r0]! - vld1.8 {q6, q7}, [r1]! - vst1.8 {q4, q5}, [r0]! - vld1.8 {q8, q9}, [r1]! - vst1.8 {q6, q7}, [r0]! - vld1.8 {q10, q11}, [r1]! - vst1.8 {q8, q9}, [r0]! - vld1.8 {q12, q13}, [r1]! - vst1.8 {q10, q11}, [r0]! - vld1.8 {q14, q15}, [r1]! - vst1.8 {q12, q13}, [r0]! - vst1.8 {q14, q15}, [r0]! - - ;pld [r1] ;preload pred data -- need to adjust for real device - ;pld [r1, #128] - ;pld [r1, #256] - ;pld [r1, #384] - - bne memcpy_neon_loop - - ands r3, r2, #0xff ;extra copy - beq done_copy_neon_loop - -extra_copy_neon_loop - vld1.8 {q0}, [r1]! ;load src data - subs r3, r3, #16 - vst1.8 {q0}, [r0]! - bne extra_copy_neon_loop - -done_copy_neon_loop - bx lr - ENDP - - END diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm deleted file mode 100644 index 4d1512d40..000000000 --- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm +++ /dev/null @@ -1,116 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_mse16x16_neon| - EXPORT |vp8_get4x4sse_cs_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;============================ -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -;note: in this function, sum is never used. So, we can remove this part of calculation -;from vp9_variance(). - -|vp8_mse16x16_neon| PROC - vmov.i8 q7, #0 ;q7, q8, q9, q10 - sse - vmov.i8 q8, #0 - vmov.i8 q9, #0 - vmov.i8 q10, #0 - - mov r12, #8 - -mse16x16_neon_loop - vld1.8 {q0}, [r0], r1 ;Load up source and reference - vld1.8 {q2}, [r2], r3 - vld1.8 {q1}, [r0], r1 - vld1.8 {q3}, [r2], r3 - - vsubl.u8 q11, d0, d4 - vsubl.u8 q12, d1, d5 - vsubl.u8 q13, d2, d6 - vsubl.u8 q14, d3, d7 - - vmlal.s16 q7, d22, d22 - vmlal.s16 q8, d23, d23 - - subs r12, r12, #1 - - vmlal.s16 q9, d24, d24 - vmlal.s16 q10, d25, d25 - vmlal.s16 q7, d26, d26 - vmlal.s16 q8, d27, d27 - vmlal.s16 q9, d28, d28 - vmlal.s16 q10, d29, d29 - - bne mse16x16_neon_loop - - vadd.u32 q7, q7, q8 - vadd.u32 q9, q9, q10 - - ldr r12, [sp] ;load *sse from stack - - vadd.u32 q10, q7, q9 - vpaddl.u32 q1, q10 - vadd.u64 d0, d2, d3 - - vst1.32 {d0[0]}, [r12] - vmov.32 r0, d0[0] - - bx lr - - ENDP - - -;============================= -; r0 unsigned char *src_ptr, -; r1 int source_stride, -; r2 unsigned char *ref_ptr, -; r3 int recon_stride -|vp8_get4x4sse_cs_neon| PROC - vld1.8 {d0}, [r0], r1 ;Load up source and reference - vld1.8 {d4}, [r2], r3 - vld1.8 {d1}, [r0], r1 - vld1.8 {d5}, [r2], r3 - vld1.8 {d2}, [r0], r1 - vld1.8 {d6}, [r2], r3 - vld1.8 {d3}, [r0], r1 - vld1.8 {d7}, [r2], r3 - - vsubl.u8 q11, d0, d4 - vsubl.u8 q12, d1, d5 - vsubl.u8 q13, d2, d6 - vsubl.u8 q14, d3, d7 - - vmull.s16 q7, d22, d22 - vmull.s16 q8, d24, d24 - vmull.s16 q9, d26, d26 - vmull.s16 q10, d28, d28 - - vadd.u32 q7, q7, q8 - vadd.u32 q9, q9, q10 - vadd.u32 q9, q7, q9 - - vpaddl.u32 q1, q9 - vadd.u64 d0, d2, d3 - - vmov.32 r0, d0[0] - bx lr - - ENDP - - END diff --git a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm deleted file mode 100644 index 22266297a..000000000 --- a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm +++ /dev/null @@ -1,103 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_short_walsh4x4_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp8_short_walsh4x4_neon(short *input, short *output, int pitch) -; r0 short *input, -; r1 short *output, -; r2 int pitch -|vp8_short_walsh4x4_neon| PROC - - vld1.16 {d0}, [r0@64], r2 ; load input - vld1.16 {d1}, [r0@64], r2 - vld1.16 {d2}, [r0@64], r2 - vld1.16 {d3}, [r0@64] - - ;First for-loop - ;transpose d0, d1, d2, d3. Then, d0=ip[0], d1=ip[1], d2=ip[2], d3=ip[3] - vtrn.32 d0, d2 - vtrn.32 d1, d3 - - vmov.s32 q15, #3 ; add 3 to all values - - vtrn.16 d0, d1 - vtrn.16 d2, d3 - - vadd.s16 d4, d0, d2 ; ip[0] + ip[2] - vadd.s16 d5, d1, d3 ; ip[1] + ip[3] - vsub.s16 d6, d1, d3 ; ip[1] - ip[3] - vsub.s16 d7, d0, d2 ; ip[0] - ip[2] - - vshl.s16 d4, d4, #2 ; a1 = (ip[0] + ip[2]) << 2 - vshl.s16 d5, d5, #2 ; d1 = (ip[1] + ip[3]) << 2 - vshl.s16 d6, d6, #2 ; c1 = (ip[1] - ip[3]) << 2 - vceq.s16 d16, d4, #0 ; a1 == 0 - vshl.s16 d7, d7, #2 ; b1 = (ip[0] - ip[2]) << 2 - - vadd.s16 d0, d4, d5 ; a1 + d1 - vmvn d16, d16 ; a1 != 0 - vsub.s16 d3, d4, d5 ; op[3] = a1 - d1 - vadd.s16 d1, d7, d6 ; op[1] = b1 + c1 - vsub.s16 d2, d7, d6 ; op[2] = b1 - c1 - vsub.s16 d0, d0, d16 ; op[0] = a1 + d1 + (a1 != 0) - - ;Second for-loop - ;transpose d0, d1, d2, d3, Then, d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12] - vtrn.32 d1, d3 - vtrn.32 d0, d2 - vtrn.16 d2, d3 - vtrn.16 d0, d1 - - vaddl.s16 q8, d0, d2 ; a1 = ip[0]+ip[8] - vaddl.s16 q9, d1, d3 ; d1 = ip[4]+ip[12] - vsubl.s16 q10, d1, d3 ; c1 = ip[4]-ip[12] - vsubl.s16 q11, d0, d2 ; b1 = ip[0]-ip[8] - - vadd.s32 q0, q8, q9 ; a2 = a1 + d1 - vadd.s32 q1, q11, q10 ; b2 = b1 + c1 - vsub.s32 q2, q11, q10 ; c2 = b1 - c1 - vsub.s32 q3, q8, q9 ; d2 = a1 - d1 - - vclt.s32 q8, q0, #0 - vclt.s32 q9, q1, #0 - vclt.s32 q10, q2, #0 - vclt.s32 q11, q3, #0 - - ; subtract -1 (or 0) - vsub.s32 q0, q0, q8 ; a2 += a2 < 0 - vsub.s32 q1, q1, q9 ; b2 += b2 < 0 - vsub.s32 q2, q2, q10 ; c2 += c2 < 0 - vsub.s32 q3, q3, q11 ; d2 += d2 < 0 - - vadd.s32 q8, q0, q15 ; a2 + 3 - vadd.s32 q9, q1, q15 ; b2 + 3 - vadd.s32 q10, q2, q15 ; c2 + 3 - vadd.s32 q11, q3, q15 ; d2 + 3 - - ; vrshrn? would add 1 << 3-1 = 2 - vshrn.s32 d0, q8, #3 - vshrn.s32 d1, q9, #3 - vshrn.s32 d2, q10, #3 - vshrn.s32 d3, q11, #3 - - vst1.16 {q0, q1}, [r1@128] - - bx lr - - ENDP - - END diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm deleted file mode 100644 index 8bb0734d1..000000000 --- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm +++ /dev/null @@ -1,425 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_sub_pixel_variance16x16_neon_func| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -; r0 unsigned char *src_ptr, -; r1 int src_pixels_per_line, -; r2 int xoffset, -; r3 int yoffset, -; stack(r4) unsigned char *dst_ptr, -; stack(r5) int dst_pixels_per_line, -; stack(r6) unsigned int *sse -;note: most of the code is copied from bilinear_predict16x16_neon and vp9_variance16x16_neon. - -|vp9_sub_pixel_variance16x16_neon_func| PROC - push {r4-r6, lr} - - ldr r12, _BilinearTaps_coeff_ - ldr r4, [sp, #16] ;load *dst_ptr from stack - ldr r5, [sp, #20] ;load dst_pixels_per_line from stack - ldr r6, [sp, #24] ;load *sse from stack - - cmp r2, #0 ;skip first_pass filter if xoffset=0 - beq secondpass_bfilter16x16_only - - add r2, r12, r2, lsl #3 ;calculate filter location - - cmp r3, #0 ;skip second_pass filter if yoffset=0 - - vld1.s32 {d31}, [r2] ;load first_pass filter - - beq firstpass_bfilter16x16_only - - sub sp, sp, #272 ;reserve space on stack for temporary storage - vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data - mov lr, sp - vld1.u8 {d5, d6, d7}, [r0], r1 - - mov r2, #3 ;loop counter - vld1.u8 {d8, d9, d10}, [r0], r1 - - vdup.8 d0, d31[0] ;first_pass filter (d0 d1) - vld1.u8 {d11, d12, d13}, [r0], r1 - - vdup.8 d1, d31[4] - -;First Pass: output_height lines x output_width columns (17x16) -vp8e_filt_blk2d_fp16x16_loop_neon - pld [r0] - pld [r0, r1] - pld [r0, r1, lsl #1] - - vmull.u8 q7, d2, d0 ;(src_ptr[0] * Filter[0]) - vmull.u8 q8, d3, d0 - vmull.u8 q9, d5, d0 - vmull.u8 q10, d6, d0 - vmull.u8 q11, d8, d0 - vmull.u8 q12, d9, d0 - vmull.u8 q13, d11, d0 - vmull.u8 q14, d12, d0 - - vext.8 d2, d2, d3, #1 ;construct src_ptr[1] - vext.8 d5, d5, d6, #1 - vext.8 d8, d8, d9, #1 - vext.8 d11, d11, d12, #1 - - vmlal.u8 q7, d2, d1 ;(src_ptr[0] * Filter[1]) - vmlal.u8 q9, d5, d1 - vmlal.u8 q11, d8, d1 - vmlal.u8 q13, d11, d1 - - vext.8 d3, d3, d4, #1 - vext.8 d6, d6, d7, #1 - vext.8 d9, d9, d10, #1 - vext.8 d12, d12, d13, #1 - - vmlal.u8 q8, d3, d1 ;(src_ptr[0] * Filter[1]) - vmlal.u8 q10, d6, d1 - vmlal.u8 q12, d9, d1 - vmlal.u8 q14, d12, d1 - - subs r2, r2, #1 - - vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d15, q8, #7 - vqrshrn.u16 d16, q9, #7 - vqrshrn.u16 d17, q10, #7 - vqrshrn.u16 d18, q11, #7 - vqrshrn.u16 d19, q12, #7 - vqrshrn.u16 d20, q13, #7 - - vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data - vqrshrn.u16 d21, q14, #7 - vld1.u8 {d5, d6, d7}, [r0], r1 - - vst1.u8 {d14, d15, d16, d17}, [lr]! ;store result - vld1.u8 {d8, d9, d10}, [r0], r1 - vst1.u8 {d18, d19, d20, d21}, [lr]! - vld1.u8 {d11, d12, d13}, [r0], r1 - - bne vp8e_filt_blk2d_fp16x16_loop_neon - -;First-pass filtering for rest 5 lines - vld1.u8 {d14, d15, d16}, [r0], r1 - - vmull.u8 q9, d2, d0 ;(src_ptr[0] * Filter[0]) - vmull.u8 q10, d3, d0 - vmull.u8 q11, d5, d0 - vmull.u8 q12, d6, d0 - vmull.u8 q13, d8, d0 - vmull.u8 q14, d9, d0 - - vext.8 d2, d2, d3, #1 ;construct src_ptr[1] - vext.8 d5, d5, d6, #1 - vext.8 d8, d8, d9, #1 - - vmlal.u8 q9, d2, d1 ;(src_ptr[0] * Filter[1]) - vmlal.u8 q11, d5, d1 - vmlal.u8 q13, d8, d1 - - vext.8 d3, d3, d4, #1 - vext.8 d6, d6, d7, #1 - vext.8 d9, d9, d10, #1 - - vmlal.u8 q10, d3, d1 ;(src_ptr[0] * Filter[1]) - vmlal.u8 q12, d6, d1 - vmlal.u8 q14, d9, d1 - - vmull.u8 q1, d11, d0 - vmull.u8 q2, d12, d0 - vmull.u8 q3, d14, d0 - vmull.u8 q4, d15, d0 - - vext.8 d11, d11, d12, #1 ;construct src_ptr[1] - vext.8 d14, d14, d15, #1 - - vmlal.u8 q1, d11, d1 ;(src_ptr[0] * Filter[1]) - vmlal.u8 q3, d14, d1 - - vext.8 d12, d12, d13, #1 - vext.8 d15, d15, d16, #1 - - vmlal.u8 q2, d12, d1 ;(src_ptr[0] * Filter[1]) - vmlal.u8 q4, d15, d1 - - vqrshrn.u16 d10, q9, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d11, q10, #7 - vqrshrn.u16 d12, q11, #7 - vqrshrn.u16 d13, q12, #7 - vqrshrn.u16 d14, q13, #7 - vqrshrn.u16 d15, q14, #7 - vqrshrn.u16 d16, q1, #7 - vqrshrn.u16 d17, q2, #7 - vqrshrn.u16 d18, q3, #7 - vqrshrn.u16 d19, q4, #7 - - vst1.u8 {d10, d11, d12, d13}, [lr]! ;store result - vst1.u8 {d14, d15, d16, d17}, [lr]! - vst1.u8 {d18, d19}, [lr]! - -;Second pass: 16x16 -;secondpass_filter - add r3, r12, r3, lsl #3 - sub lr, lr, #272 - - vld1.u32 {d31}, [r3] ;load second_pass filter - - sub sp, sp, #256 - mov r3, sp - - vld1.u8 {d22, d23}, [lr]! ;load src data - - vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1) - vdup.8 d1, d31[4] - mov r12, #4 ;loop counter - -vp8e_filt_blk2d_sp16x16_loop_neon - vld1.u8 {d24, d25}, [lr]! - vmull.u8 q1, d22, d0 ;(src_ptr[0] * Filter[0]) - vld1.u8 {d26, d27}, [lr]! - vmull.u8 q2, d23, d0 - vld1.u8 {d28, d29}, [lr]! - vmull.u8 q3, d24, d0 - vld1.u8 {d30, d31}, [lr]! - - vmull.u8 q4, d25, d0 - vmull.u8 q5, d26, d0 - vmull.u8 q6, d27, d0 - vmull.u8 q7, d28, d0 - vmull.u8 q8, d29, d0 - - vmlal.u8 q1, d24, d1 ;(src_ptr[pixel_step] * Filter[1]) - vmlal.u8 q2, d25, d1 - vmlal.u8 q3, d26, d1 - vmlal.u8 q4, d27, d1 - vmlal.u8 q5, d28, d1 - vmlal.u8 q6, d29, d1 - vmlal.u8 q7, d30, d1 - vmlal.u8 q8, d31, d1 - - subs r12, r12, #1 - - vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d3, q2, #7 - vqrshrn.u16 d4, q3, #7 - vqrshrn.u16 d5, q4, #7 - vqrshrn.u16 d6, q5, #7 - vqrshrn.u16 d7, q6, #7 - vqrshrn.u16 d8, q7, #7 - vqrshrn.u16 d9, q8, #7 - - vst1.u8 {d2, d3}, [r3]! ;store result - vst1.u8 {d4, d5}, [r3]! - vst1.u8 {d6, d7}, [r3]! - vmov q11, q15 - vst1.u8 {d8, d9}, [r3]! - - bne vp8e_filt_blk2d_sp16x16_loop_neon - - b sub_pixel_variance16x16_neon - -;-------------------- -firstpass_bfilter16x16_only - mov r2, #4 ;loop counter - sub sp, sp, #528 ;reserve space on stack for temporary storage - vdup.8 d0, d31[0] ;first_pass filter (d0 d1) - vdup.8 d1, d31[4] - mov r3, sp - -;First Pass: output_height lines x output_width columns (16x16) -vp8e_filt_blk2d_fpo16x16_loop_neon - vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data - vld1.u8 {d5, d6, d7}, [r0], r1 - vld1.u8 {d8, d9, d10}, [r0], r1 - vld1.u8 {d11, d12, d13}, [r0], r1 - - pld [r0] - pld [r0, r1] - pld [r0, r1, lsl #1] - - vmull.u8 q7, d2, d0 ;(src_ptr[0] * Filter[0]) - vmull.u8 q8, d3, d0 - vmull.u8 q9, d5, d0 - vmull.u8 q10, d6, d0 - vmull.u8 q11, d8, d0 - vmull.u8 q12, d9, d0 - vmull.u8 q13, d11, d0 - vmull.u8 q14, d12, d0 - - vext.8 d2, d2, d3, #1 ;construct src_ptr[1] - vext.8 d5, d5, d6, #1 - vext.8 d8, d8, d9, #1 - vext.8 d11, d11, d12, #1 - - vmlal.u8 q7, d2, d1 ;(src_ptr[0] * Filter[1]) - vmlal.u8 q9, d5, d1 - vmlal.u8 q11, d8, d1 - vmlal.u8 q13, d11, d1 - - vext.8 d3, d3, d4, #1 - vext.8 d6, d6, d7, #1 - vext.8 d9, d9, d10, #1 - vext.8 d12, d12, d13, #1 - - vmlal.u8 q8, d3, d1 ;(src_ptr[0] * Filter[1]) - vmlal.u8 q10, d6, d1 - vmlal.u8 q12, d9, d1 - vmlal.u8 q14, d12, d1 - - subs r2, r2, #1 - - vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d15, q8, #7 - vqrshrn.u16 d16, q9, #7 - vqrshrn.u16 d17, q10, #7 - vqrshrn.u16 d18, q11, #7 - vqrshrn.u16 d19, q12, #7 - vqrshrn.u16 d20, q13, #7 - vst1.u8 {d14, d15}, [r3]! ;store result - vqrshrn.u16 d21, q14, #7 - - vst1.u8 {d16, d17}, [r3]! - vst1.u8 {d18, d19}, [r3]! - vst1.u8 {d20, d21}, [r3]! - - bne vp8e_filt_blk2d_fpo16x16_loop_neon - - b sub_pixel_variance16x16_neon - -;--------------------- -secondpass_bfilter16x16_only -;Second pass: 16x16 -;secondpass_filter - sub sp, sp, #528 ;reserve space on stack for temporary storage - add r3, r12, r3, lsl #3 - mov r12, #4 ;loop counter - vld1.u32 {d31}, [r3] ;load second_pass filter - vld1.u8 {d22, d23}, [r0], r1 ;load src data - mov r3, sp - - vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1) - vdup.8 d1, d31[4] - -vp8e_filt_blk2d_spo16x16_loop_neon - vld1.u8 {d24, d25}, [r0], r1 - vmull.u8 q1, d22, d0 ;(src_ptr[0] * Filter[0]) - vld1.u8 {d26, d27}, [r0], r1 - vmull.u8 q2, d23, d0 - vld1.u8 {d28, d29}, [r0], r1 - vmull.u8 q3, d24, d0 - vld1.u8 {d30, d31}, [r0], r1 - - vmull.u8 q4, d25, d0 - vmull.u8 q5, d26, d0 - vmull.u8 q6, d27, d0 - vmull.u8 q7, d28, d0 - vmull.u8 q8, d29, d0 - - vmlal.u8 q1, d24, d1 ;(src_ptr[pixel_step] * Filter[1]) - vmlal.u8 q2, d25, d1 - vmlal.u8 q3, d26, d1 - vmlal.u8 q4, d27, d1 - vmlal.u8 q5, d28, d1 - vmlal.u8 q6, d29, d1 - vmlal.u8 q7, d30, d1 - vmlal.u8 q8, d31, d1 - - vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d3, q2, #7 - vqrshrn.u16 d4, q3, #7 - vqrshrn.u16 d5, q4, #7 - vqrshrn.u16 d6, q5, #7 - vqrshrn.u16 d7, q6, #7 - vqrshrn.u16 d8, q7, #7 - vqrshrn.u16 d9, q8, #7 - - vst1.u8 {d2, d3}, [r3]! ;store result - subs r12, r12, #1 - vst1.u8 {d4, d5}, [r3]! - vmov q11, q15 - vst1.u8 {d6, d7}, [r3]! - vst1.u8 {d8, d9}, [r3]! - - bne vp8e_filt_blk2d_spo16x16_loop_neon - - b sub_pixel_variance16x16_neon - -;---------------------------- -;variance16x16 -sub_pixel_variance16x16_neon - vmov.i8 q8, #0 ;q8 - sum - vmov.i8 q9, #0 ;q9, q10 - sse - vmov.i8 q10, #0 - - sub r3, r3, #256 - mov r12, #8 - -sub_pixel_variance16x16_neon_loop - vld1.8 {q0}, [r3]! ;Load up source and reference - vld1.8 {q2}, [r4], r5 - vld1.8 {q1}, [r3]! - vld1.8 {q3}, [r4], r5 - - vsubl.u8 q11, d0, d4 ;diff - vsubl.u8 q12, d1, d5 - vsubl.u8 q13, d2, d6 - vsubl.u8 q14, d3, d7 - - vpadal.s16 q8, q11 ;sum - vmlal.s16 q9, d22, d22 ;sse - vmlal.s16 q10, d23, d23 - - subs r12, r12, #1 - - vpadal.s16 q8, q12 - vmlal.s16 q9, d24, d24 - vmlal.s16 q10, d25, d25 - vpadal.s16 q8, q13 - vmlal.s16 q9, d26, d26 - vmlal.s16 q10, d27, d27 - vpadal.s16 q8, q14 - vmlal.s16 q9, d28, d28 - vmlal.s16 q10, d29, d29 - - bne sub_pixel_variance16x16_neon_loop - - vadd.u32 q10, q9, q10 ;accumulate sse - vpaddl.s32 q0, q8 ;accumulate sum - - vpaddl.u32 q1, q10 - vadd.s64 d0, d0, d1 - vadd.u64 d1, d2, d3 - - vmull.s32 q5, d0, d0 - vst1.32 {d1[0]}, [r6] ;store sse - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 - - add sp, sp, #528 - vmov.32 r0, d0[0] ;return - - pop {r4-r6,pc} - - ENDP - -;----------------- - -_BilinearTaps_coeff_ - DCD bilinear_taps_coeff -bilinear_taps_coeff - DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112 - - END diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm deleted file mode 100644 index a3faf9a77..000000000 --- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm +++ /dev/null @@ -1,572 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_variance_halfpixvar16x16_h_neon| - EXPORT |vp9_variance_halfpixvar16x16_v_neon| - EXPORT |vp9_variance_halfpixvar16x16_hv_neon| - EXPORT |vp9_sub_pixel_variance16x16s_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -;================================================ -;unsigned int vp9_variance_halfpixvar16x16_h_neon -;( -; unsigned char *src_ptr, r0 -; int src_pixels_per_line, r1 -; unsigned char *dst_ptr, r2 -; int dst_pixels_per_line, r3 -; unsigned int *sse -;); -;================================================ -|vp9_variance_halfpixvar16x16_h_neon| PROC - push {lr} - - mov r12, #4 ;loop counter - ldr lr, [sp, #4] ;load *sse from stack - vmov.i8 q8, #0 ;q8 - sum - vmov.i8 q9, #0 ;q9, q10 - sse - vmov.i8 q10, #0 - -;First Pass: output_height lines x output_width columns (16x16) -vp8_filt_fpo16x16s_4_0_loop_neon - vld1.u8 {d0, d1, d2, d3}, [r0], r1 ;load src data - vld1.8 {q11}, [r2], r3 - vld1.u8 {d4, d5, d6, d7}, [r0], r1 - vld1.8 {q12}, [r2], r3 - vld1.u8 {d8, d9, d10, d11}, [r0], r1 - vld1.8 {q13}, [r2], r3 - vld1.u8 {d12, d13, d14, d15}, [r0], r1 - - ;pld [r0] - ;pld [r0, r1] - ;pld [r0, r1, lsl #1] - - vext.8 q1, q0, q1, #1 ;construct src_ptr[1] - vext.8 q3, q2, q3, #1 - vext.8 q5, q4, q5, #1 - vext.8 q7, q6, q7, #1 - - vrhadd.u8 q0, q0, q1 ;(src_ptr[0]+src_ptr[1])/round/shift right 1 - vld1.8 {q14}, [r2], r3 - vrhadd.u8 q1, q2, q3 - vrhadd.u8 q2, q4, q5 - vrhadd.u8 q3, q6, q7 - - vsubl.u8 q4, d0, d22 ;diff - vsubl.u8 q5, d1, d23 - vsubl.u8 q6, d2, d24 - vsubl.u8 q7, d3, d25 - vsubl.u8 q0, d4, d26 - vsubl.u8 q1, d5, d27 - vsubl.u8 q2, d6, d28 - vsubl.u8 q3, d7, d29 - - vpadal.s16 q8, q4 ;sum - vmlal.s16 q9, d8, d8 ;sse - vmlal.s16 q10, d9, d9 - - subs r12, r12, #1 - - vpadal.s16 q8, q5 - vmlal.s16 q9, d10, d10 - vmlal.s16 q10, d11, d11 - vpadal.s16 q8, q6 - vmlal.s16 q9, d12, d12 - vmlal.s16 q10, d13, d13 - vpadal.s16 q8, q7 - vmlal.s16 q9, d14, d14 - vmlal.s16 q10, d15, d15 - - vpadal.s16 q8, q0 ;sum - vmlal.s16 q9, d0, d0 ;sse - vmlal.s16 q10, d1, d1 - vpadal.s16 q8, q1 - vmlal.s16 q9, d2, d2 - vmlal.s16 q10, d3, d3 - vpadal.s16 q8, q2 - vmlal.s16 q9, d4, d4 - vmlal.s16 q10, d5, d5 - vpadal.s16 q8, q3 - vmlal.s16 q9, d6, d6 - vmlal.s16 q10, d7, d7 - - bne vp8_filt_fpo16x16s_4_0_loop_neon - - vadd.u32 q10, q9, q10 ;accumulate sse - vpaddl.s32 q0, q8 ;accumulate sum - - vpaddl.u32 q1, q10 - vadd.s64 d0, d0, d1 - vadd.u64 d1, d2, d3 - - vmull.s32 q5, d0, d0 - vst1.32 {d1[0]}, [lr] ;store sse - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 - - vmov.32 r0, d0[0] ;return - pop {pc} - ENDP - -;================================================ -;unsigned int vp9_variance_halfpixvar16x16_v_neon -;( -; unsigned char *src_ptr, r0 -; int src_pixels_per_line, r1 -; unsigned char *dst_ptr, r2 -; int dst_pixels_per_line, r3 -; unsigned int *sse -;); -;================================================ -|vp9_variance_halfpixvar16x16_v_neon| PROC - push {lr} - - mov r12, #4 ;loop counter - - vld1.u8 {q0}, [r0], r1 ;load src data - ldr lr, [sp, #4] ;load *sse from stack - - vmov.i8 q8, #0 ;q8 - sum - vmov.i8 q9, #0 ;q9, q10 - sse - vmov.i8 q10, #0 - -vp8_filt_spo16x16s_0_4_loop_neon - vld1.u8 {q2}, [r0], r1 - vld1.8 {q1}, [r2], r3 - vld1.u8 {q4}, [r0], r1 - vld1.8 {q3}, [r2], r3 - vld1.u8 {q6}, [r0], r1 - vld1.8 {q5}, [r2], r3 - vld1.u8 {q15}, [r0], r1 - - vrhadd.u8 q0, q0, q2 - vld1.8 {q7}, [r2], r3 - vrhadd.u8 q2, q2, q4 - vrhadd.u8 q4, q4, q6 - vrhadd.u8 q6, q6, q15 - - vsubl.u8 q11, d0, d2 ;diff - vsubl.u8 q12, d1, d3 - vsubl.u8 q13, d4, d6 - vsubl.u8 q14, d5, d7 - vsubl.u8 q0, d8, d10 - vsubl.u8 q1, d9, d11 - vsubl.u8 q2, d12, d14 - vsubl.u8 q3, d13, d15 - - vpadal.s16 q8, q11 ;sum - vmlal.s16 q9, d22, d22 ;sse - vmlal.s16 q10, d23, d23 - - subs r12, r12, #1 - - vpadal.s16 q8, q12 - vmlal.s16 q9, d24, d24 - vmlal.s16 q10, d25, d25 - vpadal.s16 q8, q13 - vmlal.s16 q9, d26, d26 - vmlal.s16 q10, d27, d27 - vpadal.s16 q8, q14 - vmlal.s16 q9, d28, d28 - vmlal.s16 q10, d29, d29 - - vpadal.s16 q8, q0 ;sum - vmlal.s16 q9, d0, d0 ;sse - vmlal.s16 q10, d1, d1 - vpadal.s16 q8, q1 - vmlal.s16 q9, d2, d2 - vmlal.s16 q10, d3, d3 - vpadal.s16 q8, q2 - vmlal.s16 q9, d4, d4 - vmlal.s16 q10, d5, d5 - - vmov q0, q15 - - vpadal.s16 q8, q3 - vmlal.s16 q9, d6, d6 - vmlal.s16 q10, d7, d7 - - bne vp8_filt_spo16x16s_0_4_loop_neon - - vadd.u32 q10, q9, q10 ;accumulate sse - vpaddl.s32 q0, q8 ;accumulate sum - - vpaddl.u32 q1, q10 - vadd.s64 d0, d0, d1 - vadd.u64 d1, d2, d3 - - vmull.s32 q5, d0, d0 - vst1.32 {d1[0]}, [lr] ;store sse - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 - - vmov.32 r0, d0[0] ;return - pop {pc} - ENDP - -;================================================ -;unsigned int vp9_variance_halfpixvar16x16_hv_neon -;( -; unsigned char *src_ptr, r0 -; int src_pixels_per_line, r1 -; unsigned char *dst_ptr, r2 -; int dst_pixels_per_line, r3 -; unsigned int *sse -;); -;================================================ -|vp9_variance_halfpixvar16x16_hv_neon| PROC - push {lr} - - vld1.u8 {d0, d1, d2, d3}, [r0], r1 ;load src data - - ldr lr, [sp, #4] ;load *sse from stack - vmov.i8 q13, #0 ;q8 - sum - vext.8 q1, q0, q1, #1 ;construct src_ptr[1] - - vmov.i8 q14, #0 ;q9, q10 - sse - vmov.i8 q15, #0 - - mov r12, #4 ;loop counter - vrhadd.u8 q0, q0, q1 ;(src_ptr[0]+src_ptr[1])/round/shift right 1 - -;First Pass: output_height lines x output_width columns (17x16) -vp8_filt16x16s_4_4_loop_neon - vld1.u8 {d4, d5, d6, d7}, [r0], r1 - vld1.u8 {d8, d9, d10, d11}, [r0], r1 - vld1.u8 {d12, d13, d14, d15}, [r0], r1 - vld1.u8 {d16, d17, d18, d19}, [r0], r1 - - ;pld [r0] - ;pld [r0, r1] - ;pld [r0, r1, lsl #1] - - vext.8 q3, q2, q3, #1 ;construct src_ptr[1] - vext.8 q5, q4, q5, #1 - vext.8 q7, q6, q7, #1 - vext.8 q9, q8, q9, #1 - - vrhadd.u8 q1, q2, q3 ;(src_ptr[0]+src_ptr[1])/round/shift right 1 - vrhadd.u8 q2, q4, q5 - vrhadd.u8 q3, q6, q7 - vrhadd.u8 q4, q8, q9 - - vld1.8 {q5}, [r2], r3 - vrhadd.u8 q0, q0, q1 - vld1.8 {q6}, [r2], r3 - vrhadd.u8 q1, q1, q2 - vld1.8 {q7}, [r2], r3 - vrhadd.u8 q2, q2, q3 - vld1.8 {q8}, [r2], r3 - vrhadd.u8 q3, q3, q4 - - vsubl.u8 q9, d0, d10 ;diff - vsubl.u8 q10, d1, d11 - vsubl.u8 q11, d2, d12 - vsubl.u8 q12, d3, d13 - - vsubl.u8 q0, d4, d14 ;diff - vsubl.u8 q1, d5, d15 - vsubl.u8 q5, d6, d16 - vsubl.u8 q6, d7, d17 - - vpadal.s16 q13, q9 ;sum - vmlal.s16 q14, d18, d18 ;sse - vmlal.s16 q15, d19, d19 - - vpadal.s16 q13, q10 ;sum - vmlal.s16 q14, d20, d20 ;sse - vmlal.s16 q15, d21, d21 - - vpadal.s16 q13, q11 ;sum - vmlal.s16 q14, d22, d22 ;sse - vmlal.s16 q15, d23, d23 - - vpadal.s16 q13, q12 ;sum - vmlal.s16 q14, d24, d24 ;sse - vmlal.s16 q15, d25, d25 - - subs r12, r12, #1 - - vpadal.s16 q13, q0 ;sum - vmlal.s16 q14, d0, d0 ;sse - vmlal.s16 q15, d1, d1 - - vpadal.s16 q13, q1 ;sum - vmlal.s16 q14, d2, d2 ;sse - vmlal.s16 q15, d3, d3 - - vpadal.s16 q13, q5 ;sum - vmlal.s16 q14, d10, d10 ;sse - vmlal.s16 q15, d11, d11 - - vmov q0, q4 - - vpadal.s16 q13, q6 ;sum - vmlal.s16 q14, d12, d12 ;sse - vmlal.s16 q15, d13, d13 - - bne vp8_filt16x16s_4_4_loop_neon - - vadd.u32 q15, q14, q15 ;accumulate sse - vpaddl.s32 q0, q13 ;accumulate sum - - vpaddl.u32 q1, q15 - vadd.s64 d0, d0, d1 - vadd.u64 d1, d2, d3 - - vmull.s32 q5, d0, d0 - vst1.32 {d1[0]}, [lr] ;store sse - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 - - vmov.32 r0, d0[0] ;return - pop {pc} - ENDP - -;============================== -; r0 unsigned char *src_ptr, -; r1 int src_pixels_per_line, -; r2 int xoffset, -; r3 int yoffset, -; stack unsigned char *dst_ptr, -; stack int dst_pixels_per_line, -; stack unsigned int *sse -;note: in vp8_find_best_half_pixel_step()(called when 8<Speed<15), and first call of vp8_find_best_sub_pixel_step() -;(called when speed<=8). xoffset/yoffset can only be 4 or 0, which means either by pass the filter, -;or filter coeff is {64, 64}. This simplified program only works in this situation. -;note: It happens that both xoffset and yoffset are zero. This can be handled in c code later. - -|vp9_sub_pixel_variance16x16s_neon| PROC - push {r4, lr} - - ldr r4, [sp, #8] ;load *dst_ptr from stack - ldr r12, [sp, #12] ;load dst_pixels_per_line from stack - ldr lr, [sp, #16] ;load *sse from stack - - cmp r2, #0 ;skip first_pass filter if xoffset=0 - beq secondpass_bfilter16x16s_only - - cmp r3, #0 ;skip second_pass filter if yoffset=0 - beq firstpass_bfilter16x16s_only - - vld1.u8 {d0, d1, d2, d3}, [r0], r1 ;load src data - sub sp, sp, #256 ;reserve space on stack for temporary storage - vext.8 q1, q0, q1, #1 ;construct src_ptr[1] - mov r3, sp - mov r2, #4 ;loop counter - vrhadd.u8 q0, q0, q1 ;(src_ptr[0]+src_ptr[1])/round/shift right 1 - -;First Pass: output_height lines x output_width columns (17x16) -vp8e_filt_blk2d_fp16x16s_loop_neon - vld1.u8 {d4, d5, d6, d7}, [r0], r1 - vld1.u8 {d8, d9, d10, d11}, [r0], r1 - vld1.u8 {d12, d13, d14, d15}, [r0], r1 - vld1.u8 {d16, d17, d18, d19}, [r0], r1 - - ;pld [r0] - ;pld [r0, r1] - ;pld [r0, r1, lsl #1] - - vext.8 q3, q2, q3, #1 ;construct src_ptr[1] - vext.8 q5, q4, q5, #1 - vext.8 q7, q6, q7, #1 - vext.8 q9, q8, q9, #1 - - vrhadd.u8 q1, q2, q3 ;(src_ptr[0]+src_ptr[1])/round/shift right 1 - vrhadd.u8 q2, q4, q5 - vrhadd.u8 q3, q6, q7 - vrhadd.u8 q4, q8, q9 - - vrhadd.u8 q0, q0, q1 - vrhadd.u8 q1, q1, q2 - vrhadd.u8 q2, q2, q3 - vrhadd.u8 q3, q3, q4 - - subs r2, r2, #1 - vst1.u8 {d0, d1 ,d2, d3}, [r3]! ;store result - vmov q0, q4 - vst1.u8 {d4, d5, d6, d7}, [r3]! - - bne vp8e_filt_blk2d_fp16x16s_loop_neon - - b sub_pixel_variance16x16s_neon - -;-------------------- -firstpass_bfilter16x16s_only - mov r2, #2 ;loop counter - sub sp, sp, #256 ;reserve space on stack for temporary storage - mov r3, sp - -;First Pass: output_height lines x output_width columns (16x16) -vp8e_filt_blk2d_fpo16x16s_loop_neon - vld1.u8 {d0, d1, d2, d3}, [r0], r1 ;load src data - vld1.u8 {d4, d5, d6, d7}, [r0], r1 - vld1.u8 {d8, d9, d10, d11}, [r0], r1 - vld1.u8 {d12, d13, d14, d15}, [r0], r1 - - ;pld [r0] - ;pld [r0, r1] - ;pld [r0, r1, lsl #1] - - vext.8 q1, q0, q1, #1 ;construct src_ptr[1] - vld1.u8 {d16, d17, d18, d19}, [r0], r1 - vext.8 q3, q2, q3, #1 - vld1.u8 {d20, d21, d22, d23}, [r0], r1 - vext.8 q5, q4, q5, #1 - vld1.u8 {d24, d25, d26, d27}, [r0], r1 - vext.8 q7, q6, q7, #1 - vld1.u8 {d28, d29, d30, d31}, [r0], r1 - vext.8 q9, q8, q9, #1 - vext.8 q11, q10, q11, #1 - vext.8 q13, q12, q13, #1 - vext.8 q15, q14, q15, #1 - - vrhadd.u8 q0, q0, q1 ;(src_ptr[0]+src_ptr[1])/round/shift right 1 - vrhadd.u8 q1, q2, q3 - vrhadd.u8 q2, q4, q5 - vrhadd.u8 q3, q6, q7 - vrhadd.u8 q4, q8, q9 - vrhadd.u8 q5, q10, q11 - vrhadd.u8 q6, q12, q13 - vrhadd.u8 q7, q14, q15 - - subs r2, r2, #1 - - vst1.u8 {d0, d1, d2, d3}, [r3]! ;store result - vst1.u8 {d4, d5, d6, d7}, [r3]! - vst1.u8 {d8, d9, d10, d11}, [r3]! - vst1.u8 {d12, d13, d14, d15}, [r3]! - - bne vp8e_filt_blk2d_fpo16x16s_loop_neon - - b sub_pixel_variance16x16s_neon - -;--------------------- -secondpass_bfilter16x16s_only - sub sp, sp, #256 ;reserve space on stack for temporary storage - - mov r2, #2 ;loop counter - vld1.u8 {d0, d1}, [r0], r1 ;load src data - mov r3, sp - -vp8e_filt_blk2d_spo16x16s_loop_neon - vld1.u8 {d2, d3}, [r0], r1 - vld1.u8 {d4, d5}, [r0], r1 - vld1.u8 {d6, d7}, [r0], r1 - vld1.u8 {d8, d9}, [r0], r1 - - vrhadd.u8 q0, q0, q1 - vld1.u8 {d10, d11}, [r0], r1 - vrhadd.u8 q1, q1, q2 - vld1.u8 {d12, d13}, [r0], r1 - vrhadd.u8 q2, q2, q3 - vld1.u8 {d14, d15}, [r0], r1 - vrhadd.u8 q3, q3, q4 - vld1.u8 {d16, d17}, [r0], r1 - vrhadd.u8 q4, q4, q5 - vrhadd.u8 q5, q5, q6 - vrhadd.u8 q6, q6, q7 - vrhadd.u8 q7, q7, q8 - - subs r2, r2, #1 - - vst1.u8 {d0, d1, d2, d3}, [r3]! ;store result - vmov q0, q8 - vst1.u8 {d4, d5, d6, d7}, [r3]! - vst1.u8 {d8, d9, d10, d11}, [r3]! ;store result - vst1.u8 {d12, d13, d14, d15}, [r3]! - - bne vp8e_filt_blk2d_spo16x16s_loop_neon - - b sub_pixel_variance16x16s_neon - -;---------------------------- -;variance16x16 -sub_pixel_variance16x16s_neon - vmov.i8 q8, #0 ;q8 - sum - vmov.i8 q9, #0 ;q9, q10 - sse - vmov.i8 q10, #0 - - sub r3, r3, #256 - mov r2, #4 - -sub_pixel_variance16x16s_neon_loop - vld1.8 {q0}, [r3]! ;Load up source and reference - vld1.8 {q1}, [r4], r12 - vld1.8 {q2}, [r3]! - vld1.8 {q3}, [r4], r12 - vld1.8 {q4}, [r3]! - vld1.8 {q5}, [r4], r12 - vld1.8 {q6}, [r3]! - vld1.8 {q7}, [r4], r12 - - vsubl.u8 q11, d0, d2 ;diff - vsubl.u8 q12, d1, d3 - vsubl.u8 q13, d4, d6 - vsubl.u8 q14, d5, d7 - vsubl.u8 q0, d8, d10 - vsubl.u8 q1, d9, d11 - vsubl.u8 q2, d12, d14 - vsubl.u8 q3, d13, d15 - - vpadal.s16 q8, q11 ;sum - vmlal.s16 q9, d22, d22 ;sse - vmlal.s16 q10, d23, d23 - - subs r2, r2, #1 - - vpadal.s16 q8, q12 - vmlal.s16 q9, d24, d24 - vmlal.s16 q10, d25, d25 - vpadal.s16 q8, q13 - vmlal.s16 q9, d26, d26 - vmlal.s16 q10, d27, d27 - vpadal.s16 q8, q14 - vmlal.s16 q9, d28, d28 - vmlal.s16 q10, d29, d29 - - vpadal.s16 q8, q0 ;sum - vmlal.s16 q9, d0, d0 ;sse - vmlal.s16 q10, d1, d1 - vpadal.s16 q8, q1 - vmlal.s16 q9, d2, d2 - vmlal.s16 q10, d3, d3 - vpadal.s16 q8, q2 - vmlal.s16 q9, d4, d4 - vmlal.s16 q10, d5, d5 - vpadal.s16 q8, q3 - vmlal.s16 q9, d6, d6 - vmlal.s16 q10, d7, d7 - - bne sub_pixel_variance16x16s_neon_loop - - vadd.u32 q10, q9, q10 ;accumulate sse - vpaddl.s32 q0, q8 ;accumulate sum - - vpaddl.u32 q1, q10 - vadd.s64 d0, d0, d1 - vadd.u64 d1, d2, d3 - - vmull.s32 q5, d0, d0 - vst1.32 {d1[0]}, [lr] ;store sse - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 - - add sp, sp, #256 - vmov.32 r0, d0[0] ;return - - pop {r4, pc} - ENDP - - END diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm deleted file mode 100644 index 29975f13e..000000000 --- a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm +++ /dev/null @@ -1,224 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_sub_pixel_variance8x8_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -; r0 unsigned char *src_ptr, -; r1 int src_pixels_per_line, -; r2 int xoffset, -; r3 int yoffset, -; stack(r4) unsigned char *dst_ptr, -; stack(r5) int dst_pixels_per_line, -; stack(r6) unsigned int *sse -;note: most of the code is copied from bilinear_predict8x8_neon and vp9_variance8x8_neon. - -|vp9_sub_pixel_variance8x8_neon| PROC - push {r4-r5, lr} - - ldr r12, _BilinearTaps_coeff_ - ldr r4, [sp, #12] ;load *dst_ptr from stack - ldr r5, [sp, #16] ;load dst_pixels_per_line from stack - ldr lr, [sp, #20] ;load *sse from stack - - cmp r2, #0 ;skip first_pass filter if xoffset=0 - beq skip_firstpass_filter - -;First pass: output_height lines x output_width columns (9x8) - add r2, r12, r2, lsl #3 ;calculate filter location - - vld1.u8 {q1}, [r0], r1 ;load src data - vld1.u32 {d31}, [r2] ;load first_pass filter - vld1.u8 {q2}, [r0], r1 - vdup.8 d0, d31[0] ;first_pass filter (d0 d1) - vld1.u8 {q3}, [r0], r1 - vdup.8 d1, d31[4] - vld1.u8 {q4}, [r0], r1 - - vmull.u8 q6, d2, d0 ;(src_ptr[0] * Filter[0]) - vmull.u8 q7, d4, d0 - vmull.u8 q8, d6, d0 - vmull.u8 q9, d8, d0 - - vext.8 d3, d2, d3, #1 ;construct src_ptr[-1] - vext.8 d5, d4, d5, #1 - vext.8 d7, d6, d7, #1 - vext.8 d9, d8, d9, #1 - - vmlal.u8 q6, d3, d1 ;(src_ptr[1] * Filter[1]) - vmlal.u8 q7, d5, d1 - vmlal.u8 q8, d7, d1 - vmlal.u8 q9, d9, d1 - - vld1.u8 {q1}, [r0], r1 ;load src data - vqrshrn.u16 d22, q6, #7 ;shift/round/saturate to u8 - vld1.u8 {q2}, [r0], r1 - vqrshrn.u16 d23, q7, #7 - vld1.u8 {q3}, [r0], r1 - vqrshrn.u16 d24, q8, #7 - vld1.u8 {q4}, [r0], r1 - vqrshrn.u16 d25, q9, #7 - - ;first_pass filtering on the rest 5-line data - vld1.u8 {q5}, [r0], r1 - - vmull.u8 q6, d2, d0 ;(src_ptr[0] * Filter[0]) - vmull.u8 q7, d4, d0 - vmull.u8 q8, d6, d0 - vmull.u8 q9, d8, d0 - vmull.u8 q10, d10, d0 - - vext.8 d3, d2, d3, #1 ;construct src_ptr[-1] - vext.8 d5, d4, d5, #1 - vext.8 d7, d6, d7, #1 - vext.8 d9, d8, d9, #1 - vext.8 d11, d10, d11, #1 - - vmlal.u8 q6, d3, d1 ;(src_ptr[1] * Filter[1]) - vmlal.u8 q7, d5, d1 - vmlal.u8 q8, d7, d1 - vmlal.u8 q9, d9, d1 - vmlal.u8 q10, d11, d1 - - vqrshrn.u16 d26, q6, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d27, q7, #7 - vqrshrn.u16 d28, q8, #7 - vqrshrn.u16 d29, q9, #7 - vqrshrn.u16 d30, q10, #7 - -;Second pass: 8x8 -secondpass_filter - cmp r3, #0 ;skip second_pass filter if yoffset=0 - ;skip_secondpass_filter - beq sub_pixel_variance8x8_neon - - add r3, r12, r3, lsl #3 - - vld1.u32 {d31}, [r3] ;load second_pass filter - - vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1) - vdup.8 d1, d31[4] - - vmull.u8 q1, d22, d0 ;(src_ptr[0] * Filter[0]) - vmull.u8 q2, d23, d0 - vmull.u8 q3, d24, d0 - vmull.u8 q4, d25, d0 - vmull.u8 q5, d26, d0 - vmull.u8 q6, d27, d0 - vmull.u8 q7, d28, d0 - vmull.u8 q8, d29, d0 - - vmlal.u8 q1, d23, d1 ;(src_ptr[pixel_step] * Filter[1]) - vmlal.u8 q2, d24, d1 - vmlal.u8 q3, d25, d1 - vmlal.u8 q4, d26, d1 - vmlal.u8 q5, d27, d1 - vmlal.u8 q6, d28, d1 - vmlal.u8 q7, d29, d1 - vmlal.u8 q8, d30, d1 - - vqrshrn.u16 d22, q1, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d23, q2, #7 - vqrshrn.u16 d24, q3, #7 - vqrshrn.u16 d25, q4, #7 - vqrshrn.u16 d26, q5, #7 - vqrshrn.u16 d27, q6, #7 - vqrshrn.u16 d28, q7, #7 - vqrshrn.u16 d29, q8, #7 - - b sub_pixel_variance8x8_neon - -;-------------------- -skip_firstpass_filter - vld1.u8 {d22}, [r0], r1 ;load src data - vld1.u8 {d23}, [r0], r1 - vld1.u8 {d24}, [r0], r1 - vld1.u8 {d25}, [r0], r1 - vld1.u8 {d26}, [r0], r1 - vld1.u8 {d27}, [r0], r1 - vld1.u8 {d28}, [r0], r1 - vld1.u8 {d29}, [r0], r1 - vld1.u8 {d30}, [r0], r1 - - b secondpass_filter - -;---------------------- -;vp9_variance8x8_neon -sub_pixel_variance8x8_neon - vmov.i8 q8, #0 ;q8 - sum - vmov.i8 q9, #0 ;q9, q10 - sse - vmov.i8 q10, #0 - - mov r12, #2 - -sub_pixel_variance8x8_neon_loop - vld1.8 {d0}, [r4], r5 ;load dst data - subs r12, r12, #1 - vld1.8 {d1}, [r4], r5 - vld1.8 {d2}, [r4], r5 - vsubl.u8 q4, d22, d0 ;calculate diff - vld1.8 {d3}, [r4], r5 - - vsubl.u8 q5, d23, d1 - vsubl.u8 q6, d24, d2 - - vpadal.s16 q8, q4 ;sum - vmlal.s16 q9, d8, d8 ;sse - vmlal.s16 q10, d9, d9 - - vsubl.u8 q7, d25, d3 - - vpadal.s16 q8, q5 - vmlal.s16 q9, d10, d10 - vmlal.s16 q10, d11, d11 - - vmov q11, q13 - - vpadal.s16 q8, q6 - vmlal.s16 q9, d12, d12 - vmlal.s16 q10, d13, d13 - - vmov q12, q14 - - vpadal.s16 q8, q7 - vmlal.s16 q9, d14, d14 - vmlal.s16 q10, d15, d15 - - bne sub_pixel_variance8x8_neon_loop - - vadd.u32 q10, q9, q10 ;accumulate sse - vpaddl.s32 q0, q8 ;accumulate sum - - vpaddl.u32 q1, q10 - vadd.s64 d0, d0, d1 - vadd.u64 d1, d2, d3 - - vmull.s32 q5, d0, d0 - vst1.32 {d1[0]}, [lr] ;store sse - vshr.s32 d10, d10, #6 - vsub.s32 d0, d1, d10 - - vmov.32 r0, d0[0] ;return - pop {r4-r5, pc} - - ENDP - -;----------------- - -_BilinearTaps_coeff_ - DCD bilinear_taps_coeff -bilinear_taps_coeff - DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112 - - END diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c deleted file mode 100644 index b78c2534b..000000000 --- a/vp8/encoder/arm/quantize_arm.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include <math.h> -#include "vpx_mem/vpx_mem.h" - -#include "vp8/encoder/quantize.h" -#include "vp8/common/entropy.h" - - -#if HAVE_ARMV7 - -/* vp8_quantize_mbX functions here differs from corresponding ones in - * quantize.c only by using quantize_b_pair function pointer instead of - * the regular quantize_b function pointer */ -void vp8_quantize_mby_neon(MACROBLOCK *x) { - int i; - int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED - && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); - - for (i = 0; i < 16; i += 2) - x->quantize_b_pair(&x->block[i], &x->block[i + 1], - &x->e_mbd.block[i], &x->e_mbd.block[i + 1]); - - if (has_2nd_order) - x->quantize_b(&x->block[24], &x->e_mbd.block[24]); -} - -void vp8_quantize_mb_neon(MACROBLOCK *x) { - int i; - int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED - && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); - - for (i = 0; i < 24; i += 2) - x->quantize_b_pair(&x->block[i], &x->block[i + 1], - &x->e_mbd.block[i], &x->e_mbd.block[i + 1]); - - if (has_2nd_order) - x->quantize_b(&x->block[i], &x->e_mbd.block[i]); -} - - -void vp8_quantize_mbuv_neon(MACROBLOCK *x) { - int i; - - for (i = 16; i < 24; i += 2) - x->quantize_b_pair(&x->block[i], &x->block[i + 1], - &x->e_mbd.block[i], &x->e_mbd.block[i + 1]); -} - -#endif /* HAVE_ARMV7 */ diff --git a/vp8/encoder/arm/quantize_arm.h b/vp8/encoder/arm/quantize_arm.h deleted file mode 100644 index 7d2088d2d..000000000 --- a/vp8/encoder/arm/quantize_arm.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef QUANTIZE_ARM_H -#define QUANTIZE_ARM_H - -#if HAVE_ARMV6 - -extern prototype_quantize_block(vp8_fast_quantize_b_armv6); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_quantize_fastquantb -#define vp8_quantize_fastquantb vp8_fast_quantize_b_armv6 -#endif - -#endif /* HAVE_ARMV6 */ - - -#if HAVE_ARMV7 - -extern prototype_quantize_block(vp8_fast_quantize_b_neon); -extern prototype_quantize_block_pair(vp8_fast_quantize_b_pair_neon); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_quantize_fastquantb -#define vp8_quantize_fastquantb vp8_fast_quantize_b_neon - -#undef vp8_quantize_fastquantb_pair -#define vp8_quantize_fastquantb_pair vp8_fast_quantize_b_pair_neon - -#undef vp8_quantize_mb -#define vp8_quantize_mb vp8_quantize_mb_neon - -#undef vp8_quantize_mbuv -#define vp8_quantize_mbuv vp8_quantize_mbuv_neon - -#undef vp8_quantize_mby -#define vp8_quantize_mby vp8_quantize_mby_neon -#endif - -#endif /* HAVE_ARMV7 */ - -#endif - diff --git a/vp8/encoder/arm/variance_arm.c b/vp8/encoder/arm/variance_arm.c deleted file mode 100644 index 097c73e0b..000000000 --- a/vp8/encoder/arm/variance_arm.c +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8/encoder/variance.h" -#include "vp8/common/filter.h" -#include "vp8/common/arm/bilinearfilter_arm.h" - -#define HALFNDX 8 - -#if HAVE_ARMV6 - -unsigned int vp9_sub_pixel_variance8x8_armv6 -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - unsigned short first_pass[10 * 8]; - unsigned char second_pass[8 * 8]; - const short *HFilter, *VFilter; - - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; - - vp9_filter_block2d_bil_first_pass_armv6(src_ptr, first_pass, - src_pixels_per_line, - 9, 8, HFilter); - vp9_filter_block2d_bil_second_pass_armv6(first_pass, second_pass, - 8, 8, 8, VFilter); - - return vp9_variance8x8_armv6(second_pass, 8, dst_ptr, - dst_pixels_per_line, sse); -} - -unsigned int vp9_sub_pixel_variance16x16_armv6 -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - unsigned short first_pass[36 * 16]; - unsigned char second_pass[20 * 16]; - const short *HFilter, *VFilter; - unsigned int var; - - if (xoffset == HALFNDX && yoffset == 0) { - var = vp9_variance_halfpixvar16x16_h_armv6(src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, sse); - } else if (xoffset == 0 && yoffset == HALFNDX) { - var = vp9_variance_halfpixvar16x16_v_armv6(src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, sse); - } else if (xoffset == HALFNDX && yoffset == HALFNDX) { - var = vp9_variance_halfpixvar16x16_hv_armv6(src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, sse); - } else { - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; - - vp9_filter_block2d_bil_first_pass_armv6(src_ptr, first_pass, - src_pixels_per_line, - 17, 16, HFilter); - vp9_filter_block2d_bil_second_pass_armv6(first_pass, second_pass, - 16, 16, 16, VFilter); - - var = vp9_variance16x16_armv6(second_pass, 16, dst_ptr, - dst_pixels_per_line, sse); - } - return var; -} - -#endif /* HAVE_ARMV6 */ - - -#if HAVE_ARMV7 - -unsigned int vp9_sub_pixel_variance16x16_neon -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - if (xoffset == HALFNDX && yoffset == 0) - return vp9_variance_halfpixvar16x16_h_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse); - else if (xoffset == 0 && yoffset == HALFNDX) - return vp9_variance_halfpixvar16x16_v_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse); - else if (xoffset == HALFNDX && yoffset == HALFNDX) - return vp9_variance_halfpixvar16x16_hv_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse); - else - return vp9_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); -} - -#endif diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h deleted file mode 100644 index c2c208a78..000000000 --- a/vp8/encoder/arm/variance_arm.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VARIANCE_ARM_H -#define VARIANCE_ARM_H - -#if HAVE_ARMV6 - -extern prototype_sad(vp9_sad16x16_armv6); -extern prototype_variance(vp9_variance16x16_armv6); -extern prototype_variance(vp9_variance8x8_armv6); -extern prototype_subpixvariance(vp9_sub_pixel_variance16x16_armv6); -extern prototype_subpixvariance(vp9_sub_pixel_variance8x8_armv6); -extern prototype_variance(vp9_variance_halfpixvar16x16_h_armv6); -extern prototype_variance(vp9_variance_halfpixvar16x16_v_armv6); -extern prototype_variance(vp9_variance_halfpixvar16x16_hv_armv6); -extern prototype_variance(vp9_mse16x16_armv6); - -#if !CONFIG_RUNTIME_CPU_DETECT - -#undef vp9_variance_sad16x16 -#define vp9_variance_sad16x16 vp9_sad16x16_armv6 - -#undef vp9_variance_subpixvar16x16 -#define vp9_variance_subpixvar16x16 vp9_sub_pixel_variance16x16_armv6 - -#undef vp9_variance_subpixvar8x8 -#define vp9_variance_subpixvar8x8 vp9_sub_pixel_variance8x8_armv6 - -#undef vp9_variance_var16x16 -#define vp9_variance_var16x16 vp9_variance16x16_armv6 - -#undef vp9_variance_mse16x16 -#define vp9_variance_mse16x16 vp9_mse16x16_armv6 - -#undef vp9_variance_var8x8 -#define vp9_variance_var8x8 vp9_variance8x8_armv6 - -#undef vp9_variance_halfpixvar16x16_h -#define vp9_variance_halfpixvar16x16_h vp9_variance_halfpixvar16x16_h_armv6 - -#undef vp9_variance_halfpixvar16x16_v -#define vp9_variance_halfpixvar16x16_v vp9_variance_halfpixvar16x16_v_armv6 - -#undef vp9_variance_halfpixvar16x16_hv -#define vp9_variance_halfpixvar16x16_hv vp9_variance_halfpixvar16x16_hv_armv6 - -#endif /* !CONFIG_RUNTIME_CPU_DETECT */ - -#endif /* HAVE_ARMV6 */ - - -#if HAVE_ARMV7 -extern prototype_sad(vp9_sad4x4_neon); -extern prototype_sad(vp9_sad8x8_neon); -extern prototype_sad(vp9_sad8x16_neon); -extern prototype_sad(vp9_sad16x8_neon); -extern prototype_sad(vp9_sad16x16_neon); - -extern prototype_variance(vp9_variance8x8_neon); -extern prototype_variance(vp9_variance8x16_neon); -extern prototype_variance(vp9_variance16x8_neon); -extern prototype_variance(vp9_variance16x16_neon); - -extern prototype_subpixvariance(vp9_sub_pixel_variance8x8_neon); -extern prototype_subpixvariance(vp9_sub_pixel_variance16x16_neon); -extern prototype_subpixvariance(vp9_sub_pixel_variance16x16_neon_func); -extern prototype_variance(vp9_variance_halfpixvar16x16_h_neon); -extern prototype_variance(vp9_variance_halfpixvar16x16_v_neon); -extern prototype_variance(vp9_variance_halfpixvar16x16_hv_neon); - -extern prototype_variance(vp9_mse16x16_neon); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp9_variance_sad4x4 -#define vp9_variance_sad4x4 vp9_sad4x4_neon - -#undef vp9_variance_sad8x8 -#define vp9_variance_sad8x8 vp9_sad8x8_neon - -#undef vp9_variance_sad8x16 -#define vp9_variance_sad8x16 vp9_sad8x16_neon - -#undef vp9_variance_sad16x8 -#define vp9_variance_sad16x8 vp9_sad16x8_neon - -#undef vp9_variance_sad16x16 -#define vp9_variance_sad16x16 vp9_sad16x16_neon - -#undef vp9_variance_var8x8 -#define vp9_variance_var8x8 vp9_variance8x8_neon - -#undef vp9_variance_var8x16 -#define vp9_variance_var8x16 vp9_variance8x16_neon - -#undef vp9_variance_var16x8 -#define vp9_variance_var16x8 vp9_variance16x8_neon - -#undef vp9_variance_var16x16 -#define vp9_variance_var16x16 vp9_variance16x16_neon - -#undef vp9_variance_subpixvar8x8 -#define vp9_variance_subpixvar8x8 vp9_sub_pixel_variance8x8_neon - -#undef vp9_variance_subpixvar16x16 -#define vp9_variance_subpixvar16x16 vp9_sub_pixel_variance16x16_neon - -#undef vp9_variance_halfpixvar16x16_h -#define vp9_variance_halfpixvar16x16_h vp9_variance_halfpixvar16x16_h_neon - -#undef vp9_variance_halfpixvar16x16_v -#define vp9_variance_halfpixvar16x16_v vp9_variance_halfpixvar16x16_v_neon - -#undef vp9_variance_halfpixvar16x16_hv -#define vp9_variance_halfpixvar16x16_hv vp9_variance_halfpixvar16x16_hv_neon - -#undef vp9_variance_mse16x16 -#define vp9_variance_mse16x16 vp9_mse16x16_neon - -#endif - -#endif - -#endif diff --git a/vp8/encoder/asm_enc_offsets.c b/vp8/encoder/asm_enc_offsets.c deleted file mode 100644 index 345240fdc..000000000 --- a/vp8/encoder/asm_enc_offsets.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_ports/asm_offsets.h" -#include "vpx_config.h" -#include "block.h" -#include "vp8/common/blockd.h" -#include "onyx_int.h" -#include "treewriter.h" -#include "tokenize.h" - -BEGIN - -/* regular quantize */ -DEFINE(vp9_block_coeff, offsetof(BLOCK, coeff)); -DEFINE(vp9_block_zbin, offsetof(BLOCK, zbin)); -DEFINE(vp9_block_round, offsetof(BLOCK, round)); -DEFINE(vp9_block_quant, offsetof(BLOCK, quant)); -DEFINE(vp9_block_quant_fast, offsetof(BLOCK, quant_fast)); -DEFINE(vp9_block_zbin_extra, offsetof(BLOCK, zbin_extra)); -DEFINE(vp9_block_zrun_zbin_boost, offsetof(BLOCK, zrun_zbin_boost)); -DEFINE(vp9_block_quant_shift, offsetof(BLOCK, quant_shift)); - -DEFINE(vp9_blockd_qcoeff, offsetof(BLOCKD, qcoeff)); -DEFINE(vp9_blockd_dequant, offsetof(BLOCKD, dequant)); -DEFINE(vp9_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff)); -DEFINE(vp9_blockd_eob, offsetof(BLOCKD, eob)); - -/* subtract */ -DEFINE(vp9_block_base_src, offsetof(BLOCK, base_src)); -DEFINE(vp9_block_src, offsetof(BLOCK, src)); -DEFINE(vp9_block_src_diff, offsetof(BLOCK, src_diff)); -DEFINE(vp9_block_src_stride, offsetof(BLOCK, src_stride)); - -DEFINE(vp9_blockd_predictor, offsetof(BLOCKD, predictor)); - -/* pack tokens */ -DEFINE(vp9_writer_lowvalue, offsetof(vp9_writer, lowvalue)); -DEFINE(vp9_writer_range, offsetof(vp9_writer, range)); -DEFINE(vp9_writer_value, offsetof(vp9_writer, value)); -DEFINE(vp9_writer_count, offsetof(vp9_writer, count)); -DEFINE(vp9_writer_pos, offsetof(vp9_writer, pos)); -DEFINE(vp9_writer_buffer, offsetof(vp9_writer, buffer)); - -DEFINE(tokenextra_token, offsetof(TOKENEXTRA, Token)); -DEFINE(tokenextra_extra, offsetof(TOKENEXTRA, Extra)); -DEFINE(tokenextra_context_tree, offsetof(TOKENEXTRA, context_tree)); -DEFINE(tokenextra_skip_eob_node, offsetof(TOKENEXTRA, skip_eob_node)); -DEFINE(TOKENEXTRA_SZ, sizeof(TOKENEXTRA)); - -DEFINE(vp9_extra_bit_struct_sz, sizeof(vp9_extra_bit_struct)); - -DEFINE(vp9_token_value, offsetof(vp9_token, value)); -DEFINE(vp9_token_len, offsetof(vp9_token, Len)); - -DEFINE(vp9_extra_bit_struct_tree, offsetof(vp9_extra_bit_struct, tree)); -DEFINE(vp9_extra_bit_struct_prob, offsetof(vp9_extra_bit_struct, prob)); -DEFINE(vp9_extra_bit_struct_len, offsetof(vp9_extra_bit_struct, Len)); -DEFINE(vp9_extra_bit_struct_base_val, offsetof(vp9_extra_bit_struct, base_val)); - -DEFINE(vp9_comp_tplist, offsetof(VP9_COMP, tplist)); -DEFINE(vp9_comp_common, offsetof(VP9_COMP, common)); - -DEFINE(tokenlist_start, offsetof(TOKENLIST, start)); -DEFINE(tokenlist_stop, offsetof(TOKENLIST, stop)); -DEFINE(TOKENLIST_SZ, sizeof(TOKENLIST)); - -DEFINE(vp9_common_mb_rows, offsetof(VP9_COMMON, mb_rows)); - -END - -/* add asserts for any offset that is not supported by assembly code - * add asserts for any size that is not supported by assembly code - - * These are used in vp8cx_pack_tokens. They are hard coded so if their sizes - * change they will have to be adjusted. - */ - -#if HAVE_ARMV5TE -ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8) -ct_assert(vp9_extra_bit_struct_sz, sizeof(vp9_extra_bit_struct) == 16) -#endif diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c deleted file mode 100644 index 3bfd5157b..000000000 --- a/vp8/encoder/bitstream.c +++ /dev/null @@ -1,2394 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp8/common/header.h" -#include "encodemv.h" -#include "vp8/common/entropymode.h" -#include "vp8/common/findnearmv.h" -#include "mcomp.h" -#include "vp8/common/systemdependent.h" -#include <assert.h> -#include <stdio.h> -#include <limits.h> -#include "vp8/common/pragmas.h" -#include "vpx/vpx_encoder.h" -#include "vpx_mem/vpx_mem.h" -#include "bitstream.h" -#include "segmentation.h" - -#include "vp8/common/seg_common.h" -#include "vp8/common/pred_common.h" -#include "vp8/common/entropy.h" -#include "vp8/encoder/encodemv.h" -#include "vp8/common/entropymv.h" - -#if CONFIG_NEWBESTREFMV -#include "vp8/common/mvref_common.h" -#endif - -#if defined(SECTIONBITS_OUTPUT) -unsigned __int64 Sectionbits[500]; -#endif - -#ifdef ENTROPY_STATS -int intra_mode_stats [VP9_BINTRAMODES] [VP9_BINTRAMODES] [VP9_BINTRAMODES]; -unsigned int tree_update_hist [BLOCK_TYPES] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES][2]; -unsigned int hybrid_tree_update_hist [BLOCK_TYPES] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES][2]; -unsigned int tree_update_hist_8x8 [BLOCK_TYPES_8X8] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] [2]; -unsigned int hybrid_tree_update_hist_8x8 [BLOCK_TYPES_8X8] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] [2]; -unsigned int tree_update_hist_16x16 [BLOCK_TYPES_16X16] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] [2]; -unsigned int hybrid_tree_update_hist_16x16 [BLOCK_TYPES_16X16] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] [2]; - -extern unsigned int active_section; -#endif - -#ifdef MODE_STATS -int count_mb_seg[4] = { 0, 0, 0, 0 }; -#endif - -#define vp9_cost_upd ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)) >> 8) -#define vp9_cost_upd256 ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd))) - -#define SEARCH_NEWP -static int update_bits[255]; - -static void compute_update_table() { - int i; - for (i = 0; i < 255; i++) - update_bits[i] = vp9_count_term_subexp(i, SUBEXP_PARAM, 255); -} - -static int split_index(int i, int n, int modulus) { - int max1 = (n - 1 - modulus / 2) / modulus + 1; - if (i % modulus == modulus / 2) i = i / modulus; - else i = max1 + i - (i + modulus - modulus / 2) / modulus; - return i; -} - -static int remap_prob(int v, int m) { - const int n = 256; - const int modulus = MODULUS_PARAM; - int i; - if ((m << 1) <= n) - i = vp9_recenter_nonneg(v, m) - 1; - else - i = vp9_recenter_nonneg(n - 1 - v, n - 1 - m) - 1; - - i = split_index(i, n - 1, modulus); - return i; -} - -static void write_prob_diff_update(vp9_writer *const bc, - vp9_prob newp, vp9_prob oldp) { - int delp = remap_prob(newp, oldp); - vp9_encode_term_subexp(bc, delp, SUBEXP_PARAM, 255); -} - -static int prob_diff_update_cost(vp9_prob newp, vp9_prob oldp) { - int delp = remap_prob(newp, oldp); - return update_bits[delp] * 256; -} - -static void update_mode( - vp9_writer *const bc, - int n, - vp9_token tok [/* n */], - vp9_tree tree, - vp9_prob Pnew [/* n-1 */], - vp9_prob Pcur [/* n-1 */], - unsigned int bct [/* n-1 */] [2], - const unsigned int num_events[/* n */] -) { - unsigned int new_b = 0, old_b = 0; - int i = 0; - - vp9_tree_probs_from_distribution( - n--, tok, tree, - Pnew, bct, num_events, - 256, 1 - ); - - do { - new_b += cost_branch(bct[i], Pnew[i]); - old_b += cost_branch(bct[i], Pcur[i]); - } while (++i < n); - - if (new_b + (n << 8) < old_b) { - int i = 0; - - vp9_write_bit(bc, 1); - - do { - const vp9_prob p = Pnew[i]; - - vp9_write_literal(bc, Pcur[i] = p ? p : 1, 8); - } while (++i < n); - } else - vp9_write_bit(bc, 0); -} - -static void update_mbintra_mode_probs(VP9_COMP* const cpi, - vp9_writer* const bc) { - VP9_COMMON *const cm = &cpi->common; - - { - vp9_prob Pnew [VP9_YMODES - 1]; - unsigned int bct [VP9_YMODES - 1] [2]; - - update_mode( - bc, VP9_YMODES, vp9_ymode_encodings, vp9_ymode_tree, - Pnew, cm->fc.ymode_prob, bct, (unsigned int *)cpi->ymode_count - ); - } -} - -static int get_prob(int num, int den) { - int p; - if (den <= 0) - return 128; - p = (num * 255 + (den >> 1)) / den; - if (p > 255) - return 255; - else if (p < 1) - return 1; - return p; -} - -static int get_binary_prob(int n0, int n1) { - return get_prob(n0, n0 + n1); -} - -void vp9_update_skip_probs(VP9_COMP *cpi) { - VP9_COMMON *const pc = &cpi->common; - int prob_skip_false[3] = {0, 0, 0}; - int k; - - for (k = 0; k < MBSKIP_CONTEXTS; ++k) { - pc->mbskip_pred_probs[k] = get_binary_prob(cpi->skip_false_count[k], - cpi->skip_true_count[k]); - } -} - -static void update_switchable_interp_probs(VP9_COMP *cpi, - vp9_writer* const bc) { - VP9_COMMON *const pc = &cpi->common; - unsigned int branch_ct[32][2]; - int i, j; - for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) { - vp9_tree_probs_from_distribution( - VP9_SWITCHABLE_FILTERS, - vp9_switchable_interp_encodings, vp9_switchable_interp_tree, - pc->fc.switchable_interp_prob[j], branch_ct, - cpi->switchable_interp_count[j], 256, 1); - for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) { - if (pc->fc.switchable_interp_prob[j][i] < 1) - pc->fc.switchable_interp_prob[j][i] = 1; - vp9_write_literal(bc, pc->fc.switchable_interp_prob[j][i], 8); - } - } -} - -// This function updates the reference frame prediction stats -static void update_refpred_stats(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - int i; - int tot_count; - vp9_prob new_pred_probs[PREDICTION_PROBS]; - int old_cost, new_cost; - - // Set the prediction probability structures to defaults - if (cm->frame_type == KEY_FRAME) { - // Set the prediction probabilities to defaults - cm->ref_pred_probs[0] = 120; - cm->ref_pred_probs[1] = 80; - cm->ref_pred_probs[2] = 40; - - vpx_memset(cpi->ref_pred_probs_update, 0, - sizeof(cpi->ref_pred_probs_update)); - } else { - // From the prediction counts set the probabilities for each context - for (i = 0; i < PREDICTION_PROBS; i++) { - new_pred_probs[i] = get_binary_prob(cpi->ref_pred_count[i][0], - cpi->ref_pred_count[i][1]); - - // Decide whether or not to update the reference frame probs. - // Returned costs are in 1/256 bit units. - old_cost = - (cpi->ref_pred_count[i][0] * vp9_cost_zero(cm->ref_pred_probs[i])) + - (cpi->ref_pred_count[i][1] * vp9_cost_one(cm->ref_pred_probs[i])); - - new_cost = - (cpi->ref_pred_count[i][0] * vp9_cost_zero(new_pred_probs[i])) + - (cpi->ref_pred_count[i][1] * vp9_cost_one(new_pred_probs[i])); - - // Cost saving must be >= 8 bits (2048 in these units) - if ((old_cost - new_cost) >= 2048) { - cpi->ref_pred_probs_update[i] = 1; - cm->ref_pred_probs[i] = new_pred_probs[i]; - } else - cpi->ref_pred_probs_update[i] = 0; - - } - } -} - -static void update_mvcount(VP9_COMP *cpi, MACROBLOCK *x, - int_mv *best_ref_mv, int_mv *second_best_ref_mv) { - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - MV mv; - - if (mbmi->mode == SPLITMV) { - int i; - - for (i = 0; i < x->partition_info->count; i++) { - if (x->partition_info->bmi[i].mode == NEW4X4) { - if (x->e_mbd.allow_high_precision_mv) { - mv.row = (x->partition_info->bmi[i].mv.as_mv.row - - best_ref_mv->as_mv.row); - mv.col = (x->partition_info->bmi[i].mv.as_mv.col - - best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1); - if (x->e_mbd.mode_info_context->mbmi.second_ref_frame) { - mv.row = (x->partition_info->bmi[i].second_mv.as_mv.row - - second_best_ref_mv->as_mv.row); - mv.col = (x->partition_info->bmi[i].second_mv.as_mv.col - - second_best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, - &cpi->NMVcount, 1); - } - } else { - mv.row = (x->partition_info->bmi[i].mv.as_mv.row - - best_ref_mv->as_mv.row); - mv.col = (x->partition_info->bmi[i].mv.as_mv.col - - best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0); - if (x->e_mbd.mode_info_context->mbmi.second_ref_frame) { - mv.row = (x->partition_info->bmi[i].second_mv.as_mv.row - - second_best_ref_mv->as_mv.row); - mv.col = (x->partition_info->bmi[i].second_mv.as_mv.col - - second_best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, - &cpi->NMVcount, 0); - } - } - } - } - } else if (mbmi->mode == NEWMV) { - if (x->e_mbd.allow_high_precision_mv) { - mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row); - mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1); - if (mbmi->second_ref_frame) { - mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row); - mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 1); - } - } else { - mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row); - mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0); - if (mbmi->second_ref_frame) { - mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row); - mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col); - vp9_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 0); - } - } - } -} - -static void write_ymode(vp9_writer *bc, int m, const vp9_prob *p) { - write_token(bc, vp9_ymode_tree, p, vp9_ymode_encodings + m); -} - -static void kfwrite_ymode(vp9_writer *bc, int m, const vp9_prob *p) { - write_token(bc, vp9_kf_ymode_tree, p, vp9_kf_ymode_encodings + m); -} - -#if CONFIG_SUPERBLOCKS -static void sb_kfwrite_ymode(vp9_writer *bc, int m, const vp9_prob *p) { - write_token(bc, vp9_uv_mode_tree, p, vp9_sb_kf_ymode_encodings + m); -} -#endif - -static void write_i8x8_mode(vp9_writer *bc, int m, const vp9_prob *p) { - write_token(bc, vp9_i8x8_mode_tree, p, vp9_i8x8_mode_encodings + m); -} - -static void write_uv_mode(vp9_writer *bc, int m, const vp9_prob *p) { - write_token(bc, vp9_uv_mode_tree, p, vp9_uv_mode_encodings + m); -} - - -static void write_bmode(vp9_writer *bc, int m, const vp9_prob *p) { - write_token(bc, vp9_bmode_tree, p, vp9_bmode_encodings + m); -} - -static void write_split(vp9_writer *bc, int x, const vp9_prob *p) { - write_token(bc, vp9_mbsplit_tree, p, vp9_mbsplit_encodings + x); -} - -static int prob_update_savings(const unsigned int *ct, - const vp9_prob oldp, const vp9_prob newp, - const vp9_prob upd) { - const int old_b = cost_branch256(ct, oldp); - const int new_b = cost_branch256(ct, newp); - const int update_b = 2048 + vp9_cost_upd256; - return (old_b - new_b - update_b); -} - -static int prob_diff_update_savings(const unsigned int *ct, - const vp9_prob oldp, const vp9_prob newp, - const vp9_prob upd) { - const int old_b = cost_branch256(ct, oldp); - const int new_b = cost_branch256(ct, newp); - const int update_b = (newp == oldp ? 0 : - prob_diff_update_cost(newp, oldp) + vp9_cost_upd256); - return (old_b - new_b - update_b); -} - -static int prob_diff_update_savings_search(const unsigned int *ct, - const vp9_prob oldp, vp9_prob *bestp, - const vp9_prob upd) { - const int old_b = cost_branch256(ct, oldp); - int new_b, update_b, savings, bestsavings, step; - vp9_prob newp, bestnewp; - - bestsavings = 0; - bestnewp = oldp; - - step = (*bestp > oldp ? -1 : 1); - for (newp = *bestp; newp != oldp; newp += step) { - new_b = cost_branch256(ct, newp); - update_b = prob_diff_update_cost(newp, oldp) + vp9_cost_upd256; - savings = old_b - new_b - update_b; - if (savings > bestsavings) { - bestsavings = savings; - bestnewp = newp; - } - } - *bestp = bestnewp; - return bestsavings; -} - -static void pack_mb_tokens(vp9_writer* const bc, - TOKENEXTRA **tp, - const TOKENEXTRA *const stop) { - unsigned int split; - unsigned int shift; - int count = bc->count; - unsigned int range = bc->range; - unsigned int lowvalue = bc->lowvalue; - TOKENEXTRA *p = *tp; - - while (p < stop) { - const int t = p->Token; - vp9_token *const a = vp9_coef_encodings + t; - const vp9_extra_bit_struct *const b = vp9_extra_bits + t; - int i = 0; - const unsigned char *pp = p->context_tree; - int v = a->value; - int n = a->Len; - - if (t == EOSB_TOKEN) - { - ++p; - break; - } - - /* skip one or two nodes */ - if (p->skip_eob_node) { - n -= p->skip_eob_node; - i = 2 * p->skip_eob_node; - } - - do { - const int bb = (v >> --n) & 1; - split = 1 + (((range - 1) * pp[i >> 1]) >> 8); - i = vp9_coef_tree[i + bb]; - - if (bb) { - lowvalue += split; - range = range - split; - } else { - range = split; - } - - shift = vp9_norm[range]; - range <<= shift; - count += shift; - - if (count >= 0) { - int offset = shift - count; - - if ((lowvalue << (offset - 1)) & 0x80000000) { - int x = bc->pos - 1; - - while (x >= 0 && bc->buffer[x] == 0xff) { - bc->buffer[x] = (unsigned char)0; - x--; - } - - bc->buffer[x] += 1; - } - - bc->buffer[bc->pos++] = (lowvalue >> (24 - offset)); - lowvalue <<= offset; - shift = count; - lowvalue &= 0xffffff; - count -= 8; - } - - lowvalue <<= shift; - } while (n); - - - if (b->base_val) { - const int e = p->Extra, L = b->Len; - - if (L) { - const unsigned char *pp = b->prob; - int v = e >> 1; - int n = L; /* number of bits in v, assumed nonzero */ - int i = 0; - - do { - const int bb = (v >> --n) & 1; - split = 1 + (((range - 1) * pp[i >> 1]) >> 8); - i = b->tree[i + bb]; - - if (bb) { - lowvalue += split; - range = range - split; - } else { - range = split; - } - - shift = vp9_norm[range]; - range <<= shift; - count += shift; - - if (count >= 0) { - int offset = shift - count; - - if ((lowvalue << (offset - 1)) & 0x80000000) { - int x = bc->pos - 1; - - while (x >= 0 && bc->buffer[x] == 0xff) { - bc->buffer[x] = (unsigned char)0; - x--; - } - - bc->buffer[x] += 1; - } - - bc->buffer[bc->pos++] = (lowvalue >> (24 - offset)); - lowvalue <<= offset; - shift = count; - lowvalue &= 0xffffff; - count -= 8; - } - - lowvalue <<= shift; - } while (n); - } - - - { - - split = (range + 1) >> 1; - - if (e & 1) { - lowvalue += split; - range = range - split; - } else { - range = split; - } - - range <<= 1; - - if ((lowvalue & 0x80000000)) { - int x = bc->pos - 1; - - while (x >= 0 && bc->buffer[x] == 0xff) { - bc->buffer[x] = (unsigned char)0; - x--; - } - - bc->buffer[x] += 1; - - } - - lowvalue <<= 1; - - if (!++count) { - count = -8; - bc->buffer[bc->pos++] = (lowvalue >> 24); - lowvalue &= 0xffffff; - } - } - - } - ++p; - } - - bc->count = count; - bc->lowvalue = lowvalue; - bc->range = range; - *tp = p; -} - -static void write_partition_size(unsigned char *cx_data, int size) { - signed char csize; - - csize = size & 0xff; - *cx_data = csize; - csize = (size >> 8) & 0xff; - *(cx_data + 1) = csize; - csize = (size >> 16) & 0xff; - *(cx_data + 2) = csize; - -} - -static void write_mv_ref -( - vp9_writer *bc, MB_PREDICTION_MODE m, const vp9_prob *p -) { -#if CONFIG_DEBUG - assert(NEARESTMV <= m && m <= SPLITMV); -#endif - write_token(bc, vp9_mv_ref_tree, p, - vp9_mv_ref_encoding_array - NEARESTMV + m); -} - -#if CONFIG_SUPERBLOCKS -static void write_sb_mv_ref(vp9_writer *bc, MB_PREDICTION_MODE m, - const vp9_prob *p) { -#if CONFIG_DEBUG - assert(NEARESTMV <= m && m < SPLITMV); -#endif - write_token(bc, vp9_sb_mv_ref_tree, p, - vp9_sb_mv_ref_encoding_array - NEARESTMV + m); -} -#endif - -static void write_sub_mv_ref -( - vp9_writer *bc, B_PREDICTION_MODE m, const vp9_prob *p -) { -#if CONFIG_DEBUG - assert(LEFT4X4 <= m && m <= NEW4X4); -#endif - write_token(bc, vp9_sub_mv_ref_tree, p, - vp9_sub_mv_ref_encoding_array - LEFT4X4 + m); -} - -static void write_nmv(vp9_writer *bc, const MV *mv, const int_mv *ref, - const nmv_context *nmvc, int usehp) { - MV e; - e.row = mv->row - ref->as_mv.row; - e.col = mv->col - ref->as_mv.col; - - vp9_encode_nmv(bc, &e, &ref->as_mv, nmvc); - vp9_encode_nmv_fp(bc, &e, &ref->as_mv, nmvc, usehp); -} - -#if CONFIG_NEW_MVREF -static int vp9_cost_mv_ref_id(vp9_prob * ref_id_probs, int mv_ref_id) { - int cost; - - // Encode the index for the MV reference. - switch (mv_ref_id) { - case 0: - cost = vp9_cost_zero(ref_id_probs[0]); - break; - case 1: - cost = vp9_cost_one(ref_id_probs[0]); - cost += vp9_cost_zero(ref_id_probs[1]); - break; - case 2: - cost = vp9_cost_one(ref_id_probs[0]); - cost += vp9_cost_one(ref_id_probs[1]); - cost += vp9_cost_zero(ref_id_probs[2]); - break; - case 3: - cost = vp9_cost_one(ref_id_probs[0]); - cost += vp9_cost_one(ref_id_probs[1]); - cost += vp9_cost_one(ref_id_probs[2]); - break; - - // TRAP.. This should not happen - default: - assert(0); - break; - } - - return cost; -} - -static void vp9_write_mv_ref_id(vp9_writer *w, - vp9_prob * ref_id_probs, - int mv_ref_id) { - // Encode the index for the MV reference. - switch (mv_ref_id) { - case 0: - vp9_write(w, 0, ref_id_probs[0]); - break; - case 1: - vp9_write(w, 1, ref_id_probs[0]); - vp9_write(w, 0, ref_id_probs[1]); - break; - case 2: - vp9_write(w, 1, ref_id_probs[0]); - vp9_write(w, 1, ref_id_probs[1]); - vp9_write(w, 0, ref_id_probs[2]); - break; - case 3: - vp9_write(w, 1, ref_id_probs[0]); - vp9_write(w, 1, ref_id_probs[1]); - vp9_write(w, 1, ref_id_probs[2]); - break; - - // TRAP.. This should not happen - default: - assert(0); - break; - } -} - -// Estimate the cost of each coding the vector using each reference candidate -static unsigned int pick_best_mv_ref(MACROBLOCK *x, - MV_REFERENCE_FRAME ref_frame, - int_mv target_mv, - int_mv * mv_ref_list, - int_mv * best_ref) { - int i; - int best_index = 0; - int cost, cost2; - int zero_seen = (mv_ref_list[0].as_int) ? FALSE : TRUE; - MACROBLOCKD *xd = &x->e_mbd; - int max_mv = MV_MAX; - - cost = vp9_cost_mv_ref_id(xd->mb_mv_ref_id_probs[ref_frame], 0) + - vp9_mv_bit_cost(&target_mv, - &mv_ref_list[0], - XMVCOST, 96, - xd->allow_high_precision_mv); - - - // Use 4 for now : for (i = 1; i < MAX_MV_REFS; ++i ) { - for (i = 1; i < 4; ++i) { - // If we see a 0,0 reference vector for a second time we have reached - // the end of the list of valid candidate vectors. - if (!mv_ref_list[i].as_int) - if (zero_seen) - break; - else - zero_seen = TRUE; - - // Check for cases where the reference choice would give rise to an - // uncodable/out of range residual for row or col. - if ((abs(target_mv.as_mv.row - mv_ref_list[i].as_mv.row) > max_mv) || - (abs(target_mv.as_mv.col - mv_ref_list[i].as_mv.col) > max_mv)) { - continue; - } - - cost2 = vp9_cost_mv_ref_id(xd->mb_mv_ref_id_probs[ref_frame], i) + - vp9_mv_bit_cost(&target_mv, - &mv_ref_list[i], - XMVCOST, 96, - xd->allow_high_precision_mv); - - if (cost2 < cost) { - cost = cost2; - best_index = i; - } - } - - (*best_ref).as_int = mv_ref_list[best_index].as_int; - - return best_index; -} -#endif - -// This function writes the current macro block's segnment id to the bitstream -// It should only be called if a segment map update is indicated. -static void write_mb_segid(vp9_writer *bc, - const MB_MODE_INFO *mi, const MACROBLOCKD *xd) { - // Encode the MB segment id. - int seg_id = mi->segment_id; -#if CONFIG_SUPERBLOCKS - if (mi->encoded_as_sb) { - if (xd->mb_to_right_edge > 0) - seg_id = seg_id && xd->mode_info_context[1].mbmi.segment_id; - if (xd->mb_to_bottom_edge > 0) { - seg_id = seg_id && - xd->mode_info_context[xd->mode_info_stride].mbmi.segment_id; - if (xd->mb_to_right_edge > 0) - seg_id = seg_id && - xd->mode_info_context[xd->mode_info_stride + 1].mbmi.segment_id; - } - } -#endif - if (xd->segmentation_enabled && xd->update_mb_segmentation_map) { - switch (seg_id) { - case 0: - vp9_write(bc, 0, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 0, xd->mb_segment_tree_probs[1]); - break; - case 1: - vp9_write(bc, 0, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 1, xd->mb_segment_tree_probs[1]); - break; - case 2: - vp9_write(bc, 1, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 0, xd->mb_segment_tree_probs[2]); - break; - case 3: - vp9_write(bc, 1, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 1, xd->mb_segment_tree_probs[2]); - break; - - // TRAP.. This should not happen - default: - vp9_write(bc, 0, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 0, xd->mb_segment_tree_probs[1]); - break; - } - } -} - -// This function encodes the reference frame -static void encode_ref_frame(vp9_writer *const bc, - VP9_COMMON *const cm, - MACROBLOCKD *xd, - int segment_id, - MV_REFERENCE_FRAME rf) { - int seg_ref_active; - int seg_ref_count = 0; - seg_ref_active = vp9_segfeature_active(xd, - segment_id, - SEG_LVL_REF_FRAME); - - if (seg_ref_active) { - seg_ref_count = vp9_check_segref(xd, segment_id, INTRA_FRAME) + - vp9_check_segref(xd, segment_id, LAST_FRAME) + - vp9_check_segref(xd, segment_id, GOLDEN_FRAME) + - vp9_check_segref(xd, segment_id, ALTREF_FRAME); - } - - // If segment level coding of this signal is disabled... - // or the segment allows multiple reference frame options - if (!seg_ref_active || (seg_ref_count > 1)) { - // Values used in prediction model coding - unsigned char prediction_flag; - vp9_prob pred_prob; - MV_REFERENCE_FRAME pred_rf; - - // Get the context probability the prediction flag - pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF); - - // Get the predicted value. - pred_rf = vp9_get_pred_ref(cm, xd); - - // Did the chosen reference frame match its predicted value. - prediction_flag = - (xd->mode_info_context->mbmi.ref_frame == pred_rf); - - vp9_set_pred_flag(xd, PRED_REF, prediction_flag); - vp9_write(bc, prediction_flag, pred_prob); - - // If not predicted correctly then code value explicitly - if (!prediction_flag) { - vp9_prob mod_refprobs[PREDICTION_PROBS]; - - vpx_memcpy(mod_refprobs, - cm->mod_refprobs[pred_rf], sizeof(mod_refprobs)); - - // If segment coding enabled blank out options that cant occur by - // setting the branch probability to 0. - if (seg_ref_active) { - mod_refprobs[INTRA_FRAME] *= - vp9_check_segref(xd, segment_id, INTRA_FRAME); - mod_refprobs[LAST_FRAME] *= - vp9_check_segref(xd, segment_id, LAST_FRAME); - mod_refprobs[GOLDEN_FRAME] *= - (vp9_check_segref(xd, segment_id, GOLDEN_FRAME) * - vp9_check_segref(xd, segment_id, ALTREF_FRAME)); - } - - if (mod_refprobs[0]) { - vp9_write(bc, (rf != INTRA_FRAME), mod_refprobs[0]); - } - - // Inter coded - if (rf != INTRA_FRAME) { - if (mod_refprobs[1]) { - vp9_write(bc, (rf != LAST_FRAME), mod_refprobs[1]); - } - - if (rf != LAST_FRAME) { - if (mod_refprobs[2]) { - vp9_write(bc, (rf != GOLDEN_FRAME), mod_refprobs[2]); - } - } - } - } - } - - // if using the prediction mdoel we have nothing further to do because - // the reference frame is fully coded by the segment -} - -// Update the probabilities used to encode reference frame data -static void update_ref_probs(VP9_COMP *const cpi) { - VP9_COMMON *const cm = &cpi->common; - - const int *const rfct = cpi->count_mb_ref_frame_usage; - const int rf_intra = rfct[INTRA_FRAME]; - const int rf_inter = rfct[LAST_FRAME] + - rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]; - - cm->prob_intra_coded = get_binary_prob(rf_intra, rf_inter); - cm->prob_last_coded = get_prob(rfct[LAST_FRAME], rf_inter); - cm->prob_gf_coded = get_binary_prob(rfct[GOLDEN_FRAME], rfct[ALTREF_FRAME]); - - // Compute a modified set of probabilities to use when prediction of the - // reference frame fails - vp9_compute_mod_refprobs(cm); -} - -static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { - int i; - VP9_COMMON *const pc = &cpi->common; - const nmv_context *nmvc = &pc->fc.nmvc; - MACROBLOCK *x = &cpi->mb; - MACROBLOCKD *xd = &cpi->mb.e_mbd; - MODE_INFO *m; - MODE_INFO *prev_m; - TOKENEXTRA *tok = cpi->tok; - TOKENEXTRA *tok_end = tok + cpi->tok_count; - - const int mis = pc->mode_info_stride; - int mb_row, mb_col; - int row, col; - - // Values used in prediction model coding - vp9_prob pred_prob; - unsigned char prediction_flag; - - int row_delta[4] = { 0, +1, 0, -1}; - int col_delta[4] = { +1, -1, +1, +1}; - - cpi->mb.partition_info = cpi->mb.pi; - - mb_row = 0; - for (row = 0; row < pc->mb_rows; row += 2) { - m = pc->mi + row * mis; - prev_m = pc->prev_mi + row * mis; - - mb_col = 0; - for (col = 0; col < pc->mb_cols; col += 2) { - int i; - - // Process the 4 MBs in the order: - // top-left, top-right, bottom-left, bottom-right -#if CONFIG_SUPERBLOCKS - vp9_write(bc, m->mbmi.encoded_as_sb, pc->sb_coded); -#endif - for (i = 0; i < 4; i++) { - MB_MODE_INFO *mi; - MV_REFERENCE_FRAME rf; - MB_PREDICTION_MODE mode; - int segment_id; - - int dy = row_delta[i]; - int dx = col_delta[i]; - int offset_extended = dy * mis + dx; - - if ((mb_row >= pc->mb_rows) || (mb_col >= pc->mb_cols)) { - // MB lies outside frame, move on - mb_row += dy; - mb_col += dx; - m += offset_extended; - prev_m += offset_extended; - cpi->mb.partition_info += offset_extended; - continue; - } - - mi = &m->mbmi; - rf = mi->ref_frame; - mode = mi->mode; - segment_id = mi->segment_id; - - // Distance of Mb to the various image edges. - // These specified to 8th pel as they are always compared to MV - // values that are in 1/8th pel units - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; - xd->mb_to_top_edge = -((mb_row * 16)) << 3; - xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; - - // Make sure the MacroBlockD mode info pointer is set correctly - xd->mode_info_context = m; - xd->prev_mode_info_context = prev_m; - -#ifdef ENTROPY_STATS - active_section = 9; -#endif - if (cpi->mb.e_mbd.update_mb_segmentation_map) { - // Is temporal coding of the segment map enabled - if (pc->temporal_update) { - prediction_flag = vp9_get_pred_flag(xd, PRED_SEG_ID); - pred_prob = vp9_get_pred_prob(pc, xd, PRED_SEG_ID); - - // Code the segment id prediction flag for this mb - vp9_write(bc, prediction_flag, pred_prob); - - // If the mb segment id wasn't predicted code explicitly - if (!prediction_flag) - write_mb_segid(bc, mi, &cpi->mb.e_mbd); - } else { - // Normal unpredicted coding - write_mb_segid(bc, mi, &cpi->mb.e_mbd); - } - } - - if (pc->mb_no_coeff_skip && - (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) || - (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0))) { - int skip_coeff = mi->mb_skip_coeff; -#if CONFIG_SUPERBLOCKS - if (mi->encoded_as_sb) { - skip_coeff &= m[1].mbmi.mb_skip_coeff; - skip_coeff &= m[mis].mbmi.mb_skip_coeff; - skip_coeff &= m[mis + 1].mbmi.mb_skip_coeff; - } -#endif - vp9_write(bc, skip_coeff, - vp9_get_pred_prob(pc, xd, PRED_MBSKIP)); - } - - // Encode the reference frame. - encode_ref_frame(bc, pc, xd, segment_id, rf); - - if (rf == INTRA_FRAME) { -#ifdef ENTROPY_STATS - active_section = 6; -#endif - - // TODO(rbultje) write using SB tree structure - - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) { - write_ymode(bc, mode, pc->fc.ymode_prob); - } - - if (mode == B_PRED) { - int j = 0; -#if CONFIG_COMP_INTRA_PRED - int uses_second = - m->bmi[0].as_mode.second != - (B_PREDICTION_MODE)(B_DC_PRED - 1); - vp9_write(bc, uses_second, 128); -#endif - do { -#if CONFIG_COMP_INTRA_PRED - B_PREDICTION_MODE mode2 = m->bmi[j].as_mode.second; -#endif - write_bmode(bc, m->bmi[j].as_mode.first, - pc->fc.bmode_prob); - /* - if (!cpi->dummy_packing) { - int p; - for (p = 0; p < VP9_BINTRAMODES - 1; ++p) - printf(" %d", pc->fc.bmode_prob[p]); - printf("\nbmode[%d][%d]: %d\n", pc->current_video_frame, j, m->bmi[j].as_mode.first); - } - */ -#if CONFIG_COMP_INTRA_PRED - if (uses_second) { - write_bmode(bc, mode2, pc->fc.bmode_prob); - } -#endif - } while (++j < 16); - } - if (mode == I8X8_PRED) { - write_i8x8_mode(bc, m->bmi[0].as_mode.first, - pc->fc.i8x8_mode_prob); - write_i8x8_mode(bc, m->bmi[2].as_mode.first, - pc->fc.i8x8_mode_prob); - write_i8x8_mode(bc, m->bmi[8].as_mode.first, - pc->fc.i8x8_mode_prob); - write_i8x8_mode(bc, m->bmi[10].as_mode.first, - pc->fc.i8x8_mode_prob); - } else { - write_uv_mode(bc, mi->uv_mode, - pc->fc.uv_mode_prob[mode]); - } - } else { - int_mv best_mv, best_second_mv; - int ct[4]; - - vp9_prob mv_ref_p [VP9_MVREFS - 1]; - - { - int_mv n1, n2; - - // Only used for context just now and soon to be deprecated. - vp9_find_near_mvs(xd, m, prev_m, &n1, &n2, &best_mv, ct, - rf, cpi->common.ref_frame_sign_bias); -#if CONFIG_NEWBESTREFMV - best_mv.as_int = mi->ref_mvs[rf][0].as_int; -#endif - - vp9_mv_ref_probs(&cpi->common, mv_ref_p, ct); - -#ifdef ENTROPY_STATS - accum_mv_refs(mode, ct); -#endif - } - -#ifdef ENTROPY_STATS - active_section = 3; -#endif - - // Is the segment coding of mode enabled - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) { -#if CONFIG_SUPERBLOCKS - if (mi->encoded_as_sb) { - write_sb_mv_ref(bc, mode, mv_ref_p); - } else -#endif - { - write_mv_ref(bc, mode, mv_ref_p); - } - vp9_accum_mv_refs(&cpi->common, mode, ct); - } - -#if CONFIG_PRED_FILTER - // Is the prediction filter enabled - if (mode >= NEARESTMV && mode < SPLITMV) { - if (cpi->common.pred_filter_mode == 2) - vp9_write(bc, mi->pred_filter_enabled, - pc->prob_pred_filter_off); - else - assert(mi->pred_filter_enabled == - cpi->common.pred_filter_mode); - } -#endif - if (mode >= NEARESTMV && mode <= SPLITMV) - { - if (cpi->common.mcomp_filter_type == SWITCHABLE) { - write_token(bc, vp9_switchable_interp_tree, - vp9_get_pred_probs(&cpi->common, xd, - PRED_SWITCHABLE_INTERP), - vp9_switchable_interp_encodings + - vp9_switchable_interp_map[mi->interp_filter]); - } else { - assert (mi->interp_filter == - cpi->common.mcomp_filter_type); - } - } - if (mi->second_ref_frame && - (mode == NEWMV || mode == SPLITMV)) { - int_mv n1, n2; - - // Only used for context just now and soon to be deprecated. - vp9_find_near_mvs(xd, m, prev_m, - &n1, &n2, &best_second_mv, ct, - mi->second_ref_frame, - cpi->common.ref_frame_sign_bias); - -#if CONFIG_NEWBESTREFMV - best_second_mv.as_int = - mi->ref_mvs[mi->second_ref_frame][0].as_int; -#endif - } - - // does the feature use compound prediction or not - // (if not specified at the frame/segment level) - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { - vp9_write(bc, mi->second_ref_frame != INTRA_FRAME, - vp9_get_pred_prob(pc, xd, PRED_COMP)); - } - - { - switch (mode) { /* new, split require MVs */ - case NEWMV: -#ifdef ENTROPY_STATS - active_section = 5; -#endif - -#if CONFIG_NEW_MVREF - { - unsigned int best_index; - - // Choose the best mv reference - best_index = pick_best_mv_ref(x, rf, mi->mv[0], - mi->ref_mvs[rf], &best_mv); - - // Encode the index of the choice. - vp9_write_mv_ref_id(bc, - xd->mb_mv_ref_id_probs[rf], best_index); - - cpi->best_ref_index_counts[rf][best_index]++; - - } -#endif - - write_nmv(bc, &mi->mv[0].as_mv, &best_mv, - (const nmv_context*) nmvc, - xd->allow_high_precision_mv); - - if (mi->second_ref_frame) { -#if CONFIG_NEW_MVREF - unsigned int best_index; - MV_REFERENCE_FRAME sec_ref_frame = mi->second_ref_frame; - - best_index = - pick_best_mv_ref(x, sec_ref_frame, mi->mv[1], - mi->ref_mvs[sec_ref_frame], - &best_second_mv); - - // Encode the index of the choice. - vp9_write_mv_ref_id(bc, - xd->mb_mv_ref_id_probs[sec_ref_frame], - best_index); - - cpi->best_ref_index_counts[sec_ref_frame][best_index]++; -#endif - write_nmv(bc, &mi->mv[1].as_mv, &best_second_mv, - (const nmv_context*) nmvc, - xd->allow_high_precision_mv); - } - break; - case SPLITMV: { - int j = 0; - -#ifdef MODE_STATS - ++count_mb_seg [mi->partitioning]; -#endif - - write_split(bc, mi->partitioning, cpi->common.fc.mbsplit_prob); - cpi->mbsplit_count[mi->partitioning]++; - - do { - B_PREDICTION_MODE blockmode; - int_mv blockmv; - const int *const L = - vp9_mbsplits [mi->partitioning]; - int k = -1; /* first block in subset j */ - int mv_contz; - int_mv leftmv, abovemv; - - blockmode = cpi->mb.partition_info->bmi[j].mode; - blockmv = cpi->mb.partition_info->bmi[j].mv; -#if CONFIG_DEBUG - while (j != L[++k]) - if (k >= 16) - assert(0); -#else - while (j != L[++k]); -#endif - leftmv.as_int = left_block_mv(m, k); - abovemv.as_int = above_block_mv(m, k, mis); - mv_contz = vp9_mv_cont(&leftmv, &abovemv); - - write_sub_mv_ref(bc, blockmode, - cpi->common.fc.sub_mv_ref_prob [mv_contz]); - cpi->sub_mv_ref_count[mv_contz][blockmode - LEFT4X4]++; - if (blockmode == NEW4X4) { -#ifdef ENTROPY_STATS - active_section = 11; -#endif - write_nmv(bc, &blockmv.as_mv, &best_mv, - (const nmv_context*) nmvc, - xd->allow_high_precision_mv); - - if (mi->second_ref_frame) { - write_nmv(bc, - &cpi->mb.partition_info->bmi[j].second_mv.as_mv, - &best_second_mv, - (const nmv_context*) nmvc, - xd->allow_high_precision_mv); - } - } - } while (++j < cpi->mb.partition_info->count); - } - break; - default: - break; - } - } - - // Update the mvcounts used to tune mv probs but only if this is - // the real pack run. - if ( !cpi->dummy_packing ) { - update_mvcount(cpi, x, &best_mv, &best_second_mv); - } - } - - if ( -#if CONFIG_SUPERBLOCKS - !mi->encoded_as_sb && -#endif - ((rf == INTRA_FRAME && mode <= I8X8_PRED) || - (rf != INTRA_FRAME && !(mode == SPLITMV && - mi->partitioning == PARTITIONING_4X4))) && - pc->txfm_mode == TX_MODE_SELECT && - !((pc->mb_no_coeff_skip && mi->mb_skip_coeff) || - (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { - TX_SIZE sz = mi->txfm_size; - // FIXME(rbultje) code ternary symbol once all experiments are merged - vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]); - if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV) - vp9_write(bc, sz != TX_8X8, pc->prob_tx[1]); - } - -#ifdef ENTROPY_STATS - active_section = 1; -#endif - assert(tok < tok_end); - pack_mb_tokens(bc, &tok, tok_end); - -#if CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb) { - assert(!i); - mb_col += 2; - m += 2; - cpi->mb.partition_info += 2; - prev_m += 2; - break; - } -#endif - - // Next MB - mb_row += dy; - mb_col += dx; - m += offset_extended; - prev_m += offset_extended; - cpi->mb.partition_info += offset_extended; -#if CONFIG_DEBUG - assert((prev_m - cpi->common.prev_mip) == (m - cpi->common.mip)); - assert((prev_m - cpi->common.prev_mi) == (m - cpi->common.mi)); -#endif - } - } - - // Next SB - mb_row += 2; - m += mis + (1 - (pc->mb_cols & 0x1)); - prev_m += mis + (1 - (pc->mb_cols & 0x1)); - cpi->mb.partition_info += mis + (1 - (pc->mb_cols & 0x1)); - } -} - - -static void write_mb_modes_kf(const VP9_COMMON *c, - const MACROBLOCKD *xd, - const MODE_INFO *m, - int mode_info_stride, - vp9_writer *const bc) { - const int mis = mode_info_stride; - int ym; - int segment_id; - - ym = m->mbmi.mode; - segment_id = m->mbmi.segment_id; - - if (xd->update_mb_segmentation_map) { - write_mb_segid(bc, &m->mbmi, xd); - } - - if (c->mb_no_coeff_skip && - (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) || - (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0))) { - int skip_coeff = m->mbmi.mb_skip_coeff; -#if CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb) { - skip_coeff &= m[1].mbmi.mb_skip_coeff; - skip_coeff &= m[mis].mbmi.mb_skip_coeff; - skip_coeff &= m[mis + 1].mbmi.mb_skip_coeff; - } -#endif - vp9_write(bc, skip_coeff, - vp9_get_pred_prob(c, xd, PRED_MBSKIP)); - } - -#if CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb) { - sb_kfwrite_ymode(bc, ym, - c->sb_kf_ymode_prob[c->kf_ymode_probs_index]); - } else -#endif - { - kfwrite_ymode(bc, ym, - c->kf_ymode_prob[c->kf_ymode_probs_index]); - } - - if (ym == B_PRED) { - const int mis = c->mode_info_stride; - int i = 0; -#if CONFIG_COMP_INTRA_PRED - int uses_second = - m->bmi[0].as_mode.second != - (B_PREDICTION_MODE)(B_DC_PRED - 1); - vp9_write(bc, uses_second, 128); -#endif - do { - const B_PREDICTION_MODE A = above_block_mode(m, i, mis); - const B_PREDICTION_MODE L = left_block_mode(m, i); - const int bm = m->bmi[i].as_mode.first; -#if CONFIG_COMP_INTRA_PRED - const int bm2 = m->bmi[i].as_mode.second; -#endif - -#ifdef ENTROPY_STATS - ++intra_mode_stats [A] [L] [bm]; -#endif - - write_bmode(bc, bm, c->kf_bmode_prob [A] [L]); - // printf(" mode: %d\n", bm); -#if CONFIG_COMP_INTRA_PRED - if (uses_second) { - write_bmode(bc, bm2, c->kf_bmode_prob [A] [L]); - } -#endif - } while (++i < 16); - } - if (ym == I8X8_PRED) { - write_i8x8_mode(bc, m->bmi[0].as_mode.first, - c->fc.i8x8_mode_prob); - // printf(" mode: %d\n", m->bmi[0].as_mode.first); fflush(stdout); - write_i8x8_mode(bc, m->bmi[2].as_mode.first, - c->fc.i8x8_mode_prob); - // printf(" mode: %d\n", m->bmi[2].as_mode.first); fflush(stdout); - write_i8x8_mode(bc, m->bmi[8].as_mode.first, - c->fc.i8x8_mode_prob); - // printf(" mode: %d\n", m->bmi[8].as_mode.first); fflush(stdout); - write_i8x8_mode(bc, m->bmi[10].as_mode.first, - c->fc.i8x8_mode_prob); - // printf(" mode: %d\n", m->bmi[10].as_mode.first); fflush(stdout); - } else - write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]); - - if ( -#if CONFIG_SUPERBLOCKS - !m->mbmi.encoded_as_sb && -#endif - ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT && - !((c->mb_no_coeff_skip && m->mbmi.mb_skip_coeff) || - (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { - TX_SIZE sz = m->mbmi.txfm_size; - // FIXME(rbultje) code ternary symbol once all experiments are merged - vp9_write(bc, sz != TX_4X4, c->prob_tx[0]); - if (sz != TX_4X4 && ym <= TM_PRED) - vp9_write(bc, sz != TX_8X8, c->prob_tx[1]); - } -} - -static void write_kfmodes(VP9_COMP* const cpi, vp9_writer* const bc) { - VP9_COMMON *const c = &cpi->common; - const int mis = c->mode_info_stride; - MACROBLOCKD *xd = &cpi->mb.e_mbd; - MODE_INFO *m; - int i; - int row, col; - int mb_row, mb_col; - int row_delta[4] = { 0, +1, 0, -1}; - int col_delta[4] = { +1, -1, +1, +1}; - TOKENEXTRA *tok = cpi->tok; - TOKENEXTRA *tok_end = tok + cpi->tok_count; - - mb_row = 0; - for (row = 0; row < c->mb_rows; row += 2) { - m = c->mi + row * mis; - - mb_col = 0; - for (col = 0; col < c->mb_cols; col += 2) { -#if CONFIG_SUPERBLOCKS - vp9_write(bc, m->mbmi.encoded_as_sb, c->sb_coded); -#endif - // Process the 4 MBs in the order: - // top-left, top-right, bottom-left, bottom-right - for (i = 0; i < 4; i++) { - int dy = row_delta[i]; - int dx = col_delta[i]; - int offset_extended = dy * mis + dx; - - if ((mb_row >= c->mb_rows) || (mb_col >= c->mb_cols)) { - // MB lies outside frame, move on - mb_row += dy; - mb_col += dx; - m += offset_extended; - continue; - } - - // Make sure the MacroBlockD mode info pointer is set correctly - xd->mode_info_context = m; - - write_mb_modes_kf(c, xd, m, mis, bc); -#ifdef ENTROPY_STATS - active_section = 8; -#endif - assert(tok < tok_end); - pack_mb_tokens(bc, &tok, tok_end); - -#if CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb) { - assert(!i); - mb_col += 2; - m += 2; - break; - } -#endif - // Next MB - mb_row += dy; - mb_col += dx; - m += offset_extended; - } - } - mb_row += 2; - } -} - - -/* This function is used for debugging probability trees. */ -static void print_prob_tree(vp9_prob - coef_probs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]) { - /* print coef probability tree */ - int i, j, k, l; - FILE *f = fopen("enc_tree_probs.txt", "a"); - fprintf(f, "{\n"); - for (i = 0; i < BLOCK_TYPES; i++) { - fprintf(f, " {\n"); - for (j = 0; j < COEF_BANDS; j++) { - fprintf(f, " {\n"); - for (k = 0; k < PREV_COEF_CONTEXTS; k++) { - fprintf(f, " {"); - for (l = 0; l < ENTROPY_NODES; l++) { - fprintf(f, "%3u, ", - (unsigned int)(coef_probs [i][j][k][l])); - } - fprintf(f, " }\n"); - } - fprintf(f, " }\n"); - } - fprintf(f, " }\n"); - } - fprintf(f, "}\n"); - fclose(f); -} - -static void build_coeff_contexts(VP9_COMP *cpi) { - int i = 0, j, k; -#ifdef ENTROPY_STATS - int t = 0; -#endif - for (i = 0; i < BLOCK_TYPES; ++i) { - for (j = 0; j < COEF_BANDS; ++j) { - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - cpi->frame_coef_probs [i][j][k], - cpi->frame_branch_ct [i][j][k], - cpi->coef_counts [i][j][k], - 256, 1 - ); -#ifdef ENTROPY_STATS - if (!cpi->dummy_packing) - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - context_counters[i][j][k][t] += cpi->coef_counts[i][j][k][t]; -#endif - } - } - } - for (i = 0; i < BLOCK_TYPES; ++i) { - for (j = 0; j < COEF_BANDS; ++j) { - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - cpi->frame_hybrid_coef_probs [i][j][k], - cpi->frame_hybrid_branch_ct [i][j][k], - cpi->hybrid_coef_counts [i][j][k], - 256, 1 - ); -#ifdef ENTROPY_STATS - if (!cpi->dummy_packing) - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - hybrid_context_counters[i][j][k][t] += cpi->hybrid_coef_counts[i][j][k][t]; -#endif - } - } - } - - if (cpi->common.txfm_mode != ONLY_4X4) { - for (i = 0; i < BLOCK_TYPES_8X8; ++i) { - for (j = 0; j < COEF_BANDS; ++j) { - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - /* at every context */ - /* calc probs and branch cts for this frame only */ - // vp9_prob new_p [ENTROPY_NODES]; - // unsigned int branch_ct [ENTROPY_NODES] [2]; - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - cpi->frame_coef_probs_8x8 [i][j][k], - cpi->frame_branch_ct_8x8 [i][j][k], - cpi->coef_counts_8x8 [i][j][k], - 256, 1 - ); -#ifdef ENTROPY_STATS - if (!cpi->dummy_packing) - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - context_counters_8x8[i][j][k][t] += cpi->coef_counts_8x8[i][j][k][t]; -#endif - } - } - } - for (i = 0; i < BLOCK_TYPES_8X8; ++i) { - for (j = 0; j < COEF_BANDS; ++j) { - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - /* at every context */ - /* calc probs and branch cts for this frame only */ - // vp9_prob new_p [ENTROPY_NODES]; - // unsigned int branch_ct [ENTROPY_NODES] [2]; - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - cpi->frame_hybrid_coef_probs_8x8 [i][j][k], - cpi->frame_hybrid_branch_ct_8x8 [i][j][k], - cpi->hybrid_coef_counts_8x8 [i][j][k], - 256, 1 - ); -#ifdef ENTROPY_STATS - if (!cpi->dummy_packing) - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - hybrid_context_counters_8x8[i][j][k][t] += cpi->hybrid_coef_counts_8x8[i][j][k][t]; -#endif - } - } - } - } - - if (cpi->common.txfm_mode > ALLOW_8X8) { - for (i = 0; i < BLOCK_TYPES_16X16; ++i) { - for (j = 0; j < COEF_BANDS; ++j) { - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - cpi->frame_coef_probs_16x16[i][j][k], - cpi->frame_branch_ct_16x16[i][j][k], - cpi->coef_counts_16x16[i][j][k], 256, 1); -#ifdef ENTROPY_STATS - if (!cpi->dummy_packing) - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - context_counters_16x16[i][j][k][t] += cpi->coef_counts_16x16[i][j][k][t]; -#endif - } - } - } - } - for (i = 0; i < BLOCK_TYPES_16X16; ++i) { - for (j = 0; j < COEF_BANDS; ++j) { - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - cpi->frame_hybrid_coef_probs_16x16[i][j][k], - cpi->frame_hybrid_branch_ct_16x16[i][j][k], - cpi->hybrid_coef_counts_16x16[i][j][k], 256, 1); -#ifdef ENTROPY_STATS - if (!cpi->dummy_packing) - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - hybrid_context_counters_16x16[i][j][k][t] += cpi->hybrid_coef_counts_16x16[i][j][k][t]; -#endif - } - } - } -} - -static void update_coef_probs_common( - vp9_writer* const bc, - vp9_prob new_frame_coef_probs[BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES], - vp9_prob old_frame_coef_probs[BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES], - unsigned int frame_branch_ct[BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES][2]) { - int i, j, k, t; - int update[2] = {0, 0}; - int savings; - // vp9_prob bestupd = find_coef_update_prob(cpi); - - /* dry run to see if there is any udpate at all needed */ - savings = 0; - for (i = 0; i < BLOCK_TYPES; ++i) { - for (j = !i; j < COEF_BANDS; ++j) { - int prev_coef_savings[ENTROPY_NODES] = {0}; - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - for (t = 0; t < ENTROPY_NODES; ++t) { - vp9_prob newp = new_frame_coef_probs[i][j][k][t]; - const vp9_prob oldp = old_frame_coef_probs[i][j][k][t]; - const vp9_prob upd = COEF_UPDATE_PROB; - int s = prev_coef_savings[t]; - int u = 0; - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; -#if defined(SEARCH_NEWP) - s = prob_diff_update_savings_search( - frame_branch_ct[i][j][k][t], - oldp, &newp, upd); - if (s > 0 && newp != oldp) - u = 1; - if (u) - savings += s - (int)(vp9_cost_zero(upd)); - else - savings -= (int)(vp9_cost_zero(upd)); -#else - s = prob_update_savings( - frame_branch_ct[i][j][k][t], - oldp, newp, upd); - if (s > 0) - u = 1; - if (u) - savings += s; -#endif - - update[u]++; - } - } - } - } - - // printf("Update %d %d, savings %d\n", update[0], update[1], savings); - /* Is coef updated at all */ - if (update[1] == 0 || savings < 0) { - vp9_write_bit(bc, 0); - } else { - vp9_write_bit(bc, 1); - for (i = 0; i < BLOCK_TYPES; ++i) { - for (j = !i; j < COEF_BANDS; ++j) { - int prev_coef_savings[ENTROPY_NODES] = {0}; - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - // calc probs and branch cts for this frame only - for (t = 0; t < ENTROPY_NODES; ++t) { - vp9_prob newp = new_frame_coef_probs[i][j][k][t]; - vp9_prob *oldp = old_frame_coef_probs[i][j][k] + t; - const vp9_prob upd = COEF_UPDATE_PROB; - int s = prev_coef_savings[t]; - int u = 0; - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - -#if defined(SEARCH_NEWP) - s = prob_diff_update_savings_search( - frame_branch_ct[i][j][k][t], - *oldp, &newp, upd); - if (s > 0 && newp != *oldp) - u = 1; -#else - s = prob_update_savings( - frame_branch_ct[i][j][k][t], - *oldp, newp, upd); - if (s > 0) - u = 1; -#endif - vp9_write(bc, u, upd); -#ifdef ENTROPY_STATS - if (!cpi->dummy_packing) - ++ tree_update_hist [i][j][k][t] [u]; -#endif - if (u) { - /* send/use new probability */ - write_prob_diff_update(bc, newp, *oldp); - *oldp = newp; - } - } - } - } - } - } -} - -static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { - vp9_clear_system_state(); - - // Build the cofficient contexts based on counts collected in encode loop - build_coeff_contexts(cpi); - - update_coef_probs_common(bc, - cpi->frame_coef_probs, - cpi->common.fc.coef_probs, - cpi->frame_branch_ct); - - update_coef_probs_common(bc, - cpi->frame_hybrid_coef_probs, - cpi->common.fc.hybrid_coef_probs, - cpi->frame_hybrid_branch_ct); - - /* do not do this if not even allowed */ - if (cpi->common.txfm_mode != ONLY_4X4) { - update_coef_probs_common(bc, - cpi->frame_coef_probs_8x8, - cpi->common.fc.coef_probs_8x8, - cpi->frame_branch_ct_8x8); - - update_coef_probs_common(bc, - cpi->frame_hybrid_coef_probs_8x8, - cpi->common.fc.hybrid_coef_probs_8x8, - cpi->frame_hybrid_branch_ct_8x8); - } - - if (cpi->common.txfm_mode > ALLOW_8X8) { - update_coef_probs_common(bc, - cpi->frame_coef_probs_16x16, - cpi->common.fc.coef_probs_16x16, - cpi->frame_branch_ct_16x16); - update_coef_probs_common(bc, - cpi->frame_hybrid_coef_probs_16x16, - cpi->common.fc.hybrid_coef_probs_16x16, - cpi->frame_hybrid_branch_ct_16x16); - } -} - -#ifdef PACKET_TESTING -FILE *vpxlogc = 0; -#endif - -static void put_delta_q(vp9_writer *bc, int delta_q) { - if (delta_q != 0) { - vp9_write_bit(bc, 1); - vp9_write_literal(bc, abs(delta_q), 4); - - if (delta_q < 0) - vp9_write_bit(bc, 1); - else - vp9_write_bit(bc, 0); - } else - vp9_write_bit(bc, 0); -} - -static void decide_kf_ymode_entropy(VP9_COMP *cpi) { - - int mode_cost[MB_MODE_COUNT]; - int cost; - int bestcost = INT_MAX; - int bestindex = 0; - int i, j; - - for (i = 0; i < 8; i++) { - vp9_cost_tokens(mode_cost, cpi->common.kf_ymode_prob[i], vp9_kf_ymode_tree); - cost = 0; - for (j = 0; j < VP9_YMODES; j++) { - cost += mode_cost[j] * cpi->ymode_count[j]; - } -#if CONFIG_SUPERBLOCKS - vp9_cost_tokens(mode_cost, cpi->common.sb_kf_ymode_prob[i], - vp9_sb_ymode_tree); - for (j = 0; j < VP9_I32X32_MODES; j++) { - cost += mode_cost[j] * cpi->sb_ymode_count[j]; - } -#endif - if (cost < bestcost) { - bestindex = i; - bestcost = cost; - } - } - cpi->common.kf_ymode_probs_index = bestindex; - -} -static void segment_reference_frames(VP9_COMP *cpi) { - VP9_COMMON *oci = &cpi->common; - MODE_INFO *mi = oci->mi; - int ref[MAX_MB_SEGMENTS] = {0}; - int i, j; - int mb_index = 0; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; - - for (i = 0; i < oci->mb_rows; i++) { - for (j = 0; j < oci->mb_cols; j++, mb_index++) { - ref[mi[mb_index].mbmi.segment_id] |= (1 << mi[mb_index].mbmi.ref_frame); - } - mb_index++; - } - for (i = 0; i < MAX_MB_SEGMENTS; i++) { - vp9_enable_segfeature(xd, i, SEG_LVL_REF_FRAME); - vp9_set_segdata(xd, i, SEG_LVL_REF_FRAME, ref[i]); - } -} - -void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, - unsigned long *size) { - int i, j; - VP9_HEADER oh; - VP9_COMMON *const pc = &cpi->common; - vp9_writer header_bc, residual_bc; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; - int extra_bytes_packed = 0; - - unsigned char *cx_data = dest; - - oh.show_frame = (int) pc->show_frame; - oh.type = (int)pc->frame_type; - oh.version = pc->version; - oh.first_partition_length_in_bytes = 0; - - cx_data += 3; - -#if defined(SECTIONBITS_OUTPUT) - Sectionbits[active_section = 1] += sizeof(VP9_HEADER) * 8 * 256; -#endif - - compute_update_table(); - - /* vp9_kf_default_bmode_probs() is called in vp9_setup_key_frame() once - * for each K frame before encode frame. pc->kf_bmode_prob doesn't get - * changed anywhere else. No need to call it again here. --yw - * vp9_kf_default_bmode_probs( pc->kf_bmode_prob); - */ - - /* every keyframe send startcode, width, height, scale factor, clamp - * and color type. - */ - if (oh.type == KEY_FRAME) { - int v; - - // Start / synch code - cx_data[0] = 0x9D; - cx_data[1] = 0x01; - cx_data[2] = 0x2a; - - v = (pc->horiz_scale << 14) | pc->Width; - cx_data[3] = v; - cx_data[4] = v >> 8; - - v = (pc->vert_scale << 14) | pc->Height; - cx_data[5] = v; - cx_data[6] = v >> 8; - - extra_bytes_packed = 7; - cx_data += extra_bytes_packed; - - vp9_start_encode(&header_bc, cx_data); - - // signal clr type - vp9_write_bit(&header_bc, pc->clr_type); - vp9_write_bit(&header_bc, pc->clamp_type); - - } else { - vp9_start_encode(&header_bc, cx_data); - } - - // Signal whether or not Segmentation is enabled - vp9_write_bit(&header_bc, (xd->segmentation_enabled) ? 1 : 0); - - // Indicate which features are enabled - if (xd->segmentation_enabled) { - // Indicate whether or not the segmentation map is being updated. - vp9_write_bit(&header_bc, (xd->update_mb_segmentation_map) ? 1 : 0); - - // If it is, then indicate the method that will be used. - if (xd->update_mb_segmentation_map) { - // Select the coding strategy (temporal or spatial) - vp9_choose_segmap_coding_method(cpi); - // Send the tree probabilities used to decode unpredicted - // macro-block segments - for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) { - int data = xd->mb_segment_tree_probs[i]; - - if (data != 255) { - vp9_write_bit(&header_bc, 1); - vp9_write_literal(&header_bc, data, 8); - } else { - vp9_write_bit(&header_bc, 0); - } - } - - // Write out the chosen coding method. - vp9_write_bit(&header_bc, (pc->temporal_update) ? 1 : 0); - if (pc->temporal_update) { - for (i = 0; i < PREDICTION_PROBS; i++) { - int data = pc->segment_pred_probs[i]; - - if (data != 255) { - vp9_write_bit(&header_bc, 1); - vp9_write_literal(&header_bc, data, 8); - } else { - vp9_write_bit(&header_bc, 0); - } - } - } - } - - vp9_write_bit(&header_bc, (xd->update_mb_segmentation_data) ? 1 : 0); - - // segment_reference_frames(cpi); - - if (xd->update_mb_segmentation_data) { - signed char Data; - - vp9_write_bit(&header_bc, (xd->mb_segment_abs_delta) ? 1 : 0); - - // For each segments id... - for (i = 0; i < MAX_MB_SEGMENTS; i++) { - // For each segmentation codable feature... - for (j = 0; j < SEG_LVL_MAX; j++) { - Data = vp9_get_segdata(xd, i, j); - - // If the feature is enabled... - if (vp9_segfeature_active(xd, i, j)) { - vp9_write_bit(&header_bc, 1); - - // Is the segment data signed.. - if (vp9_is_segfeature_signed(j)) { - // Encode the relevant feature data - if (Data < 0) { - Data = - Data; - vp9_write_literal(&header_bc, Data, - vp9_seg_feature_data_bits(j)); - vp9_write_bit(&header_bc, 1); - } else { - vp9_write_literal(&header_bc, Data, - vp9_seg_feature_data_bits(j)); - vp9_write_bit(&header_bc, 0); - } - } - // Unsigned data element so no sign bit needed - else - vp9_write_literal(&header_bc, Data, - vp9_seg_feature_data_bits(j)); - } else - vp9_write_bit(&header_bc, 0); - } - } - } - } - - // Encode the common prediction model status flag probability updates for - // the reference frame - update_refpred_stats(cpi); - if (pc->frame_type != KEY_FRAME) { - for (i = 0; i < PREDICTION_PROBS; i++) { - if (cpi->ref_pred_probs_update[i]) { - vp9_write_bit(&header_bc, 1); - vp9_write_literal(&header_bc, pc->ref_pred_probs[i], 8); - } else { - vp9_write_bit(&header_bc, 0); - } - } - } - -#if CONFIG_SUPERBLOCKS - { - /* sb mode probability */ - const int sb_max = (((pc->mb_rows + 1) >> 1) * ((pc->mb_cols + 1) >> 1)); - - pc->sb_coded = get_prob(sb_max - cpi->sb_count, sb_max); - vp9_write_literal(&header_bc, pc->sb_coded, 8); - } -#endif - - { - if (pc->txfm_mode == TX_MODE_SELECT) { - pc->prob_tx[0] = get_prob(cpi->txfm_count[0] + cpi->txfm_count_8x8p[0], - cpi->txfm_count[0] + cpi->txfm_count[1] + cpi->txfm_count[2] + - cpi->txfm_count_8x8p[0] + cpi->txfm_count_8x8p[1]); - pc->prob_tx[1] = get_prob(cpi->txfm_count[1], cpi->txfm_count[1] + cpi->txfm_count[2]); - } else { - pc->prob_tx[0] = 128; - pc->prob_tx[1] = 128; - } - vp9_write_literal(&header_bc, pc->txfm_mode, 2); - if (pc->txfm_mode == TX_MODE_SELECT) { - vp9_write_literal(&header_bc, pc->prob_tx[0], 8); - vp9_write_literal(&header_bc, pc->prob_tx[1], 8); - } - } - - // Encode the loop filter level and type - vp9_write_bit(&header_bc, pc->filter_type); - vp9_write_literal(&header_bc, pc->filter_level, 6); - vp9_write_literal(&header_bc, pc->sharpness_level, 3); - - // Write out loop filter deltas applied at the MB level based on mode or ref frame (if they are enabled). - vp9_write_bit(&header_bc, (xd->mode_ref_lf_delta_enabled) ? 1 : 0); - - if (xd->mode_ref_lf_delta_enabled) { - // Do the deltas need to be updated - int send_update = xd->mode_ref_lf_delta_update; - - vp9_write_bit(&header_bc, send_update); - if (send_update) { - int Data; - - // Send update - for (i = 0; i < MAX_REF_LF_DELTAS; i++) { - Data = xd->ref_lf_deltas[i]; - - // Frame level data - if (xd->ref_lf_deltas[i] != xd->last_ref_lf_deltas[i]) { - xd->last_ref_lf_deltas[i] = xd->ref_lf_deltas[i]; - vp9_write_bit(&header_bc, 1); - - if (Data > 0) { - vp9_write_literal(&header_bc, (Data & 0x3F), 6); - vp9_write_bit(&header_bc, 0); // sign - } else { - Data = -Data; - vp9_write_literal(&header_bc, (Data & 0x3F), 6); - vp9_write_bit(&header_bc, 1); // sign - } - } else { - vp9_write_bit(&header_bc, 0); - } - } - - // Send update - for (i = 0; i < MAX_MODE_LF_DELTAS; i++) { - Data = xd->mode_lf_deltas[i]; - - if (xd->mode_lf_deltas[i] != xd->last_mode_lf_deltas[i]) { - xd->last_mode_lf_deltas[i] = xd->mode_lf_deltas[i]; - vp9_write_bit(&header_bc, 1); - - if (Data > 0) { - vp9_write_literal(&header_bc, (Data & 0x3F), 6); - vp9_write_bit(&header_bc, 0); // sign - } else { - Data = -Data; - vp9_write_literal(&header_bc, (Data & 0x3F), 6); - vp9_write_bit(&header_bc, 1); // sign - } - } else { - vp9_write_bit(&header_bc, 0); - } - } - } - } - - // signal here is multi token partition is enabled - // vp9_write_literal(&header_bc, pc->multi_token_partition, 2); - vp9_write_literal(&header_bc, 0, 2); - - // Frame Q baseline quantizer index - vp9_write_literal(&header_bc, pc->base_qindex, QINDEX_BITS); - - // Transmit Dc, Second order and Uv quantizer delta information - put_delta_q(&header_bc, pc->y1dc_delta_q); - put_delta_q(&header_bc, pc->y2dc_delta_q); - put_delta_q(&header_bc, pc->y2ac_delta_q); - put_delta_q(&header_bc, pc->uvdc_delta_q); - put_delta_q(&header_bc, pc->uvac_delta_q); - - // When there is a key frame all reference buffers are updated using the new key frame - if (pc->frame_type != KEY_FRAME) { - // Should the GF or ARF be updated using the transmitted frame or buffer - vp9_write_bit(&header_bc, pc->refresh_golden_frame); - vp9_write_bit(&header_bc, pc->refresh_alt_ref_frame); - - // For inter frames the current default behavior is that when - // cm->refresh_golden_frame is set we copy the old GF over to - // the ARF buffer. This is purely an encoder decision at present. - if (pc->refresh_golden_frame) - pc->copy_buffer_to_arf = 2; - - // If not being updated from current frame should either GF or ARF be updated from another buffer - if (!pc->refresh_golden_frame) - vp9_write_literal(&header_bc, pc->copy_buffer_to_gf, 2); - - if (!pc->refresh_alt_ref_frame) - vp9_write_literal(&header_bc, pc->copy_buffer_to_arf, 2); - - // Indicate reference frame sign bias for Golden and ARF frames (always 0 for last frame buffer) - vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[GOLDEN_FRAME]); - vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[ALTREF_FRAME]); - - // Signal whether to allow high MV precision - vp9_write_bit(&header_bc, (xd->allow_high_precision_mv) ? 1 : 0); - if (pc->mcomp_filter_type == SWITCHABLE) { - /* Check to see if only one of the filters is actually used */ - int count[VP9_SWITCHABLE_FILTERS]; - int i, j, c = 0; - for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { - count[i] = 0; - for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) { - count[i] += cpi->switchable_interp_count[j][i]; - } - c += (count[i] > 0); - } - if (c == 1) { - /* Only one filter is used. So set the filter at frame level */ - for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { - if (count[i]) { - pc->mcomp_filter_type = vp9_switchable_interp[i]; - break; - } - } - } - } - // Signal the type of subpel filter to use - vp9_write_bit(&header_bc, (pc->mcomp_filter_type == SWITCHABLE)); - if (pc->mcomp_filter_type != SWITCHABLE) - vp9_write_literal(&header_bc, (pc->mcomp_filter_type), 2); - } - - vp9_write_bit(&header_bc, pc->refresh_entropy_probs); - - if (pc->frame_type != KEY_FRAME) - vp9_write_bit(&header_bc, pc->refresh_last_frame); - -#ifdef ENTROPY_STATS - if (pc->frame_type == INTER_FRAME) - active_section = 0; - else - active_section = 7; -#endif - - vp9_clear_system_state(); // __asm emms; - - vp9_copy(cpi->common.fc.pre_coef_probs, cpi->common.fc.coef_probs); - vp9_copy(cpi->common.fc.pre_hybrid_coef_probs, cpi->common.fc.hybrid_coef_probs); - vp9_copy(cpi->common.fc.pre_coef_probs_8x8, cpi->common.fc.coef_probs_8x8); - vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_8x8, cpi->common.fc.hybrid_coef_probs_8x8); - vp9_copy(cpi->common.fc.pre_coef_probs_16x16, cpi->common.fc.coef_probs_16x16); - vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_16x16, cpi->common.fc.hybrid_coef_probs_16x16); - vp9_copy(cpi->common.fc.pre_ymode_prob, cpi->common.fc.ymode_prob); - vp9_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob); - vp9_copy(cpi->common.fc.pre_bmode_prob, cpi->common.fc.bmode_prob); - vp9_copy(cpi->common.fc.pre_sub_mv_ref_prob, cpi->common.fc.sub_mv_ref_prob); - vp9_copy(cpi->common.fc.pre_mbsplit_prob, cpi->common.fc.mbsplit_prob); - vp9_copy(cpi->common.fc.pre_i8x8_mode_prob, cpi->common.fc.i8x8_mode_prob); - cpi->common.fc.pre_nmvc = cpi->common.fc.nmvc; - vp9_zero(cpi->sub_mv_ref_count); - vp9_zero(cpi->mbsplit_count); - vp9_zero(cpi->common.fc.mv_ref_ct) - vp9_zero(cpi->common.fc.mv_ref_ct_a) - - update_coef_probs(cpi, &header_bc); - -#ifdef ENTROPY_STATS - active_section = 2; -#endif - - // Write out the mb_no_coeff_skip flag - vp9_write_bit(&header_bc, pc->mb_no_coeff_skip); - if (pc->mb_no_coeff_skip) { - int k; - - vp9_update_skip_probs(cpi); - for (k = 0; k < MBSKIP_CONTEXTS; ++k) - vp9_write_literal(&header_bc, pc->mbskip_pred_probs[k], 8); - } - - if (pc->frame_type == KEY_FRAME) { - if (!pc->kf_ymode_probs_update) { - vp9_write_literal(&header_bc, pc->kf_ymode_probs_index, 3); - } - } else { - // Update the probabilities used to encode reference frame data - update_ref_probs(cpi); - -#ifdef ENTROPY_STATS - active_section = 1; -#endif - -#if CONFIG_PRED_FILTER - // Write the prediction filter mode used for this frame - vp9_write_literal(&header_bc, pc->pred_filter_mode, 2); - - // Write prediction filter on/off probability if signaling at MB level - if (pc->pred_filter_mode == 2) - vp9_write_literal(&header_bc, pc->prob_pred_filter_off, 8); - -#endif - if (pc->mcomp_filter_type == SWITCHABLE) - update_switchable_interp_probs(cpi, &header_bc); - - vp9_write_literal(&header_bc, pc->prob_intra_coded, 8); - vp9_write_literal(&header_bc, pc->prob_last_coded, 8); - vp9_write_literal(&header_bc, pc->prob_gf_coded, 8); - - { - const int comp_pred_mode = cpi->common.comp_pred_mode; - const int use_compound_pred = (comp_pred_mode != SINGLE_PREDICTION_ONLY); - const int use_hybrid_pred = (comp_pred_mode == HYBRID_PREDICTION); - - vp9_write(&header_bc, use_compound_pred, 128); - if (use_compound_pred) { - vp9_write(&header_bc, use_hybrid_pred, 128); - if (use_hybrid_pred) { - for (i = 0; i < COMP_PRED_CONTEXTS; i++) { - pc->prob_comppred[i] = get_binary_prob(cpi->single_pred_count[i], - cpi->comp_pred_count[i]); - vp9_write_literal(&header_bc, pc->prob_comppred[i], 8); - } - } - } - } - - update_mbintra_mode_probs(cpi, &header_bc); - -#if CONFIG_NEW_MVREF - // Temp defaults probabilities for ecnoding the MV ref id signal - vpx_memset(xd->mb_mv_ref_id_probs, 192, sizeof(xd->mb_mv_ref_id_probs)); -#endif - - vp9_write_nmvprobs(cpi, xd->allow_high_precision_mv, &header_bc); - } - - vp9_stop_encode(&header_bc); - - oh.first_partition_length_in_bytes = header_bc.pos; - - /* update frame tag */ - { - int v = (oh.first_partition_length_in_bytes << 5) | - (oh.show_frame << 4) | - (oh.version << 1) | - oh.type; - - dest[0] = v; - dest[1] = v >> 8; - dest[2] = v >> 16; - } - - *size = VP9_HEADER_SIZE + extra_bytes_packed + header_bc.pos; - vp9_start_encode(&residual_bc, cx_data + header_bc.pos); - - if (pc->frame_type == KEY_FRAME) { - decide_kf_ymode_entropy(cpi); - write_kfmodes(cpi, &residual_bc); - } else { - pack_inter_mode_mvs(cpi, &residual_bc); - vp9_update_mode_context(&cpi->common); - } - - - vp9_stop_encode(&residual_bc); - - *size += residual_bc.pos; - -} - -#ifdef ENTROPY_STATS -void print_tree_update_probs() { - int i, j, k, l; - FILE *f = fopen("coefupdprob.h", "w"); - int Sum; - fprintf(f, "\n/* Update probabilities for token entropy tree. */\n\n"); - - fprintf(f, "const vp9_prob\n" - "vp9_coef_update_probs[BLOCK_TYPES]\n" - " [COEF_BANDS]\n" - " [PREV_COEF_CONTEXTS]\n" - " [ENTROPY_NODES] = {\n"); - for (i = 0; i < BLOCK_TYPES; i++) { - fprintf(f, " { \n"); - for (j = 0; j < COEF_BANDS; j++) { - fprintf(f, " {\n"); - for (k = 0; k < PREV_COEF_CONTEXTS; k++) { - fprintf(f, " {"); - for (l = 0; l < ENTROPY_NODES; l++) { - fprintf(f, "%3ld, ", - get_binary_prob(tree_update_hist[i][j][k][l][0], - tree_update_hist[i][j][k][l][1])); - } - fprintf(f, "},\n"); - } - fprintf(f, " },\n"); - } - fprintf(f, " },\n"); - } - fprintf(f, "};\n"); - - fprintf(f, "const vp9_prob\n" - "vp9_coef_update_probs_8x8[BLOCK_TYPES_8X8]\n" - " [COEF_BANDS]\n" - " [PREV_COEF_CONTEXTS]\n" - " [ENTROPY_NODES] = {\n"); - for (i = 0; i < BLOCK_TYPES_8X8; i++) { - fprintf(f, " { \n"); - for (j = 0; j < COEF_BANDS; j++) { - fprintf(f, " {\n"); - for (k = 0; k < PREV_COEF_CONTEXTS; k++) { - fprintf(f, " {"); - for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++) { - fprintf(f, "%3ld, ", - get_binary_prob(tree_update_hist_8x8[i][j][k][l][0], - tree_update_hist_8x8[i][j][k][l][1])); - } - fprintf(f, "},\n"); - } - fprintf(f, " },\n"); - } - fprintf(f, " },\n"); - } - - fprintf(f, "const vp9_prob\n" - "vp9_coef_update_probs_16x16[BLOCK_TYPES_16X16]\n" - " [COEF_BANDS]\n" - " [PREV_COEF_CONTEXTS]\n" - " [ENTROPY_NODES] = {\n"); - for (i = 0; i < BLOCK_TYPES_16X16; i++) { - fprintf(f, " { \n"); - for (j = 0; j < COEF_BANDS; j++) { - fprintf(f, " {\n"); - for (k = 0; k < PREV_COEF_CONTEXTS; k++) { - fprintf(f, " {"); - for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++) { - fprintf(f, "%3ld, ", - get_binary_prob(tree_update_hist_16x16[i][j][k][l][0], - tree_update_hist_16x16[i][j][k][l][1])); - } - fprintf(f, "},\n"); - } - fprintf(f, " },\n"); - } - fprintf(f, " },\n"); - } - - fclose(f); - f = fopen("treeupdate.bin", "wb"); - fwrite(tree_update_hist, sizeof(tree_update_hist), 1, f); - fwrite(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f); - fwrite(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f); - fclose(f); -} -#endif diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h deleted file mode 100644 index a0ac8b5db..000000000 --- a/vp8/encoder/bitstream.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __INC_BITSTREAM_H -#define __INC_BITSTREAM_H - -void vp9_update_skip_probs(VP9_COMP *cpi); - -#endif diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h deleted file mode 100644 index a77017c4f..000000000 --- a/vp8/encoder/block.h +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __INC_BLOCK_H -#define __INC_BLOCK_H - -#include "vp8/common/onyx.h" -#include "vp8/common/entropymv.h" -#include "vp8/common/entropy.h" -#include "vpx_ports/mem.h" -#include "vp8/common/onyxc_int.h" - -// motion search site -typedef struct { - MV mv; - int offset; -} search_site; - -typedef struct block { - // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries - short *src_diff; - short *coeff; - - // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries - short *quant; - short *quant_fast; // fast quant deprecated for now - unsigned char *quant_shift; - short *zbin; - short *zbin_8x8; - short *zbin_16x16; - short *zrun_zbin_boost; - short *zrun_zbin_boost_8x8; - short *zrun_zbin_boost_16x16; - short *round; - - // Zbin Over Quant value - short zbin_extra; - - unsigned char **base_src; - unsigned char **base_second_src; - int src; - int src_stride; - - int eob_max_offset; - int eob_max_offset_8x8; - int eob_max_offset_16x16; -} BLOCK; - -typedef struct { - int count; - struct { - B_PREDICTION_MODE mode; - int_mv mv; - int_mv second_mv; - } bmi[16]; -} PARTITION_INFO; - -// Structure to hold snapshot of coding context during the mode picking process -// TODO Do we need all of these? -typedef struct { - MODE_INFO mic; - PARTITION_INFO partition_info; - int_mv best_ref_mv; - int_mv second_best_ref_mv; -#if CONFIG_NEWBESTREFMV || CONFIG_NEW_MVREF - int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REFS]; -#endif - int rate; - int distortion; - int64_t intra_error; - int best_mode_index; - int rddiv; - int rdmult; - int hybrid_pred_diff; - int comp_pred_diff; - int single_pred_diff; - int64_t txfm_rd_diff[NB_TXFM_MODES]; -} PICK_MODE_CONTEXT; - -typedef struct macroblock { - DECLARE_ALIGNED(16, short, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y - DECLARE_ALIGNED(16, short, coeff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y - DECLARE_ALIGNED(16, unsigned char, thismb[256]); // 16x16 Y - - unsigned char *thismb_ptr; - // 16 Y blocks, 4 U blocks, 4 V blocks, - // 1 DC 2nd order block each with 16 entries - BLOCK block[25]; - - YV12_BUFFER_CONFIG src; - - MACROBLOCKD e_mbd; - PARTITION_INFO *partition_info; /* work pointer */ - PARTITION_INFO *pi; /* Corresponds to upper left visible macroblock */ - PARTITION_INFO *pip; /* Base of allocated array */ - - search_site *ss; - int ss_count; - int searches_per_step; - - int errorperbit; - int sadperbit16; - int sadperbit4; - int rddiv; - int rdmult; - unsigned int *mb_activity_ptr; - int *mb_norm_activity_ptr; - signed int act_zbin_adj; - - int nmvjointcost[MV_JOINTS]; - int nmvcosts[2][MV_VALS]; - int *nmvcost[2]; - int nmvcosts_hp[2][MV_VALS]; - int *nmvcost_hp[2]; - - int nmvjointsadcost[MV_JOINTS]; - int nmvsadcosts[2][MV_VALS]; - int *nmvsadcost[2]; - int nmvsadcosts_hp[2][MV_VALS]; - int *nmvsadcost_hp[2]; - - int mbmode_cost[2][MB_MODE_COUNT]; - int intra_uv_mode_cost[2][MB_MODE_COUNT]; - int bmode_costs[VP9_BINTRAMODES][VP9_BINTRAMODES][VP9_BINTRAMODES]; - int i8x8_mode_costs[MB_MODE_COUNT]; - int inter_bmode_costs[B_MODE_COUNT]; - int switchable_interp_costs[VP9_SWITCHABLE_FILTERS + 1] - [VP9_SWITCHABLE_FILTERS]; - - // These define limits to motion vector components to prevent them - // from extending outside the UMV borders - int mv_col_min; - int mv_col_max; - int mv_row_min; - int mv_row_max; - - int skip; - - int encode_breakout; - - // char * gf_active_ptr; - signed char *gf_active_ptr; - - unsigned char *active_ptr; - - unsigned int token_costs[TX_SIZE_MAX][BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; - unsigned int hybrid_token_costs[TX_SIZE_MAX][BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; - - int optimize; - - // Structure to hold context for each of the 4 MBs within a SB: - // when encoded as 4 independent MBs: - PICK_MODE_CONTEXT mb_context[4]; -#if CONFIG_SUPERBLOCKS - // when 4 MBs share coding parameters: - PICK_MODE_CONTEXT sb_context[4]; -#endif - - void (*vp9_short_fdct4x4)(short *input, short *output, int pitch); - void (*vp9_short_fdct8x4)(short *input, short *output, int pitch); - void (*short_walsh4x4)(short *input, short *output, int pitch); - void (*quantize_b_4x4)(BLOCK *b, BLOCKD *d); - void (*quantize_b_4x4_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1); - void (*vp9_short_fdct8x8)(short *input, short *output, int pitch); - void (*vp9_short_fdct16x16)(short *input, short *output, int pitch); - void (*short_fhaar2x2)(short *input, short *output, int pitch); - void (*quantize_b_16x16)(BLOCK *b, BLOCKD *d); - void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d); - void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d); - -} MACROBLOCK; - - -#endif diff --git a/vp8/encoder/boolhuff.c b/vp8/encoder/boolhuff.c deleted file mode 100644 index 0cb4b68cc..000000000 --- a/vp8/encoder/boolhuff.c +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "boolhuff.h" - -#if defined(SECTIONBITS_OUTPUT) -unsigned __int64 Sectionbits[500]; - -#endif - -#ifdef ENTROPY_STATS -unsigned int active_section = 0; -#endif - -const unsigned int vp9_prob_cost[256] = { - 2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, 1129, 1099, 1072, 1046, - 1023, 1000, 979, 959, 940, 922, 905, 889, 873, 858, 843, 829, 816, 803, 790, 778, - 767, 755, 744, 733, 723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625, - 617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541, 534, 528, 522, 516, - 511, 505, 499, 494, 488, 483, 477, 472, 467, 462, 457, 452, 447, 442, 437, 433, - 428, 424, 419, 415, 410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365, - 361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321, 317, 314, 311, 307, - 304, 301, 297, 294, 291, 288, 285, 281, 278, 275, 272, 269, 266, 263, 260, 257, - 255, 252, 249, 246, 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214, - 211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184, 181, 179, 177, 174, - 172, 170, 168, 165, 163, 161, 159, 156, 154, 152, 150, 148, 145, 143, 141, 139, - 137, 135, 133, 131, 129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, - 105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84, 82, 81, 79, 77, - 75, 73, 72, 70, 68, 66, 65, 63, 61, 60, 58, 56, 55, 53, 51, 50, - 48, 46, 45, 43, 41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24, - 22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, 4, 3, 1, 1 -}; - -void vp9_start_encode(BOOL_CODER *br, unsigned char *source) { - - br->lowvalue = 0; - br->range = 255; - br->value = 0; - br->count = -24; - br->buffer = source; - br->pos = 0; -} - -void vp9_stop_encode(BOOL_CODER *br) { - int i; - - for (i = 0; i < 32; i++) - encode_bool(br, 0, 128); -} - - -void vp9_encode_value(BOOL_CODER *br, int data, int bits) { - int bit; - - for (bit = bits - 1; bit >= 0; bit--) - encode_bool(br, (1 & (data >> bit)), 0x80); -} - -int vp9_recenter_nonneg(int v, int m) { - if (v > (m << 1)) return v; - else if (v >= m) return ((v - m) << 1); - else return ((m - v) << 1) - 1; -} - -static int get_unsigned_bits(unsigned num_values) { - int cat = 0; - if ((num_values--) <= 1) return 0; - while (num_values > 0) { - cat++; - num_values >>= 1; - } - return cat; -} - -void vp9_encode_uniform(BOOL_CODER *br, int v, int n) { - int l = get_unsigned_bits(n); - int m; - if (l == 0) return; - m = (1 << l) - n; - if (v < m) - vp9_encode_value(br, v, l - 1); - else { - vp9_encode_value(br, m + ((v - m) >> 1), l - 1); - vp9_encode_value(br, (v - m) & 1, 1); - } -} - -int vp9_count_uniform(int v, int n) { - int l = get_unsigned_bits(n); - int m; - if (l == 0) return 0; - m = (1 << l) - n; - if (v < m) - return l - 1; - else - return l; -} - -void vp9_encode_term_subexp(BOOL_CODER *br, int word, int k, int num_syms) { - int i = 0; - int mk = 0; - while (1) { - int b = (i ? k + i - 1 : k); - int a = (1 << b); - if (num_syms <= mk + 3 * a) { - vp9_encode_uniform(br, word - mk, num_syms - mk); - break; - } else { - int t = (word >= mk + a); - vp9_encode_value(br, t, 1); - if (t) { - i = i + 1; - mk += a; - } else { - vp9_encode_value(br, word - mk, b); - break; - } - } - } -} - -int vp9_count_term_subexp(int word, int k, int num_syms) { - int count = 0; - int i = 0; - int mk = 0; - while (1) { - int b = (i ? k + i - 1 : k); - int a = (1 << b); - if (num_syms <= mk + 3 * a) { - count += vp9_count_uniform(word - mk, num_syms - mk); - break; - } else { - int t = (word >= mk + a); - count++; - if (t) { - i = i + 1; - mk += a; - } else { - count += b; - break; - } - } - } - return count; -} diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h deleted file mode 100644 index e5fd0fb94..000000000 --- a/vp8/encoder/boolhuff.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : boolhuff.h -* -* Description : Bool Coder header file. -* -****************************************************************************/ -#ifndef __INC_BOOLHUFF_H -#define __INC_BOOLHUFF_H - -#include "vpx_ports/mem.h" - -typedef struct { - unsigned int lowvalue; - unsigned int range; - unsigned int value; - int count; - unsigned int pos; - unsigned char *buffer; - - // Variables used to track bit costs without outputing to the bitstream - unsigned int measure_cost; - unsigned long bit_counter; -} BOOL_CODER; - -extern void vp9_start_encode(BOOL_CODER *bc, unsigned char *buffer); - -extern void vp9_encode_value(BOOL_CODER *br, int data, int bits); -extern void vp9_stop_encode(BOOL_CODER *bc); -extern const unsigned int vp9_prob_cost[256]; - -extern void vp9_encode_uniform(BOOL_CODER *bc, int v, int n); -extern void vp9_encode_term_subexp(BOOL_CODER *bc, int v, int k, int n); -extern int vp9_count_uniform(int v, int n); -extern int vp9_count_term_subexp(int v, int k, int n); -extern int vp9_recenter_nonneg(int v, int m); - -DECLARE_ALIGNED(16, extern const unsigned char, vp9_norm[256]); - - -static void encode_bool(BOOL_CODER *br, int bit, int probability) { - unsigned int split; - int count = br->count; - unsigned int range = br->range; - unsigned int lowvalue = br->lowvalue; - register unsigned int shift; - -#ifdef ENTROPY_STATS -#if defined(SECTIONBITS_OUTPUT) - - if (bit) - Sectionbits[active_section] += vp9_prob_cost[255 - probability]; - else - Sectionbits[active_section] += vp9_prob_cost[probability]; - -#endif -#endif - - split = 1 + (((range - 1) * probability) >> 8); - - range = split; - - if (bit) { - lowvalue += split; - range = br->range - split; - } - - shift = vp9_norm[range]; - - range <<= shift; - count += shift; - - if (count >= 0) { - int offset = shift - count; - - if ((lowvalue << (offset - 1)) & 0x80000000) { - int x = br->pos - 1; - - while (x >= 0 && br->buffer[x] == 0xff) { - br->buffer[x] = (unsigned char)0; - x--; - } - - br->buffer[x] += 1; - } - - br->buffer[br->pos++] = (lowvalue >> (24 - offset)); - lowvalue <<= offset; - shift = count; - lowvalue &= 0xffffff; - count -= 8; - } - - lowvalue <<= shift; - br->count = count; - br->lowvalue = lowvalue; - br->range = range; -} - -#endif diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c deleted file mode 100644 index badb135b3..000000000 --- a/vp8/encoder/dct.c +++ /dev/null @@ -1,1109 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include <assert.h> -#include <math.h> -#include "vpx_ports/config.h" -#include "vp8/common/idct.h" -#include "vp8/common/systemdependent.h" - -#include "vp8/common/blockd.h" - -// TODO: these transforms can be converted into integer forms to reduce -// the complexity -static const float dct_4[16] = { - 0.500000000000000, 0.500000000000000, 0.500000000000000, 0.500000000000000, - 0.653281482438188, 0.270598050073099, -0.270598050073099, -0.653281482438188, - 0.500000000000000, -0.500000000000000, -0.500000000000000, 0.500000000000000, - 0.270598050073099, -0.653281482438188, 0.653281482438188, -0.270598050073099 -}; - -static const float adst_4[16] = { - 0.228013428883779, 0.428525073124360, 0.577350269189626, 0.656538502008139, - 0.577350269189626, 0.577350269189626, 0.000000000000000, -0.577350269189626, - 0.656538502008139, -0.228013428883779, -0.577350269189626, 0.428525073124359, - 0.428525073124360, -0.656538502008139, 0.577350269189626, -0.228013428883779 -}; - -static const float dct_8[64] = { - 0.353553390593274, 0.353553390593274, 0.353553390593274, 0.353553390593274, - 0.353553390593274, 0.353553390593274, 0.353553390593274, 0.353553390593274, - 0.490392640201615, 0.415734806151273, 0.277785116509801, 0.097545161008064, - -0.097545161008064, -0.277785116509801, -0.415734806151273, -0.490392640201615, - 0.461939766255643, 0.191341716182545, -0.191341716182545, -0.461939766255643, - -0.461939766255643, -0.191341716182545, 0.191341716182545, 0.461939766255643, - 0.415734806151273, -0.097545161008064, -0.490392640201615, -0.277785116509801, - 0.277785116509801, 0.490392640201615, 0.097545161008064, -0.415734806151273, - 0.353553390593274, -0.353553390593274, -0.353553390593274, 0.353553390593274, - 0.353553390593274, -0.353553390593274, -0.353553390593274, 0.353553390593274, - 0.277785116509801, -0.490392640201615, 0.097545161008064, 0.415734806151273, - -0.415734806151273, -0.097545161008064, 0.490392640201615, -0.277785116509801, - 0.191341716182545, -0.461939766255643, 0.461939766255643, -0.191341716182545, - -0.191341716182545, 0.461939766255643, -0.461939766255643, 0.191341716182545, - 0.097545161008064, -0.277785116509801, 0.415734806151273, -0.490392640201615, - 0.490392640201615, -0.415734806151273, 0.277785116509801, -0.097545161008064 -}; - -static const float adst_8[64] = { - 0.089131608307533, 0.175227946595735, 0.255357107325376, 0.326790388032145, - 0.387095214016349, 0.434217976756762, 0.466553967085785, 0.483002021635509, - 0.255357107325376, 0.434217976756762, 0.483002021635509, 0.387095214016349, - 0.175227946595735, -0.089131608307533, -0.326790388032145, -0.466553967085785, - 0.387095214016349, 0.466553967085785, 0.175227946595735, -0.255357107325376, - -0.483002021635509, -0.326790388032145, 0.089131608307533, 0.434217976756762, - 0.466553967085785, 0.255357107325376, -0.326790388032145, -0.434217976756762, - 0.089131608307533, 0.483002021635509, 0.175227946595735, -0.387095214016348, - 0.483002021635509, -0.089131608307533, -0.466553967085785, 0.175227946595735, - 0.434217976756762, -0.255357107325376, -0.387095214016348, 0.326790388032145, - 0.434217976756762, -0.387095214016348, -0.089131608307533, 0.466553967085786, - -0.326790388032145, -0.175227946595735, 0.483002021635509, -0.255357107325375, - 0.326790388032145, -0.483002021635509, 0.387095214016349, -0.089131608307534, - -0.255357107325377, 0.466553967085785, -0.434217976756762, 0.175227946595736, - 0.175227946595735, -0.326790388032145, 0.434217976756762, -0.483002021635509, - 0.466553967085785, -0.387095214016348, 0.255357107325376, -0.089131608307532 -}; - -/* Converted the transforms to integers. */ -static const int16_t dct_i4[16] = { - 16384, 16384, 16384, 16384, - 21407, 8867, -8867, -21407, - 16384, -16384, -16384, 16384, - 8867, -21407, 21407, -8867 -}; - -static const int16_t adst_i4[16] = { - 7472, 14042, 18919, 21513, - 18919, 18919, 0, -18919, - 21513, -7472, -18919, 14042, - 14042, -21513, 18919, -7472 -}; - -static const int16_t dct_i8[64] = { - 11585, 11585, 11585, 11585, - 11585, 11585, 11585, 11585, - 16069, 13623, 9102, 3196, - -3196, -9102, -13623, -16069, - 15137, 6270, -6270, -15137, - -15137, -6270, 6270, 15137, - 13623, -3196, -16069, -9102, - 9102, 16069, 3196, -13623, - 11585, -11585, -11585, 11585, - 11585, -11585, -11585, 11585, - 9102, -16069, 3196, 13623, - -13623, -3196, 16069, -9102, - 6270, -15137, 15137, -6270, - -6270, 15137, -15137, 6270, - 3196, -9102, 13623, -16069, - 16069, -13623, 9102, -3196 -}; - -static const int16_t adst_i8[64] = { - 2921, 5742, 8368, 10708, - 12684, 14228, 15288, 15827, - 8368, 14228, 15827, 12684, - 5742, -2921, -10708, -15288, - 12684, 15288, 5742, -8368, - -15827, -10708, 2921, 14228, - 15288, 8368, -10708, -14228, - 2921, 15827, 5742, -12684, - 15827, -2921, -15288, 5742, - 14228, -8368, -12684, 10708, - 14228, -12684, -2921, 15288, - -10708, -5742, 15827, -8368, - 10708, -15827, 12684, -2921, - -8368, 15288, -14228, 5742, - 5742, -10708, 14228, -15827, - 15288, -12684, 8368, -2921 -}; - -static const float dct_16[256] = { - 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, - 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, - 0.351851, 0.338330, 0.311806, 0.273300, 0.224292, 0.166664, 0.102631, 0.034654, - -0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851, - 0.346760, 0.293969, 0.196424, 0.068975, -0.068975, -0.196424, -0.293969, -0.346760, - -0.346760, -0.293969, -0.196424, -0.068975, 0.068975, 0.196424, 0.293969, 0.346760, - 0.338330, 0.224292, 0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631, - 0.102631, 0.273300, 0.351851, 0.311806, 0.166664, -0.034654, -0.224292, -0.338330, - 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641, - 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641, - 0.311806, 0.034654, -0.273300, -0.338330, -0.102631, 0.224292, 0.351851, 0.166664, - -0.166664, -0.351851, -0.224292, 0.102631, 0.338330, 0.273300, -0.034654, -0.311806, - 0.293969, -0.068975, -0.346760, -0.196424, 0.196424, 0.346760, 0.068975, -0.293969, - -0.293969, 0.068975, 0.346760, 0.196424, -0.196424, -0.346760, -0.068975, 0.293969, - 0.273300, -0.166664, -0.338330, 0.034654, 0.351851, 0.102631, -0.311806, -0.224292, - 0.224292, 0.311806, -0.102631, -0.351851, -0.034654, 0.338330, 0.166664, -0.273300, - 0.250000, -0.250000, -0.250000, 0.250000, 0.250000, -0.250000, -0.250000, 0.250000, - 0.250000, -0.250000, -0.250000, 0.250000, 0.250000, -0.250000, -0.250000, 0.250000, - 0.224292, -0.311806, -0.102631, 0.351851, -0.034654, -0.338330, 0.166664, 0.273300, - -0.273300, -0.166664, 0.338330, 0.034654, -0.351851, 0.102631, 0.311806, -0.224292, - 0.196424, -0.346760, 0.068975, 0.293969, -0.293969, -0.068975, 0.346760, -0.196424, - -0.196424, 0.346760, -0.068975, -0.293969, 0.293969, 0.068975, -0.346760, 0.196424, - 0.166664, -0.351851, 0.224292, 0.102631, -0.338330, 0.273300, 0.034654, -0.311806, - 0.311806, -0.034654, -0.273300, 0.338330, -0.102631, -0.224292, 0.351851, -0.166664, - 0.135299, -0.326641, 0.326641, -0.135299, -0.135299, 0.326641, -0.326641, 0.135299, - 0.135299, -0.326641, 0.326641, -0.135299, -0.135299, 0.326641, -0.326641, 0.135299, - 0.102631, -0.273300, 0.351851, -0.311806, 0.166664, 0.034654, -0.224292, 0.338330, - -0.338330, 0.224292, -0.034654, -0.166664, 0.311806, -0.351851, 0.273300, -0.102631, - 0.068975, -0.196424, 0.293969, -0.346760, 0.346760, -0.293969, 0.196424, -0.068975, - -0.068975, 0.196424, -0.293969, 0.346760, -0.346760, 0.293969, -0.196424, 0.068975, - 0.034654, -0.102631, 0.166664, -0.224292, 0.273300, -0.311806, 0.338330, -0.351851, - 0.351851, -0.338330, 0.311806, -0.273300, 0.224292, -0.166664, 0.102631, -0.034654 -}; - -static const float adst_16[256] = { - 0.033094, 0.065889, 0.098087, 0.129396, 0.159534, 0.188227, 0.215215, 0.240255, - 0.263118, 0.283599, 0.301511, 0.316693, 0.329007, 0.338341, 0.344612, 0.347761, - 0.098087, 0.188227, 0.263118, 0.316693, 0.344612, 0.344612, 0.316693, 0.263118, - 0.188227, 0.098087, 0.000000, -0.098087, -0.188227, -0.263118, -0.316693, -0.344612, - 0.159534, 0.283599, 0.344612, 0.329007, 0.240255, 0.098087, -0.065889, -0.215215, - -0.316693, -0.347761, -0.301511, -0.188227, -0.033094, 0.129396, 0.263118, 0.338341, - 0.215215, 0.338341, 0.316693, 0.159534, -0.065889, -0.263118, -0.347761, -0.283599, - -0.098087, 0.129396, 0.301511, 0.344612, 0.240255, 0.033094, -0.188227, -0.329007, - 0.263118, 0.344612, 0.188227, -0.098087, -0.316693, -0.316693, -0.098087, 0.188227, - 0.344612, 0.263118, 0.000000, -0.263118, -0.344612, -0.188227, 0.098087, 0.316693, - 0.301511, 0.301511, 0.000000, -0.301511, -0.301511, -0.000000, 0.301511, 0.301511, - 0.000000, -0.301511, -0.301511, -0.000000, 0.301511, 0.301511, 0.000000, -0.301511, - 0.329007, 0.215215, -0.188227, -0.338341, -0.033094, 0.316693, 0.240255, -0.159534, - -0.344612, -0.065889, 0.301511, 0.263118, -0.129396, -0.347761, -0.098087, 0.283599, - 0.344612, 0.098087, -0.316693, -0.188227, 0.263118, 0.263118, -0.188227, -0.316693, - 0.098087, 0.344612, 0.000000, -0.344612, -0.098087, 0.316693, 0.188227, -0.263118, - 0.347761, -0.033094, -0.344612, 0.065889, 0.338341, -0.098087, -0.329007, 0.129396, - 0.316693, -0.159534, -0.301511, 0.188227, 0.283599, -0.215215, -0.263118, 0.240255, - 0.338341, -0.159534, -0.263118, 0.283599, 0.129396, -0.344612, 0.033094, 0.329007, - -0.188227, -0.240255, 0.301511, 0.098087, -0.347761, 0.065889, 0.316693, -0.215215, - 0.316693, -0.263118, -0.098087, 0.344612, -0.188227, -0.188227, 0.344612, -0.098087, - -0.263118, 0.316693, 0.000000, -0.316693, 0.263118, 0.098087, -0.344612, 0.188227, - 0.283599, -0.329007, 0.098087, 0.215215, -0.347761, 0.188227, 0.129396, -0.338341, - 0.263118, 0.033094, -0.301511, 0.316693, -0.065889, -0.240255, 0.344612, -0.159534, - 0.240255, -0.347761, 0.263118, -0.033094, -0.215215, 0.344612, -0.283599, 0.065889, - 0.188227, -0.338341, 0.301511, -0.098087, -0.159534, 0.329007, -0.316693, 0.129396, - 0.188227, -0.316693, 0.344612, -0.263118, 0.098087, 0.098087, -0.263118, 0.344612, - -0.316693, 0.188227, 0.000000, -0.188227, 0.316693, -0.344612, 0.263118, -0.098087, - 0.129396, -0.240255, 0.316693, -0.347761, 0.329007, -0.263118, 0.159534, -0.033094, - -0.098087, 0.215215, -0.301511, 0.344612, -0.338341, 0.283599, -0.188227, 0.065889, - 0.065889, -0.129396, 0.188227, -0.240255, 0.283599, -0.316693, 0.338341, -0.347761, - 0.344612, -0.329007, 0.301511, -0.263118, 0.215215, -0.159534, 0.098087, -0.033094 -}; - -/* Converted the transforms to integers. */ -static const int16_t dct_i16[256] = { - 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, - 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, - 11529, 11086, 10217, 8955, 7350, 5461, 3363, 1136, - -1136, -3363, -5461, -7350, -8955, -10217, -11086, -11529, - 11363, 9633, 6436, 2260, -2260, -6436, -9633, -11363, - -11363, -9633, -6436, -2260, 2260, 6436, 9633, 11363, - 11086, 7350, 1136, -5461, -10217, -11529, -8955, -3363, - 3363, 8955, 11529, 10217, 5461, -1136, -7350, -11086, - 10703, 4433, -4433, -10703, -10703, -4433, 4433, 10703, - 10703, 4433, -4433, -10703, -10703, -4433, 4433, 10703, - 10217, 1136, -8955, -11086, -3363, 7350, 11529, 5461, - -5461, -11529, -7350, 3363, 11086, 8955, -1136, -10217, - 9633, -2260, -11363, -6436, 6436, 11363, 2260, -9633, - -9633, 2260, 11363, 6436, -6436, -11363, -2260, 9633, - 8955, -5461, -11086, 1136, 11529, 3363, -10217, -7350, - 7350, 10217, -3363, -11529, -1136, 11086, 5461, -8955, - 8192, -8192, -8192, 8192, 8192, -8192, -8192, 8192, - 8192, -8192, -8192, 8192, 8192, -8192, -8192, 8192, - 7350, -10217, -3363, 11529, -1136, -11086, 5461, 8955, - -8955, -5461, 11086, 1136, -11529, 3363, 10217, -7350, - 6436, -11363, 2260, 9633, -9633, -2260, 11363, -6436, - -6436, 11363, -2260, -9633, 9633, 2260, -11363, 6436, - 5461, -11529, 7350, 3363, -11086, 8955, 1136, -10217, - 10217, -1136, -8955, 11086, -3363, -7350, 11529, -5461, - 4433, -10703, 10703, -4433, -4433, 10703, -10703, 4433, - 4433, -10703, 10703, -4433, -4433, 10703, -10703, 4433, - 3363, -8955, 11529, -10217, 5461, 1136, -7350, 11086, - -11086, 7350, -1136, -5461, 10217, -11529, 8955, -3363, - 2260, -6436, 9633, -11363, 11363, -9633, 6436, -2260, - -2260, 6436, -9633, 11363, -11363, 9633, -6436, 2260, - 1136, -3363, 5461, -7350, 8955, -10217, 11086, -11529, - 11529, -11086, 10217, -8955, 7350, -5461, 3363, -1136 -}; - -static const int16_t adst_i16[256] = { - 1084, 2159, 3214, 4240, 5228, 6168, 7052, 7873, - 8622, 9293, 9880, 10377, 10781, 11087, 11292, 11395, - 3214, 6168, 8622, 10377, 11292, 11292, 10377, 8622, - 6168, 3214, 0, -3214, -6168, -8622, -10377, -11292, - 5228, 9293, 11292, 10781, 7873, 3214, -2159, -7052, - -10377, -11395, -9880, -6168, -1084, 4240, 8622, 11087, - 7052, 11087, 10377, 5228, -2159, -8622, -11395, -9293, - -3214, 4240, 9880, 11292, 7873, 1084, -6168, -10781, - 8622, 11292, 6168, -3214, -10377, -10377, -3214, 6168, - 11292, 8622, 0, -8622, -11292, -6168, 3214, 10377, - 9880, 9880, 0, -9880, -9880, 0, 9880, 9880, - 0, -9880, -9880, 0, 9880, 9880, 0, -9880, - 10781, 7052, -6168, -11087, -1084, 10377, 7873, -5228, - -11292, -2159, 9880, 8622, -4240, -11395, -3214, 9293, - 11292, 3214, -10377, -6168, 8622, 8622, -6168, -10377, - 3214, 11292, 0, -11292, -3214, 10377, 6168, -8622, - 11395, -1084, -11292, 2159, 11087, -3214, -10781, 4240, - 10377, -5228, -9880, 6168, 9293, -7052, -8622, 7873, - 11087, -5228, -8622, 9293, 4240, -11292, 1084, 10781, - -6168, -7873, 9880, 3214, -11395, 2159, 10377, -7052, - 10377, -8622, -3214, 11292, -6168, -6168, 11292, -3214, - -8622, 10377, 0, -10377, 8622, 3214, -11292, 6168, - 9293, -10781, 3214, 7052, -11395, 6168, 4240, -11087, - 8622, 1084, -9880, 10377, -2159, -7873, 11292, -5228, - 7873, -11395, 8622, -1084, -7052, 11292, -9293, 2159, - 6168, -11087, 9880, -3214, -5228, 10781, -10377, 4240, - 6168, -10377, 11292, -8622, 3214, 3214, -8622, 11292, - -10377, 6168, 0, -6168, 10377, -11292, 8622, -3214, - 4240, -7873, 10377, -11395, 10781, -8622, 5228, -1084, - -3214, 7052, -9880, 11292, -11087, 9293, -6168, 2159, - 2159, -4240, 6168, -7873, 9293, -10377, 11087, -11395, - 11292, -10781, 9880, -8622, 7052, -5228, 3214, -1084 -}; - -static const int xC1S7 = 16069; -static const int xC2S6 = 15137; -static const int xC3S5 = 13623; -static const int xC4S4 = 11585; -static const int xC5S3 = 9102; -static const int xC6S2 = 6270; -static const int xC7S1 = 3196; - -#define SHIFT_BITS 14 -#define DOROUND(X) X += (1<<(SHIFT_BITS-1)); - -#define FINAL_SHIFT 3 -#define FINAL_ROUNDING (1<<(FINAL_SHIFT -1)) -#define IN_SHIFT (FINAL_SHIFT+1) - - -void vp9_short_fdct8x8_c(short *InputData, short *OutputData, int pitch) { - int loop; - int short_pitch = pitch >> 1; - int is07, is12, is34, is56; - int is0734, is1256; - int id07, id12, id34, id56; - int irot_input_x, irot_input_y; - int icommon_product1; // Re-used product (c4s4 * (s12 - s56)) - int icommon_product2; // Re-used product (c4s4 * (d12 + d56)) - int temp1, temp2; // intermediate variable for computation - - int InterData[64]; - int *ip = InterData; - short *op = OutputData; - - for (loop = 0; loop < 8; loop++) { - // Pre calculate some common sums and differences. - is07 = (InputData[0] + InputData[7]) << IN_SHIFT; - is12 = (InputData[1] + InputData[2]) << IN_SHIFT; - is34 = (InputData[3] + InputData[4]) << IN_SHIFT; - is56 = (InputData[5] + InputData[6]) << IN_SHIFT; - id07 = (InputData[0] - InputData[7]) << IN_SHIFT; - id12 = (InputData[1] - InputData[2]) << IN_SHIFT; - id34 = (InputData[3] - InputData[4]) << IN_SHIFT; - id56 = (InputData[5] - InputData[6]) << IN_SHIFT; - - is0734 = is07 + is34; - is1256 = is12 + is56; - - // Pre-Calculate some common product terms. - icommon_product1 = xC4S4 * (is12 - is56); - DOROUND(icommon_product1) - icommon_product1 >>= SHIFT_BITS; - - icommon_product2 = xC4S4 * (id12 + id56); - DOROUND(icommon_product2) - icommon_product2 >>= SHIFT_BITS; - - - ip[0] = (xC4S4 * (is0734 + is1256)); - DOROUND(ip[0]); - ip[0] >>= SHIFT_BITS; - - ip[4] = (xC4S4 * (is0734 - is1256)); - DOROUND(ip[4]); - ip[4] >>= SHIFT_BITS; - - // Define inputs to rotation for outputs 2 and 6 - irot_input_x = id12 - id56; - irot_input_y = is07 - is34; - - // Apply rotation for outputs 2 and 6. - temp1 = xC6S2 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC2S6 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - ip[2] = temp1 + temp2; - - temp1 = xC6S2 * irot_input_y; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC2S6 * irot_input_x; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - ip[6] = temp1 - temp2; - - // Define inputs to rotation for outputs 1 and 7 - irot_input_x = icommon_product1 + id07; - irot_input_y = -(id34 + icommon_product2); - - // Apply rotation for outputs 1 and 7. - temp1 = xC1S7 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC7S1 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - ip[1] = temp1 - temp2; - - temp1 = xC7S1 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC1S7 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - ip[7] = temp1 + temp2; - - // Define inputs to rotation for outputs 3 and 5 - irot_input_x = id07 - icommon_product1; - irot_input_y = id34 - icommon_product2; - - // Apply rotation for outputs 3 and 5. - temp1 = xC3S5 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC5S3 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - ip[3] = temp1 - temp2; - - - temp1 = xC5S3 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC3S5 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - ip[5] = temp1 + temp2; - - // Increment data pointer for next row - InputData += short_pitch; - ip += 8; - } - - // Performed DCT on rows, now transform the columns - ip = InterData; - for (loop = 0; loop < 8; loop++) { - // Pre calculate some common sums and differences. - is07 = ip[0 * 8] + ip[7 * 8]; - is12 = ip[1 * 8] + ip[2 * 8]; - is34 = ip[3 * 8] + ip[4 * 8]; - is56 = ip[5 * 8] + ip[6 * 8]; - - id07 = ip[0 * 8] - ip[7 * 8]; - id12 = ip[1 * 8] - ip[2 * 8]; - id34 = ip[3 * 8] - ip[4 * 8]; - id56 = ip[5 * 8] - ip[6 * 8]; - - is0734 = is07 + is34; - is1256 = is12 + is56; - - // Pre-Calculate some common product terms - icommon_product1 = xC4S4 * (is12 - is56); - icommon_product2 = xC4S4 * (id12 + id56); - DOROUND(icommon_product1) - DOROUND(icommon_product2) - icommon_product1 >>= SHIFT_BITS; - icommon_product2 >>= SHIFT_BITS; - - - temp1 = xC4S4 * (is0734 + is1256); - temp2 = xC4S4 * (is0734 - is1256); - DOROUND(temp1); - DOROUND(temp2); - temp1 >>= SHIFT_BITS; - - temp2 >>= SHIFT_BITS; - op[0 * 8] = (temp1 + FINAL_ROUNDING) >> FINAL_SHIFT; - op[4 * 8] = (temp2 + FINAL_ROUNDING) >> FINAL_SHIFT; - - // Define inputs to rotation for outputs 2 and 6 - irot_input_x = id12 - id56; - irot_input_y = is07 - is34; - - // Apply rotation for outputs 2 and 6. - temp1 = xC6S2 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC2S6 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - op[2 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT; - - temp1 = xC6S2 * irot_input_y; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC2S6 * irot_input_x; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - op[6 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT; - - // Define inputs to rotation for outputs 1 and 7 - irot_input_x = icommon_product1 + id07; - irot_input_y = -(id34 + icommon_product2); - - // Apply rotation for outputs 1 and 7. - temp1 = xC1S7 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC7S1 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - op[1 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT; - - temp1 = xC7S1 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC1S7 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - op[7 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT; - - // Define inputs to rotation for outputs 3 and 5 - irot_input_x = id07 - icommon_product1; - irot_input_y = id34 - icommon_product2; - - // Apply rotation for outputs 3 and 5. - temp1 = xC3S5 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC5S3 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - op[3 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT; - - - temp1 = xC5S3 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC3S5 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - op[5 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT; - - // Increment data pointer for next column. - ip++; - op++; - } -} - -void vp9_short_fhaar2x2_c(short *input, short *output, int pitch) { - /* [1 1; 1 -1] orthogonal transform */ - /* use position: 0,1, 4, 8 */ - int i; - short *ip1 = input; - short *op1 = output; - for (i = 0; i < 16; i++) { - op1[i] = 0; - } - - op1[0] = (ip1[0] + ip1[1] + ip1[4] + ip1[8] + 1) >> 1; - op1[1] = (ip1[0] - ip1[1] + ip1[4] - ip1[8]) >> 1; - op1[4] = (ip1[0] + ip1[1] - ip1[4] - ip1[8]) >> 1; - op1[8] = (ip1[0] - ip1[1] - ip1[4] + ip1[8]) >> 1; -} - -/* For test */ -#define TEST_INT 1 -#if TEST_INT -#define vp9_fht_int_c vp9_fht_c -#else -#define vp9_fht_float_c vp9_fht_c -#endif - -void vp9_fht_float_c(const int16_t *input, int pitch, int16_t *output, - TX_TYPE tx_type, int tx_dim) { - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - int i, j, k; - float bufa[256], bufb[256]; // buffers are for floating-point test purpose - // the implementation could be simplified in - // conjunction with integer transform - const int16_t *ip = input; - int16_t *op = output; - - float *pfa = &bufa[0]; - float *pfb = &bufb[0]; - - // pointers to vertical and horizontal transforms - const float *ptv, *pth; - - assert(tx_type != DCT_DCT); - // load and convert residual array into floating-point - for (j = 0; j < tx_dim; j++) { - for (i = 0; i < tx_dim; i++) { - pfa[i] = (float)ip[i]; - } - pfa += tx_dim; - ip += pitch / 2; - } - - // vertical transformation - pfa = &bufa[0]; - pfb = &bufb[0]; - - switch (tx_type) { - case ADST_ADST : - case ADST_DCT : - ptv = (tx_dim == 4) ? &adst_4[0] : - ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]); - break; - - default : - ptv = (tx_dim == 4) ? &dct_4[0] : - ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]); - break; - } - - for (j = 0; j < tx_dim; j++) { - for (i = 0; i < tx_dim; i++) { - pfb[i] = 0; - for (k = 0; k < tx_dim; k++) { - pfb[i] += ptv[k] * pfa[(k * tx_dim)]; - } - pfa += 1; - } - pfb += tx_dim; - ptv += tx_dim; - pfa = &bufa[0]; - } - - // horizontal transformation - pfa = &bufa[0]; - pfb = &bufb[0]; - - switch (tx_type) { - case ADST_ADST : - case DCT_ADST : - pth = (tx_dim == 4) ? &adst_4[0] : - ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]); - break; - - default : - pth = (tx_dim == 4) ? &dct_4[0] : - ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]); - break; - } - - for (j = 0; j < tx_dim; j++) { - for (i = 0; i < tx_dim; i++) { - pfa[i] = 0; - for (k = 0; k < tx_dim; k++) { - pfa[i] += pfb[k] * pth[k]; - } - pth += tx_dim; - } - - pfa += tx_dim; - pfb += tx_dim; - // pth -= tx_dim * tx_dim; - - switch (tx_type) { - case ADST_ADST : - case DCT_ADST : - pth = (tx_dim == 4) ? &adst_4[0] : - ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]); - break; - - default : - pth = (tx_dim == 4) ? &dct_4[0] : - ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]); - break; - } - } - - // convert to short integer format and load BLOCKD buffer - op = output; - pfa = &bufa[0]; - - for (j = 0; j < tx_dim; j++) { - for (i = 0; i < tx_dim; i++) { - op[i] = (pfa[i] > 0 ) ? (int16_t)( 8 * pfa[i] + 0.49) : - -(int16_t)(- 8 * pfa[i] + 0.49); - } - op += tx_dim; - pfa += tx_dim; - } - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} - -/* Converted the transforms to integer form. */ -#define VERTICAL_SHIFT 11 -#define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1) -#define HORIZONTAL_SHIFT 16 -#define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1) -void vp9_fht_int_c(const int16_t *input, int pitch, int16_t *output, - TX_TYPE tx_type, int tx_dim) { - int i, j, k; - int16_t imbuf[256]; - - const int16_t *ip = input; - int16_t *op = output; - int16_t *im = &imbuf[0]; - - /* pointers to vertical and horizontal transforms. */ - const int16_t *ptv = NULL, *pth = NULL; - - switch (tx_type) { - case ADST_ADST : - ptv = pth = (tx_dim == 4) ? &adst_i4[0] - : ((tx_dim == 8) ? &adst_i8[0] - : &adst_i16[0]); - break; - case ADST_DCT : - ptv = (tx_dim == 4) ? &adst_i4[0] - : ((tx_dim == 8) ? &adst_i8[0] : &adst_i16[0]); - pth = (tx_dim == 4) ? &dct_i4[0] - : ((tx_dim == 8) ? &dct_i8[0] : &dct_i16[0]); - break; - case DCT_ADST : - ptv = (tx_dim == 4) ? &dct_i4[0] - : ((tx_dim == 8) ? &dct_i8[0] : &dct_i16[0]); - pth = (tx_dim == 4) ? &adst_i4[0] - : ((tx_dim == 8) ? &adst_i8[0] : &adst_i16[0]); - break; - case DCT_DCT : - ptv = pth = (tx_dim == 4) ? &dct_i4[0] - : ((tx_dim == 8) ? &dct_i8[0] : &dct_i16[0]); - break; - default: - assert(0); - break; - } - - /* vertical transformation */ - for (j = 0; j < tx_dim; j++) { - for (i = 0; i < tx_dim; i++) { - int temp = 0; - - for (k = 0; k < tx_dim; k++) { - temp += ptv[k] * ip[(k * (pitch >> 1))]; - } - - im[i] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT); - ip++; - } - im += tx_dim; // 16 - ptv += tx_dim; - ip = input; - } - - /* horizontal transformation */ - im = &imbuf[0]; - - for (j = 0; j < tx_dim; j++) { - const int16_t *pthc = pth; - - for (i = 0; i < tx_dim; i++) { - int temp = 0; - - for (k = 0; k < tx_dim; k++) { - temp += im[k] * pthc[k]; - } - - op[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT); - pthc += tx_dim; - } - - im += tx_dim; // 16 - op += tx_dim; - } -} - -void vp9_short_fdct4x4_c(short *input, short *output, int pitch) { - int i; - int a1, b1, c1, d1; - short *ip = input; - short *op = output; - - for (i = 0; i < 4; i++) { - a1 = ((ip[0] + ip[3]) << 5); - b1 = ((ip[1] + ip[2]) << 5); - c1 = ((ip[1] - ip[2]) << 5); - d1 = ((ip[0] - ip[3]) << 5); - - op[0] = a1 + b1; - op[2] = a1 - b1; - - op[1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12; - op[3] = (d1 * 2217 - c1 * 5352 + 7500) >> 12; - - ip += pitch / 2; - op += 4; - - } - ip = output; - op = output; - for (i = 0; i < 4; i++) { - a1 = ip[0] + ip[12]; - b1 = ip[4] + ip[8]; - c1 = ip[4] - ip[8]; - d1 = ip[0] - ip[12]; - - op[0] = (a1 + b1 + 7) >> 4; - op[8] = (a1 - b1 + 7) >> 4; - - op[4] = ((c1 * 2217 + d1 * 5352 + 12000) >> 16) + (d1 != 0); - op[12] = (d1 * 2217 - c1 * 5352 + 51000) >> 16; - - ip++; - op++; - } -} - -void vp9_short_fdct8x4_c(short *input, short *output, int pitch) -{ - vp9_short_fdct4x4_c(input, output, pitch); - vp9_short_fdct4x4_c(input + 4, output + 16, pitch); -} - -void vp9_short_walsh4x4_c(short *input, short *output, int pitch) { - int i; - int a1, b1, c1, d1; - short *ip = input; - short *op = output; - int pitch_short = pitch >> 1; - - for (i = 0; i < 4; i++) { - a1 = ip[0 * pitch_short] + ip[3 * pitch_short]; - b1 = ip[1 * pitch_short] + ip[2 * pitch_short]; - c1 = ip[1 * pitch_short] - ip[2 * pitch_short]; - d1 = ip[0 * pitch_short] - ip[3 * pitch_short]; - - op[0] = (a1 + b1 + 1) >> 1; - op[4] = (c1 + d1) >> 1; - op[8] = (a1 - b1) >> 1; - op[12] = (d1 - c1) >> 1; - - ip++; - op++; - } - ip = output; - op = output; - - for (i = 0; i < 4; i++) { - a1 = ip[0] + ip[3]; - b1 = ip[1] + ip[2]; - c1 = ip[1] - ip[2]; - d1 = ip[0] - ip[3]; - - op[0] = (a1 + b1 + 1) >> 1; - op[1] = (c1 + d1) >> 1; - op[2] = (a1 - b1) >> 1; - op[3] = (d1 - c1) >> 1; - - ip += 4; - op += 4; - } -} - -#if CONFIG_LOSSLESS -void vp9_short_walsh4x4_lossless_c(short *input, short *output, int pitch) { - int i; - int a1, b1, c1, d1; - short *ip = input; - short *op = output; - int pitch_short = pitch >> 1; - - for (i = 0; i < 4; i++) { - a1 = (ip[0 * pitch_short] + ip[3 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR; - b1 = (ip[1 * pitch_short] + ip[2 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR; - c1 = (ip[1 * pitch_short] - ip[2 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR; - d1 = (ip[0 * pitch_short] - ip[3 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR; - - op[0] = (a1 + b1 + 1) >> 1; - op[4] = (c1 + d1) >> 1; - op[8] = (a1 - b1) >> 1; - op[12] = (d1 - c1) >> 1; - - ip++; - op++; - } - ip = output; - op = output; - - for (i = 0; i < 4; i++) { - a1 = ip[0] + ip[3]; - b1 = ip[1] + ip[2]; - c1 = ip[1] - ip[2]; - d1 = ip[0] - ip[3]; - - op[0] = ((a1 + b1 + 1) >> 1) << Y2_WHT_UPSCALE_FACTOR; - op[1] = ((c1 + d1) >> 1) << Y2_WHT_UPSCALE_FACTOR; - op[2] = ((a1 - b1) >> 1) << Y2_WHT_UPSCALE_FACTOR; - op[3] = ((d1 - c1) >> 1) << Y2_WHT_UPSCALE_FACTOR; - - ip += 4; - op += 4; - } -} - -void vp9_short_walsh4x4_x8_c(short *input, short *output, int pitch) { - int i; - int a1, b1, c1, d1; - short *ip = input; - short *op = output; - int pitch_short = pitch >> 1; - - for (i = 0; i < 4; i++) { - a1 = ip[0 * pitch_short] + ip[3 * pitch_short]; - b1 = ip[1 * pitch_short] + ip[2 * pitch_short]; - c1 = ip[1 * pitch_short] - ip[2 * pitch_short]; - d1 = ip[0 * pitch_short] - ip[3 * pitch_short]; - - op[0] = (a1 + b1 + 1) >> 1; - op[4] = (c1 + d1) >> 1; - op[8] = (a1 - b1) >> 1; - op[12] = (d1 - c1) >> 1; - - ip++; - op++; - } - ip = output; - op = output; - - for (i = 0; i < 4; i++) { - a1 = ip[0] + ip[3]; - b1 = ip[1] + ip[2]; - c1 = ip[1] - ip[2]; - d1 = ip[0] - ip[3]; - - op[0] = ((a1 + b1 + 1) >> 1) << WHT_UPSCALE_FACTOR; - op[1] = ((c1 + d1) >> 1) << WHT_UPSCALE_FACTOR; - op[2] = ((a1 - b1) >> 1) << WHT_UPSCALE_FACTOR; - op[3] = ((d1 - c1) >> 1) << WHT_UPSCALE_FACTOR; - - ip += 4; - op += 4; - } -} - -void vp9_short_walsh8x4_x8_c(short *input, short *output, int pitch) { - vp9_short_walsh4x4_x8_c(input, output, pitch); - vp9_short_walsh4x4_x8_c(input + 4, output + 16, pitch); -} -#endif - -static const double C1 = 0.995184726672197; -static const double C2 = 0.98078528040323; -static const double C3 = 0.956940335732209; -static const double C4 = 0.923879532511287; -static const double C5 = 0.881921264348355; -static const double C6 = 0.831469612302545; -static const double C7 = 0.773010453362737; -static const double C8 = 0.707106781186548; -static const double C9 = 0.634393284163646; -static const double C10 = 0.555570233019602; -static const double C11 = 0.471396736825998; -static const double C12 = 0.38268343236509; -static const double C13 = 0.290284677254462; -static const double C14 = 0.195090322016128; -static const double C15 = 0.098017140329561; - -static void dct16x16_1d(double input[16], double output[16]) { - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - double step[16]; - double intermediate[16]; - double temp1, temp2; - - // step 1 - step[ 0] = input[0] + input[15]; - step[ 1] = input[1] + input[14]; - step[ 2] = input[2] + input[13]; - step[ 3] = input[3] + input[12]; - step[ 4] = input[4] + input[11]; - step[ 5] = input[5] + input[10]; - step[ 6] = input[6] + input[ 9]; - step[ 7] = input[7] + input[ 8]; - step[ 8] = input[7] - input[ 8]; - step[ 9] = input[6] - input[ 9]; - step[10] = input[5] - input[10]; - step[11] = input[4] - input[11]; - step[12] = input[3] - input[12]; - step[13] = input[2] - input[13]; - step[14] = input[1] - input[14]; - step[15] = input[0] - input[15]; - - // step 2 - output[0] = step[0] + step[7]; - output[1] = step[1] + step[6]; - output[2] = step[2] + step[5]; - output[3] = step[3] + step[4]; - output[4] = step[3] - step[4]; - output[5] = step[2] - step[5]; - output[6] = step[1] - step[6]; - output[7] = step[0] - step[7]; - - temp1 = step[ 8]*C7; - temp2 = step[15]*C9; - output[ 8] = temp1 + temp2; - - temp1 = step[ 9]*C11; - temp2 = step[14]*C5; - output[ 9] = temp1 - temp2; - - temp1 = step[10]*C3; - temp2 = step[13]*C13; - output[10] = temp1 + temp2; - - temp1 = step[11]*C15; - temp2 = step[12]*C1; - output[11] = temp1 - temp2; - - temp1 = step[11]*C1; - temp2 = step[12]*C15; - output[12] = temp2 + temp1; - - temp1 = step[10]*C13; - temp2 = step[13]*C3; - output[13] = temp2 - temp1; - - temp1 = step[ 9]*C5; - temp2 = step[14]*C11; - output[14] = temp2 + temp1; - - temp1 = step[ 8]*C9; - temp2 = step[15]*C7; - output[15] = temp2 - temp1; - - // step 3 - step[ 0] = output[0] + output[3]; - step[ 1] = output[1] + output[2]; - step[ 2] = output[1] - output[2]; - step[ 3] = output[0] - output[3]; - - temp1 = output[4]*C14; - temp2 = output[7]*C2; - step[ 4] = temp1 + temp2; - - temp1 = output[5]*C10; - temp2 = output[6]*C6; - step[ 5] = temp1 + temp2; - - temp1 = output[5]*C6; - temp2 = output[6]*C10; - step[ 6] = temp2 - temp1; - - temp1 = output[4]*C2; - temp2 = output[7]*C14; - step[ 7] = temp2 - temp1; - - step[ 8] = output[ 8] + output[11]; - step[ 9] = output[ 9] + output[10]; - step[10] = output[ 9] - output[10]; - step[11] = output[ 8] - output[11]; - - step[12] = output[12] + output[15]; - step[13] = output[13] + output[14]; - step[14] = output[13] - output[14]; - step[15] = output[12] - output[15]; - - // step 4 - output[ 0] = (step[ 0] + step[ 1]); - output[ 8] = (step[ 0] - step[ 1]); - - temp1 = step[2]*C12; - temp2 = step[3]*C4; - temp1 = temp1 + temp2; - output[ 4] = 2*(temp1*C8); - - temp1 = step[2]*C4; - temp2 = step[3]*C12; - temp1 = temp2 - temp1; - output[12] = 2*(temp1*C8); - - output[ 2] = 2*((step[4] + step[ 5])*C8); - output[14] = 2*((step[7] - step[ 6])*C8); - - temp1 = step[4] - step[5]; - temp2 = step[6] + step[7]; - output[ 6] = (temp1 + temp2); - output[10] = (temp1 - temp2); - - intermediate[8] = step[8] + step[14]; - intermediate[9] = step[9] + step[15]; - - temp1 = intermediate[8]*C12; - temp2 = intermediate[9]*C4; - temp1 = temp1 - temp2; - output[3] = 2*(temp1*C8); - - temp1 = intermediate[8]*C4; - temp2 = intermediate[9]*C12; - temp1 = temp2 + temp1; - output[13] = 2*(temp1*C8); - - output[ 9] = 2*((step[10] + step[11])*C8); - - intermediate[11] = step[10] - step[11]; - intermediate[12] = step[12] + step[13]; - intermediate[13] = step[12] - step[13]; - intermediate[14] = step[ 8] - step[14]; - intermediate[15] = step[ 9] - step[15]; - - output[15] = (intermediate[11] + intermediate[12]); - output[ 1] = -(intermediate[11] - intermediate[12]); - - output[ 7] = 2*(intermediate[13]*C8); - - temp1 = intermediate[14]*C12; - temp2 = intermediate[15]*C4; - temp1 = temp1 - temp2; - output[11] = -2*(temp1*C8); - - temp1 = intermediate[14]*C4; - temp2 = intermediate[15]*C12; - temp1 = temp2 + temp1; - output[ 5] = 2*(temp1*C8); - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} - -void vp9_short_fdct16x16_c(short *input, short *out, int pitch) { - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - int shortpitch = pitch >> 1; - int i, j; - double output[256]; - // First transform columns - for (i = 0; i < 16; i++) { - double temp_in[16], temp_out[16]; - for (j = 0; j < 16; j++) - temp_in[j] = input[j*shortpitch + i]; - dct16x16_1d(temp_in, temp_out); - for (j = 0; j < 16; j++) - output[j*16 + i] = temp_out[j]; - } - // Then transform rows - for (i = 0; i < 16; ++i) { - double temp_in[16], temp_out[16]; - for (j = 0; j < 16; ++j) - temp_in[j] = output[j + i*16]; - dct16x16_1d(temp_in, temp_out); - for (j = 0; j < 16; ++j) - output[j + i*16] = temp_out[j]; - } - // Scale by some magic number - for (i = 0; i < 256; i++) - out[i] = (short)round(output[i]/2); - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; -} diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c deleted file mode 100644 index 32777876f..000000000 --- a/vp8/encoder/encodeframe.c +++ /dev/null @@ -1,2342 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "encodemb.h" -#include "encodemv.h" -#include "vp8/common/common.h" -#include "onyx_int.h" -#include "vp8/common/extend.h" -#include "vp8/common/entropymode.h" -#include "vp8/common/quant_common.h" -#include "segmentation.h" -#include "vp8/common/setupintrarecon.h" -#include "vp8/common/reconintra4x4.h" -#include "encodeintra.h" -#include "vp8/common/reconinter.h" -#include "vp8/common/invtrans.h" -#include "rdopt.h" -#include "vp8/common/findnearmv.h" -#include "vp8/common/reconintra.h" -#include "vp8/common/seg_common.h" -#include "vpx_rtcd.h" -#include <stdio.h> -#include <math.h> -#include <limits.h> -#include "vp8/common/subpixel.h" -#include "vpx_ports/vpx_timer.h" -#include "vp8/common/pred_common.h" - -#define DBG_PRNT_SEGMAP 0 -#if CONFIG_NEWBESTREFMV -#include "vp8/common/mvref_common.h" -#endif - - -#if CONFIG_RUNTIME_CPU_DETECT -#define RTCD(x) &cpi->common.rtcd.x -#define IF_RTCD(x) (x) -#else -#define RTCD(x) NULL -#define IF_RTCD(x) NULL -#endif - -#ifdef ENC_DEBUG -int enc_debug = 0; -int mb_row_debug, mb_col_debug; -#endif - -extern void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex); - -extern void vp9_auto_select_speed(VP9_COMP *cpi); - -int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, int recon_uvoffset, - int *returnrate, int *returndistortion); - -extern void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, - int recon_uvoffset, int *r, int *d); - -void vp9_build_block_offsets(MACROBLOCK *x); - -void vp9_setup_block_ptrs(MACROBLOCK *x); - -void vp9_encode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, - int recon_yoffset, int recon_uvoffset, - int output_enabled); - -void vp9_encode_inter_superblock(VP9_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, - int recon_yoffset, int recon_uvoffset, - int mb_col, int mb_row); - -void vp9_encode_intra_macro_block(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int output_enabled); - -void vp9_encode_intra_super_block(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int mb_col); - -static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x); - -#ifdef MODE_STATS -unsigned int inter_y_modes[MB_MODE_COUNT]; -unsigned int inter_uv_modes[VP9_UV_MODES]; -unsigned int inter_b_modes[B_MODE_COUNT]; -unsigned int y_modes[VP9_YMODES]; -unsigned int i8x8_modes[VP9_I8X8_MODES]; -unsigned int uv_modes[VP9_UV_MODES]; -unsigned int uv_modes_y[VP9_YMODES][VP9_UV_MODES]; -unsigned int b_modes[B_MODE_COUNT]; -#endif - - -/* activity_avg must be positive, or flat regions could get a zero weight - * (infinite lambda), which confounds analysis. - * This also avoids the need for divide by zero checks in - * vp9_activity_masking(). - */ -#define VP9_ACTIVITY_AVG_MIN (64) - -/* This is used as a reference when computing the source variance for the - * purposes of activity masking. - * Eventually this should be replaced by custom no-reference routines, - * which will be faster. - */ -static const unsigned char VP9_VAR_OFFS[16] = { - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 -}; - - -// Original activity measure from Tim T's code. -static unsigned int tt_activity_measure(VP9_COMP *cpi, MACROBLOCK *x) { - unsigned int act; - unsigned int sse; - /* TODO: This could also be done over smaller areas (8x8), but that would - * require extensive changes elsewhere, as lambda is assumed to be fixed - * over an entire MB in most of the code. - * Another option is to compute four 8x8 variances, and pick a single - * lambda using a non-linear combination (e.g., the smallest, or second - * smallest, etc.). - */ - act = vp9_variance16x16(x->src.y_buffer, x->src.y_stride, VP9_VAR_OFFS, 0, - &sse); - act = act << 4; - - /* If the region is flat, lower the activity some more. */ - if (act < 8 << 12) - act = act < 5 << 12 ? act : 5 << 12; - - return act; -} - -// Stub for alternative experimental activity measures. -static unsigned int alt_activity_measure(VP9_COMP *cpi, - MACROBLOCK *x, int use_dc_pred) { - return vp9_encode_intra(cpi, x, use_dc_pred); -} - - -// Measure the activity of the current macroblock -// What we measure here is TBD so abstracted to this function -#define ALT_ACT_MEASURE 1 -static unsigned int mb_activity_measure(VP9_COMP *cpi, MACROBLOCK *x, - int mb_row, int mb_col) { - unsigned int mb_activity; - - if (ALT_ACT_MEASURE) { - int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); - - // Or use and alternative. - mb_activity = alt_activity_measure(cpi, x, use_dc_pred); - } else { - // Original activity measure from Tim T's code. - mb_activity = tt_activity_measure(cpi, x); - } - - if (mb_activity < VP9_ACTIVITY_AVG_MIN) - mb_activity = VP9_ACTIVITY_AVG_MIN; - - return mb_activity; -} - -// Calculate an "average" mb activity value for the frame -#define ACT_MEDIAN 0 -static void calc_av_activity(VP9_COMP *cpi, int64_t activity_sum) { -#if ACT_MEDIAN - // Find median: Simple n^2 algorithm for experimentation - { - unsigned int median; - unsigned int i, j; - unsigned int *sortlist; - unsigned int tmp; - - // Create a list to sort to - CHECK_MEM_ERROR(sortlist, - vpx_calloc(sizeof(unsigned int), - cpi->common.MBs)); - - // Copy map to sort list - vpx_memcpy(sortlist, cpi->mb_activity_map, - sizeof(unsigned int) * cpi->common.MBs); - - - // Ripple each value down to its correct position - for (i = 1; i < cpi->common.MBs; i ++) { - for (j = i; j > 0; j --) { - if (sortlist[j] < sortlist[j - 1]) { - // Swap values - tmp = sortlist[j - 1]; - sortlist[j - 1] = sortlist[j]; - sortlist[j] = tmp; - } else - break; - } - } - - // Even number MBs so estimate median as mean of two either side. - median = (1 + sortlist[cpi->common.MBs >> 1] + - sortlist[(cpi->common.MBs >> 1) + 1]) >> 1; - - cpi->activity_avg = median; - - vpx_free(sortlist); - } -#else - // Simple mean for now - cpi->activity_avg = (unsigned int)(activity_sum / cpi->common.MBs); -#endif - - if (cpi->activity_avg < VP9_ACTIVITY_AVG_MIN) - cpi->activity_avg = VP9_ACTIVITY_AVG_MIN; - - // Experimental code: return fixed value normalized for several clips - if (ALT_ACT_MEASURE) - cpi->activity_avg = 100000; -} - -#define USE_ACT_INDEX 0 -#define OUTPUT_NORM_ACT_STATS 0 - -#if USE_ACT_INDEX -// Calculate and activity index for each mb -static void calc_activity_index(VP9_COMP *cpi, MACROBLOCK *x) { - VP9_COMMON *const cm = &cpi->common; - int mb_row, mb_col; - - int64_t act; - int64_t a; - int64_t b; - -#if OUTPUT_NORM_ACT_STATS - FILE *f = fopen("norm_act.stt", "a"); - fprintf(f, "\n%12d\n", cpi->activity_avg); -#endif - - // Reset pointers to start of activity map - x->mb_activity_ptr = cpi->mb_activity_map; - - // Calculate normalized mb activity number. - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { - // for each macroblock col in image - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - // Read activity from the map - act = *(x->mb_activity_ptr); - - // Calculate a normalized activity number - a = act + 4 * cpi->activity_avg; - b = 4 * act + cpi->activity_avg; - - if (b >= a) - *(x->activity_ptr) = (int)((b + (a >> 1)) / a) - 1; - else - *(x->activity_ptr) = 1 - (int)((a + (b >> 1)) / b); - -#if OUTPUT_NORM_ACT_STATS - fprintf(f, " %6d", *(x->mb_activity_ptr)); -#endif - // Increment activity map pointers - x->mb_activity_ptr++; - } - -#if OUTPUT_NORM_ACT_STATS - fprintf(f, "\n"); -#endif - - } - -#if OUTPUT_NORM_ACT_STATS - fclose(f); -#endif - -} -#endif - -// Loop through all MBs. Note activity of each, average activity and -// calculate a normalized activity for each -static void build_activity_map(VP9_COMP *cpi) { - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *xd = &x->e_mbd; - VP9_COMMON *const cm = &cpi->common; - -#if ALT_ACT_MEASURE - YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx]; - int recon_yoffset; - int recon_y_stride = new_yv12->y_stride; -#endif - - int mb_row, mb_col; - unsigned int mb_activity; - int64_t activity_sum = 0; - - // for each macroblock row in image - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { -#if ALT_ACT_MEASURE - // reset above block coeffs - xd->up_available = (mb_row != 0); - recon_yoffset = (mb_row * recon_y_stride * 16); -#endif - // for each macroblock col in image - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { -#if ALT_ACT_MEASURE - xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset; - xd->left_available = (mb_col != 0); - recon_yoffset += 16; -#endif - // Copy current mb to a buffer - vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); - - // measure activity - mb_activity = mb_activity_measure(cpi, x, mb_row, mb_col); - - // Keep frame sum - activity_sum += mb_activity; - - // Store MB level activity details. - *x->mb_activity_ptr = mb_activity; - - // Increment activity map pointer - x->mb_activity_ptr++; - - // adjust to the next column of source macroblocks - x->src.y_buffer += 16; - } - - - // adjust to the next row of mbs - x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; - -#if ALT_ACT_MEASURE - // extend the recon for intra prediction - vp9_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, - xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); -#endif - - } - - // Calculate an "average" MB activity - calc_av_activity(cpi, activity_sum); - -#if USE_ACT_INDEX - // Calculate an activity index number of each mb - calc_activity_index(cpi, x); -#endif - -} - -// Macroblock activity masking -void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x) { -#if USE_ACT_INDEX - x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2); - x->errorperbit = x->rdmult * 100 / (110 * x->rddiv); - x->errorperbit += (x->errorperbit == 0); -#else - int64_t a; - int64_t b; - int64_t act = *(x->mb_activity_ptr); - - // Apply the masking to the RD multiplier. - a = act + (2 * cpi->activity_avg); - b = (2 * act) + cpi->activity_avg; - - x->rdmult = (unsigned int)(((int64_t)x->rdmult * b + (a >> 1)) / a); - x->errorperbit = x->rdmult * 100 / (110 * x->rddiv); - x->errorperbit += (x->errorperbit == 0); -#endif - - // Activity based Zbin adjustment - adjust_act_zbin(cpi, x); -} - -static void update_state(VP9_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { - int i; - MACROBLOCKD *xd = &x->e_mbd; - MODE_INFO *mi = &ctx->mic; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - int mb_mode = mi->mbmi.mode; - int mb_mode_index = ctx->best_mode_index; - -#if CONFIG_DEBUG - assert(mb_mode < MB_MODE_COUNT); - assert(mb_mode_index < MAX_MODES); - assert(mi->mbmi.ref_frame < MAX_REF_FRAMES); -#endif - - // Restore the coding context of the MB to that that was in place - // when the mode was picked for it - vpx_memcpy(xd->mode_info_context, mi, sizeof(MODE_INFO)); -#if CONFIG_SUPERBLOCKS - if (mi->mbmi.encoded_as_sb) { - const int mis = cpi->common.mode_info_stride; - if (xd->mb_to_right_edge > 0) - vpx_memcpy(xd->mode_info_context + 1, mi, sizeof(MODE_INFO)); - if (xd->mb_to_bottom_edge > 0) { - vpx_memcpy(xd->mode_info_context + mis, mi, sizeof(MODE_INFO)); - if (xd->mb_to_right_edge > 0) - vpx_memcpy(xd->mode_info_context + mis + 1, mi, sizeof(MODE_INFO)); - } - } -#endif - - if (mb_mode == B_PRED) { - for (i = 0; i < 16; i++) { - xd->block[i].bmi.as_mode = xd->mode_info_context->bmi[i].as_mode; - assert(xd->block[i].bmi.as_mode.first < MB_MODE_COUNT); - } - } else if (mb_mode == I8X8_PRED) { - for (i = 0; i < 16; i++) { - xd->block[i].bmi = xd->mode_info_context->bmi[i]; - } - } else if (mb_mode == SPLITMV) { - vpx_memcpy(x->partition_info, &ctx->partition_info, - sizeof(PARTITION_INFO)); - - mbmi->mv[0].as_int = x->partition_info->bmi[15].mv.as_int; - mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int; - } - - { - int segment_id = mbmi->segment_id; - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) || - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB)) { - for (i = 0; i < NB_TXFM_MODES; i++) { - cpi->rd_tx_select_diff[i] += ctx->txfm_rd_diff[i]; - } - } - } - - if (cpi->common.frame_type == KEY_FRAME) { - // Restore the coding modes to that held in the coding context - // if (mb_mode == B_PRED) - // for (i = 0; i < 16; i++) - // { - // xd->block[i].bmi.as_mode = - // xd->mode_info_context->bmi[i].as_mode; - // assert(xd->mode_info_context->bmi[i].as_mode < MB_MODE_COUNT); - // } -#if CONFIG_INTERNAL_STATS - static const int kf_mode_index[] = { - THR_DC /*DC_PRED*/, - THR_V_PRED /*V_PRED*/, - THR_H_PRED /*H_PRED*/, - THR_D45_PRED /*D45_PRED*/, - THR_D135_PRED /*D135_PRED*/, - THR_D117_PRED /*D117_PRED*/, - THR_D153_PRED /*D153_PRED*/, - THR_D27_PRED /*D27_PRED*/, - THR_D63_PRED /*D63_PRED*/, - THR_TM /*TM_PRED*/, - THR_I8X8_PRED /*I8X8_PRED*/, - THR_B_PRED /*B_PRED*/, - }; - cpi->mode_chosen_counts[kf_mode_index[mb_mode]]++; -#endif - } else { - /* - // Reduce the activation RD thresholds for the best choice mode - if ((cpi->rd_baseline_thresh[mb_mode_index] > 0) && - (cpi->rd_baseline_thresh[mb_mode_index] < (INT_MAX >> 2))) - { - int best_adjustment = (cpi->rd_thresh_mult[mb_mode_index] >> 2); - - cpi->rd_thresh_mult[mb_mode_index] = - (cpi->rd_thresh_mult[mb_mode_index] - >= (MIN_THRESHMULT + best_adjustment)) ? - cpi->rd_thresh_mult[mb_mode_index] - best_adjustment : - MIN_THRESHMULT; - cpi->rd_threshes[mb_mode_index] = - (cpi->rd_baseline_thresh[mb_mode_index] >> 7) - * cpi->rd_thresh_mult[mb_mode_index]; - - } - */ - // Note how often each mode chosen as best - cpi->mode_chosen_counts[mb_mode_index]++; - - cpi->prediction_error += ctx->distortion; - cpi->intra_error += ctx->intra_error; - - cpi->rd_comp_pred_diff[0] += ctx->single_pred_diff; - cpi->rd_comp_pred_diff[1] += ctx->comp_pred_diff; - cpi->rd_comp_pred_diff[2] += ctx->hybrid_pred_diff; - } -} - -static void pick_mb_modes(VP9_COMP *cpi, - VP9_COMMON *cm, - int mb_row, - int mb_col, - MACROBLOCK *x, - MACROBLOCKD *xd, - TOKENEXTRA **tp, - int *totalrate, - int *totaldist) { - int i; - int map_index; - int recon_yoffset, recon_uvoffset; - int ref_fb_idx = cm->lst_fb_idx; - int dst_fb_idx = cm->new_fb_idx; - int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; - int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; - ENTROPY_CONTEXT_PLANES left_context[2]; - ENTROPY_CONTEXT_PLANES above_context[2]; - ENTROPY_CONTEXT_PLANES *initial_above_context_ptr = cm->above_context - + mb_col; - - // Offsets to move pointers from MB to MB within a SB in raster order - int row_delta[4] = { 0, +1, 0, -1}; - int col_delta[4] = { +1, -1, +1, +1}; - - /* Function should not modify L & A contexts; save and restore on exit */ - vpx_memcpy(left_context, - cm->left_context, - sizeof(left_context)); - vpx_memcpy(above_context, - initial_above_context_ptr, - sizeof(above_context)); - - /* Encode MBs in raster order within the SB */ - for (i = 0; i < 4; i++) { - int dy = row_delta[i]; - int dx = col_delta[i]; - int offset_unextended = dy * cm->mb_cols + dx; - int offset_extended = dy * xd->mode_info_stride + dx; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - - // TODO Many of the index items here can be computed more efficiently! - - if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols)) { - // MB lies outside frame, move on - mb_row += dy; - mb_col += dx; - - // Update pointers - x->src.y_buffer += 16 * (dx + dy * x->src.y_stride); - x->src.u_buffer += 8 * (dx + dy * x->src.uv_stride); - x->src.v_buffer += 8 * (dx + dy * x->src.uv_stride); - - x->gf_active_ptr += offset_unextended; - x->partition_info += offset_extended; - xd->mode_info_context += offset_extended; - xd->prev_mode_info_context += offset_extended; -#if CONFIG_DEBUG - assert((xd->prev_mode_info_context - cpi->common.prev_mip) == - (xd->mode_info_context - cpi->common.mip)); -#endif - continue; - } - - // Index of the MB in the SB 0..3 - xd->mb_index = i; - - map_index = (mb_row * cpi->common.mb_cols) + mb_col; - x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; - - // set above context pointer - xd->above_context = cm->above_context + mb_col; - - // Restore the appropriate left context depending on which - // row in the SB the MB is situated - xd->left_context = cm->left_context + (i >> 1); - - // Set up distance of MB to edge of frame in 1/8th pel units - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; - - // Set up limit values for MV components to prevent them from - // extending beyond the UMV borders assuming 16x16 block size - x->mv_row_min = -((mb_row * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); - x->mv_col_min = -((mb_col * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); - x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + - (VP8BORDERINPIXELS - 16 - INTERP_EXTEND)); - x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + - (VP8BORDERINPIXELS - 16 - INTERP_EXTEND)); - - xd->up_available = (mb_row != 0); - xd->left_available = (mb_col != 0); - - recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8); - - xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; - xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; - xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; - - // Copy current MB to a work buffer - vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); - - x->rddiv = cpi->RDDIV; - x->rdmult = cpi->RDMULT; - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) - vp9_activity_masking(cpi, x); - - // Is segmentation enabled - if (xd->segmentation_enabled) { - // Code to set segment id in xd->mbmi.segment_id - if (xd->update_mb_segmentation_map) - mbmi->segment_id = cpi->segmentation_map[map_index]; - else - mbmi->segment_id = cm->last_frame_seg_map[map_index]; - if (mbmi->segment_id > 3) - mbmi->segment_id = 0; - - vp9_mb_init_quantizer(cpi, x); - } else - // Set to Segment 0 by default - mbmi->segment_id = 0; - - x->active_ptr = cpi->active_map + map_index; - -#if CONFIG_SUPERBLOCKS - xd->mode_info_context->mbmi.encoded_as_sb = 0; -#endif - - cpi->update_context = 0; // TODO Do we need this now?? - - vp9_intra_prediction_down_copy(xd); - - // Find best coding mode & reconstruct the MB so it is available - // as a predictor for MBs that follow in the SB - if (cm->frame_type == KEY_FRAME) { - int r, d; - vp9_rd_pick_intra_mode(cpi, x, &r, &d); - *totalrate += r; - *totaldist += d; - - // Dummy encode, do not do the tokenization - vp9_encode_intra_macro_block(cpi, x, tp, 0); - // Note the encoder may have changed the segment_id - - // Save the coding context - vpx_memcpy(&x->mb_context[i].mic, xd->mode_info_context, - sizeof(MODE_INFO)); - } else { - int seg_id, r, d; - - if (xd->segmentation_enabled && cpi->seg0_cnt > 0 && - !vp9_segfeature_active(xd, 0, SEG_LVL_REF_FRAME) && - vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME) && - vp9_check_segref(xd, 1, INTRA_FRAME) + - vp9_check_segref(xd, 1, LAST_FRAME) + - vp9_check_segref(xd, 1, GOLDEN_FRAME) + - vp9_check_segref(xd, 1, ALTREF_FRAME) == 1) { - cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; - } else { - cpi->seg0_progress = (((mb_col & ~1) * 2 + (mb_row & ~1) * cm->mb_cols + i) << 16) / cm->MBs; - } - - vp9_pick_mode_inter_macroblock(cpi, x, recon_yoffset, - recon_uvoffset, &r, &d); - *totalrate += r; - *totaldist += d; - - // Dummy encode, do not do the tokenization - vp9_encode_inter_macroblock(cpi, x, tp, - recon_yoffset, recon_uvoffset, 0); - - seg_id = mbmi->segment_id; - if (cpi->mb.e_mbd.segmentation_enabled && seg_id == 0) { - cpi->seg0_idx++; - } - if (!xd->segmentation_enabled || - !vp9_segfeature_active(xd, seg_id, SEG_LVL_REF_FRAME) || - vp9_check_segref(xd, seg_id, INTRA_FRAME) + - vp9_check_segref(xd, seg_id, LAST_FRAME) + - vp9_check_segref(xd, seg_id, GOLDEN_FRAME) + - vp9_check_segref(xd, seg_id, ALTREF_FRAME) > 1) { - // Get the prediction context and status - int pred_flag = vp9_get_pred_flag(xd, PRED_REF); - int pred_context = vp9_get_pred_context(cm, xd, PRED_REF); - - // Count prediction success - cpi->ref_pred_count[pred_context][pred_flag]++; - } - } - - // Next MB - mb_row += dy; - mb_col += dx; - - x->src.y_buffer += 16 * (dx + dy * x->src.y_stride); - x->src.u_buffer += 8 * (dx + dy * x->src.uv_stride); - x->src.v_buffer += 8 * (dx + dy * x->src.uv_stride); - - x->gf_active_ptr += offset_unextended; - x->partition_info += offset_extended; - xd->mode_info_context += offset_extended; - xd->prev_mode_info_context += offset_extended; - -#if CONFIG_DEBUG - assert((xd->prev_mode_info_context - cpi->common.prev_mip) == - (xd->mode_info_context - cpi->common.mip)); -#endif - } - - /* Restore L & A coding context to those in place on entry */ - vpx_memcpy(cm->left_context, - left_context, - sizeof(left_context)); - vpx_memcpy(initial_above_context_ptr, - above_context, - sizeof(above_context)); -} - -#if CONFIG_SUPERBLOCKS -static void pick_sb_modes (VP9_COMP *cpi, - VP9_COMMON *cm, - int mb_row, - int mb_col, - MACROBLOCK *x, - MACROBLOCKD *xd, - TOKENEXTRA **tp, - int *totalrate, - int *totaldist) -{ - int map_index; - int recon_yoffset, recon_uvoffset; - int ref_fb_idx = cm->lst_fb_idx; - int dst_fb_idx = cm->new_fb_idx; - int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; - int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; - ENTROPY_CONTEXT_PLANES left_context[2]; - ENTROPY_CONTEXT_PLANES above_context[2]; - ENTROPY_CONTEXT_PLANES *initial_above_context_ptr = cm->above_context - + mb_col; - - /* Function should not modify L & A contexts; save and restore on exit */ - vpx_memcpy (left_context, - cm->left_context, - sizeof(left_context)); - vpx_memcpy (above_context, - initial_above_context_ptr, - sizeof(above_context)); - - map_index = (mb_row * cpi->common.mb_cols) + mb_col; - x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; - - /* set above context pointer */ - xd->above_context = cm->above_context + mb_col; - - /* Restore the appropriate left context depending on which - * row in the SB the MB is situated */ - xd->left_context = cm->left_context; - - // Set up distance of MB to edge of frame in 1/8th pel units - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; - - /* Set up limit values for MV components to prevent them from - * extending beyond the UMV borders assuming 16x16 block size */ - x->mv_row_min = -((mb_row * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); - x->mv_col_min = -((mb_col * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); - x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + - (VP8BORDERINPIXELS - 32 - INTERP_EXTEND)); - x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + - (VP8BORDERINPIXELS - 32 - INTERP_EXTEND)); - - xd->up_available = (mb_row != 0); - xd->left_available = (mb_col != 0); - - recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8); - - xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; - xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; - xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; -#if 0 // FIXME - /* Copy current MB to a work buffer */ - vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); -#endif - x->rddiv = cpi->RDDIV; - x->rdmult = cpi->RDMULT; - if(cpi->oxcf.tuning == VP8_TUNE_SSIM) - vp9_activity_masking(cpi, x); - /* Is segmentation enabled */ - if (xd->segmentation_enabled) - { - /* Code to set segment id in xd->mbmi.segment_id */ - if (xd->update_mb_segmentation_map) - xd->mode_info_context->mbmi.segment_id = - cpi->segmentation_map[map_index] && - cpi->segmentation_map[map_index + 1] && - cpi->segmentation_map[map_index + cm->mb_cols] && - cpi->segmentation_map[map_index + cm->mb_cols + 1]; - else - xd->mode_info_context->mbmi.segment_id = - cm->last_frame_seg_map[map_index] && - cm->last_frame_seg_map[map_index + 1] && - cm->last_frame_seg_map[map_index + cm->mb_cols] && - cm->last_frame_seg_map[map_index + cm->mb_cols + 1]; - if (xd->mode_info_context->mbmi.segment_id > 3) - xd->mode_info_context->mbmi.segment_id = 0; - - vp9_mb_init_quantizer(cpi, x); - } - else - /* Set to Segment 0 by default */ - xd->mode_info_context->mbmi.segment_id = 0; - - x->active_ptr = cpi->active_map + map_index; - - cpi->update_context = 0; // TODO Do we need this now?? - - /* Find best coding mode & reconstruct the MB so it is available - * as a predictor for MBs that follow in the SB */ - if (cm->frame_type == KEY_FRAME) - { - vp9_rd_pick_intra_mode_sb(cpi, x, - totalrate, - totaldist); - - /* Save the coding context */ - vpx_memcpy(&x->sb_context[0].mic, xd->mode_info_context, - sizeof(MODE_INFO)); - } else { - if (xd->segmentation_enabled && cpi->seg0_cnt > 0 && - !vp9_segfeature_active(xd, 0, SEG_LVL_REF_FRAME) && - vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME) && - vp9_check_segref(xd, 1, INTRA_FRAME) + - vp9_check_segref(xd, 1, LAST_FRAME) + - vp9_check_segref(xd, 1, GOLDEN_FRAME) + - vp9_check_segref(xd, 1, ALTREF_FRAME) == 1) { - cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; - } else { - cpi->seg0_progress = - (((mb_col & ~1) * 2 + (mb_row & ~1) * cm->mb_cols) << 16) / cm->MBs; - } - - vp9_rd_pick_inter_mode_sb(cpi, x, - recon_yoffset, - recon_uvoffset, - totalrate, - totaldist); - } - - /* Restore L & A coding context to those in place on entry */ - vpx_memcpy (cm->left_context, - left_context, - sizeof(left_context)); - vpx_memcpy (initial_above_context_ptr, - above_context, - sizeof(above_context)); -} -#endif - -static void encode_sb(VP9_COMP *cpi, - VP9_COMMON *cm, - int mbrow, - int mbcol, - MACROBLOCK *x, - MACROBLOCKD *xd, - TOKENEXTRA **tp) { - int i; - int map_index; - int mb_row, mb_col; - int recon_yoffset, recon_uvoffset; - int ref_fb_idx = cm->lst_fb_idx; - int dst_fb_idx = cm->new_fb_idx; - int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; - int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; - int row_delta[4] = { 0, +1, 0, -1}; - int col_delta[4] = { +1, -1, +1, +1}; - - mb_row = mbrow; - mb_col = mbcol; - - /* Encode MBs in raster order within the SB */ - for (i = 0; i < 4; i++) { - int dy = row_delta[i]; - int dx = col_delta[i]; - int offset_extended = dy * xd->mode_info_stride + dx; - int offset_unextended = dy * cm->mb_cols + dx; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - - if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols)) { - // MB lies outside frame, move on - mb_row += dy; - mb_col += dx; - - x->src.y_buffer += 16 * (dx + dy * x->src.y_stride); - x->src.u_buffer += 8 * (dx + dy * x->src.uv_stride); - x->src.v_buffer += 8 * (dx + dy * x->src.uv_stride); - - x->gf_active_ptr += offset_unextended; - x->partition_info += offset_extended; - xd->mode_info_context += offset_extended; - xd->prev_mode_info_context += offset_extended; - -#if CONFIG_DEBUG - assert((xd->prev_mode_info_context - cpi->common.prev_mip) == - (xd->mode_info_context - cpi->common.mip)); -#endif - continue; - } - - xd->mb_index = i; - -#ifdef ENC_DEBUG - enc_debug = (cpi->common.current_video_frame == 0 && - mb_row == 0 && mb_col == 0); - mb_col_debug = mb_col; - mb_row_debug = mb_row; -#endif - - // Restore MB state to that when it was picked -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - update_state(cpi, x, &x->sb_context[i]); - cpi->sb_count++; - } else -#endif - update_state(cpi, x, &x->mb_context[i]); - - map_index = (mb_row * cpi->common.mb_cols) + mb_col; - x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; - - // reset above block coeffs - xd->above_context = cm->above_context + mb_col; - xd->left_context = cm->left_context + (i >> 1); - - // Set up distance of MB to edge of the frame in 1/8th pel units - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; - -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - // Set up limit values for MV components to prevent them from - // extending beyond the UMV borders assuming 32x32 block size - x->mv_row_min = -((mb_row * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); - x->mv_col_min = -((mb_col * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); - x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + - (VP8BORDERINPIXELS - 32 - INTERP_EXTEND)); - x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + - (VP8BORDERINPIXELS - 32 - INTERP_EXTEND)); - } else { -#endif - // Set up limit values for MV components to prevent them from - // extending beyond the UMV borders assuming 16x16 block size - x->mv_row_min = -((mb_row * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); - x->mv_col_min = -((mb_col * 16) + VP8BORDERINPIXELS - INTERP_EXTEND); - x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + - (VP8BORDERINPIXELS - 16 - INTERP_EXTEND)); - x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + - (VP8BORDERINPIXELS - 16 - INTERP_EXTEND)); -#if CONFIG_SUPERBLOCKS - } -#endif - - xd->up_available = (mb_row != 0); - xd->left_available = (mb_col != 0); - - recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8); - - xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; - xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; - xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; - - // Copy current MB to a work buffer - vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) - vp9_activity_masking(cpi, x); - - // Is segmentation enabled - if (xd->segmentation_enabled) { - vp9_mb_init_quantizer(cpi, x); - } - - x->active_ptr = cpi->active_map + map_index; - - cpi->update_context = 0; - -#if CONFIG_SUPERBLOCKS - if (!xd->mode_info_context->mbmi.encoded_as_sb) -#endif - vp9_intra_prediction_down_copy(xd); - - if (cm->frame_type == KEY_FRAME) { -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) - vp9_encode_intra_super_block(cpi, x, tp, mb_col); - else -#endif - vp9_encode_intra_macro_block(cpi, x, tp, 1); - // Note the encoder may have changed the segment_id - -#ifdef MODE_STATS - y_modes[mbmi->mode]++; -#endif - } else { - unsigned char *segment_id; - int seg_ref_active; - - if (xd->mode_info_context->mbmi.ref_frame) { - unsigned char pred_context; - - pred_context = vp9_get_pred_context(cm, xd, PRED_COMP); - - if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) - cpi->single_pred_count[pred_context]++; - else - cpi->comp_pred_count[pred_context]++; - } - -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) - vp9_encode_inter_superblock(cpi, x, tp, recon_yoffset, recon_uvoffset, - mb_col, mb_row); - else -#endif - vp9_encode_inter_macroblock(cpi, x, tp, - recon_yoffset, recon_uvoffset, 1); - // Note the encoder may have changed the segment_id - -#ifdef MODE_STATS - inter_y_modes[mbmi->mode]++; - - if (mbmi->mode == SPLITMV) { - int b; - - for (b = 0; b < x->partition_info->count; b++) { - inter_b_modes[x->partition_info->bmi[b].mode]++; - } - } - -#endif - - // If we have just a single reference frame coded for a segment then - // exclude from the reference frame counts used to work out - // probabilities. NOTE: At the moment we dont support custom trees - // for the reference frame coding for each segment but this is a - // possible future action. - segment_id = &mbmi->segment_id; - seg_ref_active = vp9_segfeature_active(xd, *segment_id, - SEG_LVL_REF_FRAME); - if (!seg_ref_active || - ((vp9_check_segref(xd, *segment_id, INTRA_FRAME) + - vp9_check_segref(xd, *segment_id, LAST_FRAME) + - vp9_check_segref(xd, *segment_id, GOLDEN_FRAME) + - vp9_check_segref(xd, *segment_id, ALTREF_FRAME)) > 1)) { - { - cpi->count_mb_ref_frame_usage[mbmi->ref_frame]++; - } - } - - // Count of last ref frame 0,0 usage - if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME)) - cpi->inter_zz_count++; - } - -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - x->src.y_buffer += 32; - x->src.u_buffer += 16; - x->src.v_buffer += 16; - - x->gf_active_ptr += 2; - x->partition_info += 2; - xd->mode_info_context += 2; - xd->prev_mode_info_context += 2; - - (*tp)->Token = EOSB_TOKEN; - (*tp)++; - if (mb_row < cm->mb_rows) cpi->tplist[mb_row].stop = *tp; - break; - } -#endif - - // Next MB - mb_row += dy; - mb_col += dx; - - x->src.y_buffer += 16 * (dx + dy * x->src.y_stride); - x->src.u_buffer += 8 * (dx + dy * x->src.uv_stride); - x->src.v_buffer += 8 * (dx + dy * x->src.uv_stride); - - x->gf_active_ptr += offset_unextended; - x->partition_info += offset_extended; - xd->mode_info_context += offset_extended; - xd->prev_mode_info_context += offset_extended; - -#if CONFIG_DEBUG - assert((xd->prev_mode_info_context - cpi->common.prev_mip) == - (xd->mode_info_context - cpi->common.mip)); -#endif - (*tp)->Token = EOSB_TOKEN; - (*tp)++; - if (mb_row < cm->mb_rows) cpi->tplist[mb_row].stop = *tp; - } - - // debug output -#if DBG_PRNT_SEGMAP - { - FILE *statsfile; - statsfile = fopen("segmap2.stt", "a"); - fprintf(statsfile, "\n"); - fclose(statsfile); - } -#endif -} - -static -void encode_sb_row(VP9_COMP *cpi, - VP9_COMMON *cm, - int mb_row, - MACROBLOCK *x, - MACROBLOCKD *xd, - TOKENEXTRA **tp, - int *totalrate) { - int mb_col; - int mb_cols = cm->mb_cols; - - // Initialize the left context for the new SB row - vpx_memset(cm->left_context, 0, sizeof(cm->left_context)); - - // Code each SB in the row - for (mb_col = 0; mb_col < mb_cols; mb_col += 2) { - int mb_rate = 0, mb_dist = 0; -#if CONFIG_SUPERBLOCKS - int sb_rate = INT_MAX, sb_dist; -#endif - -#if CONFIG_DEBUG - MODE_INFO *mic = xd->mode_info_context; - PARTITION_INFO *pi = x->partition_info; - signed char *gfa = x->gf_active_ptr; - unsigned char *yb = x->src.y_buffer; - unsigned char *ub = x->src.u_buffer; - unsigned char *vb = x->src.v_buffer; -#endif - -#if CONFIG_SUPERBLOCKS - // Pick modes assuming the SB is coded as 4 independent MBs - xd->mode_info_context->mbmi.encoded_as_sb = 0; -#endif - pick_mb_modes(cpi, cm, mb_row, mb_col, x, xd, tp, &mb_rate, &mb_dist); -#if CONFIG_SUPERBLOCKS - mb_rate += vp9_cost_bit(cm->sb_coded, 0); -#endif - - x->src.y_buffer -= 32; - x->src.u_buffer -= 16; - x->src.v_buffer -= 16; - - x->gf_active_ptr -= 2; - x->partition_info -= 2; - xd->mode_info_context -= 2; - xd->prev_mode_info_context -= 2; - -#if CONFIG_DEBUG - assert(x->gf_active_ptr == gfa); - assert(x->partition_info == pi); - assert(xd->mode_info_context == mic); - assert(x->src.y_buffer == yb); - assert(x->src.u_buffer == ub); - assert(x->src.v_buffer == vb); -#endif - -#if CONFIG_SUPERBLOCKS - if (!((( mb_cols & 1) && mb_col == mb_cols - 1) || - ((cm->mb_rows & 1) && mb_row == cm->mb_rows - 1))) { - /* Pick a mode assuming that it applies to all 4 of the MBs in the SB */ - xd->mode_info_context->mbmi.encoded_as_sb = 1; - pick_sb_modes(cpi, cm, mb_row, mb_col, x, xd, tp, &sb_rate, &sb_dist); - sb_rate += vp9_cost_bit(cm->sb_coded, 1); - } - - /* Decide whether to encode as a SB or 4xMBs */ - if (sb_rate < INT_MAX && - RDCOST(x->rdmult, x->rddiv, sb_rate, sb_dist) < - RDCOST(x->rdmult, x->rddiv, mb_rate, mb_dist)) { - xd->mode_info_context->mbmi.encoded_as_sb = 1; - xd->mode_info_context[1].mbmi.encoded_as_sb = 1; - xd->mode_info_context[cm->mode_info_stride].mbmi.encoded_as_sb = 1; - xd->mode_info_context[1 + cm->mode_info_stride].mbmi.encoded_as_sb = 1; - *totalrate += sb_rate; - } else -#endif - { -#if CONFIG_SUPERBLOCKS - xd->mode_info_context->mbmi.encoded_as_sb = 0; - if (cm->mb_cols - 1 > mb_col) - xd->mode_info_context[1].mbmi.encoded_as_sb = 0; - if (cm->mb_rows - 1 > mb_row) { - xd->mode_info_context[cm->mode_info_stride].mbmi.encoded_as_sb = 0; - if (cm->mb_cols - 1 > mb_col) - xd->mode_info_context[1 + cm->mode_info_stride].mbmi.encoded_as_sb = 0; - } -#endif - *totalrate += mb_rate; - } - - /* Encode SB using best computed mode(s) */ - encode_sb(cpi, cm, mb_row, mb_col, x, xd, tp); - -#if CONFIG_DEBUG - assert(x->gf_active_ptr == gfa + 2); - assert(x->partition_info == pi + 2); - assert(xd->mode_info_context == mic + 2); - assert(x->src.y_buffer == yb + 32); - assert(x->src.u_buffer == ub + 16); - assert(x->src.v_buffer == vb + 16); -#endif - } - - // this is to account for the border - x->gf_active_ptr += mb_cols - (mb_cols & 0x1); - x->partition_info += xd->mode_info_stride + 1 - (mb_cols & 0x1); - xd->mode_info_context += xd->mode_info_stride + 1 - (mb_cols & 0x1); - xd->prev_mode_info_context += xd->mode_info_stride + 1 - (mb_cols & 0x1); - -#if CONFIG_DEBUG - assert((xd->prev_mode_info_context - cpi->common.prev_mip) == - (xd->mode_info_context - cpi->common.mip)); -#endif -} - -static void init_encode_frame_mb_context(VP9_COMP *cpi) { - MACROBLOCK *const x = &cpi->mb; - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - - // GF active flags data structure - x->gf_active_ptr = (signed char *)cpi->gf_active_flags; - - // Activity map pointer - x->mb_activity_ptr = cpi->mb_activity_map; - - x->act_zbin_adj = 0; - cpi->seg0_idx = 0; - vpx_memset(cpi->ref_pred_count, 0, sizeof(cpi->ref_pred_count)); - - x->partition_info = x->pi; - - xd->mode_info_context = cm->mi; - xd->mode_info_stride = cm->mode_info_stride; - xd->prev_mode_info_context = cm->prev_mi; - - xd->frame_type = cm->frame_type; - - xd->frames_since_golden = cm->frames_since_golden; - xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame; - - // reset intra mode contexts - if (cm->frame_type == KEY_FRAME) - vp9_init_mbmode_probs(cm); - - // Copy data over into macro block data structures. - x->src = * cpi->Source; - xd->pre = cm->yv12_fb[cm->lst_fb_idx]; - xd->dst = cm->yv12_fb[cm->new_fb_idx]; - - // set up frame for intra coded blocks - vp9_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]); - - vp9_build_block_offsets(x); - - vp9_setup_block_dptrs(&x->e_mbd); - - vp9_setup_block_ptrs(x); - - xd->mode_info_context->mbmi.mode = DC_PRED; - xd->mode_info_context->mbmi.uv_mode = DC_PRED; - - vp9_zero(cpi->count_mb_ref_frame_usage) - vp9_zero(cpi->bmode_count) - vp9_zero(cpi->ymode_count) - vp9_zero(cpi->i8x8_mode_count) - vp9_zero(cpi->y_uv_mode_count) - vp9_zero(cpi->sub_mv_ref_count) - vp9_zero(cpi->mbsplit_count) - vp9_zero(cpi->common.fc.mv_ref_ct) - vp9_zero(cpi->common.fc.mv_ref_ct_a) -#if CONFIG_SUPERBLOCKS - vp9_zero(cpi->sb_ymode_count) - cpi->sb_count = 0; -#endif - - vpx_memset(cm->above_context, 0, - sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols); - - xd->fullpixel_mask = 0xffffffff; - if (cm->full_pixel) - xd->fullpixel_mask = 0xfffffff8; -} - -static void encode_frame_internal(VP9_COMP *cpi) { - int mb_row; - MACROBLOCK *const x = &cpi->mb; - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - - TOKENEXTRA *tp = cpi->tok; - int totalrate; - - //printf("encode_frame_internal\n"); - - // Compute a modified set of reference frame probabilities to use when - // prediction fails. These are based on the current general estimates for - // this frame which may be updated with each iteration of the recode loop. - vp9_compute_mod_refprobs(cm); - -#if CONFIG_NEW_MVREF - // temp stats reset - vp9_zero( cpi->best_ref_index_counts ); -#endif - -// debug output -#if DBG_PRNT_SEGMAP - { - FILE *statsfile; - statsfile = fopen("segmap2.stt", "a"); - fprintf(statsfile, "\n"); - fclose(statsfile); - } -#endif - - totalrate = 0; - - // Functions setup for all frame types so we can use MC in AltRef - vp9_setup_interp_filters(xd, cm->mcomp_filter_type, cm); - - // Reset frame count of inter 0,0 motion vector usage. - cpi->inter_zz_count = 0; - - cpi->prediction_error = 0; - cpi->intra_error = 0; - cpi->skip_true_count[0] = cpi->skip_true_count[1] = cpi->skip_true_count[2] = 0; - cpi->skip_false_count[0] = cpi->skip_false_count[1] = cpi->skip_false_count[2] = 0; - -#if CONFIG_PRED_FILTER - if (cm->current_video_frame == 0) { - // Initially assume that we'll signal the prediction filter - // state at the frame level and that it is off. - cpi->common.pred_filter_mode = 0; - cpi->common.prob_pred_filter_off = 128; - } - cpi->pred_filter_on_count = 0; - cpi->pred_filter_off_count = 0; -#endif - vp9_zero(cpi->switchable_interp_count); - - xd->mode_info_context = cm->mi; - xd->prev_mode_info_context = cm->prev_mi; - - vp9_zero(cpi->NMVcount); - vp9_zero(cpi->coef_counts); - vp9_zero(cpi->hybrid_coef_counts); - vp9_zero(cpi->coef_counts_8x8); - vp9_zero(cpi->hybrid_coef_counts_8x8); - vp9_zero(cpi->coef_counts_16x16); - vp9_zero(cpi->hybrid_coef_counts_16x16); - - vp9_frame_init_quantizer(cpi); - - vp9_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q); - vp9_initialize_me_consts(cpi, cm->base_qindex); - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { - // Initialize encode frame context. - init_encode_frame_mb_context(cpi); - - // Build a frame level activity map - build_activity_map(cpi); - } - - // re-initencode frame context. - init_encode_frame_mb_context(cpi); - - vpx_memset(cpi->rd_comp_pred_diff, 0, sizeof(cpi->rd_comp_pred_diff)); - vpx_memset(cpi->single_pred_count, 0, sizeof(cpi->single_pred_count)); - vpx_memset(cpi->comp_pred_count, 0, sizeof(cpi->comp_pred_count)); - vpx_memset(cpi->txfm_count, 0, sizeof(cpi->txfm_count)); - vpx_memset(cpi->txfm_count_8x8p, 0, sizeof(cpi->txfm_count_8x8p)); - vpx_memset(cpi->rd_tx_select_diff, 0, sizeof(cpi->rd_tx_select_diff)); - { - struct vpx_usec_timer emr_timer; - vpx_usec_timer_start(&emr_timer); - - { - // For each row of SBs in the frame - for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 2) { - int offset = (cm->mb_cols + 1) & ~0x1; - - encode_sb_row(cpi, cm, mb_row, x, xd, &tp, &totalrate); - - // adjust to the next row of SBs - x->src.y_buffer += 32 * x->src.y_stride - 16 * offset; - x->src.u_buffer += 16 * x->src.uv_stride - 8 * offset; - x->src.v_buffer += 16 * x->src.uv_stride - 8 * offset; - } - - cpi->tok_count = tp - cpi->tok; - } - - vpx_usec_timer_mark(&emr_timer); - cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer); - - } - - // 256 rate units to the bit, - // projected_frame_size in units of BYTES - cpi->projected_frame_size = totalrate >> 8; - - -#if 0 - // Keep record of the total distortion this time around for future use - cpi->last_frame_distortion = cpi->frame_distortion; -#endif - -} - -static int check_dual_ref_flags(VP9_COMP *cpi) { - MACROBLOCKD *xd = &cpi->mb.e_mbd; - int ref_flags = cpi->ref_frame_flags; - - if (vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME)) { - if ((ref_flags & (VP9_LAST_FLAG | VP9_GOLD_FLAG)) == (VP9_LAST_FLAG | VP9_GOLD_FLAG) && - vp9_check_segref(xd, 1, LAST_FRAME)) - return 1; - if ((ref_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) == (VP9_GOLD_FLAG | VP9_ALT_FLAG) && - vp9_check_segref(xd, 1, GOLDEN_FRAME)) - return 1; - if ((ref_flags & (VP9_ALT_FLAG | VP9_LAST_FLAG)) == (VP9_ALT_FLAG | VP9_LAST_FLAG) && - vp9_check_segref(xd, 1, ALTREF_FRAME)) - return 1; - return 0; - } else { - return (!!(ref_flags & VP9_GOLD_FLAG) + - !!(ref_flags & VP9_LAST_FLAG) + - !!(ref_flags & VP9_ALT_FLAG)) >= 2; - } -} - -static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { - VP9_COMMON *cm = &cpi->common; - int mb_row, mb_col, mis = cm->mode_info_stride, segment_id; - MODE_INFO *mi, *mi_ptr = cm->mi; -#if CONFIG_SUPERBLOCKS - MODE_INFO *sb_mi_ptr = cm->mi, *sb_mi; - MB_MODE_INFO *sb_mbmi; -#endif - MB_MODE_INFO *mbmi; - MACROBLOCK *x = &cpi->mb; - MACROBLOCKD *xd = &x->e_mbd; - - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++, mi_ptr += mis) { - mi = mi_ptr; -#if CONFIG_SUPERBLOCKS - sb_mi = sb_mi_ptr; -#endif - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++, mi++) { - mbmi = &mi->mbmi; -#if CONFIG_SUPERBLOCKS - sb_mbmi = &sb_mi->mbmi; -#endif - if ( -#if CONFIG_SUPERBLOCKS - !sb_mbmi->encoded_as_sb && -#endif - mbmi->txfm_size > txfm_max) { - segment_id = mbmi->segment_id; - xd->mode_info_context = mi; - assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) || - (cm->mb_no_coeff_skip && mbmi->mb_skip_coeff)); - mbmi->txfm_size = txfm_max; - } -#if CONFIG_SUPERBLOCKS - if (mb_col & 1) - sb_mi += 2; -#endif - } -#if CONFIG_SUPERBLOCKS - if (mb_row & 1) - sb_mi_ptr += 2 * mis; -#endif - } -} - -void vp9_encode_frame(VP9_COMP *cpi) { - if (cpi->sf.RD) { - int i, frame_type, pred_type; - TXFM_MODE txfm_type; - - /* - * This code does a single RD pass over the whole frame assuming - * either compound, single or hybrid prediction as per whatever has - * worked best for that type of frame in the past. - * It also predicts whether another coding mode would have worked - * better that this coding mode. If that is the case, it remembers - * that for subsequent frames. - * It does the same analysis for transform size selection also. - */ - if (cpi->common.frame_type == KEY_FRAME) - frame_type = 0; - else if (cpi->is_src_frame_alt_ref && cpi->common.refresh_golden_frame) - frame_type = 3; - else if (cpi->common.refresh_golden_frame || cpi->common.refresh_alt_ref_frame) - frame_type = 1; - else - frame_type = 2; - - /* prediction (compound, single or hybrid) mode selection */ - if (frame_type == 3) - pred_type = SINGLE_PREDICTION_ONLY; - else if (cpi->rd_prediction_type_threshes[frame_type][1] > - cpi->rd_prediction_type_threshes[frame_type][0] && - cpi->rd_prediction_type_threshes[frame_type][1] > - cpi->rd_prediction_type_threshes[frame_type][2] && - check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100) - pred_type = COMP_PREDICTION_ONLY; - else if (cpi->rd_prediction_type_threshes[frame_type][0] > - cpi->rd_prediction_type_threshes[frame_type][2]) - pred_type = SINGLE_PREDICTION_ONLY; - else - pred_type = HYBRID_PREDICTION; - - /* transform size (4x4, 8x8, 16x16 or select-per-mb) selection */ -#if CONFIG_LOSSLESS - if (cpi->oxcf.lossless) { - txfm_type = ONLY_4X4; - } else -#endif - /* FIXME (rbultje) - * this is a hack (no really), basically to work around the complete - * nonsense coefficient cost prediction for keyframes. The probabilities - * are reset to defaults, and thus we basically have no idea how expensive - * a 4x4 vs. 8x8 will really be. The result is that any estimate at which - * of the two is better is utterly bogus. - * I'd like to eventually remove this hack, but in order to do that, we - * need to move the frame reset code from the frame encode init to the - * bitstream write code, or alternatively keep a backup of the previous - * keyframe's probabilities as an estimate of what the current keyframe's - * coefficient cost distributions may look like. */ - if (frame_type == 0) { - txfm_type = ALLOW_16X16; - } else -#if 0 - /* FIXME (rbultje) - * this code is disabled for a similar reason as the code above; the - * problem is that each time we "revert" to 4x4 only (or even 8x8 only), - * the coefficient probabilities for 16x16 (and 8x8) start lagging behind, - * thus leading to them lagging further behind and not being chosen for - * subsequent frames either. This is essentially a local minimum problem - * that we can probably fix by estimating real costs more closely within - * a frame, perhaps by re-calculating costs on-the-fly as frame encoding - * progresses. */ - if (cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] > - cpi->rd_tx_select_threshes[frame_type][ONLY_4X4] && - cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] > - cpi->rd_tx_select_threshes[frame_type][ALLOW_16X16] && - cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] > - cpi->rd_tx_select_threshes[frame_type][ALLOW_8X8]) { - txfm_type = TX_MODE_SELECT; - } else if (cpi->rd_tx_select_threshes[frame_type][ONLY_4X4] > - cpi->rd_tx_select_threshes[frame_type][ALLOW_8X8] - && cpi->rd_tx_select_threshes[frame_type][ONLY_4X4] > - cpi->rd_tx_select_threshes[frame_type][ALLOW_16X16] - ) { - txfm_type = ONLY_4X4; - } else if (cpi->rd_tx_select_threshes[frame_type][ALLOW_16X16] >= - cpi->rd_tx_select_threshes[frame_type][ALLOW_8X8]) { - txfm_type = ALLOW_16X16; - } else - txfm_type = ALLOW_8X8; -#else - txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_16X16] >= - cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ? - ALLOW_16X16 : TX_MODE_SELECT; -#endif - cpi->common.txfm_mode = txfm_type; - if (txfm_type != TX_MODE_SELECT) { - cpi->common.prob_tx[0] = 128; - cpi->common.prob_tx[1] = 128; - } - cpi->common.comp_pred_mode = pred_type; - encode_frame_internal(cpi); - - for (i = 0; i < NB_PREDICTION_TYPES; ++i) { - const int diff = cpi->rd_comp_pred_diff[i] / cpi->common.MBs; - cpi->rd_prediction_type_threshes[frame_type][i] += diff; - cpi->rd_prediction_type_threshes[frame_type][i] >>= 1; - } - - for (i = 0; i < NB_TXFM_MODES; ++i) { - int64_t pd = cpi->rd_tx_select_diff[i]; - int diff; - if (i == TX_MODE_SELECT) - pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv, 2048 * (TX_SIZE_MAX - 1), 0); - diff = pd / cpi->common.MBs; - cpi->rd_tx_select_threshes[frame_type][i] += diff; - cpi->rd_tx_select_threshes[frame_type][i] /= 2; - } - - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { - int single_count_zero = 0; - int comp_count_zero = 0; - - for (i = 0; i < COMP_PRED_CONTEXTS; i++) { - single_count_zero += cpi->single_pred_count[i]; - comp_count_zero += cpi->comp_pred_count[i]; - } - - if (comp_count_zero == 0) { - cpi->common.comp_pred_mode = SINGLE_PREDICTION_ONLY; - } else if (single_count_zero == 0) { - cpi->common.comp_pred_mode = COMP_PREDICTION_ONLY; - } - } - - if (cpi->common.txfm_mode == TX_MODE_SELECT) { - const int count4x4 = cpi->txfm_count[TX_4X4] + cpi->txfm_count_8x8p[TX_4X4]; - const int count8x8 = cpi->txfm_count[TX_8X8]; - const int count8x8_8x8p = cpi->txfm_count_8x8p[TX_8X8]; - const int count16x16 = cpi->txfm_count[TX_16X16]; - - if (count4x4 == 0 && count16x16 == 0) { - cpi->common.txfm_mode = ALLOW_8X8; - reset_skip_txfm_size(cpi, TX_8X8); - } else if (count8x8 == 0 && count16x16 == 0 && count8x8_8x8p == 0) { - cpi->common.txfm_mode = ONLY_4X4; - reset_skip_txfm_size(cpi, TX_4X4); - } else if (count8x8 == 0 && count4x4 == 0) { - cpi->common.txfm_mode = ALLOW_16X16; - } - } - } else { - encode_frame_internal(cpi); - } - -} - -void vp9_setup_block_ptrs(MACROBLOCK *x) { - int r, c; - int i; - - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - x->block[r * 4 + c].src_diff = x->src_diff + r * 4 * 16 + c * 4; - } - } - - for (r = 0; r < 2; r++) { - for (c = 0; c < 2; c++) { - x->block[16 + r * 2 + c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4; - } - } - - - for (r = 0; r < 2; r++) { - for (c = 0; c < 2; c++) { - x->block[20 + r * 2 + c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4; - } - } - - x->block[24].src_diff = x->src_diff + 384; - - - for (i = 0; i < 25; i++) { - x->block[i].coeff = x->coeff + i * 16; - } -} - -void vp9_build_block_offsets(MACROBLOCK *x) { - int block = 0; - int br, bc; - - vp9_build_block_doffsets(&x->e_mbd); - - // y blocks - x->thismb_ptr = &x->thismb[0]; - for (br = 0; br < 4; br++) { - for (bc = 0; bc < 4; bc++) { - BLOCK *this_block = &x->block[block]; - // this_block->base_src = &x->src.y_buffer; - // this_block->src_stride = x->src.y_stride; - // this_block->src = 4 * br * this_block->src_stride + 4 * bc; - this_block->base_src = &x->thismb_ptr; - this_block->src_stride = 16; - this_block->src = 4 * br * 16 + 4 * bc; - ++block; - } - } - - // u blocks - for (br = 0; br < 2; br++) { - for (bc = 0; bc < 2; bc++) { - BLOCK *this_block = &x->block[block]; - this_block->base_src = &x->src.u_buffer; - this_block->src_stride = x->src.uv_stride; - this_block->src = 4 * br * this_block->src_stride + 4 * bc; - ++block; - } - } - - // v blocks - for (br = 0; br < 2; br++) { - for (bc = 0; bc < 2; bc++) { - BLOCK *this_block = &x->block[block]; - this_block->base_src = &x->src.v_buffer; - this_block->src_stride = x->src.uv_stride; - this_block->src = 4 * br * this_block->src_stride + 4 * bc; - ++block; - } - } -} - -static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) { - const MACROBLOCKD *xd = &x->e_mbd; - const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode; - const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode; - -#ifdef MODE_STATS - const int is_key = cpi->common.frame_type == KEY_FRAME; - - ++ (is_key ? uv_modes : inter_uv_modes)[uvm]; - ++ uv_modes_y[m][uvm]; - - if (m == B_PRED) { - unsigned int *const bct = is_key ? b_modes : inter_b_modes; - - int b = 0; - - do { - ++ bct[xd->block[b].bmi.as_mode.first]; - } while (++b < 16); - } - - if (m == I8X8_PRED) { - i8x8_modes[xd->block[0].bmi.as_mode.first]++; - i8x8_modes[xd->block[2].bmi.as_mode.first]++; - i8x8_modes[xd->block[8].bmi.as_mode.first]++; - i8x8_modes[xd->block[10].bmi.as_mode.first]++; - } -#endif - -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - ++cpi->sb_ymode_count[m]; - } else -#endif - ++cpi->ymode_count[m]; - if (m != I8X8_PRED) - ++cpi->y_uv_mode_count[m][uvm]; - else { - cpi->i8x8_mode_count[xd->block[0].bmi.as_mode.first]++; - cpi->i8x8_mode_count[xd->block[2].bmi.as_mode.first]++; - cpi->i8x8_mode_count[xd->block[8].bmi.as_mode.first]++; - cpi->i8x8_mode_count[xd->block[10].bmi.as_mode.first]++; - } - if (m == B_PRED) { - int b = 0; - do { - ++ cpi->bmode_count[xd->block[b].bmi.as_mode.first]; - } while (++b < 16); - } -} - -// Experimental stub function to create a per MB zbin adjustment based on -// some previously calculated measure of MB activity. -static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) { -#if USE_ACT_INDEX - x->act_zbin_adj = *(x->mb_activity_ptr); -#else - int64_t a; - int64_t b; - int64_t act = *(x->mb_activity_ptr); - - // Apply the masking to the RD multiplier. - a = act + 4 * cpi->activity_avg; - b = 4 * act + cpi->activity_avg; - - if (act > cpi->activity_avg) - x->act_zbin_adj = (int)(((int64_t)b + (a >> 1)) / a) - 1; - else - x->act_zbin_adj = 1 - (int)(((int64_t)a + (b >> 1)) / b); -#endif -} - -#if CONFIG_SUPERBLOCKS -static void update_sb_skip_coeff_state(VP9_COMP *cpi, - MACROBLOCK *x, - ENTROPY_CONTEXT_PLANES ta[4], - ENTROPY_CONTEXT_PLANES tl[4], - TOKENEXTRA *t[4], - TOKENEXTRA **tp, - int skip[4]) -{ - TOKENEXTRA tokens[4][16 * 24]; - int n_tokens[4], n; - - // if there were no skips, we don't need to do anything - if (!skip[0] && !skip[1] && !skip[2] && !skip[3]) - return; - - // if we don't do coeff skipping for this frame, we don't - // need to do anything here - if (!cpi->common.mb_no_coeff_skip) - return; - - // if all 4 MBs skipped coeff coding, nothing to be done - if (skip[0] && skip[1] && skip[2] && skip[3]) - return; - - // so the situation now is that we want to skip coeffs - // for some MBs, but not all, and we didn't code EOB - // coefficients for them. However, the skip flag for this - // SB will be 0 overall, so we need to insert EOBs in the - // middle of the token tree. Do so here. - n_tokens[0] = t[1] - t[0]; - n_tokens[1] = t[2] - t[1]; - n_tokens[2] = t[3] - t[2]; - n_tokens[3] = *tp - t[3]; - if (n_tokens[0]) - memcpy(tokens[0], t[0], n_tokens[0] * sizeof(*t[0])); - if (n_tokens[1]) - memcpy(tokens[1], t[1], n_tokens[1] * sizeof(*t[0])); - if (n_tokens[2]) - memcpy(tokens[2], t[2], n_tokens[2] * sizeof(*t[0])); - if (n_tokens[3]) - memcpy(tokens[3], t[3], n_tokens[3] * sizeof(*t[0])); - - // reset pointer, stuff EOBs where necessary - *tp = t[0]; - for (n = 0; n < 4; n++) { - if (skip[n]) { - x->e_mbd.above_context = &ta[n]; - x->e_mbd.left_context = &tl[n]; - vp9_stuff_mb(cpi, &x->e_mbd, tp, 0); - } else { - if (n_tokens[n]) { - memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]); - } - (*tp) += n_tokens[n]; - } - } -} - -void vp9_encode_intra_super_block(VP9_COMP *cpi, - MACROBLOCK *x, - TOKENEXTRA **t, - int mb_col) { - const int output_enabled = 1; - int n; - MACROBLOCKD *xd = &x->e_mbd; - VP9_COMMON *cm = &cpi->common; - const uint8_t *src = x->src.y_buffer; - uint8_t *dst = xd->dst.y_buffer; - const uint8_t *usrc = x->src.u_buffer; - uint8_t *udst = xd->dst.u_buffer; - const uint8_t *vsrc = x->src.v_buffer; - uint8_t *vdst = xd->dst.v_buffer; - int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; - int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - const VP9_ENCODER_RTCD *rtcd = IF_RTCD(&cpi->rtcd); - TOKENEXTRA *tp[4]; - int skip[4]; - MODE_INFO *mi = x->e_mbd.mode_info_context; - ENTROPY_CONTEXT_PLANES ta[4], tl[4]; - - if ((cpi->oxcf.tuning == VP8_TUNE_SSIM) && output_enabled) { - adjust_act_zbin(cpi, x); - vp9_update_zbin_extra(cpi, x); - } - - vp9_build_intra_predictors_sby_s(&x->e_mbd); - vp9_build_intra_predictors_sbuv_s(&x->e_mbd); - - assert(x->e_mbd.mode_info_context->mbmi.txfm_size == TX_8X8); - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; - - xd->above_context = cm->above_context + mb_col + (n & 1); - xd->left_context = cm->left_context + (n >> 1); - - vp9_subtract_mby_s_c(x->src_diff, - src + x_idx * 16 + y_idx * 16 * src_y_stride, - src_y_stride, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride, - dst_y_stride); - vp9_subtract_mbuv_s_c(x->src_diff, - usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - src_uv_stride, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - dst_uv_stride); - vp9_transform_mb_8x8(x); - vp9_quantize_mb_8x8(x); - if (x->optimize) { - vp9_optimize_mby_8x8(x, rtcd); - vp9_optimize_mbuv_8x8(x, rtcd); - } - vp9_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); - vp9_recon_mby_s_c(&x->e_mbd, dst + x_idx * 16 + y_idx * 16 * dst_y_stride); - vp9_recon_mbuv_s_c(&x->e_mbd, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); - - if (output_enabled) { - memcpy(&ta[n], xd->above_context, sizeof(ta[n])); - memcpy(&tl[n], xd->left_context, sizeof(tl[n])); - tp[n] = *t; - xd->mode_info_context = mi + x_idx + y_idx * cm->mode_info_stride; - vp9_tokenize_mb(cpi, &x->e_mbd, t, 0); - skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; - } - } - - if (output_enabled) { - // Tokenize - xd->mode_info_context = mi; - sum_intra_stats(cpi, x); - update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip); - } -} -#endif /* CONFIG_SUPERBLOCKS */ - -void vp9_encode_intra_macro_block(VP9_COMP *cpi, - MACROBLOCK *x, - TOKENEXTRA **t, - int output_enabled) { - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - if ((cpi->oxcf.tuning == VP8_TUNE_SSIM) && output_enabled) { - adjust_act_zbin(cpi, x); - vp9_update_zbin_extra(cpi, x); - } - if (mbmi->mode == I8X8_PRED) { - vp9_encode_intra8x8mby(IF_RTCD(&cpi->rtcd), x); - vp9_encode_intra8x8mbuv(IF_RTCD(&cpi->rtcd), x); - } else if (mbmi->mode == B_PRED) { - vp9_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x); - } else { - vp9_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); - } - - if (mbmi->mode != I8X8_PRED) { - vp9_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); - } - - if (output_enabled) { - int segment_id = mbmi->segment_id; - - // Tokenize - sum_intra_stats(cpi, x); - vp9_tokenize_mb(cpi, &x->e_mbd, t, 0); - - if (cpi->common.txfm_mode == TX_MODE_SELECT && - !((cpi->common.mb_no_coeff_skip && mbmi->mb_skip_coeff) || - (vp9_segfeature_active(&x->e_mbd, segment_id, SEG_LVL_EOB) && - vp9_get_segdata(&x->e_mbd, segment_id, SEG_LVL_EOB) == 0))) { - if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED) { - cpi->txfm_count[mbmi->txfm_size]++; - } else if (mbmi->mode == I8X8_PRED) { - cpi->txfm_count_8x8p[mbmi->txfm_size]++; - } - } else if (cpi->common.txfm_mode >= ALLOW_16X16 && mbmi->mode <= TM_PRED) { - mbmi->txfm_size = TX_16X16; - } else - if (cpi->common.txfm_mode >= ALLOW_8X8 && mbmi->mode != B_PRED) { - mbmi->txfm_size = TX_8X8; - } else { - mbmi->txfm_size = TX_4X4; - } - } -#if CONFIG_NEWBESTREFMV - else - vp9_tokenize_mb(cpi, &x->e_mbd, t, 1); -#endif -} - -extern void vp9_fix_contexts(MACROBLOCKD *xd); - -void vp9_encode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int recon_yoffset, - int recon_uvoffset, int output_enabled) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - unsigned char *segment_id = &mbmi->segment_id; - int seg_ref_active; - unsigned char ref_pred_flag; - - x->skip = 0; -#if CONFIG_SUPERBLOCKS - assert(!xd->mode_info_context->mbmi.encoded_as_sb); -#endif - - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { - // Adjust the zbin based on this MB rate. - adjust_act_zbin(cpi, x); - } - - { - // Experimental code. Special case for gf and arf zeromv modes. - // Increase zbin size to suppress noise - cpi->zbin_mode_boost = 0; - if (cpi->zbin_mode_boost_enabled) { - if (mbmi->ref_frame != INTRA_FRAME) { - if (mbmi->mode == ZEROMV) { - if (mbmi->ref_frame != LAST_FRAME) - cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; - else - cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; - } else if (mbmi->mode == SPLITMV) - cpi->zbin_mode_boost = 0; - else - cpi->zbin_mode_boost = MV_ZBIN_BOOST; - } - } - - vp9_update_zbin_extra(cpi, x); - } - - seg_ref_active = vp9_segfeature_active(xd, *segment_id, SEG_LVL_REF_FRAME); - - // SET VARIOUS PREDICTION FLAGS - - // Did the chosen reference frame match its predicted value. - ref_pred_flag = ((mbmi->ref_frame == vp9_get_pred_ref(cm, xd))); - vp9_set_pred_flag(xd, PRED_REF, ref_pred_flag); - - if (mbmi->ref_frame == INTRA_FRAME) { - if (mbmi->mode == B_PRED) { - vp9_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); - vp9_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x); - } else if (mbmi->mode == I8X8_PRED) { - vp9_encode_intra8x8mby(IF_RTCD(&cpi->rtcd), x); - vp9_encode_intra8x8mbuv(IF_RTCD(&cpi->rtcd), x); - } else { - vp9_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); - vp9_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); - } - - if (output_enabled) - sum_intra_stats(cpi, x); - } else { - int ref_fb_idx; - - if (mbmi->ref_frame == LAST_FRAME) - ref_fb_idx = cpi->common.lst_fb_idx; - else if (mbmi->ref_frame == GOLDEN_FRAME) - ref_fb_idx = cpi->common.gld_fb_idx; - else - ref_fb_idx = cpi->common.alt_fb_idx; - - xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; - xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; - xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; - - if (mbmi->second_ref_frame) { - int second_ref_fb_idx; - - if (mbmi->second_ref_frame == LAST_FRAME) - second_ref_fb_idx = cpi->common.lst_fb_idx; - else if (mbmi->second_ref_frame == GOLDEN_FRAME) - second_ref_fb_idx = cpi->common.gld_fb_idx; - else - second_ref_fb_idx = cpi->common.alt_fb_idx; - - xd->second_pre.y_buffer = cpi->common.yv12_fb[second_ref_fb_idx].y_buffer + - recon_yoffset; - xd->second_pre.u_buffer = cpi->common.yv12_fb[second_ref_fb_idx].u_buffer + - recon_uvoffset; - xd->second_pre.v_buffer = cpi->common.yv12_fb[second_ref_fb_idx].v_buffer + - recon_uvoffset; - } - - if (!x->skip) { - vp9_encode_inter16x16(IF_RTCD(&cpi->rtcd), x); - - // Clear mb_skip_coeff if mb_no_coeff_skip is not set - if (!cpi->common.mb_no_coeff_skip) - mbmi->mb_skip_coeff = 0; - - } else { - vp9_build_1st_inter16x16_predictors_mb(xd, xd->dst.y_buffer, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride); - } - } - - if (!x->skip) { -#ifdef ENC_DEBUG - if (enc_debug) { - int i; - printf("Segment=%d [%d, %d]: %d %d:\n", mbmi->segment_id, mb_col_debug, - mb_row_debug, xd->mb_to_left_edge, xd->mb_to_top_edge); - for (i = 0; i < 400; i++) { - printf("%3d ", xd->qcoeff[i]); - if (i % 16 == 15) printf("\n"); - } - printf("\n"); - printf("eobs = "); - for (i = 0; i < 25; i++) - printf("%d:%d ", i, xd->block[i].eob); - printf("\n"); - fflush(stdout); - } -#endif - - vp9_tokenize_mb(cpi, xd, t, !output_enabled); - -#ifdef ENC_DEBUG - if (enc_debug) { - printf("Tokenized\n"); - fflush(stdout); - } -#endif - } else { - int mb_skip_context = - cpi->common.mb_no_coeff_skip ? - (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff + - (x->e_mbd.mode_info_context - cpi->common.mode_info_stride)->mbmi.mb_skip_coeff : - 0; - if (cpi->common.mb_no_coeff_skip) { - mbmi->mb_skip_coeff = 1; - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_fix_contexts(xd); - } else { - vp9_stuff_mb(cpi, xd, t, !output_enabled); - mbmi->mb_skip_coeff = 0; - if (output_enabled) - cpi->skip_false_count[mb_skip_context]++; - } - } - - if (output_enabled) { - int segment_id = mbmi->segment_id; - if (cpi->common.txfm_mode == TX_MODE_SELECT && - !((cpi->common.mb_no_coeff_skip && mbmi->mb_skip_coeff) || - (vp9_segfeature_active(&x->e_mbd, segment_id, SEG_LVL_EOB) && - vp9_get_segdata(&x->e_mbd, segment_id, SEG_LVL_EOB) == 0))) { - if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED && - mbmi->mode != SPLITMV) { - cpi->txfm_count[mbmi->txfm_size]++; - } else if (mbmi->mode == I8X8_PRED || - (mbmi->mode == SPLITMV && - mbmi->partitioning != PARTITIONING_4X4)) { - cpi->txfm_count_8x8p[mbmi->txfm_size]++; - } - } else if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED && - mbmi->mode != SPLITMV && cpi->common.txfm_mode >= ALLOW_16X16) { - mbmi->txfm_size = TX_16X16; - } else if (mbmi->mode != B_PRED && - !(mbmi->mode == SPLITMV && - mbmi->partitioning == PARTITIONING_4X4) && - cpi->common.txfm_mode >= ALLOW_8X8) { - mbmi->txfm_size = TX_8X8; - } else { - mbmi->txfm_size = TX_4X4; - } - } -} - -#if CONFIG_SUPERBLOCKS -void vp9_encode_inter_superblock(VP9_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, - int recon_yoffset, int recon_uvoffset, - int mb_col, int mb_row) { - const int output_enabled = 1; - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - const uint8_t *src = x->src.y_buffer; - uint8_t *dst = xd->dst.y_buffer; - const uint8_t *usrc = x->src.u_buffer; - uint8_t *udst = xd->dst.u_buffer; - const uint8_t *vsrc = x->src.v_buffer; - uint8_t *vdst = xd->dst.v_buffer; - int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; - int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - const VP9_ENCODER_RTCD *rtcd = IF_RTCD(&cpi->rtcd); - unsigned int segment_id = xd->mode_info_context->mbmi.segment_id; - int seg_ref_active; - unsigned char ref_pred_flag; - int n; - TOKENEXTRA *tp[4]; - int skip[4]; - MODE_INFO *mi = x->e_mbd.mode_info_context; - ENTROPY_CONTEXT_PLANES ta[4], tl[4]; - - x->skip = 0; - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { - // Adjust the zbin based on this MB rate. - adjust_act_zbin(cpi, x); - } - - { - // Experimental code. Special case for gf and arf zeromv modes. - // Increase zbin size to suppress noise - cpi->zbin_mode_boost = 0; - if (cpi->zbin_mode_boost_enabled) { - if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) { - if (xd->mode_info_context->mbmi.mode == ZEROMV) { - if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) - cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; - else - cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; - } else if (xd->mode_info_context->mbmi.mode == SPLITMV) - cpi->zbin_mode_boost = 0; - else - cpi->zbin_mode_boost = MV_ZBIN_BOOST; - } - } - - vp9_update_zbin_extra(cpi, x); - } - - seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME); - - // SET VARIOUS PREDICTION FLAGS - - // Did the chosen reference frame match its predicted value. - ref_pred_flag = ((xd->mode_info_context->mbmi.ref_frame == - vp9_get_pred_ref(cm, xd))); - vp9_set_pred_flag(xd, PRED_REF, ref_pred_flag); - - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - vp9_build_intra_predictors_sby_s(&x->e_mbd); - vp9_build_intra_predictors_sbuv_s(&x->e_mbd); - } else { - int ref_fb_idx; - - if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) - ref_fb_idx = cpi->common.lst_fb_idx; - else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) - ref_fb_idx = cpi->common.gld_fb_idx; - else - ref_fb_idx = cpi->common.alt_fb_idx; - - xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; - xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; - xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; - - if (xd->mode_info_context->mbmi.second_ref_frame) { - int second_ref_fb_idx; - - if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME) - second_ref_fb_idx = cpi->common.lst_fb_idx; - else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME) - second_ref_fb_idx = cpi->common.gld_fb_idx; - else - second_ref_fb_idx = cpi->common.alt_fb_idx; - - xd->second_pre.y_buffer = cpi->common.yv12_fb[second_ref_fb_idx].y_buffer + - recon_yoffset; - xd->second_pre.u_buffer = cpi->common.yv12_fb[second_ref_fb_idx].u_buffer + - recon_uvoffset; - xd->second_pre.v_buffer = cpi->common.yv12_fb[second_ref_fb_idx].v_buffer + - recon_uvoffset; - } - - vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride); - } - - assert(x->e_mbd.mode_info_context->mbmi.txfm_size == TX_8X8); - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; - - vp9_subtract_mby_s_c(x->src_diff, - src + x_idx * 16 + y_idx * 16 * src_y_stride, - src_y_stride, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride, - dst_y_stride); - vp9_subtract_mbuv_s_c(x->src_diff, - usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - src_uv_stride, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - dst_uv_stride); - vp9_transform_mb_8x8(x); - vp9_quantize_mb_8x8(x); - if (x->optimize) { - vp9_optimize_mby_8x8(x, rtcd); - vp9_optimize_mbuv_8x8(x, rtcd); - } - vp9_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); - vp9_recon_mby_s_c(&x->e_mbd, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride); - vp9_recon_mbuv_s_c(&x->e_mbd, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); - - if (!x->skip) { - if (output_enabled) { - xd->left_context = cm->left_context + (n >> 1); - xd->above_context = cm->above_context + mb_col + (n & 1); - memcpy(&ta[n], xd->above_context, sizeof(ta[n])); - memcpy(&tl[n], xd->left_context, sizeof(tl[n])); - tp[n] = *t; - xd->mode_info_context = mi + x_idx + y_idx * cm->mode_info_stride; - vp9_tokenize_mb(cpi, &x->e_mbd, t, 0); - skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; - } - } else { - int mb_skip_context = - cpi->common.mb_no_coeff_skip ? - (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff + - (x->e_mbd.mode_info_context - cpi->common.mode_info_stride)->mbmi.mb_skip_coeff : - 0; - if (cpi->common.mb_no_coeff_skip) { - skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff = 1; - xd->left_context = cm->left_context + (n >> 1); - xd->above_context = cm->above_context + mb_col + (n & 1); - memcpy(&ta[n], xd->above_context, sizeof(ta[n])); - memcpy(&tl[n], xd->left_context, sizeof(tl[n])); - tp[n] = *t; - cpi->skip_true_count[mb_skip_context]++; - vp9_fix_contexts(xd); - } else { - vp9_stuff_mb(cpi, xd, t, 0); - xd->mode_info_context->mbmi.mb_skip_coeff = 0; - cpi->skip_false_count[mb_skip_context]++; - } - } - } - - xd->mode_info_context = mi; - update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip); -} -#endif diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c deleted file mode 100644 index e89e96c44..000000000 --- a/vp8/encoder/encodeintra.c +++ /dev/null @@ -1,289 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_ports/config.h" -#include "vpx_rtcd.h" -#include "vp8/common/idct.h" -#include "quantize.h" -#include "vp8/common/reconintra.h" -#include "vp8/common/reconintra4x4.h" -#include "encodemb.h" -#include "vp8/common/invtrans.h" -#include "encodeintra.h" - -#if CONFIG_RUNTIME_CPU_DETECT -#define IF_RTCD(x) (x) -#else -#define IF_RTCD(x) NULL -#endif - -int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) { - int i; - int intra_pred_var = 0; - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - (void) cpi; - - if (use_16x16_pred) { - mbmi->mode = DC_PRED; -#if CONFIG_COMP_INTRA_PRED - mbmi->second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); -#endif - mbmi->uv_mode = DC_PRED; - mbmi->ref_frame = INTRA_FRAME; - - vp9_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); - } else { - for (i = 0; i < 16; i++) { - x->e_mbd.block[i].bmi.as_mode.first = B_DC_PRED; - vp9_encode_intra4x4block(IF_RTCD(&cpi->rtcd), x, i); - } - } - - intra_pred_var = vp9_get_mb_ss(x->src_diff); - - return intra_pred_var; -} - -void vp9_encode_intra4x4block(const VP9_ENCODER_RTCD *rtcd, - MACROBLOCK *x, int ib) { - BLOCKD *b = &x->e_mbd.block[ib]; - BLOCK *be = &x->block[ib]; - TX_TYPE tx_type; - -#if CONFIG_COMP_INTRA_PRED - if (b->bmi.as_mode.second == (B_PREDICTION_MODE)(B_DC_PRED - 1)) { -#endif - vp9_intra4x4_predict(b, b->bmi.as_mode.first, b->predictor); -#if CONFIG_COMP_INTRA_PRED - } else { - vp9_comp_intra4x4_predict(b, b->bmi.as_mode.first, b->bmi.as_mode.second, - b->predictor); - } -#endif - - vp9_subtract_b(be, b, 16); - - tx_type = get_tx_type(&x->e_mbd, b); - if (tx_type != DCT_DCT) { - vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4); - vp9_ht_quantize_b_4x4(be, b, tx_type); - vp9_ihtllm_c(b->dqcoeff, b->diff, 32, tx_type, 4); - } else { - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(be, b) ; - vp9_inverse_transform_b_4x4(IF_RTCD(&rtcd->common->idct), b, 32); - } - - vp9_recon_b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); -} - -void vp9_encode_intra4x4mby(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *mb) { - int i; - - for (i = 0; i < 16; i++) - vp9_encode_intra4x4block(rtcd, mb, i); - return; -} - -void vp9_encode_intra16x16mby(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) { - MACROBLOCKD *xd = &x->e_mbd; - BLOCK *b = &x->block[0]; - TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - TX_TYPE tx_type; - -#if CONFIG_COMP_INTRA_PRED - if (xd->mode_info_context->mbmi.second_mode == (MB_PREDICTION_MODE)(DC_PRED - 1)) -#endif - vp9_build_intra_predictors_mby(xd); -#if CONFIG_COMP_INTRA_PRED - else - vp9_build_comp_intra_predictors_mby(xd); -#endif - - vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride); - - if (tx_size == TX_16X16) { - BLOCKD *bd = &xd->block[0]; - tx_type = get_tx_type(xd, bd); - if (tx_type != DCT_DCT) { - vp9_fht(b->src_diff, 32, b->coeff, tx_type, 16); - vp9_quantize_mby_16x16(x); - if (x->optimize) - vp9_optimize_mby_16x16(x, rtcd); - vp9_ihtllm_c(bd->dqcoeff, bd->diff, 32, tx_type, 16); - } else { - vp9_transform_mby_16x16(x); - vp9_quantize_mby_16x16(x); - if (x->optimize) - vp9_optimize_mby_16x16(x, rtcd); - vp9_inverse_transform_mby_16x16(IF_RTCD(&rtcd->common->idct), xd); - } - } else if (tx_size == TX_8X8) { - vp9_transform_mby_8x8(x); - vp9_quantize_mby_8x8(x); - if (x->optimize) - vp9_optimize_mby_8x8(x, rtcd); - vp9_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), xd); - } else { - vp9_transform_mby_4x4(x); - vp9_quantize_mby_4x4(x); - if (x->optimize) - vp9_optimize_mby_4x4(x, rtcd); - vp9_inverse_transform_mby_4x4(IF_RTCD(&rtcd->common->idct), xd); - } - - vp9_recon_mby(xd); -} - -void vp9_encode_intra16x16mbuv(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) { - MACROBLOCKD *xd = &x->e_mbd; - TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - -#if CONFIG_COMP_INTRA_PRED - if (xd->mode_info_context->mbmi.second_uv_mode == (MB_PREDICTION_MODE)(DC_PRED - 1)) { -#endif - vp9_build_intra_predictors_mbuv(xd); -#if CONFIG_COMP_INTRA_PRED - } else { - vp9_build_comp_intra_predictors_mbuv(xd); - } -#endif - - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - xd->predictor, x->src.uv_stride); - - if (tx_size == TX_4X4) { - vp9_transform_mbuv_4x4(x); - vp9_quantize_mbuv_4x4(x); - if (x->optimize) - vp9_optimize_mbuv_4x4(x, rtcd); - vp9_inverse_transform_mbuv_4x4(IF_RTCD(&rtcd->common->idct), xd); - } else /* 16x16 or 8x8 */ { - vp9_transform_mbuv_8x8(x); - vp9_quantize_mbuv_8x8(x); - if (x->optimize) - vp9_optimize_mbuv_8x8(x, rtcd); - vp9_inverse_transform_mbuv_8x8(IF_RTCD(&rtcd->common->idct), xd); - } - - vp9_recon_intra_mbuv(xd); -} - -void vp9_encode_intra8x8(const VP9_ENCODER_RTCD *rtcd, - MACROBLOCK *x, int ib) { - MACROBLOCKD *xd = &x->e_mbd; - BLOCKD *b = &xd->block[ib]; - BLOCK *be = &x->block[ib]; - const int iblock[4] = {0, 1, 4, 5}; - int i; - TX_TYPE tx_type; - -#if CONFIG_COMP_INTRA_PRED - if (b->bmi.as_mode.second == (MB_PREDICTION_MODE)(DC_PRED - 1)) { -#endif - vp9_intra8x8_predict(b, b->bmi.as_mode.first, b->predictor); -#if CONFIG_COMP_INTRA_PRED - } else { - vp9_comp_intra8x8_predict(b, b->bmi.as_mode.first, b->bmi.as_mode.second, - b->predictor); - } -#endif - - if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { - int idx = (ib & 0x02) ? (ib + 2) : ib; - - // generate residual blocks - vp9_subtract_4b_c(be, b, 16); - - tx_type = get_tx_type(xd, xd->block + idx); - if (tx_type != DCT_DCT) { - vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, - tx_type, 8); - x->quantize_b_8x8(x->block + idx, xd->block + idx); - vp9_ihtllm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32, - tx_type, 8); - } else { - x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); - x->quantize_b_8x8(x->block + idx, xd->block + idx); - vp9_idct_idct8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32); - } - } else { - for (i = 0; i < 4; i++) { - b = &xd->block[ib + iblock[i]]; - be = &x->block[ib + iblock[i]]; - vp9_subtract_b(be, b, 16); - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(be, b); - vp9_inverse_transform_b_4x4(IF_RTCD(&rtcd->common->idct), b, 32); - } - } - - // reconstruct submacroblock - for (i = 0; i < 4; i++) { - b = &xd->block[ib + iblock[i]]; - vp9_recon_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst, - b->dst_stride); - } -} - -void vp9_encode_intra8x8mby(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) { - int i, ib; - - for (i = 0; i < 4; i++) { - ib = vp9_i8x8_block[i]; - vp9_encode_intra8x8(rtcd, x, ib); - } -} - -void vp9_encode_intra_uv4x4(const VP9_ENCODER_RTCD *rtcd, - MACROBLOCK *x, int ib, - int mode, int second) { - BLOCKD *b = &x->e_mbd.block[ib]; - BLOCK *be = &x->block[ib]; - -#if CONFIG_COMP_INTRA_PRED - if (second == -1) { -#endif - vp9_intra_uv4x4_predict(b, mode, b->predictor); -#if CONFIG_COMP_INTRA_PRED - } else { - vp9_comp_intra_uv4x4_predict(b, mode, second, b->predictor); - } -#endif - - vp9_subtract_b(be, b, 8); - - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 16); - x->quantize_b_4x4(be, b); - vp9_inverse_transform_b_4x4(IF_RTCD(&rtcd->common->idct), b, 16); - - vp9_recon_uv_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst, - b->dst_stride); -} - -void vp9_encode_intra8x8mbuv(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) { - int i, ib, mode, second; - BLOCKD *b; - - for (i = 0; i < 4; i++) { - ib = vp9_i8x8_block[i]; - b = &x->e_mbd.block[ib]; - mode = b->bmi.as_mode.first; -#if CONFIG_COMP_INTRA_PRED - second = b->bmi.as_mode.second; -#else - second = -1; -#endif - /*u */ - vp9_encode_intra_uv4x4(rtcd, x, i + 16, mode, second); - /*v */ - vp9_encode_intra_uv4x4(rtcd, x, i + 20, mode, second); - } -} diff --git a/vp8/encoder/encodeintra.h b/vp8/encoder/encodeintra.h deleted file mode 100644 index 38b42b71e..000000000 --- a/vp8/encoder/encodeintra.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef __ENCODEINTRA_H_ -#define __ENCODEINTRA_H_ - -#include "onyx_int.h" - -int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred); -void vp9_encode_intra16x16mby(const VP9_ENCODER_RTCD *, MACROBLOCK *x); -void vp9_encode_intra16x16mbuv(const VP9_ENCODER_RTCD *, MACROBLOCK *x); -void vp9_encode_intra4x4mby(const VP9_ENCODER_RTCD *, MACROBLOCK *mb); -void vp9_encode_intra4x4block(const VP9_ENCODER_RTCD *rtcd, - MACROBLOCK *x, int ib); -void vp9_encode_intra8x8mby(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x); -void vp9_encode_intra8x8mbuv(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x); -void vp9_encode_intra8x8(const VP9_ENCODER_RTCD *rtcd, - MACROBLOCK *x, int ib); - -#endif // __ENCODEINTRA_H_ diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c deleted file mode 100644 index d6221daed..000000000 --- a/vp8/encoder/encodemb.c +++ /dev/null @@ -1,950 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_ports/config.h" -#include "encodemb.h" -#include "vp8/common/reconinter.h" -#include "quantize.h" -#include "tokenize.h" -#include "vp8/common/invtrans.h" -#include "vp8/common/reconintra.h" -#include "vpx_mem/vpx_mem.h" -#include "rdopt.h" -#include "vp8/common/systemdependent.h" -#include "vpx_rtcd.h" - -#if CONFIG_RUNTIME_CPU_DETECT -#define IF_RTCD(x) (x) -#else -#define IF_RTCD(x) NULL -#endif - -void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) { - unsigned char *src_ptr = (*(be->base_src) + be->src); - short *diff_ptr = be->src_diff; - unsigned char *pred_ptr = bd->predictor; - int src_stride = be->src_stride; - - int r, c; - - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - diff_ptr[c] = src_ptr[c] - pred_ptr[c]; - } - - diff_ptr += pitch; - pred_ptr += pitch; - src_ptr += src_stride; - } -} - -void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) { - unsigned char *src_ptr = (*(be->base_src) + be->src); - short *diff_ptr = be->src_diff; - unsigned char *pred_ptr = bd->predictor; - int src_stride = be->src_stride; - int r, c; - - for (r = 0; r < 8; r++) { - for (c = 0; c < 8; c++) { - diff_ptr[c] = src_ptr[c] - pred_ptr[c]; - } - diff_ptr += pitch; - pred_ptr += pitch; - src_ptr += src_stride; - } -} - -void vp9_subtract_mbuv_s_c(short *diff, const unsigned char *usrc, - const unsigned char *vsrc, int src_stride, - const unsigned char *upred, - const unsigned char *vpred, int dst_stride) { - short *udiff = diff + 256; - short *vdiff = diff + 320; - int r, c; - - for (r = 0; r < 8; r++) { - for (c = 0; c < 8; c++) { - udiff[c] = usrc[c] - upred[c]; - } - - udiff += 8; - upred += dst_stride; - usrc += src_stride; - } - - for (r = 0; r < 8; r++) { - for (c = 0; c < 8; c++) { - vdiff[c] = vsrc[c] - vpred[c]; - } - - vdiff += 8; - vpred += dst_stride; - vsrc += src_stride; - } -} - -void vp9_subtract_mbuv_c(short *diff, unsigned char *usrc, - unsigned char *vsrc, unsigned char *pred, int stride) { - unsigned char *upred = pred + 256; - unsigned char *vpred = pred + 320; - - vp9_subtract_mbuv_s_c(diff, usrc, vsrc, stride, upred, vpred, 8); -} - -void vp9_subtract_mby_s_c(short *diff, const unsigned char *src, int src_stride, - const unsigned char *pred, int dst_stride) { - int r, c; - - for (r = 0; r < 16; r++) { - for (c = 0; c < 16; c++) { - diff[c] = src[c] - pred[c]; - } - - diff += 16; - pred += dst_stride; - src += src_stride; - } -} - -void vp9_subtract_mby_c(short *diff, unsigned char *src, - unsigned char *pred, int stride) { - vp9_subtract_mby_s_c(diff, src, stride, pred, 16); -} - -static void subtract_mb(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) { - BLOCK *b = &x->block[0]; - - vp9_subtract_mby(x->src_diff, *(b->base_src), x->e_mbd.predictor, - b->src_stride); - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); -} - -static void build_dcblock_4x4(MACROBLOCK *x) { - short *src_diff_ptr = &x->src_diff[384]; - int i; - - for (i = 0; i < 16; i++) { - src_diff_ptr[i] = x->coeff[i * 16]; - } -} - -void vp9_transform_mby_4x4(MACROBLOCK *x) { - int i; - - for (i = 0; i < 16; i += 2) { - x->vp9_short_fdct8x4(&x->block[i].src_diff[0], - &x->block[i].coeff[0], 32); - } - - if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) { - // build dc block from 16 y dc values - build_dcblock_4x4(x); - - // do 2nd order transform on the dc block - x->short_walsh4x4(&x->block[24].src_diff[0], - &x->block[24].coeff[0], 8); - } -} - -void vp9_transform_mbuv_4x4(MACROBLOCK *x) { - int i; - - for (i = 16; i < 24; i += 2) { - x->vp9_short_fdct8x4(&x->block[i].src_diff[0], - &x->block[i].coeff[0], 16); - } -} - -static void transform_mb_4x4(MACROBLOCK *x) { - vp9_transform_mby_4x4(x); - vp9_transform_mbuv_4x4(x); -} - -static void build_dcblock_8x8(MACROBLOCK *x) { - int16_t *src_diff_ptr = x->block[24].src_diff; - int i; - - for (i = 0; i < 16; i++) { - src_diff_ptr[i] = 0; - } - src_diff_ptr[0] = x->coeff[0 * 16]; - src_diff_ptr[1] = x->coeff[4 * 16]; - src_diff_ptr[4] = x->coeff[8 * 16]; - src_diff_ptr[8] = x->coeff[12 * 16]; -} - -void vp9_transform_mby_8x8(MACROBLOCK *x) { - int i; - - for (i = 0; i < 9; i += 8) { - x->vp9_short_fdct8x8(&x->block[i].src_diff[0], - &x->block[i].coeff[0], 32); - } - for (i = 2; i < 11; i += 8) { - x->vp9_short_fdct8x8(&x->block[i].src_diff[0], - &x->block[i + 2].coeff[0], 32); - } - - if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) { - // build dc block from 2x2 y dc values - build_dcblock_8x8(x); - - // do 2nd order transform on the dc block - x->short_fhaar2x2(&x->block[24].src_diff[0], - &x->block[24].coeff[0], 8); - } -} - -void vp9_transform_mbuv_8x8(MACROBLOCK *x) { - int i; - - for (i = 16; i < 24; i += 4) { - x->vp9_short_fdct8x8(&x->block[i].src_diff[0], - &x->block[i].coeff[0], 16); - } -} - -void vp9_transform_mb_8x8(MACROBLOCK *x) { - vp9_transform_mby_8x8(x); - vp9_transform_mbuv_8x8(x); -} - -void vp9_transform_mby_16x16(MACROBLOCK *x) { - vp9_clear_system_state(); - x->vp9_short_fdct16x16(&x->block[0].src_diff[0], - &x->block[0].coeff[0], 32); -} - -void vp9_transform_mb_16x16(MACROBLOCK *x) { - vp9_transform_mby_16x16(x); - vp9_transform_mbuv_8x8(x); -} - -#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) -#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) -typedef struct vp9_token_state vp9_token_state; - -struct vp9_token_state { - int rate; - int error; - int next; - signed char token; - short qc; -}; - -// TODO: experiments to find optimal multiple numbers -#define Y1_RD_MULT 4 -#define UV_RD_MULT 2 -#define Y2_RD_MULT 4 - -static const int plane_rd_mult[4] = { - Y1_RD_MULT, - Y2_RD_MULT, - UV_RD_MULT, - Y1_RD_MULT -}; - -#define UPDATE_RD_COST()\ -{\ - rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\ - rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\ - if (rd_cost0 == rd_cost1) {\ - rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\ - rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\ - }\ -} - -static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - const VP9_ENCODER_RTCD *rtcd, int tx_size) { - BLOCK *b; - BLOCKD *d; - vp9_token_state tokens[65][2]; - uint64_t best_mask[2]; - const short *dequant_ptr; - const short *coeff_ptr; - short *qcoeff_ptr; - short *dqcoeff_ptr; - int eob; - int i0; - int rc; - int x; - int sz = 0; - int next; - int rdmult; - int rddiv; - int final_eob; - int64_t rd_cost0, rd_cost1; - int rate0, rate1; - int error0, error1; - int t0, t1; - int best; - int band; - int pt; - int err_mult = plane_rd_mult[type]; - int default_eob; - int const *scan, *bands; - - b = &mb->block[i]; - d = &mb->e_mbd.block[i]; - switch (tx_size) { - default: - case TX_4X4: - scan = vp9_default_zig_zag1d; - bands = vp9_coef_bands; - default_eob = 16; - // TODO: this isn't called (for intra4x4 modes), but will be left in - // since it could be used later - { - TX_TYPE tx_type = get_tx_type(&mb->e_mbd, d); - if (tx_type != DCT_DCT) { - switch (tx_type) { - case ADST_DCT: - scan = vp9_row_scan; - break; - - case DCT_ADST: - scan = vp9_col_scan; - break; - - default: - scan = vp9_default_zig_zag1d; - break; - } - } else { - scan = vp9_default_zig_zag1d; - } - } - break; - case TX_8X8: - scan = vp9_default_zig_zag1d_8x8; - bands = vp9_coef_bands_8x8; - default_eob = 64; - break; - } - - dequant_ptr = d->dequant; - coeff_ptr = b->coeff; - qcoeff_ptr = d->qcoeff; - dqcoeff_ptr = d->dqcoeff; - i0 = (type == PLANE_TYPE_Y_NO_DC); - eob = d->eob; - - /* Now set up a Viterbi trellis to evaluate alternative roundings. */ - rdmult = mb->rdmult * err_mult; - if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) - rdmult = (rdmult * 9) >> 4; - rddiv = mb->rddiv; - best_mask[0] = best_mask[1] = 0; - /* Initialize the sentinel node of the trellis. */ - tokens[eob][0].rate = 0; - tokens[eob][0].error = 0; - tokens[eob][0].next = default_eob; - tokens[eob][0].token = DCT_EOB_TOKEN; - tokens[eob][0].qc = 0; - *(tokens[eob] + 1) = *(tokens[eob] + 0); - next = eob; - for (i = eob; i-- > i0;) { - int base_bits; - int d2; - int dx; - - rc = scan[i]; - x = qcoeff_ptr[rc]; - /* Only add a trellis state for non-zero coefficients. */ - if (x) { - int shortcut = 0; - error0 = tokens[next][0].error; - error1 = tokens[next][1].error; - /* Evaluate the first possibility for this state. */ - rate0 = tokens[next][0].rate; - rate1 = tokens[next][1].rate; - t0 = (vp9_dct_value_tokens_ptr + x)->Token; - /* Consider both possible successor states. */ - if (next < default_eob) { - band = bands[i + 1]; - pt = vp9_prev_token_class[t0]; - rate0 += - mb->token_costs[tx_size][type][band][pt][tokens[next][0].token]; - rate1 += - mb->token_costs[tx_size][type][band][pt][tokens[next][1].token]; - } - UPDATE_RD_COST(); - /* And pick the best. */ - best = rd_cost1 < rd_cost0; - base_bits = *(vp9_dct_value_cost_ptr + x); - dx = dqcoeff_ptr[rc] - coeff_ptr[rc]; - d2 = dx * dx; - tokens[i][0].rate = base_bits + (best ? rate1 : rate0); - tokens[i][0].error = d2 + (best ? error1 : error0); - tokens[i][0].next = next; - tokens[i][0].token = t0; - tokens[i][0].qc = x; - best_mask[0] |= best << i; - /* Evaluate the second possibility for this state. */ - rate0 = tokens[next][0].rate; - rate1 = tokens[next][1].rate; - - if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc])) && - (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) + dequant_ptr[rc != 0])) - shortcut = 1; - else - shortcut = 0; - - if (shortcut) { - sz = -(x < 0); - x -= 2 * sz + 1; - } - - /* Consider both possible successor states. */ - if (!x) { - /* If we reduced this coefficient to zero, check to see if - * we need to move the EOB back here. - */ - t0 = tokens[next][0].token == DCT_EOB_TOKEN ? - DCT_EOB_TOKEN : ZERO_TOKEN; - t1 = tokens[next][1].token == DCT_EOB_TOKEN ? - DCT_EOB_TOKEN : ZERO_TOKEN; - } else { - t0 = t1 = (vp9_dct_value_tokens_ptr + x)->Token; - } - if (next < default_eob) { - band = bands[i + 1]; - if (t0 != DCT_EOB_TOKEN) { - pt = vp9_prev_token_class[t0]; - rate0 += mb->token_costs[tx_size][type][band][pt][ - tokens[next][0].token]; - } - if (t1 != DCT_EOB_TOKEN) { - pt = vp9_prev_token_class[t1]; - rate1 += mb->token_costs[tx_size][type][band][pt][ - tokens[next][1].token]; - } - } - - UPDATE_RD_COST(); - /* And pick the best. */ - best = rd_cost1 < rd_cost0; - base_bits = *(vp9_dct_value_cost_ptr + x); - - if (shortcut) { - dx -= (dequant_ptr[rc != 0] + sz) ^ sz; - d2 = dx * dx; - } - tokens[i][1].rate = base_bits + (best ? rate1 : rate0); - tokens[i][1].error = d2 + (best ? error1 : error0); - tokens[i][1].next = next; - tokens[i][1].token = best ? t1 : t0; - tokens[i][1].qc = x; - best_mask[1] |= best << i; - /* Finally, make this the new head of the trellis. */ - next = i; - } - /* There's no choice to make for a zero coefficient, so we don't - * add a new trellis node, but we do need to update the costs. - */ - else { - band = bands[i + 1]; - t0 = tokens[next][0].token; - t1 = tokens[next][1].token; - /* Update the cost of each path if we're past the EOB token. */ - if (t0 != DCT_EOB_TOKEN) { - tokens[next][0].rate += mb->token_costs[tx_size][type][band][0][t0]; - tokens[next][0].token = ZERO_TOKEN; - } - if (t1 != DCT_EOB_TOKEN) { - tokens[next][1].rate += mb->token_costs[tx_size][type][band][0][t1]; - tokens[next][1].token = ZERO_TOKEN; - } - /* Don't update next, because we didn't add a new node. */ - } - } - - /* Now pick the best path through the whole trellis. */ - band = bands[i + 1]; - VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); - rate0 = tokens[next][0].rate; - rate1 = tokens[next][1].rate; - error0 = tokens[next][0].error; - error1 = tokens[next][1].error; - t0 = tokens[next][0].token; - t1 = tokens[next][1].token; - rate0 += mb->token_costs[tx_size][type][band][pt][t0]; - rate1 += mb->token_costs[tx_size][type][band][pt][t1]; - UPDATE_RD_COST(); - best = rd_cost1 < rd_cost0; - final_eob = i0 - 1; - for (i = next; i < eob; i = next) { - x = tokens[i][best].qc; - if (x) - final_eob = i; - rc = scan[i]; - qcoeff_ptr[rc] = x; - dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]); - - next = tokens[i][best].next; - best = (best_mask[best] >> i) & 1; - } - final_eob++; - - d->eob = final_eob; - *a = *l = (d->eob != !type); -} - -/************************************************************************** -our inverse hadamard transform effectively is weighted sum of all 16 inputs -with weight either 1 or -1. It has a last stage scaling of (sum+1)>>2. And -dc only idct is (dc+16)>>5. So if all the sums are between -65 and 63 the -output after inverse wht and idct will be all zero. A sum of absolute value -smaller than 65 guarantees all 16 different (+1/-1) weighted sums in wht -fall between -65 and +65. -**************************************************************************/ -#define SUM_2ND_COEFF_THRESH 65 - -static void check_reset_2nd_coeffs(MACROBLOCKD *xd, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { - int sum = 0; - int i; - BLOCKD *bd = &xd->block[24]; - if (bd->dequant[0] >= SUM_2ND_COEFF_THRESH - && bd->dequant[1] >= SUM_2ND_COEFF_THRESH) - return; - - for (i = 0; i < bd->eob; i++) { - int coef = bd->dqcoeff[vp9_default_zig_zag1d[i]]; - sum += (coef >= 0) ? coef : -coef; - if (sum >= SUM_2ND_COEFF_THRESH) - return; - } - - if (sum < SUM_2ND_COEFF_THRESH) { - for (i = 0; i < bd->eob; i++) { - int rc = vp9_default_zig_zag1d[i]; - bd->qcoeff[rc] = 0; - bd->dqcoeff[rc] = 0; - } - bd->eob = 0; - *a = *l = (bd->eob != 0); - } -} - -#define SUM_2ND_COEFF_THRESH_8X8 32 -static void check_reset_8x8_2nd_coeffs(MACROBLOCKD *xd, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { - int sum = 0; - BLOCKD *bd = &xd->block[24]; - int coef; - - coef = bd->dqcoeff[0]; - sum += (coef >= 0) ? coef : -coef; - coef = bd->dqcoeff[1]; - sum += (coef >= 0) ? coef : -coef; - coef = bd->dqcoeff[4]; - sum += (coef >= 0) ? coef : -coef; - coef = bd->dqcoeff[8]; - sum += (coef >= 0) ? coef : -coef; - - if (sum < SUM_2ND_COEFF_THRESH_8X8) { - bd->qcoeff[0] = 0; - bd->dqcoeff[0] = 0; - bd->qcoeff[1] = 0; - bd->dqcoeff[1] = 0; - bd->qcoeff[4] = 0; - bd->dqcoeff[4] = 0; - bd->qcoeff[8] = 0; - bd->dqcoeff[8] = 0; - bd->eob = 0; - *a = *l = (bd->eob != 0); - } -} - -void vp9_optimize_mby_4x4(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) { - int b; - PLANE_TYPE type; - int has_2nd_order; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta; - ENTROPY_CONTEXT *tl; - MB_PREDICTION_MODE mode = x->e_mbd.mode_info_context->mbmi.mode; - - if (!x->e_mbd.above_context || !x->e_mbd.left_context) - return; - - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - - has_2nd_order = (mode != B_PRED && mode != I8X8_PRED && mode != SPLITMV); - type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; - - for (b = 0; b < 16; b++) { - optimize_b(x, b, type, - ta + vp9_block2above[b], tl + vp9_block2left[b], rtcd, TX_4X4); - } - - if (has_2nd_order) { - b = 24; - optimize_b(x, b, PLANE_TYPE_Y2, - ta + vp9_block2above[b], tl + vp9_block2left[b], rtcd, TX_4X4); - check_reset_2nd_coeffs(&x->e_mbd, - ta + vp9_block2above[b], tl + vp9_block2left[b]); - } -} - -void vp9_optimize_mbuv_4x4(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) { - int b; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta; - ENTROPY_CONTEXT *tl; - - if (!x->e_mbd.above_context || !x->e_mbd.left_context) - return; - - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - - for (b = 16; b < 24; b++) { - optimize_b(x, b, PLANE_TYPE_UV, - ta + vp9_block2above[b], tl + vp9_block2left[b], rtcd, TX_4X4); - } -} - -static void optimize_mb_4x4(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) { - vp9_optimize_mby_4x4(x, rtcd); - vp9_optimize_mbuv_4x4(x, rtcd); -} - -void vp9_optimize_mby_8x8(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) { - int b; - PLANE_TYPE type; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta; - ENTROPY_CONTEXT *tl; - int has_2nd_order = x->e_mbd.mode_info_context->mbmi.mode != SPLITMV; - - if (!x->e_mbd.above_context || !x->e_mbd.left_context) - return; - - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; - for (b = 0; b < 16; b += 4) { - optimize_b(x, b, type, - ta + vp9_block2above_8x8[b], tl + vp9_block2left_8x8[b], - rtcd, TX_8X8); - ta[vp9_block2above_8x8[b] + 1] = ta[vp9_block2above_8x8[b]]; - tl[vp9_block2left_8x8[b] + 1] = tl[vp9_block2left_8x8[b]]; - } - - // 8x8 always have 2nd roder haar block - if (has_2nd_order) { - check_reset_8x8_2nd_coeffs(&x->e_mbd, - ta + vp9_block2above_8x8[24], - tl + vp9_block2left_8x8[24]); - } -} - -void vp9_optimize_mbuv_8x8(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) { - int b; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta; - ENTROPY_CONTEXT *tl; - - if (!x->e_mbd.above_context || !x->e_mbd.left_context) - return; - - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - - for (b = 16; b < 24; b += 4) { - optimize_b(x, b, PLANE_TYPE_UV, - ta + vp9_block2above_8x8[b], tl + vp9_block2left_8x8[b], - rtcd, TX_8X8); - ta[vp9_block2above_8x8[b] + 1] = ta[vp9_block2above_8x8[b]]; - tl[vp9_block2left_8x8[b] + 1] = tl[vp9_block2left_8x8[b]]; - } -} - -static void optimize_mb_8x8(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) { - vp9_optimize_mby_8x8(x, rtcd); - vp9_optimize_mbuv_8x8(x, rtcd); -} - -static void optimize_b_16x16(MACROBLOCK *mb, int i, PLANE_TYPE type, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - const VP9_ENCODER_RTCD *rtcd) { - BLOCK *b = &mb->block[i]; - BLOCKD *d = &mb->e_mbd.block[i]; - vp9_token_state tokens[257][2]; - unsigned best_index[257][2]; - const short *dequant_ptr = d->dequant, *coeff_ptr = b->coeff; - short *qcoeff_ptr = qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = dqcoeff_ptr = d->dqcoeff; - int eob = d->eob, final_eob, sz = 0; - int rc, x, next; - int64_t rdmult, rddiv, rd_cost0, rd_cost1; - int rate0, rate1, error0, error1, t0, t1; - int best, band, pt; - int err_mult = plane_rd_mult[type]; - - /* Now set up a Viterbi trellis to evaluate alternative roundings. */ - rdmult = mb->rdmult * err_mult; - if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) - rdmult = (rdmult * 9)>>4; - rddiv = mb->rddiv; - memset(best_index, 0, sizeof(best_index)); - /* Initialize the sentinel node of the trellis. */ - tokens[eob][0].rate = 0; - tokens[eob][0].error = 0; - tokens[eob][0].next = 256; - tokens[eob][0].token = DCT_EOB_TOKEN; - tokens[eob][0].qc = 0; - *(tokens[eob] + 1) = *(tokens[eob] + 0); - next = eob; - for (i = eob; i-- > 0;) { - int base_bits, d2, dx; - - rc = vp9_default_zig_zag1d_16x16[i]; - x = qcoeff_ptr[rc]; - /* Only add a trellis state for non-zero coefficients. */ - if (x) { - int shortcut = 0; - error0 = tokens[next][0].error; - error1 = tokens[next][1].error; - /* Evaluate the first possibility for this state. */ - rate0 = tokens[next][0].rate; - rate1 = tokens[next][1].rate; - t0 = (vp9_dct_value_tokens_ptr + x)->Token; - /* Consider both possible successor states. */ - if (next < 256) { - band = vp9_coef_bands_16x16[i + 1]; - pt = vp9_prev_token_class[t0]; - rate0 += mb->token_costs[TX_16X16][type][band][pt][tokens[next][0].token]; - rate1 += mb->token_costs[TX_16X16][type][band][pt][tokens[next][1].token]; - } - UPDATE_RD_COST(); - /* And pick the best. */ - best = rd_cost1 < rd_cost0; - base_bits = *(vp9_dct_value_cost_ptr + x); - dx = dqcoeff_ptr[rc] - coeff_ptr[rc]; - d2 = dx*dx; - tokens[i][0].rate = base_bits + (best ? rate1 : rate0); - tokens[i][0].error = d2 + (best ? error1 : error0); - tokens[i][0].next = next; - tokens[i][0].token = t0; - tokens[i][0].qc = x; - best_index[i][0] = best; - /* Evaluate the second possibility for this state. */ - rate0 = tokens[next][0].rate; - rate1 = tokens[next][1].rate; - - if((abs(x)*dequant_ptr[rc!=0]>abs(coeff_ptr[rc])) && - (abs(x)*dequant_ptr[rc!=0]<abs(coeff_ptr[rc])+dequant_ptr[rc!=0])) - shortcut = 1; - else - shortcut = 0; - - if (shortcut) { - sz = -(x < 0); - x -= 2*sz + 1; - } - - /* Consider both possible successor states. */ - if (!x) { - /* If we reduced this coefficient to zero, check to see if - * we need to move the EOB back here. - */ - t0 = tokens[next][0].token == DCT_EOB_TOKEN ? - DCT_EOB_TOKEN : ZERO_TOKEN; - t1 = tokens[next][1].token == DCT_EOB_TOKEN ? - DCT_EOB_TOKEN : ZERO_TOKEN; - } - else - t0=t1 = (vp9_dct_value_tokens_ptr + x)->Token; - if (next < 256) { - band = vp9_coef_bands_16x16[i + 1]; - if (t0 != DCT_EOB_TOKEN) { - pt = vp9_prev_token_class[t0]; - rate0 += mb->token_costs[TX_16X16][type][band][pt] - [tokens[next][0].token]; - } - if (t1!=DCT_EOB_TOKEN) { - pt = vp9_prev_token_class[t1]; - rate1 += mb->token_costs[TX_16X16][type][band][pt] - [tokens[next][1].token]; - } - } - UPDATE_RD_COST(); - /* And pick the best. */ - best = rd_cost1 < rd_cost0; - base_bits = *(vp9_dct_value_cost_ptr + x); - - if(shortcut) { - dx -= (dequant_ptr[rc!=0] + sz) ^ sz; - d2 = dx*dx; - } - tokens[i][1].rate = base_bits + (best ? rate1 : rate0); - tokens[i][1].error = d2 + (best ? error1 : error0); - tokens[i][1].next = next; - tokens[i][1].token = best ? t1 : t0; - tokens[i][1].qc = x; - best_index[i][1] = best; - /* Finally, make this the new head of the trellis. */ - next = i; - } - /* There's no choice to make for a zero coefficient, so we don't - * add a new trellis node, but we do need to update the costs. - */ - else { - band = vp9_coef_bands_16x16[i + 1]; - t0 = tokens[next][0].token; - t1 = tokens[next][1].token; - /* Update the cost of each path if we're past the EOB token. */ - if (t0 != DCT_EOB_TOKEN) { - tokens[next][0].rate += mb->token_costs[TX_16X16][type][band][0][t0]; - tokens[next][0].token = ZERO_TOKEN; - } - if (t1 != DCT_EOB_TOKEN) { - tokens[next][1].rate += mb->token_costs[TX_16X16][type][band][0][t1]; - tokens[next][1].token = ZERO_TOKEN; - } - /* Don't update next, because we didn't add a new node. */ - } - } - - /* Now pick the best path through the whole trellis. */ - band = vp9_coef_bands_16x16[i + 1]; - VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); - rate0 = tokens[next][0].rate; - rate1 = tokens[next][1].rate; - error0 = tokens[next][0].error; - error1 = tokens[next][1].error; - t0 = tokens[next][0].token; - t1 = tokens[next][1].token; - rate0 += mb->token_costs[TX_16X16][type][band][pt][t0]; - rate1 += mb->token_costs[TX_16X16][type][band][pt][t1]; - UPDATE_RD_COST(); - best = rd_cost1 < rd_cost0; - final_eob = -1; - - for (i = next; i < eob; i = next) { - x = tokens[i][best].qc; - if (x) - final_eob = i; - rc = vp9_default_zig_zag1d_16x16[i]; - qcoeff_ptr[rc] = x; - dqcoeff_ptr[rc] = (x * dequant_ptr[rc!=0]); - - next = tokens[i][best].next; - best = best_index[i][best]; - } - final_eob++; - - d->eob = final_eob; - *a = *l = (d->eob != !type); -} - -void vp9_optimize_mby_16x16(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) { - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta, *tl; - - if (!x->e_mbd.above_context || !x->e_mbd.left_context) - return; - - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - optimize_b_16x16(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, rtcd); -} - -static void optimize_mb_16x16(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) { - vp9_optimize_mby_16x16(x, rtcd); - vp9_optimize_mbuv_8x8(x, rtcd); -} - -void vp9_encode_inter16x16(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) { - MACROBLOCKD *xd = &x->e_mbd; - TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - - vp9_build_inter_predictors_mb(xd); - subtract_mb(rtcd, x); - - if (tx_size == TX_16X16) { - vp9_transform_mb_16x16(x); - vp9_quantize_mb_16x16(x); - if (x->optimize) - optimize_mb_16x16(x, rtcd); - vp9_inverse_transform_mb_16x16(IF_RTCD(&rtcd->common->idct), xd); - } else if (tx_size == TX_8X8) { - if (xd->mode_info_context->mbmi.mode == SPLITMV) { - assert(xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4); - vp9_transform_mby_8x8(x); - vp9_transform_mbuv_4x4(x); - vp9_quantize_mby_8x8(x); - vp9_quantize_mbuv_4x4(x); - if (x->optimize) { - vp9_optimize_mby_8x8(x, rtcd); - vp9_optimize_mbuv_4x4(x, rtcd); - } - vp9_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), xd); - vp9_inverse_transform_mbuv_4x4(IF_RTCD(&rtcd->common->idct), xd); - } else { - vp9_transform_mb_8x8(x); - vp9_quantize_mb_8x8(x); - if (x->optimize) - optimize_mb_8x8(x, rtcd); - vp9_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), xd); - } - } else { - transform_mb_4x4(x); - vp9_quantize_mb_4x4(x); - if (x->optimize) - optimize_mb_4x4(x, rtcd); - vp9_inverse_transform_mb_4x4(IF_RTCD(&rtcd->common->idct), xd); - } - - vp9_recon_mb(xd); -} - -/* this function is used by first pass only */ -void vp9_encode_inter16x16y(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) { - MACROBLOCKD *xd = &x->e_mbd; - BLOCK *b = &x->block[0]; - -#if CONFIG_PRED_FILTER - // Disable the prediction filter for firstpass - xd->mode_info_context->mbmi.pred_filter_enabled = 0; -#endif - - vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0); - - vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride); - - vp9_transform_mby_4x4(x); - vp9_quantize_mby_4x4(x); - vp9_inverse_transform_mby_4x4(IF_RTCD(&rtcd->common->idct), xd); - - vp9_recon_mby(xd); -} diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h deleted file mode 100644 index 8a3d38f1d..000000000 --- a/vp8/encoder/encodemb.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __INC_ENCODEMB_H -#define __INC_ENCODEMB_H - -#include "vpx_ports/config.h" -#include "block.h" - -typedef struct { - MB_PREDICTION_MODE mode; - MV_REFERENCE_FRAME ref_frame; - MV_REFERENCE_FRAME second_ref_frame; -#if CONFIG_PRED_FILTER - int pred_filter_flag; -#endif -} MODE_DEFINITION; - - -#if CONFIG_RUNTIME_CPU_DETECT -#define ENCODEMB_INVOKE(ctx,fn) (ctx)->fn -#else -#define ENCODEMB_INVOKE(ctx,fn) vp9_encodemb_##fn -#endif - - - -#include "onyx_int.h" -struct VP9_ENCODER_RTCD; -void vp9_encode_inter16x16(const struct VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x); - -void vp9_transform_mbuv_4x4(MACROBLOCK *x); -void vp9_transform_mby_4x4(MACROBLOCK *x); - -void vp9_optimize_mby_4x4(MACROBLOCK *x, const struct VP9_ENCODER_RTCD *rtcd); -void vp9_optimize_mbuv_4x4(MACROBLOCK *x, const struct VP9_ENCODER_RTCD *rtcd); -void vp9_encode_inter16x16y(const struct VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x); - -void vp9_transform_mb_8x8(MACROBLOCK *mb); -void vp9_transform_mby_8x8(MACROBLOCK *x); -void vp9_transform_mbuv_8x8(MACROBLOCK *x); -void vp9_build_dcblock_8x8(MACROBLOCK *b); -void vp9_optimize_mby_8x8(MACROBLOCK *x, const struct VP9_ENCODER_RTCD *rtcd); -void vp9_optimize_mbuv_8x8(MACROBLOCK *x, const struct VP9_ENCODER_RTCD *rtcd); - -void vp9_transform_mb_16x16(MACROBLOCK *mb); -void vp9_transform_mby_16x16(MACROBLOCK *x); -void vp9_optimize_mby_16x16(MACROBLOCK *x, const struct VP9_ENCODER_RTCD *rtcd); - -void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch); - -#if CONFIG_SUPERBLOCKS -void vp9_subtract_mbuv_s_c(short *diff, const unsigned char *usrc, - const unsigned char *vsrc, int src_stride, - const unsigned char *upred, - const unsigned char *vpred, int dst_stride); -void vp9_subtract_mby_s_c(short *diff, const unsigned char *src, - int src_stride, const unsigned char *pred, - int dst_stride); -#endif - -#endif diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c deleted file mode 100644 index 675fb0b59..000000000 --- a/vp8/encoder/encodemv.c +++ /dev/null @@ -1,547 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp8/common/common.h" -#include "encodemv.h" -#include "vp8/common/entropymode.h" -#include "vp8/common/systemdependent.h" - -#include <math.h> - -#ifdef ENTROPY_STATS -extern unsigned int active_section; -#endif - -#ifdef NMV_STATS -nmv_context_counts tnmvcounts; -#endif - -static void encode_nmv_component(vp9_writer* const bc, - int v, - int r, - const nmv_component* const mvcomp) { - int s, z, c, o, d; - assert (v != 0); /* should not be zero */ - s = v < 0; - vp9_write(bc, s, mvcomp->sign); - z = (s ? -v : v) - 1; /* magnitude - 1 */ - - c = vp9_get_mv_class(z, &o); - - write_token(bc, vp9_mv_class_tree, mvcomp->classes, - vp9_mv_class_encodings + c); - - d = (o >> 3); /* int mv data */ - - if (c == MV_CLASS_0) { - write_token(bc, vp9_mv_class0_tree, mvcomp->class0, - vp9_mv_class0_encodings + d); - } else { - int i, b; - b = c + CLASS0_BITS - 1; /* number of bits */ - for (i = 0; i < b; ++i) - vp9_write(bc, ((d >> i) & 1), mvcomp->bits[i]); - } -} - -static void encode_nmv_component_fp(vp9_writer *bc, - int v, - int r, - const nmv_component* const mvcomp, - int usehp) { - int s, z, c, o, d, f, e; - assert (v != 0); /* should not be zero */ - s = v < 0; - z = (s ? -v : v) - 1; /* magnitude - 1 */ - - c = vp9_get_mv_class(z, &o); - - d = (o >> 3); /* int mv data */ - f = (o >> 1) & 3; /* fractional pel mv data */ - e = (o & 1); /* high precision mv data */ - - /* Code the fractional pel bits */ - if (c == MV_CLASS_0) { - write_token(bc, vp9_mv_fp_tree, mvcomp->class0_fp[d], - vp9_mv_fp_encodings + f); - } else { - write_token(bc, vp9_mv_fp_tree, mvcomp->fp, - vp9_mv_fp_encodings + f); - } - /* Code the high precision bit */ - if (usehp) { - if (c == MV_CLASS_0) { - vp9_write(bc, e, mvcomp->class0_hp); - } else { - vp9_write(bc, e, mvcomp->hp); - } - } -} - -static void build_nmv_component_cost_table(int *mvcost, - const nmv_component* const mvcomp, - int usehp) { - int i, v; - int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE]; - int bits_cost[MV_OFFSET_BITS][2]; - int class0_fp_cost[CLASS0_SIZE][4], fp_cost[4]; - int class0_hp_cost[2], hp_cost[2]; - - sign_cost[0] = vp9_cost_zero(mvcomp->sign); - sign_cost[1] = vp9_cost_one(mvcomp->sign); - vp9_cost_tokens(class_cost, mvcomp->classes, vp9_mv_class_tree); - vp9_cost_tokens(class0_cost, mvcomp->class0, vp9_mv_class0_tree); - for (i = 0; i < MV_OFFSET_BITS; ++i) { - bits_cost[i][0] = vp9_cost_zero(mvcomp->bits[i]); - bits_cost[i][1] = vp9_cost_one(mvcomp->bits[i]); - } - - for (i = 0; i < CLASS0_SIZE; ++i) - vp9_cost_tokens(class0_fp_cost[i], mvcomp->class0_fp[i], vp9_mv_fp_tree); - vp9_cost_tokens(fp_cost, mvcomp->fp, vp9_mv_fp_tree); - - if (usehp) { - class0_hp_cost[0] = vp9_cost_zero(mvcomp->class0_hp); - class0_hp_cost[1] = vp9_cost_one(mvcomp->class0_hp); - hp_cost[0] = vp9_cost_zero(mvcomp->hp); - hp_cost[1] = vp9_cost_one(mvcomp->hp); - } - mvcost[0] = 0; - for (v = 1; v <= MV_MAX; ++v) { - int z, c, o, d, e, f, cost = 0; - z = v - 1; - c = vp9_get_mv_class(z, &o); - cost += class_cost[c]; - d = (o >> 3); /* int mv data */ - f = (o >> 1) & 3; /* fractional pel mv data */ - e = (o & 1); /* high precision mv data */ - if (c == MV_CLASS_0) { - cost += class0_cost[d]; - } else { - int i, b; - b = c + CLASS0_BITS - 1; /* number of bits */ - for (i = 0; i < b; ++i) - cost += bits_cost[i][((d >> i) & 1)]; - } - if (c == MV_CLASS_0) { - cost += class0_fp_cost[d][f]; - } else { - cost += fp_cost[f]; - } - if (usehp) { - if (c == MV_CLASS_0) { - cost += class0_hp_cost[e]; - } else { - cost += hp_cost[e]; - } - } - mvcost[v] = cost + sign_cost[0]; - mvcost[-v] = cost + sign_cost[1]; - } -} - -static int update_nmv_savings(const unsigned int ct[2], - const vp9_prob cur_p, - const vp9_prob new_p, - const vp9_prob upd_p) { - -#ifdef LOW_PRECISION_MV_UPDATE - vp9_prob mod_p = new_p | 1; -#else - vp9_prob mod_p = new_p; -#endif - const int cur_b = cost_branch256(ct, cur_p); - const int mod_b = cost_branch256(ct, mod_p); - const int cost = 7 * 256 + -#ifndef LOW_PRECISION_MV_UPDATE - 256 + -#endif - (vp9_cost_one(upd_p) - vp9_cost_zero(upd_p)); - if (cur_b - mod_b - cost > 0) { - return cur_b - mod_b - cost; - } else { - return -vp9_cost_zero(upd_p); - } -} - -static int update_nmv( - vp9_writer *const bc, - const unsigned int ct[2], - vp9_prob *const cur_p, - const vp9_prob new_p, - const vp9_prob upd_p) { - -#ifdef LOW_PRECISION_MV_UPDATE - vp9_prob mod_p = new_p | 1; -#else - vp9_prob mod_p = new_p; -#endif - - const int cur_b = cost_branch256(ct, *cur_p); - const int mod_b = cost_branch256(ct, mod_p); - const int cost = 7 * 256 + -#ifndef LOW_PRECISION_MV_UPDATE - 256 + -#endif - (vp9_cost_one(upd_p) - vp9_cost_zero(upd_p)); - - if (cur_b - mod_b > cost) { - *cur_p = mod_p; - vp9_write(bc, 1, upd_p); -#ifdef LOW_PRECISION_MV_UPDATE - vp9_write_literal(bc, mod_p >> 1, 7); -#else - vp9_write_literal(bc, mod_p, 8); -#endif - return 1; - } else { - vp9_write(bc, 0, upd_p); - return 0; - } -} - -#ifdef NMV_STATS -void init_nmvstats() { - vp9_zero(tnmvcounts); -} - -void print_nmvstats() { - nmv_context prob; - unsigned int branch_ct_joint[MV_JOINTS - 1][2]; - unsigned int branch_ct_sign[2][2]; - unsigned int branch_ct_classes[2][MV_CLASSES - 1][2]; - unsigned int branch_ct_class0[2][CLASS0_SIZE - 1][2]; - unsigned int branch_ct_bits[2][MV_OFFSET_BITS][2]; - unsigned int branch_ct_class0_fp[2][CLASS0_SIZE][4 - 1][2]; - unsigned int branch_ct_fp[2][4 - 1][2]; - unsigned int branch_ct_class0_hp[2][2]; - unsigned int branch_ct_hp[2][2]; - int i, j, k; - vp9_counts_to_nmv_context(&tnmvcounts, &prob, 1, - branch_ct_joint, branch_ct_sign, branch_ct_classes, - branch_ct_class0, branch_ct_bits, - branch_ct_class0_fp, branch_ct_fp, - branch_ct_class0_hp, branch_ct_hp); - - printf("\nCounts =\n { "); - for (j = 0; j < MV_JOINTS; ++j) - printf("%d, ", tnmvcounts.joints[j]); - printf("},\n"); - for (i=0; i< 2; ++i) { - printf(" {\n"); - printf(" %d/%d,\n", tnmvcounts.comps[i].sign[0], - tnmvcounts.comps[i].sign[1]); - printf(" { "); - for (j = 0; j < MV_CLASSES; ++j) - printf("%d, ", tnmvcounts.comps[i].classes[j]); - printf("},\n"); - printf(" { "); - for (j = 0; j < CLASS0_SIZE; ++j) - printf("%d, ", tnmvcounts.comps[i].class0[j]); - printf("},\n"); - printf(" { "); - for (j = 0; j < MV_OFFSET_BITS; ++j) - printf("%d/%d, ", tnmvcounts.comps[i].bits[j][0], - tnmvcounts.comps[i].bits[j][1]); - printf("},\n"); - - printf(" {"); - for (j = 0; j < CLASS0_SIZE; ++j) { - printf("{"); - for (k = 0; k < 4; ++k) - printf("%d, ", tnmvcounts.comps[i].class0_fp[j][k]); - printf("}, "); - } - printf("},\n"); - - printf(" { "); - for (j = 0; j < 4; ++j) - printf("%d, ", tnmvcounts.comps[i].fp[j]); - printf("},\n"); - - printf(" %d/%d,\n", - tnmvcounts.comps[i].class0_hp[0], - tnmvcounts.comps[i].class0_hp[1]); - printf(" %d/%d,\n", - tnmvcounts.comps[i].hp[0], - tnmvcounts.comps[i].hp[1]); - printf(" },\n"); - } - - printf("\nProbs =\n { "); - for (j = 0; j < MV_JOINTS - 1; ++j) - printf("%d, ", prob.joints[j]); - printf("},\n"); - for (i=0; i< 2; ++i) { - printf(" {\n"); - printf(" %d,\n", prob.comps[i].sign); - printf(" { "); - for (j = 0; j < MV_CLASSES - 1; ++j) - printf("%d, ", prob.comps[i].classes[j]); - printf("},\n"); - printf(" { "); - for (j = 0; j < CLASS0_SIZE - 1; ++j) - printf("%d, ", prob.comps[i].class0[j]); - printf("},\n"); - printf(" { "); - for (j = 0; j < MV_OFFSET_BITS; ++j) - printf("%d, ", prob.comps[i].bits[j]); - printf("},\n"); - printf(" { "); - for (j = 0; j < CLASS0_SIZE; ++j) { - printf("{"); - for (k = 0; k < 3; ++k) - printf("%d, ", prob.comps[i].class0_fp[j][k]); - printf("}, "); - } - printf("},\n"); - printf(" { "); - for (j = 0; j < 3; ++j) - printf("%d, ", prob.comps[i].fp[j]); - printf("},\n"); - - printf(" %d,\n", prob.comps[i].class0_hp); - printf(" %d,\n", prob.comps[i].hp); - printf(" },\n"); - } -} - -static void add_nmvcount(nmv_context_counts* const dst, - const nmv_context_counts* const src) { - int i, j, k; - for (j = 0; j < MV_JOINTS; ++j) { - dst->joints[j] += src->joints[j]; - } - for (i = 0; i < 2; ++i) { - for (j = 0; j < MV_VALS; ++j) { - dst->comps[i].mvcount[j] += src->comps[i].mvcount[j]; - } - dst->comps[i].sign[0] += src->comps[i].sign[0]; - dst->comps[i].sign[1] += src->comps[i].sign[1]; - for (j = 0; j < MV_CLASSES; ++j) { - dst->comps[i].classes[j] += src->comps[i].classes[j]; - } - for (j = 0; j < CLASS0_SIZE; ++j) { - dst->comps[i].class0[j] += src->comps[i].class0[j]; - } - for (j = 0; j < MV_OFFSET_BITS; ++j) { - dst->comps[i].bits[j][0] += src->comps[i].bits[j][0]; - dst->comps[i].bits[j][1] += src->comps[i].bits[j][1]; - } - } - for (i = 0; i < 2; ++i) { - for (j = 0; j < CLASS0_SIZE; ++j) { - for (k = 0; k < 4; ++k) { - dst->comps[i].class0_fp[j][k] += src->comps[i].class0_fp[j][k]; - } - } - for (j = 0; j < 4; ++j) { - dst->comps[i].fp[j] += src->comps[i].fp[j]; - } - dst->comps[i].class0_hp[0] += src->comps[i].class0_hp[0]; - dst->comps[i].class0_hp[1] += src->comps[i].class0_hp[1]; - dst->comps[i].hp[0] += src->comps[i].hp[0]; - dst->comps[i].hp[1] += src->comps[i].hp[1]; - } -} -#endif - -void vp9_write_nmvprobs(VP9_COMP* const cpi, int usehp, vp9_writer* const bc) { - int i, j; - nmv_context prob; - unsigned int branch_ct_joint[MV_JOINTS - 1][2]; - unsigned int branch_ct_sign[2][2]; - unsigned int branch_ct_classes[2][MV_CLASSES - 1][2]; - unsigned int branch_ct_class0[2][CLASS0_SIZE - 1][2]; - unsigned int branch_ct_bits[2][MV_OFFSET_BITS][2]; - unsigned int branch_ct_class0_fp[2][CLASS0_SIZE][4 - 1][2]; - unsigned int branch_ct_fp[2][4 - 1][2]; - unsigned int branch_ct_class0_hp[2][2]; - unsigned int branch_ct_hp[2][2]; - int savings = 0; - -#ifdef NMV_STATS - if (!cpi->dummy_packing) - add_nmvcount(&tnmvcounts, &cpi->NMVcount); -#endif - vp9_counts_to_nmv_context(&cpi->NMVcount, &prob, usehp, - branch_ct_joint, branch_ct_sign, branch_ct_classes, - branch_ct_class0, branch_ct_bits, - branch_ct_class0_fp, branch_ct_fp, - branch_ct_class0_hp, branch_ct_hp); - /* write updates if they help */ -#ifdef MV_GROUP_UPDATE - for (j = 0; j < MV_JOINTS - 1; ++j) { - savings += update_nmv_savings(branch_ct_joint[j], - cpi->common.fc.nmvc.joints[j], - prob.joints[j], - VP9_NMV_UPDATE_PROB); - } - for (i = 0; i < 2; ++i) { - savings += update_nmv_savings(branch_ct_sign[i], - cpi->common.fc.nmvc.comps[i].sign, - prob.comps[i].sign, - VP9_NMV_UPDATE_PROB); - for (j = 0; j < MV_CLASSES - 1; ++j) { - savings += update_nmv_savings(branch_ct_classes[i][j], - cpi->common.fc.nmvc.comps[i].classes[j], - prob.comps[i].classes[j], - VP9_NMV_UPDATE_PROB); - } - for (j = 0; j < CLASS0_SIZE - 1; ++j) { - savings += update_nmv_savings(branch_ct_class0[i][j], - cpi->common.fc.nmvc.comps[i].class0[j], - prob.comps[i].class0[j], - VP9_NMV_UPDATE_PROB); - } - for (j = 0; j < MV_OFFSET_BITS; ++j) { - savings += update_nmv_savings(branch_ct_bits[i][j], - cpi->common.fc.nmvc.comps[i].bits[j], - prob.comps[i].bits[j], - VP9_NMV_UPDATE_PROB); - } - } - for (i = 0; i < 2; ++i) { - for (j = 0; j < CLASS0_SIZE; ++j) { - int k; - for (k = 0; k < 3; ++k) { - savings += update_nmv_savings(branch_ct_class0_fp[i][j][k], - cpi->common.fc.nmvc.comps[i].class0_fp[j][k], - prob.comps[i].class0_fp[j][k], - VP9_NMV_UPDATE_PROB); - } - } - for (j = 0; j < 3; ++j) { - savings += update_nmv_savings(branch_ct_fp[i][j], - cpi->common.fc.nmvc.comps[i].fp[j], - prob.comps[i].fp[j], - VP9_NMV_UPDATE_PROB); - } - } - if (usehp) { - for (i = 0; i < 2; ++i) { - savings += update_nmv_savings(branch_ct_class0_hp[i], - cpi->common.fc.nmvc.comps[i].class0_hp, - prob.comps[i].class0_hp, - VP9_NMV_UPDATE_PROB); - savings += update_nmv_savings(branch_ct_hp[i], - cpi->common.fc.nmvc.comps[i].hp, - prob.comps[i].hp, - VP9_NMV_UPDATE_PROB); - } - } - if (savings <= 0) { - vp9_write_bit(bc, 0); - return; - } - vp9_write_bit(bc, 1); -#endif - - for (j = 0; j < MV_JOINTS - 1; ++j) { - update_nmv(bc, branch_ct_joint[j], - &cpi->common.fc.nmvc.joints[j], - prob.joints[j], - VP9_NMV_UPDATE_PROB); - } - for (i = 0; i < 2; ++i) { - update_nmv(bc, branch_ct_sign[i], - &cpi->common.fc.nmvc.comps[i].sign, - prob.comps[i].sign, - VP9_NMV_UPDATE_PROB); - for (j = 0; j < MV_CLASSES - 1; ++j) { - update_nmv(bc, branch_ct_classes[i][j], - &cpi->common.fc.nmvc.comps[i].classes[j], - prob.comps[i].classes[j], - VP9_NMV_UPDATE_PROB); - } - for (j = 0; j < CLASS0_SIZE - 1; ++j) { - update_nmv(bc, branch_ct_class0[i][j], - &cpi->common.fc.nmvc.comps[i].class0[j], - prob.comps[i].class0[j], - VP9_NMV_UPDATE_PROB); - } - for (j = 0; j < MV_OFFSET_BITS; ++j) { - update_nmv(bc, branch_ct_bits[i][j], - &cpi->common.fc.nmvc.comps[i].bits[j], - prob.comps[i].bits[j], - VP9_NMV_UPDATE_PROB); - } - } - for (i = 0; i < 2; ++i) { - for (j = 0; j < CLASS0_SIZE; ++j) { - int k; - for (k = 0; k < 3; ++k) { - update_nmv(bc, branch_ct_class0_fp[i][j][k], - &cpi->common.fc.nmvc.comps[i].class0_fp[j][k], - prob.comps[i].class0_fp[j][k], - VP9_NMV_UPDATE_PROB); - } - } - for (j = 0; j < 3; ++j) { - update_nmv(bc, branch_ct_fp[i][j], - &cpi->common.fc.nmvc.comps[i].fp[j], - prob.comps[i].fp[j], - VP9_NMV_UPDATE_PROB); - } - } - if (usehp) { - for (i = 0; i < 2; ++i) { - update_nmv(bc, branch_ct_class0_hp[i], - &cpi->common.fc.nmvc.comps[i].class0_hp, - prob.comps[i].class0_hp, - VP9_NMV_UPDATE_PROB); - update_nmv(bc, branch_ct_hp[i], - &cpi->common.fc.nmvc.comps[i].hp, - prob.comps[i].hp, - VP9_NMV_UPDATE_PROB); - } - } -} - -void vp9_encode_nmv(vp9_writer* const bc, const MV* const mv, - const MV* const ref, const nmv_context* const mvctx) { - MV_JOINT_TYPE j = vp9_get_mv_joint(*mv); - write_token(bc, vp9_mv_joint_tree, mvctx->joints, - vp9_mv_joint_encodings + j); - if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) { - encode_nmv_component(bc, mv->row, ref->col, &mvctx->comps[0]); - } - if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) { - encode_nmv_component(bc, mv->col, ref->col, &mvctx->comps[1]); - } -} - -void vp9_encode_nmv_fp(vp9_writer* const bc, const MV* const mv, - const MV* const ref, const nmv_context* const mvctx, - int usehp) { - MV_JOINT_TYPE j = vp9_get_mv_joint(*mv); - usehp = usehp && vp9_use_nmv_hp(ref); - if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) { - encode_nmv_component_fp(bc, mv->row, ref->row, &mvctx->comps[0], usehp); - } - if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) { - encode_nmv_component_fp(bc, mv->col, ref->col, &mvctx->comps[1], usehp); - } -} - -void vp9_build_nmv_cost_table(int *mvjoint, - int *mvcost[2], - const nmv_context* const mvctx, - int usehp, - int mvc_flag_v, - int mvc_flag_h) { - vp9_clear_system_state(); - vp9_cost_tokens(mvjoint, mvctx->joints, vp9_mv_joint_tree); - if (mvc_flag_v) - build_nmv_component_cost_table(mvcost[0], &mvctx->comps[0], usehp); - if (mvc_flag_h) - build_nmv_component_cost_table(mvcost[1], &mvctx->comps[1], usehp); -} diff --git a/vp8/encoder/encodemv.h b/vp8/encoder/encodemv.h deleted file mode 100644 index f19613d0a..000000000 --- a/vp8/encoder/encodemv.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __INC_ENCODEMV_H -#define __INC_ENCODEMV_H - -#include "onyx_int.h" - -void vp9_write_nmvprobs(VP9_COMP* const, int usehp, vp9_writer* const); -void vp9_encode_nmv(vp9_writer* const w, const MV* const mv, - const MV* const ref, const nmv_context* const mvctx); -void vp9_encode_nmv_fp(vp9_writer* const w, const MV* const mv, - const MV* const ref, const nmv_context *mvctx, - int usehp); -void vp9_build_nmv_cost_table(int *mvjoint, - int *mvcost[2], - const nmv_context *mvctx, - int usehp, - int mvc_flag_v, - int mvc_flag_h); - -#endif diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c deleted file mode 100644 index a7ae9189a..000000000 --- a/vp8/encoder/firstpass.c +++ /dev/null @@ -1,2533 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "math.h" -#include "limits.h" -#include "block.h" -#include "onyx_int.h" -#include "variance.h" -#include "encodeintra.h" -#include "vp8/common/setupintrarecon.h" -#include "mcomp.h" -#include "firstpass.h" -#include "vpx_scale/vpxscale.h" -#include "encodemb.h" -#include "vp8/common/extend.h" -#include "vp8/common/systemdependent.h" -#include "vpx_scale/yv12extend.h" -#include "vpx_mem/vpx_mem.h" -#include "vp8/common/swapyv12buffer.h" -#include <stdio.h> -#include "rdopt.h" -#include "ratectrl.h" -#include "vp8/common/quant_common.h" -#include "vp8/common/entropymv.h" -#include "encodemv.h" - -#define OUTPUT_FPF 0 - -#if CONFIG_RUNTIME_CPU_DETECT -#define IF_RTCD(x) (x) -#else -#define IF_RTCD(x) NULL -#endif - -extern void vp9_build_block_offsets(MACROBLOCK *x); - -extern void vp9_setup_block_ptrs(MACROBLOCK *x); - -extern void vp9_frame_init_quantizer(VP9_COMP *cpi); - -extern void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, - int_mv *mv); - -extern void vp9_alloc_compressor_data(VP9_COMP *cpi); - -#define IIFACTOR 12.5 -#define IIKFACTOR1 12.5 -#define IIKFACTOR2 15.0 -#define RMAX 128.0 -#define GF_RMAX 96.0 -#define ERR_DIVISOR 150.0 - -#define KF_MB_INTRA_MIN 300 -#define GF_MB_INTRA_MIN 200 - -#define DOUBLE_DIVIDE_CHECK(X) ((X)<0?(X)-.000001:(X)+.000001) - -#define POW1 (double)cpi->oxcf.two_pass_vbrbias/100.0 -#define POW2 (double)cpi->oxcf.two_pass_vbrbias/100.0 - -static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame); - -static int select_cq_level(int qindex) { - int ret_val = QINDEX_RANGE - 1; - int i; - - double target_q = (vp9_convert_qindex_to_q(qindex) * 0.5847) + 1.0; - - for (i = 0; i < QINDEX_RANGE; i++) { - if (target_q <= vp9_convert_qindex_to_q(i)) { - ret_val = i; - break; - } - } - - return ret_val; -} - - -// Resets the first pass file to the given position using a relative seek from the current position -static void reset_fpf_position(VP9_COMP *cpi, FIRSTPASS_STATS *Position) { - cpi->twopass.stats_in = Position; -} - -static int lookup_next_frame_stats(VP9_COMP *cpi, FIRSTPASS_STATS *next_frame) { - if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end) - return EOF; - - *next_frame = *cpi->twopass.stats_in; - return 1; -} - -// Read frame stats at an offset from the current position -static int read_frame_stats(VP9_COMP *cpi, - FIRSTPASS_STATS *frame_stats, - int offset) { - FIRSTPASS_STATS *fps_ptr = cpi->twopass.stats_in; - - // Check legality of offset - if (offset >= 0) { - if (&fps_ptr[offset] >= cpi->twopass.stats_in_end) - return EOF; - } else if (offset < 0) { - if (&fps_ptr[offset] < cpi->twopass.stats_in_start) - return EOF; - } - - *frame_stats = fps_ptr[offset]; - return 1; -} - -static int input_stats(VP9_COMP *cpi, FIRSTPASS_STATS *fps) { - if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end) - return EOF; - - *fps = *cpi->twopass.stats_in; - cpi->twopass.stats_in = - (void *)((char *)cpi->twopass.stats_in + sizeof(FIRSTPASS_STATS)); - return 1; -} - -static void output_stats(const VP9_COMP *cpi, - struct vpx_codec_pkt_list *pktlist, - FIRSTPASS_STATS *stats) { - struct vpx_codec_cx_pkt pkt; - pkt.kind = VPX_CODEC_STATS_PKT; - pkt.data.twopass_stats.buf = stats; - pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS); - vpx_codec_pkt_list_add(pktlist, &pkt); - -// TEMP debug code -#if OUTPUT_FPF - - { - FILE *fpfile; - fpfile = fopen("firstpass.stt", "a"); - - fprintf(fpfile, "%12.0f %12.0f %12.0f %12.0f %12.0f %12.4f %12.4f" - "%12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f" - "%12.0f %12.0f %12.4f %12.0f %12.0f %12.4f\n", - stats->frame, - stats->intra_error, - stats->coded_error, - stats->sr_coded_error, - stats->ssim_weighted_pred_err, - stats->pcnt_inter, - stats->pcnt_motion, - stats->pcnt_second_ref, - stats->pcnt_neutral, - stats->MVr, - stats->mvr_abs, - stats->MVc, - stats->mvc_abs, - stats->MVrv, - stats->MVcv, - stats->mv_in_out_count, - stats->new_mv_count, - stats->count, - stats->duration); - fclose(fpfile); - } -#endif -} - -static void zero_stats(FIRSTPASS_STATS *section) { - section->frame = 0.0; - section->intra_error = 0.0; - section->coded_error = 0.0; - section->sr_coded_error = 0.0; - section->ssim_weighted_pred_err = 0.0; - section->pcnt_inter = 0.0; - section->pcnt_motion = 0.0; - section->pcnt_second_ref = 0.0; - section->pcnt_neutral = 0.0; - section->MVr = 0.0; - section->mvr_abs = 0.0; - section->MVc = 0.0; - section->mvc_abs = 0.0; - section->MVrv = 0.0; - section->MVcv = 0.0; - section->mv_in_out_count = 0.0; - section->new_mv_count = 0.0; - section->count = 0.0; - section->duration = 1.0; -} - -static void accumulate_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame) { - section->frame += frame->frame; - section->intra_error += frame->intra_error; - section->coded_error += frame->coded_error; - section->sr_coded_error += frame->sr_coded_error; - section->ssim_weighted_pred_err += frame->ssim_weighted_pred_err; - section->pcnt_inter += frame->pcnt_inter; - section->pcnt_motion += frame->pcnt_motion; - section->pcnt_second_ref += frame->pcnt_second_ref; - section->pcnt_neutral += frame->pcnt_neutral; - section->MVr += frame->MVr; - section->mvr_abs += frame->mvr_abs; - section->MVc += frame->MVc; - section->mvc_abs += frame->mvc_abs; - section->MVrv += frame->MVrv; - section->MVcv += frame->MVcv; - section->mv_in_out_count += frame->mv_in_out_count; - section->new_mv_count += frame->new_mv_count; - section->count += frame->count; - section->duration += frame->duration; -} - -static void subtract_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame) { - section->frame -= frame->frame; - section->intra_error -= frame->intra_error; - section->coded_error -= frame->coded_error; - section->sr_coded_error -= frame->sr_coded_error; - section->ssim_weighted_pred_err -= frame->ssim_weighted_pred_err; - section->pcnt_inter -= frame->pcnt_inter; - section->pcnt_motion -= frame->pcnt_motion; - section->pcnt_second_ref -= frame->pcnt_second_ref; - section->pcnt_neutral -= frame->pcnt_neutral; - section->MVr -= frame->MVr; - section->mvr_abs -= frame->mvr_abs; - section->MVc -= frame->MVc; - section->mvc_abs -= frame->mvc_abs; - section->MVrv -= frame->MVrv; - section->MVcv -= frame->MVcv; - section->mv_in_out_count -= frame->mv_in_out_count; - section->new_mv_count -= frame->new_mv_count; - section->count -= frame->count; - section->duration -= frame->duration; -} - -static void avg_stats(FIRSTPASS_STATS *section) { - if (section->count < 1.0) - return; - - section->intra_error /= section->count; - section->coded_error /= section->count; - section->sr_coded_error /= section->count; - section->ssim_weighted_pred_err /= section->count; - section->pcnt_inter /= section->count; - section->pcnt_second_ref /= section->count; - section->pcnt_neutral /= section->count; - section->pcnt_motion /= section->count; - section->MVr /= section->count; - section->mvr_abs /= section->count; - section->MVc /= section->count; - section->mvc_abs /= section->count; - section->MVrv /= section->count; - section->MVcv /= section->count; - section->mv_in_out_count /= section->count; - section->duration /= section->count; -} - -// Calculate a modified Error used in distributing bits between easier and harder frames -static double calculate_modified_err(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { - double av_err = (cpi->twopass.total_stats->ssim_weighted_pred_err / - cpi->twopass.total_stats->count); - double this_err = this_frame->ssim_weighted_pred_err; - double modified_err; - - if (this_err > av_err) - modified_err = av_err * pow((this_err / DOUBLE_DIVIDE_CHECK(av_err)), POW1); - else - modified_err = av_err * pow((this_err / DOUBLE_DIVIDE_CHECK(av_err)), POW2); - - return modified_err; -} - -static const double weight_table[256] = { - 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, - 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, - 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, - 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, - 0.020000, 0.031250, 0.062500, 0.093750, 0.125000, 0.156250, 0.187500, 0.218750, - 0.250000, 0.281250, 0.312500, 0.343750, 0.375000, 0.406250, 0.437500, 0.468750, - 0.500000, 0.531250, 0.562500, 0.593750, 0.625000, 0.656250, 0.687500, 0.718750, - 0.750000, 0.781250, 0.812500, 0.843750, 0.875000, 0.906250, 0.937500, 0.968750, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000 -}; - -static double simple_weight(YV12_BUFFER_CONFIG *source) { - int i, j; - - unsigned char *src = source->y_buffer; - double sum_weights = 0.0; - - // Loop throught the Y plane raw examining levels and creating a weight for the image - i = source->y_height; - do { - j = source->y_width; - do { - sum_weights += weight_table[ *src]; - src++; - } while (--j); - src -= source->y_width; - src += source->y_stride; - } while (--i); - - sum_weights /= (source->y_height * source->y_width); - - return sum_weights; -} - - -// This function returns the current per frame maximum bitrate target -static int frame_max_bits(VP9_COMP *cpi) { - // Max allocation for a single frame based on the max section guidelines passed in and how many bits are left - int max_bits; - - // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user - max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats->count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); - - // Trap case where we are out of bits - if (max_bits < 0) - max_bits = 0; - - return max_bits; -} - -void vp9_init_first_pass(VP9_COMP *cpi) { - zero_stats(cpi->twopass.total_stats); -} - -void vp9_end_first_pass(VP9_COMP *cpi) { - output_stats(cpi, cpi->output_pkt_list, cpi->twopass.total_stats); -} - -static void zz_motion_search(VP9_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset) { - MACROBLOCKD *const xd = &x->e_mbd; - BLOCK *b = &x->block[0]; - BLOCKD *d = &x->e_mbd.block[0]; - - unsigned char *src_ptr = (*(b->base_src) + b->src); - int src_stride = b->src_stride; - unsigned char *ref_ptr; - int ref_stride = d->pre_stride; - - // Set up pointers for this macro block recon buffer - xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; - - ref_ptr = (unsigned char *)(*(d->base_pre) + d->pre); - - vp9_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride, - (unsigned int *)(best_motion_err)); -} - -static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, - int_mv *ref_mv, MV *best_mv, - YV12_BUFFER_CONFIG *recon_buffer, - int *best_motion_err, int recon_yoffset) { - MACROBLOCKD *const xd = &x->e_mbd; - BLOCK *b = &x->block[0]; - BLOCKD *d = &x->e_mbd.block[0]; - int num00; - - int_mv tmp_mv; - int_mv ref_mv_full; - - int tmp_err; - int step_param = 3; - int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; - int n; - vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; - int new_mv_mode_penalty = 256; - - // override the default variance function to use MSE - v_fn_ptr.vf = vp9_mse16x16; - - // Set up pointers for this macro block recon buffer - xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; - - // Initial step/diamond search centred on best mv - tmp_mv.as_int = 0; - ref_mv_full.as_mv.col = ref_mv->as_mv.col >> 3; - ref_mv_full.as_mv.row = ref_mv->as_mv.row >> 3; - tmp_err = cpi->diamond_search_sad(x, b, d, &ref_mv_full, &tmp_mv, step_param, - x->sadperbit16, &num00, &v_fn_ptr, - XMVCOST, ref_mv); - if (tmp_err < INT_MAX - new_mv_mode_penalty) - tmp_err += new_mv_mode_penalty; - - if (tmp_err < *best_motion_err) { - *best_motion_err = tmp_err; - best_mv->row = tmp_mv.as_mv.row; - best_mv->col = tmp_mv.as_mv.col; - } - - // Further step/diamond searches as necessary - n = num00; - num00 = 0; - - while (n < further_steps) { - n++; - - if (num00) - num00--; - else { - tmp_err = cpi->diamond_search_sad(x, b, d, &ref_mv_full, &tmp_mv, - step_param + n, x->sadperbit16, - &num00, &v_fn_ptr, - XMVCOST, ref_mv); - if (tmp_err < INT_MAX - new_mv_mode_penalty) - tmp_err += new_mv_mode_penalty; - - if (tmp_err < *best_motion_err) { - *best_motion_err = tmp_err; - best_mv->row = tmp_mv.as_mv.row; - best_mv->col = tmp_mv.as_mv.col; - } - } - } -} - -void vp9_first_pass(VP9_COMP *cpi) { - int mb_row, mb_col; - MACROBLOCK *const x = &cpi->mb; - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - - int recon_yoffset, recon_uvoffset; - YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx]; - YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx]; - YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx]; - int recon_y_stride = lst_yv12->y_stride; - int recon_uv_stride = lst_yv12->uv_stride; - int64_t intra_error = 0; - int64_t coded_error = 0; - int64_t sr_coded_error = 0; - - int sum_mvr = 0, sum_mvc = 0; - int sum_mvr_abs = 0, sum_mvc_abs = 0; - int sum_mvrs = 0, sum_mvcs = 0; - int mvcount = 0; - int intercount = 0; - int second_ref_count = 0; - int intrapenalty = 256; - int neutral_count = 0; - int new_mv_count = 0; - int sum_in_vectors = 0; - uint32_t lastmv_as_int = 0; - - int_mv zero_ref_mv; - - zero_ref_mv.as_int = 0; - - vp9_clear_system_state(); // __asm emms; - - x->src = * cpi->Source; - xd->pre = *lst_yv12; - xd->dst = *new_yv12; - - x->partition_info = x->pi; - - xd->mode_info_context = cm->mi; - - vp9_build_block_offsets(x); - - vp9_setup_block_dptrs(&x->e_mbd); - - vp9_setup_block_ptrs(x); - - // set up frame new frame for intra coded blocks - vp9_setup_intra_recon(new_yv12); - vp9_frame_init_quantizer(cpi); - - // Initialise the MV cost table to the defaults - // if( cm->current_video_frame == 0) - // if ( 0 ) - { - int flag[2] = {1, 1}; - vp9_init_mv_probs(cm); - vp9_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q); - } - - // for each macroblock row in image - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { - int_mv best_ref_mv; - - best_ref_mv.as_int = 0; - - // reset above block coeffs - xd->up_available = (mb_row != 0); - recon_yoffset = (mb_row * recon_y_stride * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8); - - // Set up limit values for motion vectors to prevent them extending outside the UMV borders - x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); - - - // for each macroblock col in image - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - int this_error; - int gf_motion_error = INT_MAX; - int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); - - xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset; - xd->dst.u_buffer = new_yv12->u_buffer + recon_uvoffset; - xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset; - xd->left_available = (mb_col != 0); - - // Copy current mb to a buffer - vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); - - // do intra 16x16 prediction - this_error = vp9_encode_intra(cpi, x, use_dc_pred); - - // "intrapenalty" below deals with situations where the intra and inter error scores are very low (eg a plain black frame) - // We do not have special cases in first pass for 0,0 and nearest etc so all inter modes carry an overhead cost estimate fot the mv. - // When the error score is very low this causes us to pick all or lots of INTRA modes and throw lots of key frames. - // This penalty adds a cost matching that of a 0,0 mv to the intra case. - this_error += intrapenalty; - - // Cumulative intra error total - intra_error += (int64_t)this_error; - - // Set up limit values for motion vectors to prevent them extending outside the UMV borders - x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); - - // Other than for the first frame do a motion search - if (cm->current_video_frame > 0) { - int tmp_err; - int motion_error = INT_MAX; - int_mv mv, tmp_mv; - - // Simple 0,0 motion with no mv overhead - zz_motion_search(cpi, x, lst_yv12, &motion_error, recon_yoffset); - mv.as_int = tmp_mv.as_int = 0; - - // Test last reference frame using the previous best mv as the - // starting point (best reference) for the search - first_pass_motion_search(cpi, x, &best_ref_mv, - &mv.as_mv, lst_yv12, - &motion_error, recon_yoffset); - - // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well - if (best_ref_mv.as_int) { - tmp_err = INT_MAX; - first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv.as_mv, - lst_yv12, &tmp_err, recon_yoffset); - - if (tmp_err < motion_error) { - motion_error = tmp_err; - mv.as_int = tmp_mv.as_int; - } - } - - // Experimental search in an older reference frame - if (cm->current_video_frame > 1) { - // Simple 0,0 motion with no mv overhead - zz_motion_search(cpi, x, gld_yv12, - &gf_motion_error, recon_yoffset); - - first_pass_motion_search(cpi, x, &zero_ref_mv, - &tmp_mv.as_mv, gld_yv12, - &gf_motion_error, recon_yoffset); - - if ((gf_motion_error < motion_error) && - (gf_motion_error < this_error)) { - second_ref_count++; - } - - // Reset to last frame as reference buffer - xd->pre.y_buffer = lst_yv12->y_buffer + recon_yoffset; - xd->pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset; - xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset; - - // In accumulating a score for the older reference frame - // take the best of the motion predicted score and - // the intra coded error (just as will be done for) - // accumulation of "coded_error" for the last frame. - if (gf_motion_error < this_error) - sr_coded_error += gf_motion_error; - else - sr_coded_error += this_error; - } else - sr_coded_error += motion_error; - - /* Intra assumed best */ - best_ref_mv.as_int = 0; - - if (motion_error <= this_error) { - // Keep a count of cases where the inter and intra were - // very close and very low. This helps with scene cut - // detection for example in cropped clips with black bars - // at the sides or top and bottom. - if ((((this_error - intrapenalty) * 9) <= - (motion_error * 10)) && - (this_error < (2 * intrapenalty))) { - neutral_count++; - } - - mv.as_mv.row <<= 3; - mv.as_mv.col <<= 3; - this_error = motion_error; - vp9_set_mbmode_and_mvs(x, NEWMV, &mv); - xd->mode_info_context->mbmi.txfm_size = TX_4X4; - vp9_encode_inter16x16y(IF_RTCD(&cpi->rtcd), x); - sum_mvr += mv.as_mv.row; - sum_mvr_abs += abs(mv.as_mv.row); - sum_mvc += mv.as_mv.col; - sum_mvc_abs += abs(mv.as_mv.col); - sum_mvrs += mv.as_mv.row * mv.as_mv.row; - sum_mvcs += mv.as_mv.col * mv.as_mv.col; - intercount++; - - best_ref_mv.as_int = mv.as_int; - - // Was the vector non-zero - if (mv.as_int) { - mvcount++; - - // Was it different from the last non zero vector - if (mv.as_int != lastmv_as_int) - new_mv_count++; - lastmv_as_int = mv.as_int; - - // Does the Row vector point inwards or outwards - if (mb_row < cm->mb_rows / 2) { - if (mv.as_mv.row > 0) - sum_in_vectors--; - else if (mv.as_mv.row < 0) - sum_in_vectors++; - } else if (mb_row > cm->mb_rows / 2) { - if (mv.as_mv.row > 0) - sum_in_vectors++; - else if (mv.as_mv.row < 0) - sum_in_vectors--; - } - - // Does the Row vector point inwards or outwards - if (mb_col < cm->mb_cols / 2) { - if (mv.as_mv.col > 0) - sum_in_vectors--; - else if (mv.as_mv.col < 0) - sum_in_vectors++; - } else if (mb_col > cm->mb_cols / 2) { - if (mv.as_mv.col > 0) - sum_in_vectors++; - else if (mv.as_mv.col < 0) - sum_in_vectors--; - } - } - } - } else - sr_coded_error += (int64_t)this_error; - - coded_error += (int64_t)this_error; - - // adjust to the next column of macroblocks - x->src.y_buffer += 16; - x->src.u_buffer += 8; - x->src.v_buffer += 8; - - recon_yoffset += 16; - recon_uvoffset += 8; - } - - // adjust to the next row of mbs - x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; - x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; - x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; - - // extend the recon for intra prediction - vp9_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, - xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); - vp9_clear_system_state(); // __asm emms; - } - - vp9_clear_system_state(); // __asm emms; - { - double weight = 0.0; - - FIRSTPASS_STATS fps; - - fps.frame = cm->current_video_frame; - fps.intra_error = intra_error >> 8; - fps.coded_error = coded_error >> 8; - fps.sr_coded_error = sr_coded_error >> 8; - weight = simple_weight(cpi->Source); - - - if (weight < 0.1) - weight = 0.1; - - fps.ssim_weighted_pred_err = fps.coded_error * weight; - - fps.pcnt_inter = 0.0; - fps.pcnt_motion = 0.0; - fps.MVr = 0.0; - fps.mvr_abs = 0.0; - fps.MVc = 0.0; - fps.mvc_abs = 0.0; - fps.MVrv = 0.0; - fps.MVcv = 0.0; - fps.mv_in_out_count = 0.0; - fps.new_mv_count = 0.0; - fps.count = 1.0; - - fps.pcnt_inter = 1.0 * (double)intercount / cm->MBs; - fps.pcnt_second_ref = 1.0 * (double)second_ref_count / cm->MBs; - fps.pcnt_neutral = 1.0 * (double)neutral_count / cm->MBs; - - if (mvcount > 0) { - fps.MVr = (double)sum_mvr / (double)mvcount; - fps.mvr_abs = (double)sum_mvr_abs / (double)mvcount; - fps.MVc = (double)sum_mvc / (double)mvcount; - fps.mvc_abs = (double)sum_mvc_abs / (double)mvcount; - fps.MVrv = ((double)sum_mvrs - (fps.MVr * fps.MVr / (double)mvcount)) / (double)mvcount; - fps.MVcv = ((double)sum_mvcs - (fps.MVc * fps.MVc / (double)mvcount)) / (double)mvcount; - fps.mv_in_out_count = (double)sum_in_vectors / (double)(mvcount * 2); - fps.new_mv_count = new_mv_count; - - fps.pcnt_motion = 1.0 * (double)mvcount / cpi->common.MBs; - } - - // TODO: handle the case when duration is set to 0, or something less - // than the full time between subsequent cpi->source_time_stamp s . - fps.duration = cpi->source->ts_end - - cpi->source->ts_start; - - // don't want to do output stats with a stack variable! - memcpy(cpi->twopass.this_frame_stats, - &fps, - sizeof(FIRSTPASS_STATS)); - output_stats(cpi, cpi->output_pkt_list, cpi->twopass.this_frame_stats); - accumulate_stats(cpi->twopass.total_stats, &fps); - } - - // Copy the previous Last Frame back into gf and and arf buffers if - // the prediction is good enough... but also dont allow it to lag too far - if ((cpi->twopass.sr_update_lag > 3) || - ((cm->current_video_frame > 0) && - (cpi->twopass.this_frame_stats->pcnt_inter > 0.20) && - ((cpi->twopass.this_frame_stats->intra_error / - cpi->twopass.this_frame_stats->coded_error) > 2.0))) { - vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12); - cpi->twopass.sr_update_lag = 1; - } else - cpi->twopass.sr_update_lag++; - - // swap frame pointers so last frame refers to the frame we just compressed - vp9_swap_yv12_buffer(lst_yv12, new_yv12); - vp8_yv12_extend_frame_borders(lst_yv12); - - // Special case for the first frame. Copy into the GF buffer as a second reference. - if (cm->current_video_frame == 0) { - vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12); - } - - - // use this to see what the first pass reconstruction looks like - if (0) { - char filename[512]; - FILE *recon_file; - sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame); - - if (cm->current_video_frame == 0) - recon_file = fopen(filename, "wb"); - else - recon_file = fopen(filename, "ab"); - - if (fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file)); - fclose(recon_file); - } - - cm->current_video_frame++; - -} - -// Estimate a cost per mb attributable to overheads such as the coding of -// modes and motion vectors. -// Currently simplistic in its assumptions for testing. -// - - -static double bitcost(double prob) { - return -(log(prob) / log(2.0)); -} - -static long long estimate_modemvcost(VP9_COMP *cpi, - FIRSTPASS_STATS *fpstats) { - int mv_cost; - int mode_cost; - - double av_pct_inter = fpstats->pcnt_inter / fpstats->count; - double av_pct_motion = fpstats->pcnt_motion / fpstats->count; - double av_intra = (1.0 - av_pct_inter); - - double zz_cost; - double motion_cost; - double intra_cost; - - zz_cost = bitcost(av_pct_inter - av_pct_motion); - motion_cost = bitcost(av_pct_motion); - intra_cost = bitcost(av_intra); - - // Estimate of extra bits per mv overhead for mbs - // << 9 is the normalization to the (bits * 512) used in vp9_bits_per_mb - mv_cost = ((int)(fpstats->new_mv_count / fpstats->count) * 8) << 9; - - // Crude estimate of overhead cost from modes - // << 9 is the normalization to (bits * 512) used in vp9_bits_per_mb - mode_cost = - (int)((((av_pct_inter - av_pct_motion) * zz_cost) + - (av_pct_motion * motion_cost) + - (av_intra * intra_cost)) * cpi->common.MBs) << 9; - - // return mv_cost + mode_cost; - // TODO PGW Fix overhead costs for extended Q range - return 0; -} - -static double calc_correction_factor(double err_per_mb, - double err_divisor, - double pt_low, - double pt_high, - int Q) { - double power_term; - double error_term = err_per_mb / err_divisor; - double correction_factor; - - // Adjustment based on actual quantizer to power term. - power_term = (vp9_convert_qindex_to_q(Q) * 0.01) + pt_low; - power_term = (power_term > pt_high) ? pt_high : power_term; - - // Adjustments to error term - // TBD - - // Calculate correction factor - correction_factor = pow(error_term, power_term); - - // Clip range - correction_factor = - (correction_factor < 0.05) - ? 0.05 : (correction_factor > 2.0) ? 2.0 : correction_factor; - - return correction_factor; -} - -// Given a current maxQ value sets a range for future values. -// PGW TODO.. -// This code removes direct dependency on QIndex to determin the range -// (now uses the actual quantizer) but has not been tuned. -static void adjust_maxq_qrange(VP9_COMP *cpi) { - int i; - double q; - - // Set the max corresponding to cpi->avg_q * 2.0 - q = cpi->avg_q * 2.0; - cpi->twopass.maxq_max_limit = cpi->worst_quality; - for (i = cpi->best_quality; i <= cpi->worst_quality; i++) { - cpi->twopass.maxq_max_limit = i; - if (vp9_convert_qindex_to_q(i) >= q) - break; - } - - // Set the min corresponding to cpi->avg_q * 0.5 - q = cpi->avg_q * 0.5; - cpi->twopass.maxq_min_limit = cpi->best_quality; - for (i = cpi->worst_quality; i >= cpi->best_quality; i--) { - cpi->twopass.maxq_min_limit = i; - if (vp9_convert_qindex_to_q(i) <= q) - break; - } -} - -static int estimate_max_q(VP9_COMP *cpi, - FIRSTPASS_STATS *fpstats, - int section_target_bandwitdh, - int overhead_bits) { - int Q; - int num_mbs = cpi->common.MBs; - int target_norm_bits_per_mb; - - double section_err = (fpstats->coded_error / fpstats->count); - double sr_err_diff; - double sr_correction; - double err_per_mb = section_err / num_mbs; - double err_correction_factor; - double speed_correction = 1.0; - int overhead_bits_per_mb; - - if (section_target_bandwitdh <= 0) - return cpi->twopass.maxq_max_limit; // Highest value allowed - - target_norm_bits_per_mb = - (section_target_bandwitdh < (1 << 20)) - ? (512 * section_target_bandwitdh) / num_mbs - : 512 * (section_target_bandwitdh / num_mbs); - - // Look at the drop in prediction quality between the last frame - // and the GF buffer (which contained an older frame). - sr_err_diff = - (fpstats->sr_coded_error - fpstats->coded_error) / - (fpstats->count * cpi->common.MBs); - sr_correction = (sr_err_diff / 32.0); - sr_correction = pow(sr_correction, 0.25); - if (sr_correction < 0.75) - sr_correction = 0.75; - else if (sr_correction > 1.25) - sr_correction = 1.25; - - // Calculate a corrective factor based on a rolling ratio of bits spent - // vs target bits - if ((cpi->rolling_target_bits > 0) && - (cpi->active_worst_quality < cpi->worst_quality)) { - double rolling_ratio; - - rolling_ratio = (double)cpi->rolling_actual_bits / - (double)cpi->rolling_target_bits; - - if (rolling_ratio < 0.95) - cpi->twopass.est_max_qcorrection_factor -= 0.005; - else if (rolling_ratio > 1.05) - cpi->twopass.est_max_qcorrection_factor += 0.005; - - cpi->twopass.est_max_qcorrection_factor = - (cpi->twopass.est_max_qcorrection_factor < 0.1) - ? 0.1 - : (cpi->twopass.est_max_qcorrection_factor > 10.0) - ? 10.0 : cpi->twopass.est_max_qcorrection_factor; - } - - // Corrections for higher compression speed settings - // (reduced compression expected) - if (cpi->compressor_speed == 1) { - if (cpi->oxcf.cpu_used <= 5) - speed_correction = 1.04 + (cpi->oxcf.cpu_used * 0.04); - else - speed_correction = 1.25; - } - - // Estimate of overhead bits per mb - // Correction to overhead bits for min allowed Q. - // PGW TODO.. This code is broken for the extended Q range - // for now overhead set to 0. - overhead_bits_per_mb = overhead_bits / num_mbs; - overhead_bits_per_mb *= pow(0.98, (double)cpi->twopass.maxq_min_limit); - - // Try and pick a max Q that will be high enough to encode the - // content at the given rate. - for (Q = cpi->twopass.maxq_min_limit; Q < cpi->twopass.maxq_max_limit; Q++) { - int bits_per_mb_at_this_q; - - err_correction_factor = - calc_correction_factor(err_per_mb, ERR_DIVISOR, 0.4, 0.90, Q) * - sr_correction * speed_correction * - cpi->twopass.est_max_qcorrection_factor; - - if (err_correction_factor < 0.05) - err_correction_factor = 0.05; - else if (err_correction_factor > 5.0) - err_correction_factor = 5.0; - - bits_per_mb_at_this_q = - vp9_bits_per_mb(INTER_FRAME, Q) + overhead_bits_per_mb; - - bits_per_mb_at_this_q = (int)(.5 + err_correction_factor * - (double)bits_per_mb_at_this_q); - - // Mode and motion overhead - // As Q rises in real encode loop rd code will force overhead down - // We make a crude adjustment for this here as *.98 per Q step. - // PGW TODO.. This code is broken for the extended Q range - // for now overhead set to 0. - // overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98); - - if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) - break; - } - - // Restriction on active max q for constrained quality mode. - if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && - (Q < cpi->cq_target_quality)) { - Q = cpi->cq_target_quality; - } - - // Adjust maxq_min_limit and maxq_max_limit limits based on - // averaga q observed in clip for non kf/gf/arf frames - // Give average a chance to settle though. - // PGW TODO.. This code is broken for the extended Q range - if ((cpi->ni_frames > - ((unsigned int)cpi->twopass.total_stats->count >> 8)) && - (cpi->ni_frames > 150)) { - adjust_maxq_qrange(cpi); - } - - return Q; -} - -// For cq mode estimate a cq level that matches the observed -// complexity and data rate. -static int estimate_cq(VP9_COMP *cpi, - FIRSTPASS_STATS *fpstats, - int section_target_bandwitdh, - int overhead_bits) { - int Q; - int num_mbs = cpi->common.MBs; - int target_norm_bits_per_mb; - - double section_err = (fpstats->coded_error / fpstats->count); - double err_per_mb = section_err / num_mbs; - double err_correction_factor; - double sr_err_diff; - double sr_correction; - double speed_correction = 1.0; - double clip_iiratio; - double clip_iifactor; - int overhead_bits_per_mb; - - - target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) - ? (512 * section_target_bandwitdh) / num_mbs - : 512 * (section_target_bandwitdh / num_mbs); - - // Estimate of overhead bits per mb - overhead_bits_per_mb = overhead_bits / num_mbs; - - // Corrections for higher compression speed settings - // (reduced compression expected) - if (cpi->compressor_speed == 1) { - if (cpi->oxcf.cpu_used <= 5) - speed_correction = 1.04 + (cpi->oxcf.cpu_used * 0.04); - else - speed_correction = 1.25; - } - - // Look at the drop in prediction quality between the last frame - // and the GF buffer (which contained an older frame). - sr_err_diff = - (fpstats->sr_coded_error - fpstats->coded_error) / - (fpstats->count * cpi->common.MBs); - sr_correction = (sr_err_diff / 32.0); - sr_correction = pow(sr_correction, 0.25); - if (sr_correction < 0.75) - sr_correction = 0.75; - else if (sr_correction > 1.25) - sr_correction = 1.25; - - // II ratio correction factor for clip as a whole - clip_iiratio = cpi->twopass.total_stats->intra_error / - DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats->coded_error); - clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025); - if (clip_iifactor < 0.80) - clip_iifactor = 0.80; - - // Try and pick a Q that can encode the content at the given rate. - for (Q = 0; Q < MAXQ; Q++) { - int bits_per_mb_at_this_q; - - // Error per MB based correction factor - err_correction_factor = - calc_correction_factor(err_per_mb, 100.0, 0.4, 0.90, Q) * - sr_correction * speed_correction * clip_iifactor; - - if (err_correction_factor < 0.05) - err_correction_factor = 0.05; - else if (err_correction_factor > 5.0) - err_correction_factor = 5.0; - - bits_per_mb_at_this_q = - vp9_bits_per_mb(INTER_FRAME, Q) + overhead_bits_per_mb; - - bits_per_mb_at_this_q = (int)(.5 + err_correction_factor * - (double)bits_per_mb_at_this_q); - - // Mode and motion overhead - // As Q rises in real encode loop rd code will force overhead down - // We make a crude adjustment for this here as *.98 per Q step. - // PGW TODO.. This code is broken for the extended Q range - // for now overhead set to 0. - overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98); - - if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) - break; - } - - // Clip value to range "best allowed to (worst allowed - 1)" - Q = select_cq_level(Q); - if (Q >= cpi->worst_quality) - Q = cpi->worst_quality - 1; - if (Q < cpi->best_quality) - Q = cpi->best_quality; - - return Q; -} - - -extern void vp9_new_frame_rate(VP9_COMP *cpi, double framerate); - -void vp9_init_second_pass(VP9_COMP *cpi) { - FIRSTPASS_STATS this_frame; - FIRSTPASS_STATS *start_pos; - - double lower_bounds_min_rate = FRAME_OVERHEAD_BITS * cpi->oxcf.frame_rate; - double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth - * cpi->oxcf.two_pass_vbrmin_section / 100); - - if (two_pass_min_rate < lower_bounds_min_rate) - two_pass_min_rate = lower_bounds_min_rate; - - zero_stats(cpi->twopass.total_stats); - zero_stats(cpi->twopass.total_left_stats); - - if (!cpi->twopass.stats_in_end) - return; - - *cpi->twopass.total_stats = *cpi->twopass.stats_in_end; - *cpi->twopass.total_left_stats = *cpi->twopass.total_stats; - - // each frame can have a different duration, as the frame rate in the source - // isn't guaranteed to be constant. The frame rate prior to the first frame - // encoded in the second pass is a guess. However the sum duration is not. - // Its calculated based on the actual durations of all frames from the first - // pass. - vp9_new_frame_rate(cpi, - 10000000.0 * cpi->twopass.total_stats->count / - cpi->twopass.total_stats->duration); - - cpi->output_frame_rate = cpi->oxcf.frame_rate; - cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats->duration * - cpi->oxcf.target_bandwidth / 10000000.0); - cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats->duration * - two_pass_min_rate / 10000000.0); - - // Calculate a minimum intra value to be used in determining the IIratio - // scores used in the second pass. We have this minimum to make sure - // that clips that are static but "low complexity" in the intra domain - // are still boosted appropriately for KF/GF/ARF - cpi->twopass.kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; - cpi->twopass.gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; - - // This variable monitors how far behind the second ref update is lagging - cpi->twopass.sr_update_lag = 1; - - // Scan the first pass file and calculate an average Intra / Inter error score ratio for the sequence - { - double sum_iiratio = 0.0; - double IIRatio; - - start_pos = cpi->twopass.stats_in; // Note starting "file" position - - while (input_stats(cpi, &this_frame) != EOF) { - IIRatio = this_frame.intra_error / DOUBLE_DIVIDE_CHECK(this_frame.coded_error); - IIRatio = (IIRatio < 1.0) ? 1.0 : (IIRatio > 20.0) ? 20.0 : IIRatio; - sum_iiratio += IIRatio; - } - - cpi->twopass.avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats->count); - - // Reset file position - reset_fpf_position(cpi, start_pos); - } - - // Scan the first pass file and calculate a modified total error based upon the bias/power function - // used to allocate bits - { - start_pos = cpi->twopass.stats_in; // Note starting "file" position - - cpi->twopass.modified_error_total = 0.0; - cpi->twopass.modified_error_used = 0.0; - - while (input_stats(cpi, &this_frame) != EOF) { - cpi->twopass.modified_error_total += calculate_modified_err(cpi, &this_frame); - } - cpi->twopass.modified_error_left = cpi->twopass.modified_error_total; - - reset_fpf_position(cpi, start_pos); // Reset file position - - } -} - -void vp9_end_second_pass(VP9_COMP *cpi) { -} - -// This function gives and estimate of how badly we believe -// the prediction quality is decaying from frame to frame. -static double get_prediction_decay_rate(VP9_COMP *cpi, - FIRSTPASS_STATS *next_frame) { - double prediction_decay_rate; - double second_ref_decay; - double mb_sr_err_diff; - - // Initial basis is the % mbs inter coded - prediction_decay_rate = next_frame->pcnt_inter; - - // Look at the observed drop in prediction quality between the last frame - // and the GF buffer (which contains an older frame). - mb_sr_err_diff = - (next_frame->sr_coded_error - next_frame->coded_error) / - (cpi->common.MBs); - second_ref_decay = 1.0 - (mb_sr_err_diff / 512.0); - second_ref_decay = pow(second_ref_decay, 0.5); - if (second_ref_decay < 0.85) - second_ref_decay = 0.85; - else if (second_ref_decay > 1.0) - second_ref_decay = 1.0; - - if (second_ref_decay < prediction_decay_rate) - prediction_decay_rate = second_ref_decay; - - return prediction_decay_rate; -} - -// Function to test for a condition where a complex transition is followed -// by a static section. For example in slide shows where there is a fade -// between slides. This is to help with more optimal kf and gf positioning. -static int detect_transition_to_still( - VP9_COMP *cpi, - int frame_interval, - int still_interval, - double loop_decay_rate, - double last_decay_rate) { - BOOL trans_to_still = FALSE; - - // Break clause to detect very still sections after motion - // For example a static image after a fade or other transition - // instead of a clean scene cut. - if ((frame_interval > MIN_GF_INTERVAL) && - (loop_decay_rate >= 0.999) && - (last_decay_rate < 0.9)) { - int j; - FIRSTPASS_STATS *position = cpi->twopass.stats_in; - FIRSTPASS_STATS tmp_next_frame; - double zz_inter; - - // Look ahead a few frames to see if static condition - // persists... - for (j = 0; j < still_interval; j++) { - if (EOF == input_stats(cpi, &tmp_next_frame)) - break; - - zz_inter = - (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion); - if (zz_inter < 0.999) - break; - } - // Reset file position - reset_fpf_position(cpi, position); - - // Only if it does do we signal a transition to still - if (j == still_interval) - trans_to_still = TRUE; - } - - return trans_to_still; -} - -// This function detects a flash through the high relative pcnt_second_ref -// score in the frame following a flash frame. The offset passed in should -// reflect this -static BOOL detect_flash(VP9_COMP *cpi, int offset) { - FIRSTPASS_STATS next_frame; - - BOOL flash_detected = FALSE; - - // Read the frame data. - // The return is FALSE (no flash detected) if not a valid frame - if (read_frame_stats(cpi, &next_frame, offset) != EOF) { - // What we are looking for here is a situation where there is a - // brief break in prediction (such as a flash) but subsequent frames - // are reasonably well predicted by an earlier (pre flash) frame. - // The recovery after a flash is indicated by a high pcnt_second_ref - // comapred to pcnt_inter. - if ((next_frame.pcnt_second_ref > next_frame.pcnt_inter) && - (next_frame.pcnt_second_ref >= 0.5)) { - flash_detected = TRUE; - } - } - - return flash_detected; -} - -// Update the motion related elements to the GF arf boost calculation -static void accumulate_frame_motion_stats( - VP9_COMP *cpi, - FIRSTPASS_STATS *this_frame, - double *this_frame_mv_in_out, - double *mv_in_out_accumulator, - double *abs_mv_in_out_accumulator, - double *mv_ratio_accumulator) { - // double this_frame_mv_in_out; - double this_frame_mvr_ratio; - double this_frame_mvc_ratio; - double motion_pct; - - // Accumulate motion stats. - motion_pct = this_frame->pcnt_motion; - - // Accumulate Motion In/Out of frame stats - *this_frame_mv_in_out = this_frame->mv_in_out_count * motion_pct; - *mv_in_out_accumulator += this_frame->mv_in_out_count * motion_pct; - *abs_mv_in_out_accumulator += - fabs(this_frame->mv_in_out_count * motion_pct); - - // Accumulate a measure of how uniform (or conversely how random) - // the motion field is. (A ratio of absmv / mv) - if (motion_pct > 0.05) { - this_frame_mvr_ratio = fabs(this_frame->mvr_abs) / - DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVr)); - - this_frame_mvc_ratio = fabs(this_frame->mvc_abs) / - DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVc)); - - *mv_ratio_accumulator += - (this_frame_mvr_ratio < this_frame->mvr_abs) - ? (this_frame_mvr_ratio * motion_pct) - : this_frame->mvr_abs * motion_pct; - - *mv_ratio_accumulator += - (this_frame_mvc_ratio < this_frame->mvc_abs) - ? (this_frame_mvc_ratio * motion_pct) - : this_frame->mvc_abs * motion_pct; - - } -} - -// Calculate a baseline boost number for the current frame. -static double calc_frame_boost( - VP9_COMP *cpi, - FIRSTPASS_STATS *this_frame, - double this_frame_mv_in_out) { - double frame_boost; - - // Underlying boost factor is based on inter intra error ratio - if (this_frame->intra_error > cpi->twopass.gf_intra_err_min) - frame_boost = (IIFACTOR * this_frame->intra_error / - DOUBLE_DIVIDE_CHECK(this_frame->coded_error)); - else - frame_boost = (IIFACTOR * cpi->twopass.gf_intra_err_min / - DOUBLE_DIVIDE_CHECK(this_frame->coded_error)); - - // Increase boost for frames where new data coming into frame - // (eg zoom out). Slightly reduce boost if there is a net balance - // of motion out of the frame (zoom in). - // The range for this_frame_mv_in_out is -1.0 to +1.0 - if (this_frame_mv_in_out > 0.0) - frame_boost += frame_boost * (this_frame_mv_in_out * 2.0); - // In extreme case boost is halved - else - frame_boost += frame_boost * (this_frame_mv_in_out / 2.0); - - // Clip to maximum - if (frame_boost > GF_RMAX) - frame_boost = GF_RMAX; - - return frame_boost; -} - -static int calc_arf_boost( - VP9_COMP *cpi, - int offset, - int f_frames, - int b_frames, - int *f_boost, - int *b_boost) { - FIRSTPASS_STATS this_frame; - - int i; - double boost_score = 0.0; - double mv_ratio_accumulator = 0.0; - double decay_accumulator = 1.0; - double this_frame_mv_in_out = 0.0; - double mv_in_out_accumulator = 0.0; - double abs_mv_in_out_accumulator = 0.0; - int arf_boost; - BOOL flash_detected = FALSE; - - // Search forward from the proposed arf/next gf position - for (i = 0; i < f_frames; i++) { - if (read_frame_stats(cpi, &this_frame, (i + offset)) == EOF) - break; - - // Update the motion related elements to the boost calculation - accumulate_frame_motion_stats(cpi, &this_frame, - &this_frame_mv_in_out, &mv_in_out_accumulator, - &abs_mv_in_out_accumulator, &mv_ratio_accumulator); - - // We want to discount the the flash frame itself and the recovery - // frame that follows as both will have poor scores. - flash_detected = detect_flash(cpi, (i + offset)) || - detect_flash(cpi, (i + offset + 1)); - - // Cumulative effect of prediction quality decay - if (!flash_detected) { - decay_accumulator = - decay_accumulator * - get_prediction_decay_rate(cpi, &this_frame); - decay_accumulator = - decay_accumulator < 0.1 ? 0.1 : decay_accumulator; - } - - boost_score += (decay_accumulator * - calc_frame_boost(cpi, &this_frame, this_frame_mv_in_out)); - } - - *f_boost = boost_score; - - // Reset for backward looking loop - boost_score = 0.0; - mv_ratio_accumulator = 0.0; - decay_accumulator = 1.0; - this_frame_mv_in_out = 0.0; - mv_in_out_accumulator = 0.0; - abs_mv_in_out_accumulator = 0.0; - - // Search backward towards last gf position - for (i = -1; i >= -b_frames; i--) { - if (read_frame_stats(cpi, &this_frame, (i + offset)) == EOF) - break; - - // Update the motion related elements to the boost calculation - accumulate_frame_motion_stats(cpi, &this_frame, - &this_frame_mv_in_out, &mv_in_out_accumulator, - &abs_mv_in_out_accumulator, &mv_ratio_accumulator); - - // We want to discount the the flash frame itself and the recovery - // frame that follows as both will have poor scores. - flash_detected = detect_flash(cpi, (i + offset)) || - detect_flash(cpi, (i + offset + 1)); - - // Cumulative effect of prediction quality decay - if (!flash_detected) { - decay_accumulator = - decay_accumulator * - get_prediction_decay_rate(cpi, &this_frame); - decay_accumulator = - decay_accumulator < 0.1 ? 0.1 : decay_accumulator; - } - - boost_score += (decay_accumulator * - calc_frame_boost(cpi, &this_frame, this_frame_mv_in_out)); - - } - *b_boost = boost_score; - - arf_boost = (*f_boost + *b_boost); - if (arf_boost < ((b_frames + f_frames) * 20)) - arf_boost = ((b_frames + f_frames) * 20); - - return arf_boost; -} - -static void configure_arnr_filter(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { - int half_gf_int; - int frames_after_arf; - int frames_bwd = cpi->oxcf.arnr_max_frames - 1; - int frames_fwd = cpi->oxcf.arnr_max_frames - 1; - - // Define the arnr filter width for this group of frames: - // We only filter frames that lie within a distance of half - // the GF interval from the ARF frame. We also have to trap - // cases where the filter extends beyond the end of clip. - // Note: this_frame->frame has been updated in the loop - // so it now points at the ARF frame. - half_gf_int = cpi->baseline_gf_interval >> 1; - frames_after_arf = cpi->twopass.total_stats->count - - this_frame->frame - 1; - - switch (cpi->oxcf.arnr_type) { - case 1: // Backward filter - frames_fwd = 0; - if (frames_bwd > half_gf_int) - frames_bwd = half_gf_int; - break; - - case 2: // Forward filter - if (frames_fwd > half_gf_int) - frames_fwd = half_gf_int; - if (frames_fwd > frames_after_arf) - frames_fwd = frames_after_arf; - frames_bwd = 0; - break; - - case 3: // Centered filter - default: - frames_fwd >>= 1; - if (frames_fwd > frames_after_arf) - frames_fwd = frames_after_arf; - if (frames_fwd > half_gf_int) - frames_fwd = half_gf_int; - - frames_bwd = frames_fwd; - - // For even length filter there is one more frame backward - // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. - if (frames_bwd < half_gf_int) - frames_bwd += (cpi->oxcf.arnr_max_frames + 1) & 0x1; - break; - } - - cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd; -} - -// Analyse and define a gf/arf group . -static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { - FIRSTPASS_STATS next_frame; - FIRSTPASS_STATS *start_pos; - int i; - double boost_score = 0.0; - double old_boost_score = 0.0; - double gf_group_err = 0.0; - double gf_first_frame_err = 0.0; - double mod_frame_err = 0.0; - - double mv_ratio_accumulator = 0.0; - double decay_accumulator = 1.0; - double zero_motion_accumulator = 1.0; - - double loop_decay_rate = 1.00; // Starting decay rate - double last_loop_decay_rate = 1.00; - - double this_frame_mv_in_out = 0.0; - double mv_in_out_accumulator = 0.0; - double abs_mv_in_out_accumulator = 0.0; - - int max_bits = frame_max_bits(cpi); // Max for a single frame - - unsigned int allow_alt_ref = - cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames; - - int f_boost = 0; - int b_boost = 0; - BOOL flash_detected; - - cpi->twopass.gf_group_bits = 0; - - vp9_clear_system_state(); // __asm emms; - - start_pos = cpi->twopass.stats_in; - - vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean - - // Load stats for the current frame. - mod_frame_err = calculate_modified_err(cpi, this_frame); - - // Note the error of the frame at the start of the group (this will be - // the GF frame error if we code a normal gf - gf_first_frame_err = mod_frame_err; - - // Special treatment if the current frame is a key frame (which is also - // a gf). If it is then its error score (and hence bit allocation) need - // to be subtracted out from the calculation for the GF group - if (cpi->common.frame_type == KEY_FRAME) - gf_group_err -= gf_first_frame_err; - - // Scan forward to try and work out how many frames the next gf group - // should contain and what level of boost is appropriate for the GF - // or ARF that will be coded with the group - i = 0; - - while (((i < cpi->twopass.static_scene_max_gf_interval) || - ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL)) && - (i < cpi->twopass.frames_to_key)) { - i++; // Increment the loop counter - - // Accumulate error score of frames in this gf group - mod_frame_err = calculate_modified_err(cpi, this_frame); - gf_group_err += mod_frame_err; - - if (EOF == input_stats(cpi, &next_frame)) - break; - - // Test for the case where there is a brief flash but the prediction - // quality back to an earlier frame is then restored. - flash_detected = detect_flash(cpi, 0); - - // Update the motion related elements to the boost calculation - accumulate_frame_motion_stats(cpi, &next_frame, - &this_frame_mv_in_out, &mv_in_out_accumulator, - &abs_mv_in_out_accumulator, &mv_ratio_accumulator); - - // Cumulative effect of prediction quality decay - if (!flash_detected) { - last_loop_decay_rate = loop_decay_rate; - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); - decay_accumulator = decay_accumulator * loop_decay_rate; - - // Monitor for static sections. - if ((next_frame.pcnt_inter - next_frame.pcnt_motion) < - zero_motion_accumulator) { - zero_motion_accumulator = - (next_frame.pcnt_inter - next_frame.pcnt_motion); - } - - // Break clause to detect very still sections after motion - // (for example a staic image after a fade or other transition). - if (detect_transition_to_still(cpi, i, 5, loop_decay_rate, - last_loop_decay_rate)) { - allow_alt_ref = FALSE; - break; - } - } - - // Calculate a boost number for this frame - boost_score += - (decay_accumulator * - calc_frame_boost(cpi, &next_frame, this_frame_mv_in_out)); - - // Break out conditions. - if ( - // Break at cpi->max_gf_interval unless almost totally static - (i >= cpi->max_gf_interval && (zero_motion_accumulator < 0.995)) || - ( - // Dont break out with a very short interval - (i > MIN_GF_INTERVAL) && - // Dont break out very close to a key frame - ((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) && - ((boost_score > 125.0) || (next_frame.pcnt_inter < 0.75)) && - (!flash_detected) && - ((mv_ratio_accumulator > 100.0) || - (abs_mv_in_out_accumulator > 3.0) || - (mv_in_out_accumulator < -2.0) || - ((boost_score - old_boost_score) < 12.5)) - )) { - boost_score = old_boost_score; - break; - } - - vpx_memcpy(this_frame, &next_frame, sizeof(*this_frame)); - - old_boost_score = boost_score; - } - - // Dont allow a gf too near the next kf - if ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL) { - while (i < cpi->twopass.frames_to_key) { - i++; - - if (EOF == input_stats(cpi, this_frame)) - break; - - if (i < cpi->twopass.frames_to_key) { - mod_frame_err = calculate_modified_err(cpi, this_frame); - gf_group_err += mod_frame_err; - } - } - } - - // Set the interval till the next gf or arf. - cpi->baseline_gf_interval = i; - - // Should we use the alternate refernce frame - if (allow_alt_ref && - (i < cpi->oxcf.lag_in_frames) && - (i >= MIN_GF_INTERVAL) && - // dont use ARF very near next kf - (i <= (cpi->twopass.frames_to_key - MIN_GF_INTERVAL)) && - ((next_frame.pcnt_inter > 0.75) || - (next_frame.pcnt_second_ref > 0.5)) && - ((mv_in_out_accumulator / (double)i > -0.2) || - (mv_in_out_accumulator > -2.0)) && - (boost_score > 100)) { - // Alterrnative boost calculation for alt ref - cpi->gfu_boost = calc_arf_boost(cpi, 0, (i - 1), (i - 1), &f_boost, &b_boost); - cpi->source_alt_ref_pending = TRUE; - - configure_arnr_filter(cpi, this_frame); - } else { - cpi->gfu_boost = (int)boost_score; - cpi->source_alt_ref_pending = FALSE; - } - - // Now decide how many bits should be allocated to the GF group as a - // proportion of those remaining in the kf group. - // The final key frame group in the clip is treated as a special case - // where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left. - // This is also important for short clips where there may only be one - // key frame. - if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats->count - - cpi->common.current_video_frame)) { - cpi->twopass.kf_group_bits = - (cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0; - } - - // Calculate the bits to be allocated to the group as a whole - if ((cpi->twopass.kf_group_bits > 0) && - (cpi->twopass.kf_group_error_left > 0)) { - cpi->twopass.gf_group_bits = - (int)((double)cpi->twopass.kf_group_bits * - (gf_group_err / (double)cpi->twopass.kf_group_error_left)); - } else - cpi->twopass.gf_group_bits = 0; - - cpi->twopass.gf_group_bits = - (cpi->twopass.gf_group_bits < 0) - ? 0 - : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits) - ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits; - - // Clip cpi->twopass.gf_group_bits based on user supplied data rate - // variability limit (cpi->oxcf.two_pass_vbrmax_section) - if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval) - cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval; - - // Reset the file position - reset_fpf_position(cpi, start_pos); - - // Update the record of error used so far (only done once per gf group) - cpi->twopass.modified_error_used += gf_group_err; - - // Assign bits to the arf or gf. - for (i = 0; i <= (cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME); i++) { - int boost; - int allocation_chunks; - int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; - int gf_bits; - - boost = (cpi->gfu_boost * vp9_gfboost_qadjust(Q)) / 100; - - // Set max and minimum boost and hence minimum allocation - if (boost > ((cpi->baseline_gf_interval + 1) * 200)) - boost = ((cpi->baseline_gf_interval + 1) * 200); - else if (boost < 125) - boost = 125; - - if (cpi->source_alt_ref_pending && i == 0) - allocation_chunks = - ((cpi->baseline_gf_interval + 1) * 100) + boost; - else - allocation_chunks = - (cpi->baseline_gf_interval * 100) + (boost - 100); - - // Prevent overflow - if (boost > 1028) { - int divisor = boost >> 10; - boost /= divisor; - allocation_chunks /= divisor; - } - - // Calculate the number of bits to be spent on the gf or arf based on - // the boost number - gf_bits = (int)((double)boost * - (cpi->twopass.gf_group_bits / - (double)allocation_chunks)); - - // If the frame that is to be boosted is simpler than the average for - // the gf/arf group then use an alternative calculation - // based on the error score of the frame itself - if (mod_frame_err < gf_group_err / (double)cpi->baseline_gf_interval) { - double alt_gf_grp_bits; - int alt_gf_bits; - - alt_gf_grp_bits = - (double)cpi->twopass.kf_group_bits * - (mod_frame_err * (double)cpi->baseline_gf_interval) / - DOUBLE_DIVIDE_CHECK((double)cpi->twopass.kf_group_error_left); - - alt_gf_bits = (int)((double)boost * (alt_gf_grp_bits / - (double)allocation_chunks)); - - if (gf_bits > alt_gf_bits) { - gf_bits = alt_gf_bits; - } - } - // Else if it is harder than other frames in the group make sure it at - // least receives an allocation in keeping with its relative error - // score, otherwise it may be worse off than an "un-boosted" frame - else { - int alt_gf_bits = - (int)((double)cpi->twopass.kf_group_bits * - mod_frame_err / - DOUBLE_DIVIDE_CHECK((double)cpi->twopass.kf_group_error_left)); - - if (alt_gf_bits > gf_bits) { - gf_bits = alt_gf_bits; - } - } - - // Dont allow a negative value for gf_bits - if (gf_bits < 0) - gf_bits = 0; - - gf_bits += cpi->min_frame_bandwidth; // Add in minimum for a frame - - if (i == 0) { - cpi->twopass.gf_bits = gf_bits; - } - if (i == 1 || (!cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME))) { - cpi->per_frame_bandwidth = gf_bits; // Per frame bit target for this frame - } - } - - { - // Adjust KF group bits and error remainin - cpi->twopass.kf_group_error_left -= gf_group_err; - cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits; - - if (cpi->twopass.kf_group_bits < 0) - cpi->twopass.kf_group_bits = 0; - - // Note the error score left in the remaining frames of the group. - // For normal GFs we want to remove the error score for the first frame - // of the group (except in Key frame case where this has already - // happened) - if (!cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME) - cpi->twopass.gf_group_error_left = gf_group_err - gf_first_frame_err; - else - cpi->twopass.gf_group_error_left = gf_group_err; - - cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - cpi->min_frame_bandwidth; - - if (cpi->twopass.gf_group_bits < 0) - cpi->twopass.gf_group_bits = 0; - - // This condition could fail if there are two kfs very close together - // despite (MIN_GF_INTERVAL) and would cause a devide by 0 in the - // calculation of cpi->twopass.alt_extra_bits. - if (cpi->baseline_gf_interval >= 3) { - int boost = (cpi->source_alt_ref_pending) - ? b_boost : cpi->gfu_boost; - - if (boost >= 150) { - int pct_extra; - - pct_extra = (boost - 100) / 50; - pct_extra = (pct_extra > 20) ? 20 : pct_extra; - - cpi->twopass.alt_extra_bits = - (cpi->twopass.gf_group_bits * pct_extra) / 100; - cpi->twopass.gf_group_bits -= cpi->twopass.alt_extra_bits; - cpi->twopass.alt_extra_bits /= - ((cpi->baseline_gf_interval - 1) >> 1); - } else - cpi->twopass.alt_extra_bits = 0; - } else - cpi->twopass.alt_extra_bits = 0; - } - - if (cpi->common.frame_type != KEY_FRAME) { - FIRSTPASS_STATS sectionstats; - - zero_stats(§ionstats); - reset_fpf_position(cpi, start_pos); - - for (i = 0; i < cpi->baseline_gf_interval; i++) { - input_stats(cpi, &next_frame); - accumulate_stats(§ionstats, &next_frame); - } - - avg_stats(§ionstats); - - cpi->twopass.section_intra_rating = - sectionstats.intra_error / - DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); - - reset_fpf_position(cpi, start_pos); - } -} - -// Allocate bits to a normal frame that is neither a gf an arf or a key frame. -static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { - int target_frame_size; // gf_group_error_left - - double modified_err; - double err_fraction; // What portion of the remaining GF group error is used by this frame - - int max_bits = frame_max_bits(cpi); // Max for a single frame - - // Calculate modified prediction error used in bit allocation - modified_err = calculate_modified_err(cpi, this_frame); - - if (cpi->twopass.gf_group_error_left > 0) - err_fraction = modified_err / cpi->twopass.gf_group_error_left; // What portion of the remaining GF group error is used by this frame - else - err_fraction = 0.0; - - target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction); // How many of those bits available for allocation should we give it? - - // Clip to target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at the top end. - if (target_frame_size < 0) - target_frame_size = 0; - else { - if (target_frame_size > max_bits) - target_frame_size = max_bits; - - if (target_frame_size > cpi->twopass.gf_group_bits) - target_frame_size = cpi->twopass.gf_group_bits; - } - - cpi->twopass.gf_group_error_left -= modified_err; // Adjust error remaining - cpi->twopass.gf_group_bits -= target_frame_size; // Adjust bits remaining - - if (cpi->twopass.gf_group_bits < 0) - cpi->twopass.gf_group_bits = 0; - - target_frame_size += cpi->min_frame_bandwidth; // Add in the minimum number of bits that is set aside for every frame. - - - cpi->per_frame_bandwidth = target_frame_size; // Per frame bit target for this frame -} - -// Make a damped adjustment to the active max q. -static int adjust_active_maxq(int old_maxqi, int new_maxqi) { - int i; - int ret_val = new_maxqi; - double old_q; - double new_q; - double target_q; - - old_q = vp9_convert_qindex_to_q(old_maxqi); - new_q = vp9_convert_qindex_to_q(new_maxqi); - - target_q = ((old_q * 7.0) + new_q) / 8.0; - - if (target_q > old_q) { - for (i = old_maxqi; i <= new_maxqi; i++) { - if (vp9_convert_qindex_to_q(i) >= target_q) { - ret_val = i; - break; - } - } - } else { - for (i = old_maxqi; i >= new_maxqi; i--) { - if (vp9_convert_qindex_to_q(i) <= target_q) { - ret_val = i; - break; - } - } - } - - return ret_val; -} - -void vp9_second_pass(VP9_COMP *cpi) { - int tmp_q; - int frames_left = (int)(cpi->twopass.total_stats->count - cpi->common.current_video_frame); - - FIRSTPASS_STATS this_frame; - FIRSTPASS_STATS this_frame_copy; - - double this_frame_error; - double this_frame_intra_error; - double this_frame_coded_error; - - FIRSTPASS_STATS *start_pos; - - int overhead_bits; - - if (!cpi->twopass.stats_in) { - return; - } - - vp9_clear_system_state(); - - vpx_memset(&this_frame, 0, sizeof(FIRSTPASS_STATS)); - - if (EOF == input_stats(cpi, &this_frame)) - return; - - this_frame_error = this_frame.ssim_weighted_pred_err; - this_frame_intra_error = this_frame.intra_error; - this_frame_coded_error = this_frame.coded_error; - - start_pos = cpi->twopass.stats_in; - - // keyframe and section processing ! - if (cpi->twopass.frames_to_key == 0) { - // Define next KF group and assign bits to it - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); - find_next_key_frame(cpi, &this_frame_copy); - } - - // Is this a GF / ARF (Note that a KF is always also a GF) - if (cpi->frames_till_gf_update_due == 0) { - // Define next gf group and assign bits to it - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); - define_gf_group(cpi, &this_frame_copy); - - // If we are going to code an altref frame at the end of the group and the current frame is not a key frame.... - // If the previous group used an arf this frame has already benefited from that arf boost and it should not be given extra bits - // If the previous group was NOT coded using arf we may want to apply some boost to this GF as well - if (cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME)) { - // Assign a standard frames worth of bits from those allocated to the GF group - int bak = cpi->per_frame_bandwidth; - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); - assign_std_frame_bits(cpi, &this_frame_copy); - cpi->per_frame_bandwidth = bak; - } - } - - // Otherwise this is an ordinary frame - else { - // Assign bits from those allocated to the GF group - vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); - assign_std_frame_bits(cpi, &this_frame_copy); - } - - // Keep a globally available copy of this and the next frame's iiratio. - cpi->twopass.this_iiratio = this_frame_intra_error / - DOUBLE_DIVIDE_CHECK(this_frame_coded_error); - { - FIRSTPASS_STATS next_frame; - if (lookup_next_frame_stats(cpi, &next_frame) != EOF) { - cpi->twopass.next_iiratio = next_frame.intra_error / - DOUBLE_DIVIDE_CHECK(next_frame.coded_error); - } - } - - // Set nominal per second bandwidth for this frame - cpi->target_bandwidth = cpi->per_frame_bandwidth * cpi->output_frame_rate; - if (cpi->target_bandwidth < 0) - cpi->target_bandwidth = 0; - - - // Account for mv, mode and other overheads. - overhead_bits = estimate_modemvcost( - cpi, cpi->twopass.total_left_stats); - - // Special case code for first frame. - if (cpi->common.current_video_frame == 0) { - cpi->twopass.est_max_qcorrection_factor = 1.0; - - // Set a cq_level in constrained quality mode. - if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { - int est_cq; - - est_cq = - estimate_cq(cpi, - cpi->twopass.total_left_stats, - (int)(cpi->twopass.bits_left / frames_left), - overhead_bits); - - cpi->cq_target_quality = cpi->oxcf.cq_level; - if (est_cq > cpi->cq_target_quality) - cpi->cq_target_quality = est_cq; - } - - // guess at maxq needed in 2nd pass - cpi->twopass.maxq_max_limit = cpi->worst_quality; - cpi->twopass.maxq_min_limit = cpi->best_quality; - - tmp_q = estimate_max_q( - cpi, - cpi->twopass.total_left_stats, - (int)(cpi->twopass.bits_left / frames_left), - overhead_bits); - - cpi->active_worst_quality = tmp_q; - cpi->ni_av_qi = tmp_q; - cpi->avg_q = vp9_convert_qindex_to_q(tmp_q); - - // Limit the maxq value returned subsequently. - // This increases the risk of overspend or underspend if the initial - // estimate for the clip is bad, but helps prevent excessive - // variation in Q, especially near the end of a clip - // where for example a small overspend may cause Q to crash - adjust_maxq_qrange(cpi); - } - - // The last few frames of a clip almost always have to few or too many - // bits and for the sake of over exact rate control we dont want to make - // radical adjustments to the allowed quantizer range just to use up a - // few surplus bits or get beneath the target rate. - else if ((cpi->common.current_video_frame < - (((unsigned int)cpi->twopass.total_stats->count * 255) >> 8)) && - ((cpi->common.current_video_frame + cpi->baseline_gf_interval) < - (unsigned int)cpi->twopass.total_stats->count)) { - if (frames_left < 1) - frames_left = 1; - - tmp_q = estimate_max_q( - cpi, - cpi->twopass.total_left_stats, - (int)(cpi->twopass.bits_left / frames_left), - overhead_bits); - - // Make a damped adjustment to active max Q - cpi->active_worst_quality = - adjust_active_maxq(cpi->active_worst_quality, tmp_q); - } - - cpi->twopass.frames_to_key--; - - // Update the total stats remaining sturcture - subtract_stats(cpi->twopass.total_left_stats, &this_frame); -} - - -static BOOL test_candidate_kf(VP9_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTPASS_STATS *this_frame, FIRSTPASS_STATS *next_frame) { - BOOL is_viable_kf = FALSE; - - // Does the frame satisfy the primary criteria of a key frame - // If so, then examine how well it predicts subsequent frames - if ((this_frame->pcnt_second_ref < 0.10) && - (next_frame->pcnt_second_ref < 0.10) && - ((this_frame->pcnt_inter < 0.05) || - ( - ((this_frame->pcnt_inter - this_frame->pcnt_neutral) < .35) && - ((this_frame->intra_error / DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) < 2.5) && - ((fabs(last_frame->coded_error - this_frame->coded_error) / DOUBLE_DIVIDE_CHECK(this_frame->coded_error) > .40) || - (fabs(last_frame->intra_error - this_frame->intra_error) / DOUBLE_DIVIDE_CHECK(this_frame->intra_error) > .40) || - ((next_frame->intra_error / DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > 3.5) - ) - ) - ) - ) { - int i; - FIRSTPASS_STATS *start_pos; - - FIRSTPASS_STATS local_next_frame; - - double boost_score = 0.0; - double old_boost_score = 0.0; - double decay_accumulator = 1.0; - double next_iiratio; - - vpx_memcpy(&local_next_frame, next_frame, sizeof(*next_frame)); - - // Note the starting file position so we can reset to it - start_pos = cpi->twopass.stats_in; - - // Examine how well the key frame predicts subsequent frames - for (i = 0; i < 16; i++) { - next_iiratio = (IIKFACTOR1 * local_next_frame.intra_error / DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error)); - - if (next_iiratio > RMAX) - next_iiratio = RMAX; - - // Cumulative effect of decay in prediction quality - if (local_next_frame.pcnt_inter > 0.85) - decay_accumulator = decay_accumulator * local_next_frame.pcnt_inter; - else - decay_accumulator = decay_accumulator * ((0.85 + local_next_frame.pcnt_inter) / 2.0); - - // decay_accumulator = decay_accumulator * local_next_frame.pcnt_inter; - - // Keep a running total - boost_score += (decay_accumulator * next_iiratio); - - // Test various breakout clauses - if ((local_next_frame.pcnt_inter < 0.05) || - (next_iiratio < 1.5) || - (((local_next_frame.pcnt_inter - - local_next_frame.pcnt_neutral) < 0.20) && - (next_iiratio < 3.0)) || - ((boost_score - old_boost_score) < 3.0) || - (local_next_frame.intra_error < 200) - ) { - break; - } - - old_boost_score = boost_score; - - // Get the next frame details - if (EOF == input_stats(cpi, &local_next_frame)) - break; - } - - // If there is tolerable prediction for at least the next 3 frames then break out else discard this pottential key frame and move on - if (boost_score > 30.0 && (i > 3)) - is_viable_kf = TRUE; - else { - // Reset the file position - reset_fpf_position(cpi, start_pos); - - is_viable_kf = FALSE; - } - } - - return is_viable_kf; -} -static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { - int i, j; - FIRSTPASS_STATS last_frame; - FIRSTPASS_STATS first_frame; - FIRSTPASS_STATS next_frame; - FIRSTPASS_STATS *start_position; - - double decay_accumulator = 1.0; - double zero_motion_accumulator = 1.0; - double boost_score = 0; - double old_boost_score = 0.0; - double loop_decay_rate; - - double kf_mod_err = 0.0; - double kf_group_err = 0.0; - double kf_group_intra_err = 0.0; - double kf_group_coded_err = 0.0; - double recent_loop_decay[8] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; - - vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean - - vp9_clear_system_state(); // __asm emms; - start_position = cpi->twopass.stats_in; - - cpi->common.frame_type = KEY_FRAME; - - // is this a forced key frame by interval - cpi->this_key_frame_forced = cpi->next_key_frame_forced; - - // Clear the alt ref active flag as this can never be active on a key frame - cpi->source_alt_ref_active = FALSE; - - // Kf is always a gf so clear frames till next gf counter - cpi->frames_till_gf_update_due = 0; - - cpi->twopass.frames_to_key = 1; - - // Take a copy of the initial frame details - vpx_memcpy(&first_frame, this_frame, sizeof(*this_frame)); - - cpi->twopass.kf_group_bits = 0; // Total bits avaialable to kf group - cpi->twopass.kf_group_error_left = 0; // Group modified error score. - - kf_mod_err = calculate_modified_err(cpi, this_frame); - - // find the next keyframe - i = 0; - while (cpi->twopass.stats_in < cpi->twopass.stats_in_end) { - // Accumulate kf group error - kf_group_err += calculate_modified_err(cpi, this_frame); - - // These figures keep intra and coded error counts for all frames including key frames in the group. - // The effect of the key frame itself can be subtracted out using the first_frame data collected above - kf_group_intra_err += this_frame->intra_error; - kf_group_coded_err += this_frame->coded_error; - - // load a the next frame's stats - vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame)); - input_stats(cpi, this_frame); - - // Provided that we are not at the end of the file... - if (cpi->oxcf.auto_key - && lookup_next_frame_stats(cpi, &next_frame) != EOF) { - // Normal scene cut check - if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame)) { - break; - } - - // How fast is prediction quality decaying - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); - - // We want to know something about the recent past... rather than - // as used elsewhere where we are concened with decay in prediction - // quality since the last GF or KF. - recent_loop_decay[i % 8] = loop_decay_rate; - decay_accumulator = 1.0; - for (j = 0; j < 8; j++) { - decay_accumulator = decay_accumulator * recent_loop_decay[j]; - } - - // Special check for transition or high motion followed by a - // to a static scene. - if (detect_transition_to_still(cpi, i, - (cpi->key_frame_frequency - i), - loop_decay_rate, - decay_accumulator)) { - break; - } - - - // Step on to the next frame - cpi->twopass.frames_to_key++; - - // If we don't have a real key frame within the next two - // forcekeyframeevery intervals then break out of the loop. - if (cpi->twopass.frames_to_key >= 2 * (int)cpi->key_frame_frequency) - break; - } else - cpi->twopass.frames_to_key++; - - i++; - } - - // If there is a max kf interval set by the user we must obey it. - // We already breakout of the loop above at 2x max. - // This code centers the extra kf if the actual natural - // interval is between 1x and 2x - if (cpi->oxcf.auto_key - && cpi->twopass.frames_to_key > (int)cpi->key_frame_frequency) { - FIRSTPASS_STATS *current_pos = cpi->twopass.stats_in; - FIRSTPASS_STATS tmp_frame; - - cpi->twopass.frames_to_key /= 2; - - // Copy first frame details - vpx_memcpy(&tmp_frame, &first_frame, sizeof(first_frame)); - - // Reset to the start of the group - reset_fpf_position(cpi, start_position); - - kf_group_err = 0; - kf_group_intra_err = 0; - kf_group_coded_err = 0; - - // Rescan to get the correct error data for the forced kf group - for (i = 0; i < cpi->twopass.frames_to_key; i++) { - // Accumulate kf group errors - kf_group_err += calculate_modified_err(cpi, &tmp_frame); - kf_group_intra_err += tmp_frame.intra_error; - kf_group_coded_err += tmp_frame.coded_error; - - // Load a the next frame's stats - input_stats(cpi, &tmp_frame); - } - - // Reset to the start of the group - reset_fpf_position(cpi, current_pos); - - cpi->next_key_frame_forced = TRUE; - } else - cpi->next_key_frame_forced = FALSE; - - // Special case for the last frame of the file - if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end) { - // Accumulate kf group error - kf_group_err += calculate_modified_err(cpi, this_frame); - - // These figures keep intra and coded error counts for all frames including key frames in the group. - // The effect of the key frame itself can be subtracted out using the first_frame data collected above - kf_group_intra_err += this_frame->intra_error; - kf_group_coded_err += this_frame->coded_error; - } - - // Calculate the number of bits that should be assigned to the kf group. - if ((cpi->twopass.bits_left > 0) && (cpi->twopass.modified_error_left > 0.0)) { - // Max for a single normal frame (not key frame) - int max_bits = frame_max_bits(cpi); - - // Maximum bits for the kf group - int64_t max_grp_bits; - - // Default allocation based on bits left and relative - // complexity of the section - cpi->twopass.kf_group_bits = (int64_t)(cpi->twopass.bits_left * - (kf_group_err / - cpi->twopass.modified_error_left)); - - // Clip based on maximum per frame rate defined by the user. - max_grp_bits = (int64_t)max_bits * (int64_t)cpi->twopass.frames_to_key; - if (cpi->twopass.kf_group_bits > max_grp_bits) - cpi->twopass.kf_group_bits = max_grp_bits; - } else - cpi->twopass.kf_group_bits = 0; - - // Reset the first pass file position - reset_fpf_position(cpi, start_position); - - // determine how big to make this keyframe based on how well the subsequent frames use inter blocks - decay_accumulator = 1.0; - boost_score = 0.0; - loop_decay_rate = 1.00; // Starting decay rate - - for (i = 0; i < cpi->twopass.frames_to_key; i++) { - double r; - - if (EOF == input_stats(cpi, &next_frame)) - break; - - if (next_frame.intra_error > cpi->twopass.kf_intra_err_min) - r = (IIKFACTOR2 * next_frame.intra_error / - DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); - else - r = (IIKFACTOR2 * cpi->twopass.kf_intra_err_min / - DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); - - if (r > RMAX) - r = RMAX; - - // Monitor for static sections. - if ((next_frame.pcnt_inter - next_frame.pcnt_motion) < - zero_motion_accumulator) { - zero_motion_accumulator = - (next_frame.pcnt_inter - next_frame.pcnt_motion); - } - - // How fast is prediction quality decaying - if (!detect_flash(cpi, 0)) { - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); - decay_accumulator = decay_accumulator * loop_decay_rate; - decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator; - } - - boost_score += (decay_accumulator * r); - - if ((i > MIN_GF_INTERVAL) && - ((boost_score - old_boost_score) < 6.25)) { - break; - } - - old_boost_score = boost_score; - } - - { - FIRSTPASS_STATS sectionstats; - - zero_stats(§ionstats); - reset_fpf_position(cpi, start_position); - - for (i = 0; i < cpi->twopass.frames_to_key; i++) { - input_stats(cpi, &next_frame); - accumulate_stats(§ionstats, &next_frame); - } - - avg_stats(§ionstats); - - cpi->twopass.section_intra_rating = - sectionstats.intra_error - / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); - } - - // Reset the first pass file position - reset_fpf_position(cpi, start_position); - - // Work out how many bits to allocate for the key frame itself - if (1) { - int kf_boost = boost_score; - int allocation_chunks; - int alt_kf_bits; - - if (kf_boost < 300) { - kf_boost += (cpi->twopass.frames_to_key * 3); - if (kf_boost > 300) - kf_boost = 300; - } - - if (kf_boost < 250) // Min KF boost - kf_boost = 250; - - // Make a note of baseline boost and the zero motion - // accumulator value for use elsewhere. - cpi->kf_boost = kf_boost; - cpi->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0); - - // We do three calculations for kf size. - // The first is based on the error score for the whole kf group. - // The second (optionaly) on the key frames own error if this is - // smaller than the average for the group. - // The final one insures that the frame receives at least the - // allocation it would have received based on its own error score vs - // the error score remaining - // Special case if the sequence appears almost totaly static - // In this case we want to spend almost all of the bits on the - // key frame. - // cpi->twopass.frames_to_key-1 because key frame itself is taken - // care of by kf_boost. - if (zero_motion_accumulator >= 0.99) { - allocation_chunks = - ((cpi->twopass.frames_to_key - 1) * 10) + kf_boost; - } else { - allocation_chunks = - ((cpi->twopass.frames_to_key - 1) * 100) + kf_boost; - } - - // Prevent overflow - if (kf_boost > 1028) { - int divisor = kf_boost >> 10; - kf_boost /= divisor; - allocation_chunks /= divisor; - } - - cpi->twopass.kf_group_bits = (cpi->twopass.kf_group_bits < 0) ? 0 : cpi->twopass.kf_group_bits; - - // Calculate the number of bits to be spent on the key frame - cpi->twopass.kf_bits = (int)((double)kf_boost * ((double)cpi->twopass.kf_group_bits / (double)allocation_chunks)); - - // If the key frame is actually easier than the average for the - // kf group (which does sometimes happen... eg a blank intro frame) - // Then use an alternate calculation based on the kf error score - // which should give a smaller key frame. - if (kf_mod_err < kf_group_err / cpi->twopass.frames_to_key) { - double alt_kf_grp_bits = - ((double)cpi->twopass.bits_left * - (kf_mod_err * (double)cpi->twopass.frames_to_key) / - DOUBLE_DIVIDE_CHECK(cpi->twopass.modified_error_left)); - - alt_kf_bits = (int)((double)kf_boost * - (alt_kf_grp_bits / (double)allocation_chunks)); - - if (cpi->twopass.kf_bits > alt_kf_bits) { - cpi->twopass.kf_bits = alt_kf_bits; - } - } - // Else if it is much harder than other frames in the group make sure - // it at least receives an allocation in keeping with its relative - // error score - else { - alt_kf_bits = - (int)((double)cpi->twopass.bits_left * - (kf_mod_err / - DOUBLE_DIVIDE_CHECK(cpi->twopass.modified_error_left))); - - if (alt_kf_bits > cpi->twopass.kf_bits) { - cpi->twopass.kf_bits = alt_kf_bits; - } - } - - cpi->twopass.kf_group_bits -= cpi->twopass.kf_bits; - cpi->twopass.kf_bits += cpi->min_frame_bandwidth; // Add in the minimum frame allowance - - cpi->per_frame_bandwidth = cpi->twopass.kf_bits; // Peer frame bit target for this frame - cpi->target_bandwidth = cpi->twopass.kf_bits * cpi->output_frame_rate; // Convert to a per second bitrate - } - - // Note the total error score of the kf group minus the key frame itself - cpi->twopass.kf_group_error_left = (int)(kf_group_err - kf_mod_err); - - // Adjust the count of total modified error left. - // The count of bits left is adjusted elsewhere based on real coded frame sizes - cpi->twopass.modified_error_left -= kf_group_err; -} diff --git a/vp8/encoder/firstpass.h b/vp8/encoder/firstpass.h deleted file mode 100644 index f90a857f2..000000000 --- a/vp8/encoder/firstpass.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#if !defined __INC_FIRSTPASS_H -#define __INC_FIRSTPASS_H - -extern void vp9_init_first_pass(VP9_COMP *cpi); -extern void vp9_first_pass(VP9_COMP *cpi); -extern void vp9_end_first_pass(VP9_COMP *cpi); - -extern void vp9_init_second_pass(VP9_COMP *cpi); -extern void vp9_second_pass(VP9_COMP *cpi); -extern void vp9_end_second_pass(VP9_COMP *cpi); - -#endif diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c deleted file mode 100644 index 03f1add3b..000000000 --- a/vp8/encoder/generic/csystemdependent.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "vp8/encoder/variance.h" -#include "vp8/encoder/onyx_int.h" - - -void vp9_arch_x86_encoder_init(VP9_COMP *cpi); -void vp9_arch_arm_encoder_init(VP9_COMP *cpi); - -void (*vp9_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, - YV12_BUFFER_CONFIG *dst_ybc, - int fraction); -extern void vp9_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, - YV12_BUFFER_CONFIG *dst_ybc, - int fraction); - -void vp9_cmachine_specific_config(VP9_COMP *cpi) { -#if CONFIG_RUNTIME_CPU_DETECT - cpi->rtcd.common = &cpi->common.rtcd; - - cpi->rtcd.search.full_search = vp9_full_search_sad; - cpi->rtcd.search.refining_search = vp9_refining_search_sad; - cpi->rtcd.search.diamond_search = vp9_diamond_search_sad; - cpi->rtcd.temporal.apply = vp9_temporal_filter_apply_c; -#endif - - vp9_yv12_copy_partial_frame_ptr = vp9_yv12_copy_partial_frame; - -#if ARCH_X86 || ARCH_X86_64 - vp9_arch_x86_encoder_init(cpi); -#endif - -#if ARCH_ARM - vp9_arch_arm_encoder_init(cpi); -#endif - - -} diff --git a/vp8/encoder/lookahead.c b/vp8/encoder/lookahead.c deleted file mode 100644 index ec21a8480..000000000 --- a/vp8/encoder/lookahead.c +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include <assert.h> -#include <stdlib.h> -#include "vpx_config.h" -#include "lookahead.h" -#include "vp8/common/extend.h" - -#define MAX_LAG_BUFFERS 25 - -struct lookahead_ctx { - unsigned int max_sz; /* Absolute size of the queue */ - unsigned int sz; /* Number of buffers currently in the queue */ - unsigned int read_idx; /* Read index */ - unsigned int write_idx; /* Write index */ - struct lookahead_entry *buf; /* Buffer list */ -}; - - -/* Return the buffer at the given absolute index and increment the index */ -static struct lookahead_entry * -pop(struct lookahead_ctx *ctx, - unsigned int *idx) { - unsigned int index = *idx; - struct lookahead_entry *buf = ctx->buf + index; - - assert(index < ctx->max_sz); - if (++index >= ctx->max_sz) - index -= ctx->max_sz; - *idx = index; - return buf; -} - - -void -vp9_lookahead_destroy(struct lookahead_ctx *ctx) { - if (ctx) { - if (ctx->buf) { - int i; - - for (i = 0; i < ctx->max_sz; i++) - vp8_yv12_de_alloc_frame_buffer(&ctx->buf[i].img); - free(ctx->buf); - } - free(ctx); - } -} - - -struct lookahead_ctx * -vp9_lookahead_init(unsigned int width, - unsigned int height, - unsigned int depth) { - struct lookahead_ctx *ctx = NULL; - int i; - - /* Clamp the lookahead queue depth */ - if (depth < 1) - depth = 1; - else if (depth > MAX_LAG_BUFFERS) - depth = MAX_LAG_BUFFERS; - - /* Align the buffer dimensions */ - width = (width + 15) &~15; - height = (height + 15) &~15; - - /* Allocate the lookahead structures */ - ctx = calloc(1, sizeof(*ctx)); - if (ctx) { - ctx->max_sz = depth; - ctx->buf = calloc(depth, sizeof(*ctx->buf)); - if (!ctx->buf) - goto bail; - for (i = 0; i < depth; i++) - if (vp8_yv12_alloc_frame_buffer(&ctx->buf[i].img, - width, height, VP8BORDERINPIXELS)) - goto bail; - } - return ctx; -bail: - vp9_lookahead_destroy(ctx); - return NULL; -} - - -int -vp9_lookahead_push(struct lookahead_ctx *ctx, - YV12_BUFFER_CONFIG *src, - int64_t ts_start, - int64_t ts_end, - unsigned int flags, - unsigned char *active_map) { - struct lookahead_entry *buf; - int row, col, active_end; - int mb_rows = (src->y_height + 15) >> 4; - int mb_cols = (src->y_width + 15) >> 4; - - if (ctx->sz + 1 > ctx->max_sz) - return 1; - ctx->sz++; - buf = pop(ctx, &ctx->write_idx); - - // Only do this partial copy if the following conditions are all met: - // 1. Lookahead queue has has size of 1. - // 2. Active map is provided. - // 3. This is not a key frame, golden nor altref frame. - if (ctx->max_sz == 1 && active_map && !flags) { - for (row = 0; row < mb_rows; ++row) { - col = 0; - - while (1) { - // Find the first active macroblock in this row. - for (; col < mb_cols; ++col) { - if (active_map[col]) - break; - } - - // No more active macroblock in this row. - if (col == mb_cols) - break; - - // Find the end of active region in this row. - active_end = col; - - for (; active_end < mb_cols; ++active_end) { - if (!active_map[active_end]) - break; - } - - // Only copy this active region. - vp9_copy_and_extend_frame_with_rect(src, &buf->img, - row << 4, - col << 4, 16, - (active_end - col) << 4); - - // Start again from the end of this active region. - col = active_end; - } - - active_map += mb_cols; - } - } else { - vp9_copy_and_extend_frame(src, &buf->img); - } - buf->ts_start = ts_start; - buf->ts_end = ts_end; - buf->flags = flags; - return 0; -} - - -struct lookahead_entry * -vp9_lookahead_pop(struct lookahead_ctx *ctx, - int drain) { - struct lookahead_entry *buf = NULL; - - if (ctx->sz && (drain || ctx->sz == ctx->max_sz)) { - buf = pop(ctx, &ctx->read_idx); - ctx->sz--; - } - return buf; -} - - -struct lookahead_entry * -vp9_lookahead_peek(struct lookahead_ctx *ctx, - int index) { - struct lookahead_entry *buf = NULL; - - assert(index < ctx->max_sz); - if (index < ctx->sz) { - index += ctx->read_idx; - if (index >= ctx->max_sz) - index -= ctx->max_sz; - buf = ctx->buf + index; - } - return buf; -} - - -unsigned int -vp9_lookahead_depth(struct lookahead_ctx *ctx) { - return ctx->sz; -} diff --git a/vp8/encoder/lookahead.h b/vp8/encoder/lookahead.h deleted file mode 100644 index 44c31d66d..000000000 --- a/vp8/encoder/lookahead.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#ifndef LOOKAHEAD_H -#define LOOKAHEAD_H -#include "vpx_scale/yv12config.h" -#include "vpx/vpx_integer.h" - -struct lookahead_entry { - YV12_BUFFER_CONFIG img; - int64_t ts_start; - int64_t ts_end; - unsigned int flags; -}; - - -struct lookahead_ctx; - -/**\brief Initializes the lookahead stage - * - * The lookahead stage is a queue of frame buffers on which some analysis - * may be done when buffers are enqueued. - * - * - */ -struct lookahead_ctx *vp9_lookahead_init(unsigned int width, - unsigned int height, - unsigned int depth - ); - - -/**\brief Destroys the lookahead stage - * - */ -void vp9_lookahead_destroy(struct lookahead_ctx *ctx); - - -/**\brief Enqueue a source buffer - * - * This function will copy the source image into a new framebuffer with - * the expected stride/border. - * - * If active_map is non-NULL and there is only one frame in the queue, then copy - * only active macroblocks. - * - * \param[in] ctx Pointer to the lookahead context - * \param[in] src Pointer to the image to enqueue - * \param[in] ts_start Timestamp for the start of this frame - * \param[in] ts_end Timestamp for the end of this frame - * \param[in] flags Flags set on this frame - * \param[in] active_map Map that specifies which macroblock is active - */ -int -vp9_lookahead_push(struct lookahead_ctx *ctx, - YV12_BUFFER_CONFIG *src, - int64_t ts_start, - int64_t ts_end, - unsigned int flags, - unsigned char *active_map); - - -/**\brief Get the next source buffer to encode - * - * - * \param[in] ctx Pointer to the lookahead context - * \param[in] drain Flag indicating the buffer should be drained - * (return a buffer regardless of the current queue depth) - * - * \retval NULL, if drain set and queue is empty - * \retval NULL, if drain not set and queue not of the configured depth - * - */ -struct lookahead_entry * -vp9_lookahead_pop(struct lookahead_ctx *ctx, - int drain); - - -/**\brief Get a future source buffer to encode - * - * \param[in] ctx Pointer to the lookahead context - * \param[in] index Index of the frame to be returned, 0 == next frame - * - * \retval NULL, if no buffer exists at the specified index - * - */ -struct lookahead_entry * -vp9_lookahead_peek(struct lookahead_ctx *ctx, - int index); - - -/**\brief Get the number of frames currently in the lookahead queue - * - * \param[in] ctx Pointer to the lookahead context - */ -unsigned int -vp9_lookahead_depth(struct lookahead_ctx *ctx); - - -#endif diff --git a/vp8/encoder/mbgraph.c b/vp8/encoder/mbgraph.c deleted file mode 100644 index d2e609876..000000000 --- a/vp8/encoder/mbgraph.c +++ /dev/null @@ -1,480 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <limits.h> -#include <vp8/encoder/encodeintra.h> -#include <vp8/encoder/rdopt.h> -#include <vp8/common/setupintrarecon.h> -#include <vp8/common/blockd.h> -#include <vp8/common/reconinter.h> -#include <vp8/common/systemdependent.h> -#include <vpx_mem/vpx_mem.h> -#include <vp8/encoder/segmentation.h> - -static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, - int_mv *ref_mv, - int_mv *dst_mv) { - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - BLOCK *b = &x->block[0]; - BLOCKD *d = &xd->block[0]; - vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; - unsigned int best_err; - int step_param, further_steps; - - int tmp_col_min = x->mv_col_min; - int tmp_col_max = x->mv_col_max; - int tmp_row_min = x->mv_row_min; - int tmp_row_max = x->mv_row_max; - int_mv ref_full; - - // Further step/diamond searches as necessary - if (cpi->Speed < 8) { - step_param = cpi->sf.first_step + ((cpi->Speed > 5) ? 1 : 0); - further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - } else { - step_param = cpi->sf.first_step + 2; - further_steps = 0; - } - - vp9_clamp_mv_min_max(x, ref_mv); - - ref_full.as_mv.col = ref_mv->as_mv.col >> 3; - ref_full.as_mv.row = ref_mv->as_mv.row >> 3; - - /*cpi->sf.search_method == HEX*/ - best_err = vp9_hex_search( - x, b, d, - &ref_full, dst_mv, - step_param, - x->errorperbit, - &v_fn_ptr, - NULLMVCOST, - NULLMVCOST, - ref_mv); - - // Try sub-pixel MC - // if (bestsme > error_thresh && bestsme < INT_MAX) - { - int distortion; - unsigned int sse; - best_err = cpi->find_fractional_mv_step( - x, b, d, - dst_mv, ref_mv, - x->errorperbit, &v_fn_ptr, - NULLMVCOST, - & distortion, &sse); - } - -#if CONFIG_PRED_FILTER - // Disable the prediction filter - xd->mode_info_context->mbmi.pred_filter_enabled = 0; -#endif - - vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv); - vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0); - best_err = vp9_sad16x16(xd->dst.y_buffer, xd->dst.y_stride, - xd->predictor, 16, INT_MAX); - - /* restore UMV window */ - x->mv_col_min = tmp_col_min; - x->mv_col_max = tmp_col_max; - x->mv_row_min = tmp_row_min; - x->mv_row_max = tmp_row_max; - - return best_err; -} - -static int do_16x16_motion_search -( - VP9_COMP *cpi, - int_mv *ref_mv, - int_mv *dst_mv, - YV12_BUFFER_CONFIG *buf, - int buf_mb_y_offset, - YV12_BUFFER_CONFIG *ref, - int mb_y_offset -) { - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - unsigned int err, tmp_err; - int_mv tmp_mv; - int n; - - for (n = 0; n < 16; n++) { - BLOCKD *d = &xd->block[n]; - BLOCK *b = &x->block[n]; - - b->base_src = &buf->y_buffer; - b->src_stride = buf->y_stride; - b->src = buf->y_stride * (n & 12) + (n & 3) * 4 + buf_mb_y_offset; - - d->base_pre = &ref->y_buffer; - d->pre_stride = ref->y_stride; - d->pre = ref->y_stride * (n & 12) + (n & 3) * 4 + mb_y_offset; - } - - // Try zero MV first - // FIXME should really use something like near/nearest MV and/or MV prediction - xd->pre.y_buffer = ref->y_buffer + mb_y_offset; - xd->pre.y_stride = ref->y_stride; - err = vp9_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride, - xd->dst.y_buffer, xd->dst.y_stride, INT_MAX); - dst_mv->as_int = 0; - - // Test last reference frame using the previous best mv as the - // starting point (best reference) for the search - tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv); - if (tmp_err < err) { - err = tmp_err; - dst_mv->as_int = tmp_mv.as_int; - } - - // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well - if (ref_mv->as_int) { - int tmp_err; - int_mv zero_ref_mv, tmp_mv; - - zero_ref_mv.as_int = 0; - tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv); - if (tmp_err < err) { - dst_mv->as_int = tmp_mv.as_int; - err = tmp_err; - } - } - - return err; -} - -static int do_16x16_zerozero_search -( - VP9_COMP *cpi, - int_mv *dst_mv, - YV12_BUFFER_CONFIG *buf, - int buf_mb_y_offset, - YV12_BUFFER_CONFIG *ref, - int mb_y_offset -) { - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - unsigned int err; - int n; - - for (n = 0; n < 16; n++) { - BLOCKD *d = &xd->block[n]; - BLOCK *b = &x->block[n]; - - b->base_src = &buf->y_buffer; - b->src_stride = buf->y_stride; - b->src = buf->y_stride * (n & 12) + (n & 3) * 4 + buf_mb_y_offset; - - d->base_pre = &ref->y_buffer; - d->pre_stride = ref->y_stride; - d->pre = ref->y_stride * (n & 12) + (n & 3) * 4 + mb_y_offset; - } - - // Try zero MV first - // FIXME should really use something like near/nearest MV and/or MV prediction - xd->pre.y_buffer = ref->y_buffer + mb_y_offset; - xd->pre.y_stride = ref->y_stride; - // VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16) - err = vp9_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride, - xd->dst.y_buffer, xd->dst.y_stride, INT_MAX); - - dst_mv->as_int = 0; - - return err; -} -static int find_best_16x16_intra -( - VP9_COMP *cpi, - YV12_BUFFER_CONFIG *buf, - int mb_y_offset, - MB_PREDICTION_MODE *pbest_mode -) { - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - MB_PREDICTION_MODE best_mode = -1, mode; - int best_err = INT_MAX; - - // calculate SATD for each intra prediction mode; - // we're intentionally not doing 4x4, we just want a rough estimate - for (mode = DC_PRED; mode <= TM_PRED; mode++) { - unsigned int err; - - xd->mode_info_context->mbmi.mode = mode; - vp9_build_intra_predictors_mby(xd); - err = vp9_sad16x16(xd->predictor, 16, buf->y_buffer + mb_y_offset, - buf->y_stride, best_err); - // find best - if (err < best_err) { - best_err = err; - best_mode = mode; - } - } - - if (pbest_mode) - *pbest_mode = best_mode; - - return best_err; -} - -static void update_mbgraph_mb_stats -( - VP9_COMP *cpi, - MBGRAPH_MB_STATS *stats, - YV12_BUFFER_CONFIG *buf, - int mb_y_offset, - YV12_BUFFER_CONFIG *golden_ref, - int_mv *prev_golden_ref_mv, - int gld_y_offset, - YV12_BUFFER_CONFIG *alt_ref, - int_mv *prev_alt_ref_mv, - int arf_y_offset -) { - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - int intra_error; - - // FIXME in practice we're completely ignoring chroma here - xd->dst.y_buffer = buf->y_buffer + mb_y_offset; - - // do intra 16x16 prediction - intra_error = find_best_16x16_intra(cpi, buf, mb_y_offset, &stats->ref[INTRA_FRAME].m.mode); - if (intra_error <= 0) - intra_error = 1; - stats->ref[INTRA_FRAME].err = intra_error; - - // Golden frame MV search, if it exists and is different than last frame - if (golden_ref) { - int g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv, - &stats->ref[GOLDEN_FRAME].m.mv, - buf, mb_y_offset, - golden_ref, gld_y_offset); - stats->ref[GOLDEN_FRAME].err = g_motion_error; - } else { - stats->ref[GOLDEN_FRAME].err = INT_MAX; - stats->ref[GOLDEN_FRAME].m.mv.as_int = 0; - } - - // Alt-ref frame MV search, if it exists and is different than last/golden frame - if (alt_ref) { - // int a_motion_error = do_16x16_motion_search(cpi, prev_alt_ref_mv, - // &stats->ref[ALTREF_FRAME].m.mv, - // buf, mb_y_offset, - // alt_ref, arf_y_offset); - - int a_motion_error = - do_16x16_zerozero_search(cpi, - &stats->ref[ALTREF_FRAME].m.mv, - buf, mb_y_offset, - alt_ref, arf_y_offset); - - stats->ref[ALTREF_FRAME].err = a_motion_error; - } else { - stats->ref[ALTREF_FRAME].err = INT_MAX; - stats->ref[ALTREF_FRAME].m.mv.as_int = 0; - } -} - -static void update_mbgraph_frame_stats -( - VP9_COMP *cpi, - MBGRAPH_FRAME_STATS *stats, - YV12_BUFFER_CONFIG *buf, - YV12_BUFFER_CONFIG *golden_ref, - YV12_BUFFER_CONFIG *alt_ref -) { - MACROBLOCK *const x = &cpi->mb; - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - int mb_col, mb_row, offset = 0; - int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0; - int_mv arf_top_mv, gld_top_mv; - MODE_INFO mi_local; - - // Set up limit values for motion vectors to prevent them extending outside the UMV borders - arf_top_mv.as_int = 0; - gld_top_mv.as_int = 0; - x->mv_row_min = -(VP8BORDERINPIXELS - 16 - INTERP_EXTEND); - x->mv_row_max = (cm->mb_rows - 1) * 16 + VP8BORDERINPIXELS - 16 - INTERP_EXTEND; - xd->up_available = 0; - xd->dst.y_stride = buf->y_stride; - xd->pre.y_stride = buf->y_stride; - xd->dst.uv_stride = buf->uv_stride; - xd->mode_info_context = &mi_local; - - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { - int_mv arf_left_mv, gld_left_mv; - int mb_y_in_offset = mb_y_offset; - int arf_y_in_offset = arf_y_offset; - int gld_y_in_offset = gld_y_offset; - - // Set up limit values for motion vectors to prevent them extending outside the UMV borders - arf_left_mv.as_int = arf_top_mv.as_int; - gld_left_mv.as_int = gld_top_mv.as_int; - x->mv_col_min = -(VP8BORDERINPIXELS - 16 - INTERP_EXTEND); - x->mv_col_max = (cm->mb_cols - 1) * 16 + VP8BORDERINPIXELS - 16 - INTERP_EXTEND; - xd->left_available = 0; - - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - MBGRAPH_MB_STATS *mb_stats = &stats->mb_stats[offset + mb_col]; - - update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset, - golden_ref, &gld_left_mv, gld_y_in_offset, - alt_ref, &arf_left_mv, arf_y_in_offset); - arf_left_mv.as_int = mb_stats->ref[ALTREF_FRAME].m.mv.as_int; - gld_left_mv.as_int = mb_stats->ref[GOLDEN_FRAME].m.mv.as_int; - if (mb_col == 0) { - arf_top_mv.as_int = arf_left_mv.as_int; - gld_top_mv.as_int = gld_left_mv.as_int; - } - xd->left_available = 1; - mb_y_in_offset += 16; - gld_y_in_offset += 16; - arf_y_in_offset += 16; - x->mv_col_min -= 16; - x->mv_col_max -= 16; - } - xd->up_available = 1; - mb_y_offset += buf->y_stride * 16; - gld_y_offset += golden_ref->y_stride * 16; - if (alt_ref) - arf_y_offset += alt_ref->y_stride * 16; - x->mv_row_min -= 16; - x->mv_row_max -= 16; - offset += cm->mb_cols; - } -} - -// void separate_arf_mbs_byzz -static void separate_arf_mbs(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - int mb_col, mb_row, offset, i; - int ncnt[4]; - int n_frames = cpi->mbgraph_n_frames; - - int *arf_not_zz; - - CHECK_MEM_ERROR(arf_not_zz, - vpx_calloc(cm->mb_rows * cm->mb_cols * sizeof(*arf_not_zz), 1)); - - vpx_memset(arf_not_zz, 0, sizeof(arf_not_zz)); - - // We are not interested in results beyond the alt ref itself. - if (n_frames > cpi->frames_till_gf_update_due) - n_frames = cpi->frames_till_gf_update_due; - - // defer cost to reference frames - for (i = n_frames - 1; i >= 0; i--) { - MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i]; - - for (offset = 0, mb_row = 0; mb_row < cm->mb_rows; - offset += cm->mb_cols, mb_row++) { - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - MBGRAPH_MB_STATS *mb_stats = - &frame_stats->mb_stats[offset + mb_col]; - - int altref_err = mb_stats->ref[ALTREF_FRAME].err; - int intra_err = mb_stats->ref[INTRA_FRAME ].err; - int golden_err = mb_stats->ref[GOLDEN_FRAME].err; - - // Test for altref vs intra and gf and that its mv was 0,0. - if ((altref_err > 1000) || - (altref_err > intra_err) || - (altref_err > golden_err)) { - arf_not_zz[offset + mb_col]++; - } - } - } - } - - vpx_memset(ncnt, 0, sizeof(ncnt)); - for (offset = 0, mb_row = 0; mb_row < cm->mb_rows; - offset += cm->mb_cols, mb_row++) { - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - // If any of the blocks in the sequence failed then the MB - // goes in segment 0 - if (arf_not_zz[offset + mb_col]) { - ncnt[0]++; - cpi->segmentation_map[offset + mb_col] = 0; - } else { - ncnt[1]++; - cpi->segmentation_map[offset + mb_col] = 1; - } - } - } - - // Only bother with segmentation if over 10% of the MBs in static segment - // if ( ncnt[1] && (ncnt[0] / ncnt[1] < 10) ) - if (1) { - // Note % of blocks that are marked as static - if (cm->MBs) - cpi->static_mb_pct = (ncnt[1] * 100) / cm->MBs; - - // This error case should not be reachable as this function should - // never be called with the common data structure unititialized. - else - cpi->static_mb_pct = 0; - - cpi->seg0_cnt = ncnt[0]; - vp9_enable_segmentation((VP9_PTR) cpi); - } else { - cpi->static_mb_pct = 0; - vp9_disable_segmentation((VP9_PTR) cpi); - } - - // Free localy allocated storage - vpx_free(arf_not_zz); -} - -void vp9_update_mbgraph_stats -( - VP9_COMP *cpi -) { - VP9_COMMON *const cm = &cpi->common; - int i, n_frames = vp9_lookahead_depth(cpi->lookahead); - YV12_BUFFER_CONFIG *golden_ref = &cm->yv12_fb[cm->gld_fb_idx]; - - // we need to look ahead beyond where the ARF transitions into - // being a GF - so exit if we don't look ahead beyond that - if (n_frames <= cpi->frames_till_gf_update_due) - return; - if (n_frames > cpi->common.frames_till_alt_ref_frame) - n_frames = cpi->common.frames_till_alt_ref_frame; - if (n_frames > MAX_LAG_BUFFERS) - n_frames = MAX_LAG_BUFFERS; - - cpi->mbgraph_n_frames = n_frames; - for (i = 0; i < n_frames; i++) { - MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i]; - vpx_memset(frame_stats->mb_stats, 0, - cm->mb_rows * cm->mb_cols * sizeof(*cpi->mbgraph_stats[i].mb_stats)); - } - - // do motion search to find contribution of each reference to data - // later on in this GF group - // FIXME really, the GF/last MC search should be done forward, and - // the ARF MC search backwards, to get optimal results for MV caching - for (i = 0; i < n_frames; i++) { - MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i]; - struct lookahead_entry *q_cur = - vp9_lookahead_peek(cpi->lookahead, i); - - assert(q_cur != NULL); - - update_mbgraph_frame_stats(cpi, frame_stats, &q_cur->img, - golden_ref, cpi->Source); - } - - vp9_clear_system_state(); // __asm emms; - - separate_arf_mbs(cpi); -} diff --git a/vp8/encoder/mbgraph.h b/vp8/encoder/mbgraph.h deleted file mode 100644 index 516fe23d2..000000000 --- a/vp8/encoder/mbgraph.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef __INC_MBGRAPH_H__ -#define __INC_MBGRAPH_H__ 1 - -extern void vp9_update_mbgraph_stats(VP9_COMP *cpi); - -#endif /* __INC_MBGRAPH_H__ */ diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c deleted file mode 100644 index 110dbcb9d..000000000 --- a/vp8/encoder/mcomp.c +++ /dev/null @@ -1,2203 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp8/encoder/onyx_int.h" -#include "mcomp.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/config.h" -#include <stdio.h> -#include <limits.h> -#include <math.h> -#include "vp8/common/findnearmv.h" - -#ifdef ENTROPY_STATS -static int mv_ref_ct [31] [4] [2]; -static int mv_mode_cts [4] [2]; -#endif - -void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv) { - int col_min = (ref_mv->as_mv.col >> 3) - MAX_FULL_PEL_VAL + - ((ref_mv->as_mv.col & 7) ? 1 : 0); - int row_min = (ref_mv->as_mv.row >> 3) - MAX_FULL_PEL_VAL + - ((ref_mv->as_mv.row & 7) ? 1 : 0); - int col_max = (ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL; - int row_max = (ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL; - - /* Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. */ - if (x->mv_col_min < col_min) - x->mv_col_min = col_min; - if (x->mv_col_max > col_max) - x->mv_col_max = col_max; - if (x->mv_row_min < row_min) - x->mv_row_min = row_min; - if (x->mv_row_max > row_max) - x->mv_row_max = row_max; -} - -int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS, - int Weight, int ishp) { - MV v; - v.row = (mv->as_mv.row - ref->as_mv.row); - v.col = (mv->as_mv.col - ref->as_mv.col); - return ((mvjcost[vp9_get_mv_joint(v)] + - mvcost[0][v.row] + mvcost[1][v.col]) * - Weight) >> 7; -} - -static int mv_err_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS, - int error_per_bit, int ishp) { - if (mvcost) { - MV v; - v.row = (mv->as_mv.row - ref->as_mv.row); - v.col = (mv->as_mv.col - ref->as_mv.col); - return ((mvjcost[vp9_get_mv_joint(v)] + - mvcost[0][v.row] + mvcost[1][v.col]) * - error_per_bit + 128) >> 8; - } - return 0; -} - -static int mvsad_err_cost(int_mv *mv, int_mv *ref, DEC_MVSADCOSTS, - int error_per_bit) { - - if (mvsadcost) { - MV v; - v.row = (mv->as_mv.row - ref->as_mv.row); - v.col = (mv->as_mv.col - ref->as_mv.col); - return ((mvjsadcost[vp9_get_mv_joint(v)] + - mvsadcost[0][v.row] + mvsadcost[1][v.col]) * - error_per_bit + 128) >> 8; - } - return 0; -} - -void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) { - int Len; - int search_site_count = 0; - - - // Generate offsets for 4 search sites per step. - Len = MAX_FIRST_STEP; - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = 0; - search_site_count++; - - while (Len > 0) { - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = -Len; - x->ss[search_site_count].offset = -Len * stride; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = Len; - x->ss[search_site_count].offset = Len * stride; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = -Len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = -Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = Len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = Len; - search_site_count++; - - // Contract. - Len /= 2; - } - - x->ss_count = search_site_count; - x->searches_per_step = 4; -} - -void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { - int Len; - int search_site_count = 0; - - // Generate offsets for 8 search sites per step. - Len = MAX_FIRST_STEP; - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = 0; - search_site_count++; - - while (Len > 0) { - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = -Len; - x->ss[search_site_count].offset = -Len * stride; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = Len; - x->ss[search_site_count].offset = Len * stride; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = -Len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = -Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = Len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = -Len; - x->ss[search_site_count].mv.row = -Len; - x->ss[search_site_count].offset = -Len * stride - Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = Len; - x->ss[search_site_count].mv.row = -Len; - x->ss[search_site_count].offset = -Len * stride + Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = -Len; - x->ss[search_site_count].mv.row = Len; - x->ss[search_site_count].offset = Len * stride - Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = Len; - x->ss[search_site_count].mv.row = Len; - x->ss[search_site_count].offset = Len * stride + Len; - search_site_count++; - - // Contract. - Len /= 2; - } - - x->ss_count = search_site_count; - x->searches_per_step = 8; -} - -/* - * To avoid the penalty for crossing cache-line read, preload the reference - * area in a small buffer, which is aligned to make sure there won't be crossing - * cache-line read while reading from this buffer. This reduced the cpu - * cycles spent on reading ref data in sub-pixel filter functions. - * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x - * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we - * could reduce the area. - */ - -/* estimated cost of a motion vector (r,c) */ -#define MVC(r, c) \ - (mvcost ? \ - ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ - mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ - error_per_bit + 128) >> 8 : 0) - -#define SP(x) (((x) & 7) << 1) // convert motion vector component to offset - // for svf calc - -#define IFMVCV(r, c, s, e) \ - if (c >= minc && c <= maxc && r >= minr && r <= maxr) \ - s \ - else \ - e; - -/* pointer to predictor base of a motionvector */ -#define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset))) - -/* returns subpixel variance error function */ -#define DIST(r, c) \ - vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse) - -/* checks if (r, c) has better score than previous best */ -#define CHECK_BETTER(v, r, c) \ - IFMVCV(r, c, { \ - thismse = (DIST(r, c)); \ - if ((v = MVC(r, c) + thismse) < besterr) { \ - besterr = v; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - }, \ - v = INT_MAX;) - -#define MIN(x,y) (((x)<(y))?(x):(y)) -#define MAX(x,y) (((x)>(y))?(x):(y)) - -int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - int_mv *bestmv, int_mv *ref_mv, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - DEC_MVCOSTS, - int *distortion, - unsigned int *sse1) { - unsigned char *z = (*(b->base_src) + b->src); - MACROBLOCKD *xd = &x->e_mbd; - - int rr, rc, br, bc, hstep; - int tr, tc; - unsigned int besterr = INT_MAX; - unsigned int left, right, up, down, diag; - unsigned int sse; - unsigned int whichdir; - unsigned int halfiters = 4; - unsigned int quarteriters = 4; - unsigned int eighthiters = 4; - int thismse; - int maxc, minc, maxr, minr; - int y_stride; - int offset; - int usehp = xd->allow_high_precision_mv; - -#if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64) - unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; - unsigned char *y; - int buf_r1, buf_r2, buf_c1, buf_c2; - - // Clamping to avoid out-of-range data access - buf_r1 = ((bestmv->as_mv.row - INTERP_EXTEND) < x->mv_row_min) ? - (bestmv->as_mv.row - x->mv_row_min) : INTERP_EXTEND - 1; - buf_r2 = ((bestmv->as_mv.row + INTERP_EXTEND) > x->mv_row_max) ? - (x->mv_row_max - bestmv->as_mv.row) : INTERP_EXTEND - 1; - buf_c1 = ((bestmv->as_mv.col - INTERP_EXTEND) < x->mv_col_min) ? - (bestmv->as_mv.col - x->mv_col_min) : INTERP_EXTEND - 1; - buf_c2 = ((bestmv->as_mv.col + INTERP_EXTEND) > x->mv_col_max) ? - (x->mv_col_max - bestmv->as_mv.col) : INTERP_EXTEND - 1; - y_stride = 32; - - /* Copy to intermediate buffer before searching. */ - vfp->copymem(y0 - buf_c1 - d->pre_stride * buf_r1, d->pre_stride, xd->y_buf, y_stride, 16 + buf_r1 + buf_r2); - y = xd->y_buf + y_stride * buf_r1 + buf_c1; -#else - unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; - y_stride = d->pre_stride; -#endif - - rr = ref_mv->as_mv.row; - rc = ref_mv->as_mv.col; - br = bestmv->as_mv.row << 3; - bc = bestmv->as_mv.col << 3; - hstep = 4; - minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1)); - maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1)); - minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1)); - maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1)); - - tr = br; - tc = bc; - - - offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; - - // central mv - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; - - // calculate central point error - besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1); - *distortion = besterr; - besterr += mv_err_cost(bestmv, ref_mv, MVCOSTS, - error_per_bit, xd->allow_high_precision_mv); - - // TODO: Each subsequent iteration checks at least one point in - // common with the last iteration could be 2 ( if diag selected) - while (--halfiters) { - // 1/2 pel - CHECK_BETTER(left, tr, tc - hstep); - CHECK_BETTER(right, tr, tc + hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(down, tr + hstep, tc); - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - - switch (whichdir) { - case 0: - CHECK_BETTER(diag, tr - hstep, tc - hstep); - break; - case 1: - CHECK_BETTER(diag, tr - hstep, tc + hstep); - break; - case 2: - CHECK_BETTER(diag, tr + hstep, tc - hstep); - break; - case 3: - CHECK_BETTER(diag, tr + hstep, tc + hstep); - break; - } - - // no reason to check the same one again. - if (tr == br && tc == bc) - break; - - tr = br; - tc = bc; - } - - // TODO: Each subsequent iteration checks at least one point in common with - // the last iteration could be 2 ( if diag selected) 1/4 pel - hstep >>= 1; - while (--quarteriters) { - CHECK_BETTER(left, tr, tc - hstep); - CHECK_BETTER(right, tr, tc + hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(down, tr + hstep, tc); - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - - switch (whichdir) { - case 0: - CHECK_BETTER(diag, tr - hstep, tc - hstep); - break; - case 1: - CHECK_BETTER(diag, tr - hstep, tc + hstep); - break; - case 2: - CHECK_BETTER(diag, tr + hstep, tc - hstep); - break; - case 3: - CHECK_BETTER(diag, tr + hstep, tc + hstep); - break; - } - - // no reason to check the same one again. - if (tr == br && tc == bc) - break; - - tr = br; - tc = bc; - } - - if (xd->allow_high_precision_mv) { - usehp = vp9_use_nmv_hp(&ref_mv->as_mv); - } else { - usehp = 0; - } - - if (usehp) { - hstep >>= 1; - while (--eighthiters) { - CHECK_BETTER(left, tr, tc - hstep); - CHECK_BETTER(right, tr, tc + hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(down, tr + hstep, tc); - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - - switch (whichdir) { - case 0: - CHECK_BETTER(diag, tr - hstep, tc - hstep); - break; - case 1: - CHECK_BETTER(diag, tr - hstep, tc + hstep); - break; - case 2: - CHECK_BETTER(diag, tr + hstep, tc - hstep); - break; - case 3: - CHECK_BETTER(diag, tr + hstep, tc + hstep); - break; - } - - // no reason to check the same one again. - if (tr == br && tc == bc) - break; - - tr = br; - tc = bc; - } - } - bestmv->as_mv.row = br; - bestmv->as_mv.col = bc; - - if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || - (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) - return INT_MAX; - - return besterr; -} -#undef MVC -#undef PRE -#undef DIST -#undef IFMVCV -#undef CHECK_BETTER -#undef MIN -#undef MAX - -int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - int_mv *bestmv, int_mv *ref_mv, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - DEC_MVCOSTS, int *distortion, - unsigned int *sse1) { - int bestmse = INT_MAX; - int_mv startmv; - int_mv this_mv; - int_mv orig_mv; - int yrow_movedback = 0, ycol_movedback = 0; - unsigned char *z = (*(b->base_src) + b->src); - int left, right, up, down, diag; - unsigned int sse; - int whichdir; - int thismse; - int y_stride; - MACROBLOCKD *xd = &x->e_mbd; - int usehp = xd->allow_high_precision_mv; - -#if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64) - unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; - unsigned char *y; - - y_stride = 32; - /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ - vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18); - y = xd->y_buf + y_stride + 1; -#else - unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; - y_stride = d->pre_stride; -#endif - - // central mv - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; - startmv = *bestmv; - orig_mv = *bestmv; - - // calculate central point error - bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); - *distortion = bestmse; - bestmse += mv_err_cost(bestmv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - // go left then right and check error - this_mv.as_mv.row = startmv.as_mv.row; - this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); - thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); - left = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (left < bestmse) { - *bestmv = this_mv; - bestmse = left; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.col += 8; - thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse); - right = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (right < bestmse) { - *bestmv = this_mv; - bestmse = right; - *distortion = thismse; - *sse1 = sse; - } - - // go up then down and check error - this_mv.as_mv.col = startmv.as_mv.col; - this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); - thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); - up = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (up < bestmse) { - *bestmv = this_mv; - bestmse = up; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.row += 8; - thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (down < bestmse) { - *bestmv = this_mv; - bestmse = down; - *distortion = thismse; - *sse1 = sse; - } - - - // now check 1 more diagonal - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - // for(whichdir =0;whichdir<4;whichdir++) - // { - this_mv = startmv; - - switch (whichdir) { - case 0: - this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; - this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; - thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse); - break; - case 1: - this_mv.as_mv.col += 4; - this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; - thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse); - break; - case 2: - this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; - this_mv.as_mv.row += 4; - thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse); - break; - case 3: - default: - this_mv.as_mv.col += 4; - this_mv.as_mv.row += 4; - thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse); - break; - } - - diag = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (diag < bestmse) { - *bestmv = this_mv; - bestmse = diag; - *distortion = thismse; - *sse1 = sse; - } - -// } - - - // time to check quarter pels. - if (bestmv->as_mv.row < startmv.as_mv.row) { - y -= y_stride; - yrow_movedback = 1; - } - - if (bestmv->as_mv.col < startmv.as_mv.col) { - y--; - ycol_movedback = 1; - } - - startmv = *bestmv; - - - - // go left then right and check error - this_mv.as_mv.row = startmv.as_mv.row; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col = startmv.as_mv.col - 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; - thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, - b->src_stride, &sse); - } - - left = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (left < bestmse) { - *bestmv = this_mv; - bestmse = left; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.col += 4; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); - right = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (right < bestmse) { - *bestmv = this_mv; - bestmse = right; - *distortion = thismse; - *sse1 = sse; - } - - // go up then down and check error - this_mv.as_mv.col = startmv.as_mv.col; - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row = startmv.as_mv.row - 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; - thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), - z, b->src_stride, &sse); - } - - up = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (up < bestmse) { - *bestmv = this_mv; - bestmse = up; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.row += 4; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (down < bestmse) { - *bestmv = this_mv; - bestmse = down; - *distortion = thismse; - *sse1 = sse; - } - - - // now check 1 more diagonal - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - -// for(whichdir=0;whichdir<4;whichdir++) -// { - this_mv = startmv; - - switch (whichdir) { - case 0: - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row -= 2; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 2; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; - thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);; - } - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 2; - thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; - thismse = vfp->svf(y - y_stride - 1, y_stride, SP(6), SP(6), z, b->src_stride, &sse); - } - } - - break; - case 1: - this_mv.as_mv.col += 2; - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row -= 2; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; - thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse); - } - - break; - case 2: - this_mv.as_mv.row += 2; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 2; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; - thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, - b->src_stride, &sse); - } - - break; - case 3: - this_mv.as_mv.col += 2; - this_mv.as_mv.row += 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); - break; - } - - diag = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (diag < bestmse) { - *bestmv = this_mv; - bestmse = diag; - *distortion = thismse; - *sse1 = sse; - } - - if (x->e_mbd.allow_high_precision_mv) { - usehp = vp9_use_nmv_hp(&ref_mv->as_mv); - } else { - usehp = 0; - } - if (!usehp) - return bestmse; - - /* Now do 1/8th pixel */ - if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback) { - y -= y_stride; - yrow_movedback = 1; - } - - if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback) { - y--; - ycol_movedback = 1; - } - - startmv = *bestmv; - - // go left then right and check error - this_mv.as_mv.row = startmv.as_mv.row; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col = startmv.as_mv.col - 1; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; - thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); - } - - left = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (left < bestmse) { - *bestmv = this_mv; - bestmse = left; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.col += 2; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, b->src_stride, &sse); - right = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (right < bestmse) { - *bestmv = this_mv; - bestmse = right; - *distortion = thismse; - *sse1 = sse; - } - - // go up then down and check error - this_mv.as_mv.col = startmv.as_mv.col; - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row = startmv.as_mv.row - 1; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; - thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse); - } - - up = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (up < bestmse) { - *bestmv = this_mv; - bestmse = up; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.row += 2; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (down < bestmse) { - *bestmv = this_mv; - bestmse = down; - *distortion = thismse; - *sse1 = sse; - } - - // now check 1 more diagonal - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - -// for(whichdir=0;whichdir<4;whichdir++) -// { - this_mv = startmv; - - switch (whichdir) { - case 0: - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row -= 1; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 1; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; - thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);; - } - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 1; - thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; - thismse = vfp->svf(y - y_stride - 1, y_stride, SP(7), SP(7), z, b->src_stride, &sse); - } - } - - break; - case 1: - this_mv.as_mv.col += 1; - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row -= 1; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; - thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse); - } - - break; - case 2: - this_mv.as_mv.row += 1; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 1; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; - thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse); - } - - break; - case 3: - this_mv.as_mv.col += 1; - this_mv.as_mv.row += 1; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse); - break; - } - - diag = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (diag < bestmse) { - *bestmv = this_mv; - bestmse = diag; - *distortion = thismse; - *sse1 = sse; - } - - return bestmse; -} - -#undef SP - -int vp9_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - int_mv *bestmv, int_mv *ref_mv, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - DEC_MVCOSTS, - int *distortion, - unsigned int *sse1) { - int bestmse = INT_MAX; - int_mv startmv; - int_mv this_mv; - unsigned char *z = (*(b->base_src) + b->src); - int left, right, up, down, diag; - unsigned int sse; - int whichdir; - int thismse; - int y_stride; - MACROBLOCKD *xd = &x->e_mbd; - -#if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64) - unsigned char *y0 = *(d->base_pre) + d->pre + - (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; - unsigned char *y; - - y_stride = 32; - /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ - vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18); - y = xd->y_buf + y_stride + 1; -#else - unsigned char *y = *(d->base_pre) + d->pre + - (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; - y_stride = d->pre_stride; -#endif - - // central mv - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; - startmv = *bestmv; - - // calculate central point error - bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); - *distortion = bestmse; - bestmse += mv_err_cost(bestmv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - // go left then right and check error - this_mv.as_mv.row = startmv.as_mv.row; - this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); - thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); - left = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (left < bestmse) { - *bestmv = this_mv; - bestmse = left; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.col += 8; - thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse); - right = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (right < bestmse) { - *bestmv = this_mv; - bestmse = right; - *distortion = thismse; - *sse1 = sse; - } - - // go up then down and check error - this_mv.as_mv.col = startmv.as_mv.col; - this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); - thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); - up = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (up < bestmse) { - *bestmv = this_mv; - bestmse = up; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.row += 8; - thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (down < bestmse) { - *bestmv = this_mv; - bestmse = down; - *distortion = thismse; - *sse1 = sse; - } - - // now check 1 more diagonal - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - this_mv = startmv; - - switch (whichdir) { - case 0: - this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; - this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; - thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse); - break; - case 1: - this_mv.as_mv.col += 4; - this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; - thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse); - break; - case 2: - this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; - this_mv.as_mv.row += 4; - thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse); - break; - case 3: - default: - this_mv.as_mv.col += 4; - this_mv.as_mv.row += 4; - thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse); - break; - } - - diag = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit, - xd->allow_high_precision_mv); - - if (diag < bestmse) { - *bestmv = this_mv; - bestmse = diag; - *distortion = thismse; - *sse1 = sse; - } - - return bestmse; -} - -#define CHECK_BOUNDS(range) \ - {\ - all_in = 1;\ - all_in &= ((br-range) >= x->mv_row_min);\ - all_in &= ((br+range) <= x->mv_row_max);\ - all_in &= ((bc-range) >= x->mv_col_min);\ - all_in &= ((bc+range) <= x->mv_col_max);\ - } - -#define CHECK_POINT \ - {\ - if (this_mv.as_mv.col < x->mv_col_min) continue;\ - if (this_mv.as_mv.col > x->mv_col_max) continue;\ - if (this_mv.as_mv.row < x->mv_row_min) continue;\ - if (this_mv.as_mv.row > x->mv_row_max) continue;\ - } - -#define CHECK_BETTER \ - {\ - if (thissad < bestsad)\ - {\ - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit);\ - if (thissad < bestsad)\ - {\ - bestsad = thissad;\ - best_site = i;\ - }\ - }\ - } - -static const MV next_chkpts[6][3] = { - {{ -2, 0}, { -1, -2}, {1, -2}}, - {{ -1, -2}, {1, -2}, {2, 0}}, - {{1, -2}, {2, 0}, {1, 2}}, - {{2, 0}, {1, 2}, { -1, 2}}, - {{1, 2}, { -1, 2}, { -2, 0}}, - {{ -1, 2}, { -2, 0}, { -1, -2}} -}; - -int vp9_hex_search -( - MACROBLOCK *x, - BLOCK *b, - BLOCKD *d, - int_mv *ref_mv, - int_mv *best_mv, - int search_param, - int sad_per_bit, - const vp9_variance_fn_ptr_t *vfp, - DEC_MVSADCOSTS, - DEC_MVCOSTS, - int_mv *center_mv -) { - MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} }; - MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}}; - int i, j; - - unsigned char *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; - int in_what_stride = d->pre_stride; - int br, bc; - int_mv this_mv; - unsigned int bestsad = 0x7fffffff; - unsigned int thissad; - unsigned char *base_offset; - unsigned char *this_offset; - int k = -1; - int all_in; - int best_site = -1; - - int_mv fcenter_mv; - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - - // adjust ref_mv to make sure it is within MV range - clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); - br = ref_mv->as_mv.row; - bc = ref_mv->as_mv.col; - - // Work out the start point for the search - base_offset = (unsigned char *)(*(d->base_pre) + d->pre); - this_offset = base_offset + (br * (d->pre_stride)) + bc; - this_mv.as_mv.row = br; - this_mv.as_mv.col = bc; - bestsad = vfp->sdf(what, what_stride, this_offset, - in_what_stride, 0x7fffffff) - + mvsad_err_cost(&this_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit); - - // hex search - // j=0 - CHECK_BOUNDS(2) - - if (all_in) { - for (i = 0; i < 6; i++) { - this_mv.as_mv.row = br + hex[i].row; - this_mv.as_mv.col = bc + hex[i].col; - this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); - CHECK_BETTER - } - } else { - for (i = 0; i < 6; i++) { - this_mv.as_mv.row = br + hex[i].row; - this_mv.as_mv.col = bc + hex[i].col; - CHECK_POINT - this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); - CHECK_BETTER - } - } - - if (best_site == -1) - goto cal_neighbors; - else { - br += hex[best_site].row; - bc += hex[best_site].col; - k = best_site; - } - - for (j = 1; j < 127; j++) { - best_site = -1; - CHECK_BOUNDS(2) - - if (all_in) { - for (i = 0; i < 3; i++) { - this_mv.as_mv.row = br + next_chkpts[k][i].row; - this_mv.as_mv.col = bc + next_chkpts[k][i].col; - this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); - CHECK_BETTER - } - } else { - for (i = 0; i < 3; i++) { - this_mv.as_mv.row = br + next_chkpts[k][i].row; - this_mv.as_mv.col = bc + next_chkpts[k][i].col; - CHECK_POINT - this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); - CHECK_BETTER - } - } - - if (best_site == -1) - break; - else { - br += next_chkpts[k][best_site].row; - bc += next_chkpts[k][best_site].col; - k += 5 + best_site; - if (k >= 12) k -= 12; - else if (k >= 6) k -= 6; - } - } - - // check 4 1-away neighbors -cal_neighbors: - for (j = 0; j < 32; j++) { - best_site = -1; - CHECK_BOUNDS(1) - - if (all_in) { - for (i = 0; i < 4; i++) { - this_mv.as_mv.row = br + neighbors[i].row; - this_mv.as_mv.col = bc + neighbors[i].col; - this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); - CHECK_BETTER - } - } else { - for (i = 0; i < 4; i++) { - this_mv.as_mv.row = br + neighbors[i].row; - this_mv.as_mv.col = bc + neighbors[i].col; - CHECK_POINT - this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); - CHECK_BETTER - } - } - - if (best_site == -1) - break; - else { - br += neighbors[best_site].row; - bc += neighbors[best_site].col; - } - } - - best_mv->as_mv.row = br; - best_mv->as_mv.col = bc; - - return bestsad; -} -#undef CHECK_BOUNDS -#undef CHECK_POINT -#undef CHECK_BETTER - -int vp9_diamond_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - int_mv *ref_mv, int_mv *best_mv, - int search_param, int sad_per_bit, int *num00, - vp9_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS, - int_mv *center_mv) { - int i, j, step; - - unsigned char *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; - unsigned char *in_what; - int in_what_stride = d->pre_stride; - unsigned char *best_address; - - int tot_steps; - int_mv this_mv; - - int bestsad = INT_MAX; - int best_site = 0; - int last_site = 0; - - int ref_row, ref_col; - int this_row_offset, this_col_offset; - search_site *ss; - - unsigned char *check_here; - int thissad; - MACROBLOCKD *xd = &x->e_mbd; - int_mv fcenter_mv; - - int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - - clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); - ref_row = ref_mv->as_mv.row; - ref_col = ref_mv->as_mv.col; - *num00 = 0; - best_mv->as_mv.row = ref_row; - best_mv->as_mv.col = ref_col; - - // Work out the start point for the search - in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col); - best_address = in_what; - - // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, - in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit); - - // search_param determines the length of the initial step and hence the number of iterations - // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. - ss = &x->ss[search_param * x->searches_per_step]; - tot_steps = (x->ss_count / x->searches_per_step) - search_param; - - i = 1; - - for (step = 0; step < tot_steps; step++) { - for (j = 0; j < x->searches_per_step; j++) { - // Trap illegal vectors - this_row_offset = best_mv->as_mv.row + ss[i].mv.row; - this_col_offset = best_mv->as_mv.col + ss[i].mv.col; - - if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && - (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) - - { - check_here = ss[i].offset + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); - - if (thissad < bestsad) { - this_mv.as_mv.row = this_row_offset; - this_mv.as_mv.col = this_col_offset; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - MVSADCOSTS, sad_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; - best_site = i; - } - } - } - - i++; - } - - if (best_site != last_site) { - best_mv->as_mv.row += ss[best_site].mv.row; - best_mv->as_mv.col += ss[best_site].mv.col; - best_address += ss[best_site].offset; - last_site = best_site; - } else if (best_address == in_what) - (*num00)++; - } - - this_mv.as_mv.row = best_mv->as_mv.row << 3; - this_mv.as_mv.col = best_mv->as_mv.col << 3; - - if (bestsad == INT_MAX) - return INT_MAX; - - return - fn_ptr->vf(what, what_stride, best_address, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit, - xd->allow_high_precision_mv); -} - -int vp9_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - int_mv *ref_mv, int_mv *best_mv, int search_param, - int sad_per_bit, int *num00, - vp9_variance_fn_ptr_t *fn_ptr, - DEC_MVCOSTS, int_mv *center_mv) { - int i, j, step; - - unsigned char *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; - unsigned char *in_what; - int in_what_stride = d->pre_stride; - unsigned char *best_address; - - int tot_steps; - int_mv this_mv; - - int bestsad = INT_MAX; - int best_site = 0; - int last_site = 0; - - int ref_row; - int ref_col; - int this_row_offset; - int this_col_offset; - search_site *ss; - - unsigned char *check_here; - unsigned int thissad; - MACROBLOCKD *xd = &x->e_mbd; - int_mv fcenter_mv; - - int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - - clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); - ref_row = ref_mv->as_mv.row; - ref_col = ref_mv->as_mv.col; - *num00 = 0; - best_mv->as_mv.row = ref_row; - best_mv->as_mv.col = ref_col; - - // Work out the start point for the search - in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col); - best_address = in_what; - - // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, - in_what, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit); - - // search_param determines the length of the initial step and hence the number of iterations - // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. - ss = &x->ss[search_param * x->searches_per_step]; - tot_steps = (x->ss_count / x->searches_per_step) - search_param; - - i = 1; - - for (step = 0; step < tot_steps; step++) { - int all_in = 1, t; - - // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of - // checking 4 bounds for each points. - all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min); - all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max); - all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min); - all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max); - - if (all_in) { - unsigned int sad_array[4]; - - for (j = 0; j < x->searches_per_step; j += 4) { - unsigned char *block_offset[4]; - - for (t = 0; t < 4; t++) - block_offset[t] = ss[i + t].offset + best_address; - - fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, - sad_array); - - for (t = 0; t < 4; t++, i++) { - if (sad_array[t] < bestsad) { - this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row; - this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col; - sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv, - MVSADCOSTS, sad_per_bit); - - if (sad_array[t] < bestsad) { - bestsad = sad_array[t]; - best_site = i; - } - } - } - } - } else { - for (j = 0; j < x->searches_per_step; j++) { - // Trap illegal vectors - this_row_offset = best_mv->as_mv.row + ss[i].mv.row; - this_col_offset = best_mv->as_mv.col + ss[i].mv.col; - - if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && - (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { - check_here = ss[i].offset + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); - - if (thissad < bestsad) { - this_mv.as_mv.row = this_row_offset; - this_mv.as_mv.col = this_col_offset; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - MVSADCOSTS, sad_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; - best_site = i; - } - } - } - i++; - } - } - - if (best_site != last_site) { - best_mv->as_mv.row += ss[best_site].mv.row; - best_mv->as_mv.col += ss[best_site].mv.col; - best_address += ss[best_site].offset; - last_site = best_site; - } else if (best_address == in_what) - (*num00)++; - } - - this_mv.as_mv.row = best_mv->as_mv.row << 3; - this_mv.as_mv.col = best_mv->as_mv.col << 3; - - if (bestsad == INT_MAX) - return INT_MAX; - - return - fn_ptr->vf(what, what_stride, best_address, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit, - xd->allow_high_precision_mv); -} - -/* do_refine: If last step (1-away) of n-step search doesn't pick the center - point as the best match, we will do a final 1-away diamond - refining search */ -int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *b, - BLOCKD *d, int_mv *mvp_full, int step_param, - int sadpb, int further_steps, - int do_refine, vp9_variance_fn_ptr_t *fn_ptr, - int_mv *ref_mv, int_mv *dst_mv) { - int_mv temp_mv; - int thissme, n, num00; - int bestsme = cpi->diamond_search_sad(x, b, d, mvp_full, &temp_mv, - step_param, sadpb, &num00, - fn_ptr, XMVCOST, ref_mv); - dst_mv->as_int = temp_mv.as_int; - - n = num00; - num00 = 0; - - /* If there won't be more n-step search, check to see if refining search is needed. */ - if (n > further_steps) - do_refine = 0; - - while (n < further_steps) { - n++; - - if (num00) - num00--; - else { - thissme = cpi->diamond_search_sad(x, b, d, mvp_full, &temp_mv, - step_param + n, sadpb, &num00, - fn_ptr, XMVCOST, ref_mv); - - /* check to see if refining search is needed. */ - if (num00 > (further_steps - n)) - do_refine = 0; - - if (thissme < bestsme) { - bestsme = thissme; - dst_mv->as_int = temp_mv.as_int; - } - } - } - - /* final 1-away diamond refining search */ - if (do_refine == 1) { - int search_range = 8; - int_mv best_mv; - best_mv.as_int = dst_mv->as_int; - thissme = cpi->refining_search_sad(x, b, d, &best_mv, sadpb, search_range, - fn_ptr, XMVCOST, ref_mv); - - if (thissme < bestsme) { - bestsme = thissme; - dst_mv->as_int = best_mv.as_int; - } - } - return bestsme; -} - -int vp9_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - int sad_per_bit, int distance, - vp9_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS, - int_mv *center_mv) { - unsigned char *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; - unsigned char *in_what; - int in_what_stride = d->pre_stride; - int mv_stride = d->pre_stride; - unsigned char *bestaddress; - int_mv *best_mv = &d->bmi.as_mv.first; - int_mv this_mv; - int bestsad = INT_MAX; - int r, c; - - unsigned char *check_here; - int thissad; - MACROBLOCKD *xd = &x->e_mbd; - - int ref_row = ref_mv->as_mv.row; - int ref_col = ref_mv->as_mv.col; - - int row_min = ref_row - distance; - int row_max = ref_row + distance; - int col_min = ref_col - distance; - int col_max = ref_col + distance; - int_mv fcenter_mv; - - int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - - // Work out the mid point for the search - in_what = *(d->base_pre) + d->pre; - bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; - - best_mv->as_mv.row = ref_row; - best_mv->as_mv.col = ref_col; - - // Baseline value at the centre - bestsad = fn_ptr->sdf(what, what_stride, bestaddress, - in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit); - - // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border - if (col_min < x->mv_col_min) - col_min = x->mv_col_min; - - if (col_max > x->mv_col_max) - col_max = x->mv_col_max; - - if (row_min < x->mv_row_min) - row_min = x->mv_row_min; - - if (row_max > x->mv_row_max) - row_max = x->mv_row_max; - - for (r = row_min; r < row_max; r++) { - this_mv.as_mv.row = r; - check_here = r * mv_stride + in_what + col_min; - - for (c = col_min; c < col_max; c++) { - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); - - this_mv.as_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - MVSADCOSTS, sad_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; - best_mv->as_mv.row = r; - best_mv->as_mv.col = c; - bestaddress = check_here; - } - - check_here++; - } - } - - this_mv.as_mv.row = best_mv->as_mv.row << 3; - this_mv.as_mv.col = best_mv->as_mv.col << 3; - - if (bestsad < INT_MAX) - return - fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit, - xd->allow_high_precision_mv); - else - return INT_MAX; -} - -int vp9_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - int sad_per_bit, int distance, - vp9_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS, - int_mv *center_mv) { - unsigned char *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; - unsigned char *in_what; - int in_what_stride = d->pre_stride; - int mv_stride = d->pre_stride; - unsigned char *bestaddress; - int_mv *best_mv = &d->bmi.as_mv.first; - int_mv this_mv; - int bestsad = INT_MAX; - int r, c; - - unsigned char *check_here; - unsigned int thissad; - MACROBLOCKD *xd = &x->e_mbd; - - int ref_row = ref_mv->as_mv.row; - int ref_col = ref_mv->as_mv.col; - - int row_min = ref_row - distance; - int row_max = ref_row + distance; - int col_min = ref_col - distance; - int col_max = ref_col + distance; - - unsigned int sad_array[3]; - int_mv fcenter_mv; - - int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - - // Work out the mid point for the search - in_what = *(d->base_pre) + d->pre; - bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; - - best_mv->as_mv.row = ref_row; - best_mv->as_mv.col = ref_col; - - // Baseline value at the centre - bestsad = fn_ptr->sdf(what, what_stride, - bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit); - - // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border - if (col_min < x->mv_col_min) - col_min = x->mv_col_min; - - if (col_max > x->mv_col_max) - col_max = x->mv_col_max; - - if (row_min < x->mv_row_min) - row_min = x->mv_row_min; - - if (row_max > x->mv_row_max) - row_max = x->mv_row_max; - - for (r = row_min; r < row_max; r++) { - this_mv.as_mv.row = r; - check_here = r * mv_stride + in_what + col_min; - c = col_min; - - while ((c + 2) < col_max) { - int i; - - fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); - - for (i = 0; i < 3; i++) { - thissad = sad_array[i]; - - if (thissad < bestsad) { - this_mv.as_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - MVSADCOSTS, sad_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; - best_mv->as_mv.row = r; - best_mv->as_mv.col = c; - bestaddress = check_here; - } - } - - check_here++; - c++; - } - } - - while (c < col_max) { - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); - - if (thissad < bestsad) { - this_mv.as_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - MVSADCOSTS, sad_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; - best_mv->as_mv.row = r; - best_mv->as_mv.col = c; - bestaddress = check_here; - } - } - - check_here++; - c++; - } - - } - - this_mv.as_mv.row = best_mv->as_mv.row << 3; - this_mv.as_mv.col = best_mv->as_mv.col << 3; - - if (bestsad < INT_MAX) - return - fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit, - xd->allow_high_precision_mv); - else - return INT_MAX; -} - -int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - int sad_per_bit, int distance, - vp9_variance_fn_ptr_t *fn_ptr, - DEC_MVCOSTS, - int_mv *center_mv) { - unsigned char *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; - unsigned char *in_what; - int in_what_stride = d->pre_stride; - int mv_stride = d->pre_stride; - unsigned char *bestaddress; - int_mv *best_mv = &d->bmi.as_mv.first; - int_mv this_mv; - int bestsad = INT_MAX; - int r, c; - - unsigned char *check_here; - unsigned int thissad; - MACROBLOCKD *xd = &x->e_mbd; - - int ref_row = ref_mv->as_mv.row; - int ref_col = ref_mv->as_mv.col; - - int row_min = ref_row - distance; - int row_max = ref_row + distance; - int col_min = ref_col - distance; - int col_max = ref_col + distance; - - DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8); - unsigned int sad_array[3]; - int_mv fcenter_mv; - - int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - - // Work out the mid point for the search - in_what = *(d->base_pre) + d->pre; - bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; - - best_mv->as_mv.row = ref_row; - best_mv->as_mv.col = ref_col; - - // Baseline value at the centre - bestsad = fn_ptr->sdf(what, what_stride, - bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit); - - // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border - if (col_min < x->mv_col_min) - col_min = x->mv_col_min; - - if (col_max > x->mv_col_max) - col_max = x->mv_col_max; - - if (row_min < x->mv_row_min) - row_min = x->mv_row_min; - - if (row_max > x->mv_row_max) - row_max = x->mv_row_max; - - for (r = row_min; r < row_max; r++) { - this_mv.as_mv.row = r; - check_here = r * mv_stride + in_what + col_min; - c = col_min; - - while ((c + 7) < col_max) { - int i; - - fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); - - for (i = 0; i < 8; i++) { - thissad = (unsigned int)sad_array8[i]; - - if (thissad < bestsad) { - this_mv.as_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - MVSADCOSTS, sad_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; - best_mv->as_mv.row = r; - best_mv->as_mv.col = c; - bestaddress = check_here; - } - } - - check_here++; - c++; - } - } - - while ((c + 2) < col_max) { - int i; - - fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); - - for (i = 0; i < 3; i++) { - thissad = sad_array[i]; - - if (thissad < bestsad) { - this_mv.as_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - MVSADCOSTS, sad_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; - best_mv->as_mv.row = r; - best_mv->as_mv.col = c; - bestaddress = check_here; - } - } - - check_here++; - c++; - } - } - - while (c < col_max) { - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); - - if (thissad < bestsad) { - this_mv.as_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - MVSADCOSTS, sad_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; - best_mv->as_mv.row = r; - best_mv->as_mv.col = c; - bestaddress = check_here; - } - } - - check_here++; - c++; - } - } - - this_mv.as_mv.row = best_mv->as_mv.row << 3; - this_mv.as_mv.col = best_mv->as_mv.col << 3; - - if (bestsad < INT_MAX) - return - fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit, - xd->allow_high_precision_mv); - else - return INT_MAX; -} - -int vp9_refining_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - int error_per_bit, int search_range, - vp9_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS, - int_mv *center_mv) { - MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; - int i, j; - short this_row_offset, this_col_offset; - - int what_stride = b->src_stride; - int in_what_stride = d->pre_stride; - unsigned char *what = (*(b->base_src) + b->src); - unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre + - (ref_mv->as_mv.row * (d->pre_stride)) + ref_mv->as_mv.col); - unsigned char *check_here; - unsigned int thissad; - int_mv this_mv; - unsigned int bestsad = INT_MAX; - MACROBLOCKD *xd = &x->e_mbd; - int_mv fcenter_mv; - - int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - - bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + - mvsad_err_cost(ref_mv, &fcenter_mv, MVSADCOSTS, error_per_bit); - - for (i = 0; i < search_range; i++) { - int best_site = -1; - - for (j = 0; j < 4; j++) { - this_row_offset = ref_mv->as_mv.row + neighbors[j].row; - this_col_offset = ref_mv->as_mv.col + neighbors[j].col; - - if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && - (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { - check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); - - if (thissad < bestsad) { - this_mv.as_mv.row = this_row_offset; - this_mv.as_mv.col = this_col_offset; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, MVSADCOSTS, error_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; - best_site = j; - } - } - } - } - - if (best_site == -1) - break; - else { - ref_mv->as_mv.row += neighbors[best_site].row; - ref_mv->as_mv.col += neighbors[best_site].col; - best_address += (neighbors[best_site].row) * in_what_stride + neighbors[best_site].col; - } - } - - this_mv.as_mv.row = ref_mv->as_mv.row << 3; - this_mv.as_mv.col = ref_mv->as_mv.col << 3; - - if (bestsad < INT_MAX) - return - fn_ptr->vf(what, what_stride, best_address, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit, - xd->allow_high_precision_mv); - else - return INT_MAX; -} - -int vp9_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, - int_mv *ref_mv, int error_per_bit, - int search_range, vp9_variance_fn_ptr_t *fn_ptr, - DEC_MVCOSTS, int_mv *center_mv) { - MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; - int i, j; - short this_row_offset, this_col_offset; - - int what_stride = b->src_stride; - int in_what_stride = d->pre_stride; - unsigned char *what = (*(b->base_src) + b->src); - unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre + - (ref_mv->as_mv.row * (d->pre_stride)) + ref_mv->as_mv.col); - unsigned char *check_here; - unsigned int thissad; - int_mv this_mv; - unsigned int bestsad = INT_MAX; - MACROBLOCKD *xd = &x->e_mbd; - int_mv fcenter_mv; - - int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - - bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + - mvsad_err_cost(ref_mv, &fcenter_mv, MVSADCOSTS, error_per_bit); - - for (i = 0; i < search_range; i++) { - int best_site = -1; - int all_in = 1; - - all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min); - all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max); - all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min); - all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max); - - if (all_in) { - unsigned int sad_array[4]; - unsigned char *block_offset[4]; - block_offset[0] = best_address - in_what_stride; - block_offset[1] = best_address - 1; - block_offset[2] = best_address + 1; - block_offset[3] = best_address + in_what_stride; - - fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); - - for (j = 0; j < 4; j++) { - if (sad_array[j] < bestsad) { - this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row; - this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col; - sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, MVSADCOSTS, error_per_bit); - - if (sad_array[j] < bestsad) { - bestsad = sad_array[j]; - best_site = j; - } - } - } - } else { - for (j = 0; j < 4; j++) { - this_row_offset = ref_mv->as_mv.row + neighbors[j].row; - this_col_offset = ref_mv->as_mv.col + neighbors[j].col; - - if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && - (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { - check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); - - if (thissad < bestsad) { - this_mv.as_mv.row = this_row_offset; - this_mv.as_mv.col = this_col_offset; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, MVSADCOSTS, error_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; - best_site = j; - } - } - } - } - } - - if (best_site == -1) - break; - else { - ref_mv->as_mv.row += neighbors[best_site].row; - ref_mv->as_mv.col += neighbors[best_site].col; - best_address += (neighbors[best_site].row) * in_what_stride + neighbors[best_site].col; - } - } - - this_mv.as_mv.row = ref_mv->as_mv.row << 3; - this_mv.as_mv.col = ref_mv->as_mv.col << 3; - - if (bestsad < INT_MAX) - return - fn_ptr->vf(what, what_stride, best_address, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit, - xd->allow_high_precision_mv); - else - return INT_MAX; -} - - - -#ifdef ENTROPY_STATS -void print_mode_context(void) { - FILE *f = fopen("modecont.c", "a"); - int i, j; - - fprintf(f, "#include \"entropy.h\"\n"); - fprintf(f, "const int vp9_mode_contexts[6][4] ="); - fprintf(f, "{\n"); - for (j = 0; j < 6; j++) { - fprintf(f, " {/* %d */ ", j); - fprintf(f, " "); - for (i = 0; i < 4; i++) { - int this_prob; - int count; - - // context probs - count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1]; - if (count) - this_prob = 256 * mv_ref_ct[j][i][0] / count; - else - this_prob = 128; - - if (this_prob == 0) - this_prob = 1; - fprintf(f, "%5d, ", this_prob); - } - fprintf(f, " },\n"); - } - - fprintf(f, "};\n"); - fclose(f); -} - -/* MV ref count ENTROPY_STATS stats code */ -void init_mv_ref_counts() { - vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct)); - vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts)); -} - -void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) { - if (m == ZEROMV) { - ++mv_ref_ct [ct[0]] [0] [0]; - ++mv_mode_cts[0][0]; - } else { - ++mv_ref_ct [ct[0]] [0] [1]; - ++mv_mode_cts[0][1]; - - if (m == NEARESTMV) { - ++mv_ref_ct [ct[1]] [1] [0]; - ++mv_mode_cts[1][0]; - } else { - ++mv_ref_ct [ct[1]] [1] [1]; - ++mv_mode_cts[1][1]; - - if (m == NEARMV) { - ++mv_ref_ct [ct[2]] [2] [0]; - ++mv_mode_cts[2][0]; - } else { - ++mv_ref_ct [ct[2]] [2] [1]; - ++mv_mode_cts[2][1]; - - if (m == NEWMV) { - ++mv_ref_ct [ct[3]] [3] [0]; - ++mv_mode_cts[3][0]; - } else { - ++mv_ref_ct [ct[3]] [3] [1]; - ++mv_mode_cts[3][1]; - } - } - } - } -} - -#endif/* END MV ref count ENTROPY_STATS stats code */ diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h deleted file mode 100644 index f754837e6..000000000 --- a/vp8/encoder/mcomp.h +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __INC_MCOMP_H -#define __INC_MCOMP_H - -#include "block.h" -#include "variance.h" - -#define MVCOSTS mvjcost, mvcost -#define MVSADCOSTS mvjsadcost, mvsadcost -#define DEC_MVCOSTS int *mvjcost, int *mvcost[2] -#define DEC_MVSADCOSTS int *mvjsadcost, int *mvsadcost[2] -#define NULLMVCOST NULL, NULL -#define XMVCOST x->nmvjointcost, (x->e_mbd.allow_high_precision_mv?x->nmvcost_hp:x->nmvcost) - -#ifdef ENTROPY_STATS -extern void init_mv_ref_counts(); -extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]); -#endif - - -#define MAX_MVSEARCH_STEPS 8 // The maximum number of steps in a step search given the largest allowed initial step -#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1) // Max full pel mv specified in 1 pel units -#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) // Maximum size of the first step in full pel units - -extern void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv); -extern int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS, - int Weight, int ishp); -extern void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride); -extern void vp9_init3smotion_compensation(MACROBLOCK *x, int stride); -// Runs sequence of diamond searches in smaller steps for RD -struct VP9_COMP; -int vp9_full_pixel_diamond(struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK *b, - BLOCKD *d, int_mv *mvp_full, int step_param, - int sadpb, int further_steps, int do_refine, - vp9_variance_fn_ptr_t *fn_ptr, - int_mv *ref_mv, int_mv *dst_mv); - -extern int vp9_hex_search -( - MACROBLOCK *x, - BLOCK *b, - BLOCKD *d, - int_mv *ref_mv, - int_mv *best_mv, - int search_param, - int error_per_bit, - const vp9_variance_fn_ptr_t *vf, - DEC_MVSADCOSTS, - DEC_MVCOSTS, - int_mv *center_mv -); - -typedef int (fractional_mv_step_fp) -(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *bestmv, int_mv *ref_mv, - int error_per_bit, const vp9_variance_fn_ptr_t *vfp, DEC_MVCOSTS, - int *distortion, unsigned int *sse); -extern fractional_mv_step_fp vp9_find_best_sub_pixel_step_iteratively; -extern fractional_mv_step_fp vp9_find_best_sub_pixel_step; -extern fractional_mv_step_fp vp9_find_best_half_pixel_step; - -#define prototype_full_search_sad(sym)\ - int (sym)\ - (\ - MACROBLOCK *x, \ - BLOCK *b, \ - BLOCKD *d, \ - int_mv *ref_mv, \ - int sad_per_bit, \ - int distance, \ - vp9_variance_fn_ptr_t *fn_ptr, \ - DEC_MVSADCOSTS, \ - int_mv *center_mv \ - ) - -#define prototype_refining_search_sad(sym)\ - int (sym)\ - (\ - MACROBLOCK *x, \ - BLOCK *b, \ - BLOCKD *d, \ - int_mv *ref_mv, \ - int sad_per_bit, \ - int distance, \ - vp9_variance_fn_ptr_t *fn_ptr, \ - DEC_MVSADCOSTS, \ - int_mv *center_mv \ - ) - -#define prototype_diamond_search_sad(sym)\ - int (sym)\ - (\ - MACROBLOCK *x, \ - BLOCK *b, \ - BLOCKD *d, \ - int_mv *ref_mv, \ - int_mv *best_mv, \ - int search_param, \ - int sad_per_bit, \ - int *num00, \ - vp9_variance_fn_ptr_t *fn_ptr, \ - DEC_MVSADCOSTS, \ - int_mv *center_mv \ - ) - -#if ARCH_X86 || ARCH_X86_64 -#include "x86/mcomp_x86.h" -#endif - -typedef prototype_full_search_sad(*vp9_full_search_fn_t); -extern prototype_full_search_sad(vp9_full_search_sad); -extern prototype_full_search_sad(vp9_full_search_sadx3); -extern prototype_full_search_sad(vp9_full_search_sadx8); - -typedef prototype_refining_search_sad(*vp9_refining_search_fn_t); -extern prototype_refining_search_sad(vp9_refining_search_sad); -extern prototype_refining_search_sad(vp9_refining_search_sadx4); - -typedef prototype_diamond_search_sad(*vp9_diamond_search_fn_t); -extern prototype_diamond_search_sad(vp9_diamond_search_sad); -extern prototype_diamond_search_sad(vp9_diamond_search_sadx4); - -#ifndef vp9_search_full_search -#define vp9_search_full_search vp9_full_search_sad -#endif -extern prototype_full_search_sad(vp9_search_full_search); - -#ifndef vp9_search_refining_search -#define vp9_search_refining_search vp9_refining_search_sad -#endif -extern prototype_refining_search_sad(vp9_search_refining_search); - -#ifndef vp9_search_diamond_search -#define vp9_search_diamond_search vp9_diamond_search_sad -#endif -extern prototype_diamond_search_sad(vp9_search_diamond_search); - -typedef struct { - prototype_full_search_sad(*full_search); - prototype_refining_search_sad(*refining_search); - prototype_diamond_search_sad(*diamond_search); -} vp9_search_rtcd_vtable_t; - -#if CONFIG_RUNTIME_CPU_DETECT -#define SEARCH_INVOKE(ctx,fn) (ctx)->fn -#else -#define SEARCH_INVOKE(ctx,fn) vp9_search_##fn -#endif - -#endif diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c deleted file mode 100644 index 57d1ae671..000000000 --- a/vp8/encoder/modecosts.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp8/common/blockd.h" -#include "onyx_int.h" -#include "treewriter.h" -#include "vp8/common/entropymode.h" - - -void vp9_init_mode_costs(VP9_COMP *c) { - VP9_COMMON *x = &c->common; - const vp9_tree_p T = vp9_bmode_tree; - int i, j; - - for (i = 0; i < VP9_BINTRAMODES; i++) { - for (j = 0; j < VP9_BINTRAMODES; j++) { - vp9_cost_tokens((int *)c->mb.bmode_costs[i][j], - x->kf_bmode_prob[i][j], T); - } - } - - vp9_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.bmode_prob, T); - vp9_cost_tokens((int *)c->mb.inter_bmode_costs, - x->fc.sub_mv_ref_prob[0], vp9_sub_mv_ref_tree); - - vp9_cost_tokens(c->mb.mbmode_cost[1], x->fc.ymode_prob, vp9_ymode_tree); - vp9_cost_tokens(c->mb.mbmode_cost[0], - x->kf_ymode_prob[c->common.kf_ymode_probs_index], - vp9_kf_ymode_tree); - vp9_cost_tokens(c->mb.intra_uv_mode_cost[1], - x->fc.uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree); - vp9_cost_tokens(c->mb.intra_uv_mode_cost[0], - x->kf_uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree); - vp9_cost_tokens(c->mb.i8x8_mode_costs, - x->fc.i8x8_mode_prob, vp9_i8x8_mode_tree); - - for (i = 0; i <= VP9_SWITCHABLE_FILTERS; ++i) - vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i], - x->fc.switchable_interp_prob[i], - vp9_switchable_interp_tree); -} diff --git a/vp8/encoder/modecosts.h b/vp8/encoder/modecosts.h deleted file mode 100644 index c37604edc..000000000 --- a/vp8/encoder/modecosts.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __INC_MODECOSTS_H -#define __INC_MODECOSTS_H - -void vp9_init_mode_costs(VP9_COMP *x); - -#endif diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c deleted file mode 100644 index 4fd6f84d4..000000000 --- a/vp8/encoder/onyx_if.c +++ /dev/null @@ -1,4486 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8/common/onyxc_int.h" -#include "onyx_int.h" -#include "vp8/common/systemdependent.h" -#include "quantize.h" -#include "vp8/common/alloccommon.h" -#include "mcomp.h" -#include "firstpass.h" -#include "psnr.h" -#include "vpx_scale/vpxscale.h" -#include "vp8/common/extend.h" -#include "ratectrl.h" -#include "vp8/common/quant_common.h" -#include "segmentation.h" -#include "vpx_scale/yv12extend.h" -#if CONFIG_POSTPROC -#include "vp8/common/postproc.h" -#endif -#include "vpx_mem/vpx_mem.h" -#include "vp8/common/swapyv12buffer.h" -#include "vpx_ports/vpx_timer.h" -#include "temporal_filter.h" - -#include "vp8/common/seg_common.h" -#include "mbgraph.h" -#include "vp8/common/pred_common.h" -#include "vp8/encoder/rdopt.h" -#include "bitstream.h" -#include "ratectrl.h" - -#if CONFIG_NEWBESTREFMV -#include "vp8/common/mvref_common.h" -#endif - -#if ARCH_ARM -#include "vpx_ports/arm.h" -#endif - -#include <math.h> -#include <stdio.h> -#include <limits.h> - -#if CONFIG_RUNTIME_CPU_DETECT -#define IF_RTCD(x) (x) -#define RTCD(x) &cpi->common.rtcd.x -#else -#define IF_RTCD(x) NULL -#define RTCD(x) NULL -#endif - -extern void vp9_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi); - -extern void vp9_set_alt_lf_level(VP9_COMP *cpi, int filt_val); - -extern void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi); - -extern void vp9_cmachine_specific_config(VP9_COMP *cpi); - -extern void vp9_deblock_frame(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *post, - int filt_lvl, int low_var_thresh, int flag); - -extern void print_tree_update_probs(); - -#if HAVE_ARMV7 -extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, - YV12_BUFFER_CONFIG *dst_ybc); - -extern void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, - YV12_BUFFER_CONFIG *dst_ybc); -#endif - -int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest); - -extern void vp9_temporal_filter_prepare_c(VP9_COMP *cpi, int distance); - -static void set_default_lf_deltas(VP9_COMP *cpi); - -#define DEFAULT_INTERP_FILTER EIGHTTAP /* SWITCHABLE for better performance */ -#define SEARCH_BEST_FILTER 0 /* to search exhaustively for - best filter */ -#define RESET_FOREACH_FILTER 0 /* whether to reset the encoder state - before trying each new filter */ -#define SHARP_FILTER_QTHRESH 0 /* Q threshold for 8-tap sharp filter */ - -#define ALTREF_HIGH_PRECISION_MV 1 /* whether to use high precision mv - for altref computation */ -#define HIGH_PRECISION_MV_QTHRESH 200 /* Q threshold for use of high precision - mv. Choose a very high value for - now so that HIGH_PRECISION is always - chosen */ - -#if CONFIG_INTERNAL_STATS -#include "math.h" - -extern double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, int lumamask, - double *weight); - - -extern double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, double *ssim_y, - double *ssim_u, double *ssim_v); - - -#endif - -// #define OUTPUT_YUV_REC - -#ifdef OUTPUT_YUV_SRC -FILE *yuv_file; -#endif -#ifdef OUTPUT_YUV_REC -FILE *yuv_rec_file; -#endif - -#if 0 -FILE *framepsnr; -FILE *kf_list; -FILE *keyfile; -#endif - -#if 0 -extern int skip_true_count; -extern int skip_false_count; -#endif - - -#ifdef ENTROPY_STATS -extern int intra_mode_stats[VP9_BINTRAMODES][VP9_BINTRAMODES][VP9_BINTRAMODES]; -#endif - -#ifdef NMV_STATS -extern void init_nmvstats(); -extern void print_nmvstats(); -#endif - -#ifdef SPEEDSTATS -unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -#endif - -#if defined(SECTIONBITS_OUTPUT) -extern unsigned __int64 Sectionbits[500]; -#endif -#ifdef MODE_STATS -extern INT64 Sectionbits[500]; -extern unsigned int y_modes[VP9_YMODES]; -extern unsigned int i8x8_modes[VP9_I8X8_MODES]; -extern unsigned int uv_modes[VP9_UV_MODES]; -extern unsigned int uv_modes_y[VP9_YMODES][VP9_UV_MODES]; -extern unsigned int b_modes[B_MODE_COUNT]; -extern unsigned int inter_y_modes[MB_MODE_COUNT]; -extern unsigned int inter_uv_modes[VP9_UV_MODES]; -extern unsigned int inter_b_modes[B_MODE_COUNT]; -#endif - -extern void vp9_init_quantizer(VP9_COMP *cpi); - -static int base_skip_false_prob[QINDEX_RANGE][3]; - -// Tables relating active max Q to active min Q -static int kf_low_motion_minq[QINDEX_RANGE]; -static int kf_high_motion_minq[QINDEX_RANGE]; -static int gf_low_motion_minq[QINDEX_RANGE]; -static int gf_high_motion_minq[QINDEX_RANGE]; -static int inter_minq[QINDEX_RANGE]; - -// Functions to compute the active minq lookup table entries based on a -// formulaic approach to facilitate easier adjustment of the Q tables. -// The formulae were derived from computing a 3rd order polynomial best -// fit to the original data (after plotting real maxq vs minq (not q index)) -static int calculate_minq_index(double maxq, - double x3, double x2, double x, double c) { - int i; - double minqtarget; - double thisq; - - minqtarget = ((x3 * maxq * maxq * maxq) + - (x2 * maxq * maxq) + - (x * maxq) + - c); - - if (minqtarget > maxq) - minqtarget = maxq; - - for (i = 0; i < QINDEX_RANGE; i++) { - thisq = vp9_convert_qindex_to_q(i); - if (minqtarget <= vp9_convert_qindex_to_q(i)) - return i; - } - return QINDEX_RANGE - 1; -} - -static void init_minq_luts(void) { - int i; - double maxq; - - for (i = 0; i < QINDEX_RANGE; i++) { - maxq = vp9_convert_qindex_to_q(i); - - - kf_low_motion_minq[i] = calculate_minq_index(maxq, - 0.0000003, - -0.000015, - 0.074, - 0.0); - kf_high_motion_minq[i] = calculate_minq_index(maxq, - 0.0000004, - -0.000125, - 0.14, - 0.0); - gf_low_motion_minq[i] = calculate_minq_index(maxq, - 0.0000015, - -0.0009, - 0.33, - 0.0); - gf_high_motion_minq[i] = calculate_minq_index(maxq, - 0.0000021, - -0.00125, - 0.45, - 0.0); - inter_minq[i] = calculate_minq_index(maxq, - 0.00000271, - -0.00113, - 0.697, - 0.0); - - } -} - -static void init_base_skip_probs(void) { - int i; - double q; - int skip_prob, t; - - for (i = 0; i < QINDEX_RANGE; i++) { - q = vp9_convert_qindex_to_q(i); - - // Exponential decay caluclation of baseline skip prob with clamping - // Based on crude best fit of old table. - t = (int)(564.25 * pow(2.71828, (-0.012 * q))); - - skip_prob = t; - if (skip_prob < 1) - skip_prob = 1; - else if (skip_prob > 255) - skip_prob = 255; - base_skip_false_prob[i][1] = skip_prob; - - skip_prob = t * 0.75; - if (skip_prob < 1) - skip_prob = 1; - else if (skip_prob > 255) - skip_prob = 255; - base_skip_false_prob[i][2] = skip_prob; - - skip_prob = t * 1.25; - if (skip_prob < 1) - skip_prob = 1; - else if (skip_prob > 255) - skip_prob = 255; - base_skip_false_prob[i][0] = skip_prob; - } -} - -static void update_base_skip_probs(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - if (cm->frame_type != KEY_FRAME) { - vp9_update_skip_probs(cpi); - - if (cm->refresh_alt_ref_frame) { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; ++k) - cpi->last_skip_false_probs[2][k] = cm->mbskip_pred_probs[k]; - cpi->last_skip_probs_q[2] = cm->base_qindex; - } else if (cpi->common.refresh_golden_frame) { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; ++k) - cpi->last_skip_false_probs[1][k] = cm->mbskip_pred_probs[k]; - cpi->last_skip_probs_q[1] = cm->base_qindex; - } else { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; ++k) - cpi->last_skip_false_probs[0][k] = cm->mbskip_pred_probs[k]; - cpi->last_skip_probs_q[0] = cm->base_qindex; - - // update the baseline table for the current q - for (k = 0; k < MBSKIP_CONTEXTS; ++k) - cpi->base_skip_false_prob[cm->base_qindex][k] = - cm->mbskip_pred_probs[k]; - } - } - -} - -void vp9_initialize_enc() { - static int init_done = 0; - - if (!init_done) { - vp8_scale_machine_specific_config(); - vp9_initialize_common(); - vp9_tokenize_initialize(); - vp9_init_quant_tables(); - vp9_init_me_luts(); - init_minq_luts(); - init_base_skip_probs(); - init_done = 1; - } -} -#ifdef PACKET_TESTING -extern FILE *vpxlogc; -#endif - -static void setup_features(VP9_COMP *cpi) { - MACROBLOCKD *xd = &cpi->mb.e_mbd; - - // Set up default state for MB feature flags - - xd->segmentation_enabled = 0; // Default segmentation disabled - - xd->update_mb_segmentation_map = 0; - xd->update_mb_segmentation_data = 0; - vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs)); - - vp9_clearall_segfeatures(xd); - - xd->mode_ref_lf_delta_enabled = 0; - xd->mode_ref_lf_delta_update = 0; - vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas)); - vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas)); - vpx_memset(xd->last_ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas)); - vpx_memset(xd->last_mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas)); - - set_default_lf_deltas(cpi); - -} - - -static void dealloc_compressor_data(VP9_COMP *cpi) { - vpx_free(cpi->tplist); - cpi->tplist = NULL; - - // Delete last frame MV storage buffers - vpx_free(cpi->lfmv); - cpi->lfmv = 0; - - vpx_free(cpi->lf_ref_frame_sign_bias); - cpi->lf_ref_frame_sign_bias = 0; - - vpx_free(cpi->lf_ref_frame); - cpi->lf_ref_frame = 0; - - // Delete sementation map - vpx_free(cpi->segmentation_map); - cpi->segmentation_map = 0; - vpx_free(cpi->common.last_frame_seg_map); - cpi->common.last_frame_seg_map = 0; - vpx_free(cpi->coding_context.last_frame_seg_map_copy); - cpi->coding_context.last_frame_seg_map_copy = 0; - - vpx_free(cpi->active_map); - cpi->active_map = 0; - - vp9_de_alloc_frame_buffers(&cpi->common); - - vp8_yv12_de_alloc_frame_buffer(&cpi->last_frame_uf); - vp8_yv12_de_alloc_frame_buffer(&cpi->scaled_source); -#if VP9_TEMPORAL_ALT_REF - vp8_yv12_de_alloc_frame_buffer(&cpi->alt_ref_buffer); -#endif - vp9_lookahead_destroy(cpi->lookahead); - - vpx_free(cpi->tok); - cpi->tok = 0; - - // Structure used to monitor GF usage - vpx_free(cpi->gf_active_flags); - cpi->gf_active_flags = 0; - - // Activity mask based per mb zbin adjustments - vpx_free(cpi->mb_activity_map); - cpi->mb_activity_map = 0; - vpx_free(cpi->mb_norm_activity_map); - cpi->mb_norm_activity_map = 0; - - vpx_free(cpi->mb.pip); - cpi->mb.pip = 0; - - vpx_free(cpi->twopass.total_stats); - cpi->twopass.total_stats = 0; - - vpx_free(cpi->twopass.total_left_stats); - cpi->twopass.total_left_stats = 0; - - vpx_free(cpi->twopass.this_frame_stats); - cpi->twopass.this_frame_stats = 0; -} - -// Computes a q delta (in "q index" terms) to get from a starting q value -// to a target value -// target q value -static int compute_qdelta(VP9_COMP *cpi, double qstart, double qtarget) { - int i; - int start_index = cpi->worst_quality; - int target_index = cpi->worst_quality; - - // Convert the average q value to an index. - for (i = cpi->best_quality; i < cpi->worst_quality; i++) { - start_index = i; - if (vp9_convert_qindex_to_q(i) >= qstart) - break; - } - - // Convert the q target to an index - for (i = cpi->best_quality; i < cpi->worst_quality; i++) { - target_index = i; - if (vp9_convert_qindex_to_q(i) >= qtarget) - break; - } - - return target_index - start_index; -} - -static void init_seg_features(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &cpi->mb.e_mbd; - - int high_q = (int)(cpi->avg_q > 48.0); - int qi_delta; - - // Disable and clear down for KF - if (cm->frame_type == KEY_FRAME) { - // Clear down the global segmentation map - vpx_memset(cpi->segmentation_map, 0, (cm->mb_rows * cm->mb_cols)); - xd->update_mb_segmentation_map = 0; - xd->update_mb_segmentation_data = 0; - cpi->static_mb_pct = 0; - - // Disable segmentation - vp9_disable_segmentation((VP9_PTR)cpi); - - // Clear down the segment features. - vp9_clearall_segfeatures(xd); - } - - // If this is an alt ref frame - else if (cm->refresh_alt_ref_frame) { - // Clear down the global segmentation map - vpx_memset(cpi->segmentation_map, 0, (cm->mb_rows * cm->mb_cols)); - xd->update_mb_segmentation_map = 0; - xd->update_mb_segmentation_data = 0; - cpi->static_mb_pct = 0; - - // Disable segmentation and individual segment features by default - vp9_disable_segmentation((VP9_PTR)cpi); - vp9_clearall_segfeatures(xd); - - // Scan frames from current to arf frame. - // This function re-enables segmentation if appropriate. - vp9_update_mbgraph_stats(cpi); - - // If segmentation was enabled set those features needed for the - // arf itself. - if (xd->segmentation_enabled) { - xd->update_mb_segmentation_map = 1; - xd->update_mb_segmentation_data = 1; - - qi_delta = compute_qdelta(cpi, cpi->avg_q, (cpi->avg_q * 0.875)); - vp9_set_segdata(xd, 1, SEG_LVL_ALT_Q, (qi_delta - 2)); - vp9_set_segdata(xd, 1, SEG_LVL_ALT_LF, -2); - - vp9_enable_segfeature(xd, 1, SEG_LVL_ALT_Q); - vp9_enable_segfeature(xd, 1, SEG_LVL_ALT_LF); - - // Where relevant assume segment data is delta data - xd->mb_segment_abs_delta = SEGMENT_DELTADATA; - - } - } - // All other frames if segmentation has been enabled - else if (xd->segmentation_enabled) { - // First normal frame in a valid gf or alt ref group - if (cpi->common.frames_since_golden == 0) { - // Set up segment features for normal frames in an af group - if (cpi->source_alt_ref_active) { - xd->update_mb_segmentation_map = 0; - xd->update_mb_segmentation_data = 1; - xd->mb_segment_abs_delta = SEGMENT_DELTADATA; - - qi_delta = compute_qdelta(cpi, cpi->avg_q, - (cpi->avg_q * 1.125)); - vp9_set_segdata(xd, 1, SEG_LVL_ALT_Q, (qi_delta + 2)); - vp9_set_segdata(xd, 1, SEG_LVL_ALT_Q, 0); - vp9_enable_segfeature(xd, 1, SEG_LVL_ALT_Q); - - vp9_set_segdata(xd, 1, SEG_LVL_ALT_LF, -2); - vp9_enable_segfeature(xd, 1, SEG_LVL_ALT_LF); - - // Segment coding disabled for compred testing - if (high_q || (cpi->static_mb_pct == 100)) { - // set_segref(xd, 1, LAST_FRAME); - vp9_set_segref(xd, 1, ALTREF_FRAME); - vp9_enable_segfeature(xd, 1, SEG_LVL_REF_FRAME); - - vp9_set_segdata(xd, 1, SEG_LVL_MODE, ZEROMV); - vp9_enable_segfeature(xd, 1, SEG_LVL_MODE); - - // EOB segment coding not fixed for 8x8 yet - vp9_set_segdata(xd, 1, SEG_LVL_EOB, 0); - vp9_enable_segfeature(xd, 1, SEG_LVL_EOB); - } - } - // Disable segmentation and clear down features if alt ref - // is not active for this group - else { - vp9_disable_segmentation((VP9_PTR)cpi); - - vpx_memset(cpi->segmentation_map, 0, - (cm->mb_rows * cm->mb_cols)); - - xd->update_mb_segmentation_map = 0; - xd->update_mb_segmentation_data = 0; - - vp9_clearall_segfeatures(xd); - } - } - - // Special case where we are coding over the top of a previous - // alt ref frame - // Segment coding disabled for compred testing - else if (cpi->is_src_frame_alt_ref) { - // Enable mode and ref frame features for segment 0 as well - vp9_enable_segfeature(xd, 0, SEG_LVL_REF_FRAME); - vp9_enable_segfeature(xd, 0, SEG_LVL_MODE); - vp9_enable_segfeature(xd, 1, SEG_LVL_REF_FRAME); - vp9_enable_segfeature(xd, 1, SEG_LVL_MODE); - - // All mbs should use ALTREF_FRAME, ZEROMV exclusively - vp9_clear_segref(xd, 0); - vp9_set_segref(xd, 0, ALTREF_FRAME); - vp9_clear_segref(xd, 1); - vp9_set_segref(xd, 1, ALTREF_FRAME); - vp9_set_segdata(xd, 0, SEG_LVL_MODE, ZEROMV); - vp9_set_segdata(xd, 1, SEG_LVL_MODE, ZEROMV); - - // Skip all MBs if high Q - if (high_q) { - vp9_enable_segfeature(xd, 0, SEG_LVL_EOB); - vp9_set_segdata(xd, 0, SEG_LVL_EOB, 0); - vp9_enable_segfeature(xd, 1, SEG_LVL_EOB); - vp9_set_segdata(xd, 1, SEG_LVL_EOB, 0); - } - // Enable data udpate - xd->update_mb_segmentation_data = 1; - } - // All other frames. - else { - // No updates.. leave things as they are. - xd->update_mb_segmentation_map = 0; - xd->update_mb_segmentation_data = 0; - } - } -} - -// DEBUG: Print out the segment id of each MB in the current frame. -static void print_seg_map(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - int row, col; - int map_index = 0; - FILE *statsfile; - - statsfile = fopen("segmap.stt", "a"); - - fprintf(statsfile, "%10d\n", - cm->current_video_frame); - - for (row = 0; row < cpi->common.mb_rows; row++) { - for (col = 0; col < cpi->common.mb_cols; col++) { - fprintf(statsfile, "%10d", - cpi->segmentation_map[map_index]); - map_index++; - } - fprintf(statsfile, "\n"); - } - fprintf(statsfile, "\n"); - - fclose(statsfile); -} - -static void update_reference_segmentation_map(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - int row, col, sb_rows = (cm->mb_rows + 1) >> 1, sb_cols = (cm->mb_cols + 1) >> 1; - MODE_INFO *mi = cm->mi; - uint8_t *segmap = cpi->segmentation_map; - uint8_t *segcache = cm->last_frame_seg_map; - - for (row = 0; row < sb_rows; row++) { - for (col = 0; col < sb_cols; col++) { - MODE_INFO *miptr = mi + col * 2; - uint8_t *cache = segcache + col * 2; -#if CONFIG_SUPERBLOCKS - if (miptr->mbmi.encoded_as_sb) { - cache[0] = miptr->mbmi.segment_id; - if (!(cm->mb_cols & 1) || col < sb_cols - 1) - cache[1] = miptr->mbmi.segment_id; - if (!(cm->mb_rows & 1) || row < sb_rows - 1) { - cache[cm->mb_cols] = miptr->mbmi.segment_id; - if (!(cm->mb_cols & 1) || col < sb_cols - 1) - cache[cm->mb_cols + 1] = miptr->mbmi.segment_id; - } - } else -#endif - { - cache[0] = miptr[0].mbmi.segment_id; - if (!(cm->mb_cols & 1) || col < sb_cols - 1) - cache[1] = miptr[1].mbmi.segment_id; - if (!(cm->mb_rows & 1) || row < sb_rows - 1) { - cache[cm->mb_cols] = miptr[cm->mode_info_stride].mbmi.segment_id; - if (!(cm->mb_cols & 1) || col < sb_cols - 1) - cache[1] = miptr[1].mbmi.segment_id; - cache[cm->mb_cols + 1] = miptr[cm->mode_info_stride + 1].mbmi.segment_id; - } - } - } - segmap += 2 * cm->mb_cols; - segcache += 2 * cm->mb_cols; - mi += 2 * cm->mode_info_stride; - } -} - -static void set_default_lf_deltas(VP9_COMP *cpi) { - cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 1; - cpi->mb.e_mbd.mode_ref_lf_delta_update = 1; - - vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); - vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); - - // Test of ref frame deltas - cpi->mb.e_mbd.ref_lf_deltas[INTRA_FRAME] = 2; - cpi->mb.e_mbd.ref_lf_deltas[LAST_FRAME] = 0; - cpi->mb.e_mbd.ref_lf_deltas[GOLDEN_FRAME] = -2; - cpi->mb.e_mbd.ref_lf_deltas[ALTREF_FRAME] = -2; - - cpi->mb.e_mbd.mode_lf_deltas[0] = 4; // BPRED - cpi->mb.e_mbd.mode_lf_deltas[1] = -2; // Zero - cpi->mb.e_mbd.mode_lf_deltas[2] = 2; // New mv - cpi->mb.e_mbd.mode_lf_deltas[3] = 4; // Split mv -} - -void vp9_set_speed_features(VP9_COMP *cpi) { - SPEED_FEATURES *sf = &cpi->sf; - int Mode = cpi->compressor_speed; - int Speed = cpi->Speed; - int i; - VP9_COMMON *cm = &cpi->common; - - // Only modes 0 and 1 supported for now in experimental code basae - if (Mode > 1) - Mode = 1; - - // Initialise default mode frequency sampling variables - for (i = 0; i < MAX_MODES; i ++) { - cpi->mode_check_freq[i] = 0; - cpi->mode_test_hit_counts[i] = 0; - cpi->mode_chosen_counts[i] = 0; - } - - // best quality defaults - sf->RD = 1; - sf->search_method = NSTEP; - sf->improved_dct = 1; - sf->auto_filter = 1; - sf->recode_loop = 1; - sf->quarter_pixel_search = 1; - sf->half_pixel_search = 1; - sf->iterative_sub_pixel = 1; -#if CONFIG_LOSSLESS - sf->optimize_coefficients = 0; -#else - sf->optimize_coefficients = 1; -#endif - sf->no_skip_block4x4_search = 1; - - sf->first_step = 0; - sf->max_step_search_steps = MAX_MVSEARCH_STEPS; - sf->improved_mv_pred = 1; - - // default thresholds to 0 - for (i = 0; i < MAX_MODES; i++) - sf->thresh_mult[i] = 0; - - switch (Mode) { - case 0: // best quality mode -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_ZEROMV ] = 0; - sf->thresh_mult[THR_ZEROMV_FILT ] = 0; - sf->thresh_mult[THR_ZEROG ] = 0; - sf->thresh_mult[THR_ZEROG_FILT ] = 0; - sf->thresh_mult[THR_ZEROA ] = 0; - sf->thresh_mult[THR_ZEROA_FILT ] = 0; - sf->thresh_mult[THR_NEARESTMV ] = 0; - sf->thresh_mult[THR_NEARESTMV_FILT] = 0; - sf->thresh_mult[THR_NEARESTG ] = 0; - sf->thresh_mult[THR_NEARESTG_FILT ] = 0; - sf->thresh_mult[THR_NEARESTA ] = 0; - sf->thresh_mult[THR_NEARESTA_FILT ] = 0; - sf->thresh_mult[THR_NEARMV ] = 0; - sf->thresh_mult[THR_NEARMV_FILT ] = 0; - sf->thresh_mult[THR_NEARG ] = 0; - sf->thresh_mult[THR_NEARG_FILT ] = 0; - sf->thresh_mult[THR_NEARA ] = 0; - sf->thresh_mult[THR_NEARA_FILT ] = 0; - - sf->thresh_mult[THR_DC ] = 0; - - sf->thresh_mult[THR_V_PRED ] = 1000; - sf->thresh_mult[THR_H_PRED ] = 1000; - sf->thresh_mult[THR_D45_PRED ] = 1000; - sf->thresh_mult[THR_D135_PRED] = 1000; - sf->thresh_mult[THR_D117_PRED] = 1000; - sf->thresh_mult[THR_D153_PRED] = 1000; - sf->thresh_mult[THR_D27_PRED ] = 1000; - sf->thresh_mult[THR_D63_PRED ] = 1000; - sf->thresh_mult[THR_B_PRED ] = 2000; - sf->thresh_mult[THR_I8X8_PRED] = 2000; - sf->thresh_mult[THR_TM ] = 1000; - - sf->thresh_mult[THR_NEWMV ] = 1000; - sf->thresh_mult[THR_NEWG ] = 1000; - sf->thresh_mult[THR_NEWA ] = 1000; - sf->thresh_mult[THR_NEWMV_FILT ] = 1000; - sf->thresh_mult[THR_NEWG_FILT ] = 1000; - sf->thresh_mult[THR_NEWA_FILT ] = 1000; -#else - sf->thresh_mult[THR_ZEROMV ] = 0; - sf->thresh_mult[THR_ZEROG ] = 0; - sf->thresh_mult[THR_ZEROA ] = 0; - sf->thresh_mult[THR_NEARESTMV] = 0; - sf->thresh_mult[THR_NEARESTG ] = 0; - sf->thresh_mult[THR_NEARESTA ] = 0; - sf->thresh_mult[THR_NEARMV ] = 0; - sf->thresh_mult[THR_NEARG ] = 0; - sf->thresh_mult[THR_NEARA ] = 0; - - sf->thresh_mult[THR_DC ] = 0; - - sf->thresh_mult[THR_V_PRED ] = 1000; - sf->thresh_mult[THR_H_PRED ] = 1000; - sf->thresh_mult[THR_D45_PRED ] = 1000; - sf->thresh_mult[THR_D135_PRED] = 1000; - sf->thresh_mult[THR_D117_PRED] = 1000; - sf->thresh_mult[THR_D153_PRED] = 1000; - sf->thresh_mult[THR_D27_PRED ] = 1000; - sf->thresh_mult[THR_D63_PRED ] = 1000; - sf->thresh_mult[THR_B_PRED ] = 2000; - sf->thresh_mult[THR_I8X8_PRED] = 2000; - sf->thresh_mult[THR_TM ] = 1000; - - sf->thresh_mult[THR_NEWMV ] = 1000; - sf->thresh_mult[THR_NEWG ] = 1000; - sf->thresh_mult[THR_NEWA ] = 1000; -#endif - sf->thresh_mult[THR_SPLITMV ] = 2500; - sf->thresh_mult[THR_SPLITG ] = 5000; - sf->thresh_mult[THR_SPLITA ] = 5000; - - sf->thresh_mult[THR_COMP_ZEROLG ] = 0; - sf->thresh_mult[THR_COMP_NEARESTLG] = 0; - sf->thresh_mult[THR_COMP_NEARLG ] = 0; - sf->thresh_mult[THR_COMP_ZEROLA ] = 0; - sf->thresh_mult[THR_COMP_NEARESTLA] = 0; - sf->thresh_mult[THR_COMP_NEARLA ] = 0; - sf->thresh_mult[THR_COMP_ZEROGA ] = 0; - sf->thresh_mult[THR_COMP_NEARESTGA] = 0; - sf->thresh_mult[THR_COMP_NEARGA ] = 0; - - sf->thresh_mult[THR_COMP_NEWLG ] = 1000; - sf->thresh_mult[THR_COMP_NEWLA ] = 1000; - sf->thresh_mult[THR_COMP_NEWGA ] = 1000; - - sf->thresh_mult[THR_COMP_SPLITLA ] = 2500; - sf->thresh_mult[THR_COMP_SPLITGA ] = 5000; - sf->thresh_mult[THR_COMP_SPLITLG ] = 5000; - - sf->first_step = 0; - sf->max_step_search_steps = MAX_MVSEARCH_STEPS; - sf->search_best_filter = SEARCH_BEST_FILTER; - break; - case 1: -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEARESTMV] = 0; - sf->thresh_mult[THR_NEARESTMV_FILT] = 0; - sf->thresh_mult[THR_ZEROMV ] = 0; - sf->thresh_mult[THR_ZEROMV_FILT ] = 0; - sf->thresh_mult[THR_DC ] = 0; - sf->thresh_mult[THR_NEARMV ] = 0; - sf->thresh_mult[THR_NEARMV_FILT ] = 0; - sf->thresh_mult[THR_V_PRED ] = 1000; - sf->thresh_mult[THR_H_PRED ] = 1000; - sf->thresh_mult[THR_D45_PRED ] = 1000; - sf->thresh_mult[THR_D135_PRED] = 1000; - sf->thresh_mult[THR_D117_PRED] = 1000; - sf->thresh_mult[THR_D153_PRED] = 1000; - sf->thresh_mult[THR_D27_PRED ] = 1000; - sf->thresh_mult[THR_D63_PRED ] = 1000; - sf->thresh_mult[THR_B_PRED ] = 2500; - sf->thresh_mult[THR_I8X8_PRED] = 2500; - sf->thresh_mult[THR_TM ] = 1000; - - sf->thresh_mult[THR_NEARESTG ] = 1000; - sf->thresh_mult[THR_NEARESTG_FILT ] = 1000; - sf->thresh_mult[THR_NEARESTA ] = 1000; - sf->thresh_mult[THR_NEARESTA_FILT ] = 1000; - - sf->thresh_mult[THR_ZEROG ] = 1000; - sf->thresh_mult[THR_ZEROA ] = 1000; - sf->thresh_mult[THR_NEARG ] = 1000; - sf->thresh_mult[THR_NEARA ] = 1000; - sf->thresh_mult[THR_ZEROG_FILT ] = 1000; - sf->thresh_mult[THR_ZEROA_FILT ] = 1000; - sf->thresh_mult[THR_NEARG_FILT ] = 1000; - sf->thresh_mult[THR_NEARA_FILT ] = 1000; - - sf->thresh_mult[THR_ZEROMV ] = 0; - sf->thresh_mult[THR_ZEROG ] = 0; - sf->thresh_mult[THR_ZEROA ] = 0; - sf->thresh_mult[THR_NEARESTMV] = 0; - sf->thresh_mult[THR_NEARESTG ] = 0; - sf->thresh_mult[THR_NEARESTA ] = 0; - sf->thresh_mult[THR_NEARMV ] = 0; - sf->thresh_mult[THR_NEARG ] = 0; - sf->thresh_mult[THR_NEARA ] = 0; - sf->thresh_mult[THR_ZEROMV_FILT ] = 0; - sf->thresh_mult[THR_ZEROG_FILT ] = 0; - sf->thresh_mult[THR_ZEROA_FILT ] = 0; - sf->thresh_mult[THR_NEARESTMV_FILT] = 0; - sf->thresh_mult[THR_NEARESTG_FILT ] = 0; - sf->thresh_mult[THR_NEARESTA_FILT ] = 0; - sf->thresh_mult[THR_NEARMV_FILT ] = 0; - sf->thresh_mult[THR_NEARG_FILT ] = 0; - sf->thresh_mult[THR_NEARA_FILT ] = 0; - - sf->thresh_mult[THR_NEWMV ] = 1000; - sf->thresh_mult[THR_NEWG ] = 1000; - sf->thresh_mult[THR_NEWA ] = 1000; - sf->thresh_mult[THR_NEWMV_FILT ] = 1000; - sf->thresh_mult[THR_NEWG_FILT ] = 1000; - sf->thresh_mult[THR_NEWA_FILT ] = 1000; -#else - sf->thresh_mult[THR_NEARESTMV] = 0; - sf->thresh_mult[THR_ZEROMV ] = 0; - sf->thresh_mult[THR_DC ] = 0; - sf->thresh_mult[THR_NEARMV ] = 0; - sf->thresh_mult[THR_V_PRED ] = 1000; - sf->thresh_mult[THR_H_PRED ] = 1000; - sf->thresh_mult[THR_D45_PRED ] = 1000; - sf->thresh_mult[THR_D135_PRED] = 1000; - sf->thresh_mult[THR_D117_PRED] = 1000; - sf->thresh_mult[THR_D153_PRED] = 1000; - sf->thresh_mult[THR_D27_PRED ] = 1000; - sf->thresh_mult[THR_D63_PRED ] = 1000; - sf->thresh_mult[THR_B_PRED ] = 2500; - sf->thresh_mult[THR_I8X8_PRED] = 2500; - sf->thresh_mult[THR_TM ] = 1000; - - sf->thresh_mult[THR_NEARESTG ] = 1000; - sf->thresh_mult[THR_NEARESTA ] = 1000; - - sf->thresh_mult[THR_ZEROG ] = 1000; - sf->thresh_mult[THR_ZEROA ] = 1000; - sf->thresh_mult[THR_NEARG ] = 1000; - sf->thresh_mult[THR_NEARA ] = 1000; - - sf->thresh_mult[THR_ZEROMV ] = 0; - sf->thresh_mult[THR_ZEROG ] = 0; - sf->thresh_mult[THR_ZEROA ] = 0; - sf->thresh_mult[THR_NEARESTMV] = 0; - sf->thresh_mult[THR_NEARESTG ] = 0; - sf->thresh_mult[THR_NEARESTA ] = 0; - sf->thresh_mult[THR_NEARMV ] = 0; - sf->thresh_mult[THR_NEARG ] = 0; - sf->thresh_mult[THR_NEARA ] = 0; - - sf->thresh_mult[THR_NEWMV ] = 1000; - sf->thresh_mult[THR_NEWG ] = 1000; - sf->thresh_mult[THR_NEWA ] = 1000; -#endif - sf->thresh_mult[THR_SPLITMV ] = 1700; - sf->thresh_mult[THR_SPLITG ] = 4500; - sf->thresh_mult[THR_SPLITA ] = 4500; - - sf->thresh_mult[THR_COMP_ZEROLG ] = 0; - sf->thresh_mult[THR_COMP_NEARESTLG] = 0; - sf->thresh_mult[THR_COMP_NEARLG ] = 0; - sf->thresh_mult[THR_COMP_ZEROLA ] = 0; - sf->thresh_mult[THR_COMP_NEARESTLA] = 0; - sf->thresh_mult[THR_COMP_NEARLA ] = 0; - sf->thresh_mult[THR_COMP_ZEROGA ] = 0; - sf->thresh_mult[THR_COMP_NEARESTGA] = 0; - sf->thresh_mult[THR_COMP_NEARGA ] = 0; - - sf->thresh_mult[THR_COMP_NEWLG ] = 1000; - sf->thresh_mult[THR_COMP_NEWLA ] = 1000; - sf->thresh_mult[THR_COMP_NEWGA ] = 1000; - - sf->thresh_mult[THR_COMP_SPLITLA ] = 1700; - sf->thresh_mult[THR_COMP_SPLITGA ] = 4500; - sf->thresh_mult[THR_COMP_SPLITLG ] = 4500; - - if (Speed > 0) { - /* Disable coefficient optimization above speed 0 */ - sf->optimize_coefficients = 0; - sf->no_skip_block4x4_search = 0; - - sf->first_step = 1; - - cpi->mode_check_freq[THR_SPLITG] = 2; - cpi->mode_check_freq[THR_SPLITA] = 2; - cpi->mode_check_freq[THR_SPLITMV] = 0; - - cpi->mode_check_freq[THR_COMP_SPLITGA] = 2; - cpi->mode_check_freq[THR_COMP_SPLITLG] = 2; - cpi->mode_check_freq[THR_COMP_SPLITLA] = 0; - } - - if (Speed > 1) { - cpi->mode_check_freq[THR_SPLITG] = 4; - cpi->mode_check_freq[THR_SPLITA] = 4; - cpi->mode_check_freq[THR_SPLITMV] = 2; - - cpi->mode_check_freq[THR_COMP_SPLITGA] = 4; - cpi->mode_check_freq[THR_COMP_SPLITLG] = 4; - cpi->mode_check_freq[THR_COMP_SPLITLA] = 2; - - sf->thresh_mult[THR_TM ] = 1500; - sf->thresh_mult[THR_V_PRED ] = 1500; - sf->thresh_mult[THR_H_PRED ] = 1500; - sf->thresh_mult[THR_D45_PRED ] = 1500; - sf->thresh_mult[THR_D135_PRED] = 1500; - sf->thresh_mult[THR_D117_PRED] = 1500; - sf->thresh_mult[THR_D153_PRED] = 1500; - sf->thresh_mult[THR_D27_PRED ] = 1500; - sf->thresh_mult[THR_D63_PRED ] = 1500; - sf->thresh_mult[THR_B_PRED ] = 5000; - sf->thresh_mult[THR_I8X8_PRED] = 5000; - - if (cpi->ref_frame_flags & VP9_LAST_FLAG) { - sf->thresh_mult[THR_NEWMV ] = 2000; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEWMV_FILT ] = 2000; -#endif - sf->thresh_mult[THR_SPLITMV ] = 10000; - sf->thresh_mult[THR_COMP_SPLITLG ] = 20000; - } - - if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { - sf->thresh_mult[THR_NEARESTG ] = 1500; - sf->thresh_mult[THR_ZEROG ] = 1500; - sf->thresh_mult[THR_NEARG ] = 1500; - sf->thresh_mult[THR_NEWG ] = 2000; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEARESTG_FILT ] = 1500; - sf->thresh_mult[THR_ZEROG_FILT ] = 1500; - sf->thresh_mult[THR_NEARG_FILT ] = 1500; - sf->thresh_mult[THR_NEWG_FILT ] = 2000; -#endif - sf->thresh_mult[THR_SPLITG ] = 20000; - sf->thresh_mult[THR_COMP_SPLITGA ] = 20000; - } - - if (cpi->ref_frame_flags & VP9_ALT_FLAG) { - sf->thresh_mult[THR_NEARESTA ] = 1500; - sf->thresh_mult[THR_ZEROA ] = 1500; - sf->thresh_mult[THR_NEARA ] = 1500; - sf->thresh_mult[THR_NEWA ] = 2000; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEARESTA_FILT ] = 1500; - sf->thresh_mult[THR_ZEROA_FILT ] = 1500; - sf->thresh_mult[THR_NEARA_FILT ] = 1500; - sf->thresh_mult[THR_NEWA_FILT ] = 2000; -#endif - sf->thresh_mult[THR_SPLITA ] = 20000; - sf->thresh_mult[THR_COMP_SPLITLA ] = 10000; - } - - sf->thresh_mult[THR_COMP_ZEROLG ] = 1500; - sf->thresh_mult[THR_COMP_NEARESTLG] = 1500; - sf->thresh_mult[THR_COMP_NEARLG ] = 1500; - sf->thresh_mult[THR_COMP_ZEROLA ] = 1500; - sf->thresh_mult[THR_COMP_NEARESTLA] = 1500; - sf->thresh_mult[THR_COMP_NEARLA ] = 1500; - sf->thresh_mult[THR_COMP_ZEROGA ] = 1500; - sf->thresh_mult[THR_COMP_NEARESTGA] = 1500; - sf->thresh_mult[THR_COMP_NEARGA ] = 1500; - - sf->thresh_mult[THR_COMP_NEWLG ] = 2000; - sf->thresh_mult[THR_COMP_NEWLA ] = 2000; - sf->thresh_mult[THR_COMP_NEWGA ] = 2000; - } - - if (Speed > 2) { - cpi->mode_check_freq[THR_SPLITG] = 15; - cpi->mode_check_freq[THR_SPLITA] = 15; - cpi->mode_check_freq[THR_SPLITMV] = 7; - - cpi->mode_check_freq[THR_COMP_SPLITGA] = 15; - cpi->mode_check_freq[THR_COMP_SPLITLG] = 15; - cpi->mode_check_freq[THR_COMP_SPLITLA] = 7; - - sf->thresh_mult[THR_TM ] = 2000; - sf->thresh_mult[THR_V_PRED ] = 2000; - sf->thresh_mult[THR_H_PRED ] = 2000; - sf->thresh_mult[THR_D45_PRED ] = 2000; - sf->thresh_mult[THR_D135_PRED] = 2000; - sf->thresh_mult[THR_D117_PRED] = 2000; - sf->thresh_mult[THR_D153_PRED] = 2000; - sf->thresh_mult[THR_D27_PRED ] = 2000; - sf->thresh_mult[THR_D63_PRED ] = 2000; - sf->thresh_mult[THR_B_PRED ] = 7500; - sf->thresh_mult[THR_I8X8_PRED] = 7500; - - if (cpi->ref_frame_flags & VP9_LAST_FLAG) { - sf->thresh_mult[THR_NEWMV ] = 2000; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEWMV_FILT ] = 2000; -#endif - sf->thresh_mult[THR_SPLITMV ] = 25000; - sf->thresh_mult[THR_COMP_SPLITLG ] = 50000; - } - - if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { - sf->thresh_mult[THR_NEARESTG ] = 2000; - sf->thresh_mult[THR_ZEROG ] = 2000; - sf->thresh_mult[THR_NEARG ] = 2000; - sf->thresh_mult[THR_NEWG ] = 2500; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEARESTG_FILT ] = 2000; - sf->thresh_mult[THR_ZEROG_FILT ] = 2000; - sf->thresh_mult[THR_NEARG_FILT ] = 2000; - sf->thresh_mult[THR_NEWG_FILT ] = 2500; -#endif - sf->thresh_mult[THR_SPLITG ] = 50000; - sf->thresh_mult[THR_COMP_SPLITGA ] = 50000; - } - - if (cpi->ref_frame_flags & VP9_ALT_FLAG) { - sf->thresh_mult[THR_NEARESTA ] = 2000; - sf->thresh_mult[THR_ZEROA ] = 2000; - sf->thresh_mult[THR_NEARA ] = 2000; - sf->thresh_mult[THR_NEWA ] = 2500; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEARESTA_FILT ] = 2000; - sf->thresh_mult[THR_ZEROA_FILT ] = 2000; - sf->thresh_mult[THR_NEARA_FILT ] = 2000; - sf->thresh_mult[THR_NEWA_FILT ] = 2500; -#endif - sf->thresh_mult[THR_SPLITA ] = 50000; - sf->thresh_mult[THR_COMP_SPLITLA ] = 25000; - } - - sf->thresh_mult[THR_COMP_ZEROLG ] = 2000; - sf->thresh_mult[THR_COMP_NEARESTLG] = 2000; - sf->thresh_mult[THR_COMP_NEARLG ] = 2000; - sf->thresh_mult[THR_COMP_ZEROLA ] = 2000; - sf->thresh_mult[THR_COMP_NEARESTLA] = 2000; - sf->thresh_mult[THR_COMP_NEARLA ] = 2000; - sf->thresh_mult[THR_COMP_ZEROGA ] = 2000; - sf->thresh_mult[THR_COMP_NEARESTGA] = 2000; - sf->thresh_mult[THR_COMP_NEARGA ] = 2000; - - sf->thresh_mult[THR_COMP_NEWLG ] = 2500; - sf->thresh_mult[THR_COMP_NEWLA ] = 2500; - sf->thresh_mult[THR_COMP_NEWGA ] = 2500; - - sf->improved_dct = 0; - - // Only do recode loop on key frames, golden frames and - // alt ref frames - sf->recode_loop = 2; - - } - - break; - - }; /* switch */ - - /* disable frame modes if flags not set */ - if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) { - sf->thresh_mult[THR_NEWMV ] = INT_MAX; - sf->thresh_mult[THR_NEARESTMV] = INT_MAX; - sf->thresh_mult[THR_ZEROMV ] = INT_MAX; - sf->thresh_mult[THR_NEARMV ] = INT_MAX; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEWMV_FILT ] = INT_MAX; - sf->thresh_mult[THR_NEARESTMV_FILT] = INT_MAX; - sf->thresh_mult[THR_ZEROMV_FILT ] = INT_MAX; - sf->thresh_mult[THR_NEARMV_FILT ] = INT_MAX; -#endif - sf->thresh_mult[THR_SPLITMV ] = INT_MAX; - } - - if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) { - sf->thresh_mult[THR_NEARESTG ] = INT_MAX; - sf->thresh_mult[THR_ZEROG ] = INT_MAX; - sf->thresh_mult[THR_NEARG ] = INT_MAX; - sf->thresh_mult[THR_NEWG ] = INT_MAX; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEARESTG_FILT ] = INT_MAX; - sf->thresh_mult[THR_ZEROG_FILT ] = INT_MAX; - sf->thresh_mult[THR_NEARG_FILT ] = INT_MAX; - sf->thresh_mult[THR_NEWG_FILT ] = INT_MAX; -#endif - sf->thresh_mult[THR_SPLITG ] = INT_MAX; - } - - if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) { - sf->thresh_mult[THR_NEARESTA ] = INT_MAX; - sf->thresh_mult[THR_ZEROA ] = INT_MAX; - sf->thresh_mult[THR_NEARA ] = INT_MAX; - sf->thresh_mult[THR_NEWA ] = INT_MAX; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEARESTA_FILT ] = INT_MAX; - sf->thresh_mult[THR_ZEROA_FILT ] = INT_MAX; - sf->thresh_mult[THR_NEARA_FILT ] = INT_MAX; - sf->thresh_mult[THR_NEWA_FILT ] = INT_MAX; -#endif - sf->thresh_mult[THR_SPLITA ] = INT_MAX; - } - - if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_GOLD_FLAG)) != (VP9_LAST_FLAG | VP9_GOLD_FLAG)) { - sf->thresh_mult[THR_COMP_ZEROLG ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARESTLG] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARLG ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEWLG ] = INT_MAX; - sf->thresh_mult[THR_COMP_SPLITLG ] = INT_MAX; - } - - if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != (VP9_LAST_FLAG | VP9_ALT_FLAG)) { - sf->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARLA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEWLA ] = INT_MAX; - sf->thresh_mult[THR_COMP_SPLITLA ] = INT_MAX; - } - - if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != (VP9_GOLD_FLAG | VP9_ALT_FLAG)) { - sf->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARGA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEWGA ] = INT_MAX; - sf->thresh_mult[THR_COMP_SPLITGA ] = INT_MAX; - } - - // Slow quant, dct and trellis not worthwhile for first pass - // so make sure they are always turned off. - if (cpi->pass == 1) { - sf->optimize_coefficients = 0; - sf->improved_dct = 0; - } - - if (cpi->sf.search_method == NSTEP) { - vp9_init3smotion_compensation(&cpi->mb, - cm->yv12_fb[cm->lst_fb_idx].y_stride); - } else if (cpi->sf.search_method == DIAMOND) { - vp9_init_dsmotion_compensation(&cpi->mb, - cm->yv12_fb[cm->lst_fb_idx].y_stride); - } - - cpi->mb.vp9_short_fdct16x16 = vp9_short_fdct16x16; - cpi->mb.vp9_short_fdct8x8 = vp9_short_fdct8x8; - cpi->mb.vp9_short_fdct8x4 = vp9_short_fdct8x4; - cpi->mb.vp9_short_fdct4x4 = vp9_short_fdct4x4; - cpi->mb.short_walsh4x4 = vp9_short_walsh4x4; - cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2; - -#if CONFIG_LOSSLESS - if (cpi->oxcf.lossless) { - cpi->mb.vp9_short_fdct8x4 = vp9_short_walsh8x4_x8; - cpi->mb.vp9_short_fdct4x4 = vp9_short_walsh4x4_x8; - cpi->mb.short_walsh4x4 = vp9_short_walsh4x4; - cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2; - cpi->mb.short_walsh4x4 = vp9_short_walsh4x4_lossless; - } -#endif - - - - cpi->mb.quantize_b_4x4 = vp9_regular_quantize_b_4x4; - cpi->mb.quantize_b_4x4_pair = vp9_regular_quantize_b_4x4_pair; - cpi->mb.quantize_b_8x8 = vp9_regular_quantize_b_8x8; - cpi->mb.quantize_b_16x16 = vp9_regular_quantize_b_16x16; - cpi->mb.quantize_b_2x2 = vp9_regular_quantize_b_2x2; - - vp9_init_quantizer(cpi); - -#if CONFIG_RUNTIME_CPU_DETECT - cpi->mb.e_mbd.rtcd = &cpi->common.rtcd; -#endif - - if (cpi->sf.iterative_sub_pixel == 1) { - cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_step_iteratively; - } else if (cpi->sf.quarter_pixel_search) { - cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_step; - } else if (cpi->sf.half_pixel_search) { - cpi->find_fractional_mv_step = vp9_find_best_half_pixel_step; - } - - if (cpi->sf.optimize_coefficients == 1 && cpi->pass != 1) - cpi->mb.optimize = 1; - else - cpi->mb.optimize = 0; - -#ifdef SPEEDSTATS - frames_at_speed[cpi->Speed]++; -#endif -} -static void alloc_raw_frame_buffers(VP9_COMP *cpi) { - int width = (cpi->oxcf.Width + 15) & ~15; - int height = (cpi->oxcf.Height + 15) & ~15; - - cpi->lookahead = vp9_lookahead_init(cpi->oxcf.Width, cpi->oxcf.Height, - cpi->oxcf.lag_in_frames); - if (!cpi->lookahead) - vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, - "Failed to allocate lag buffers"); - -#if VP9_TEMPORAL_ALT_REF - - if (vp8_yv12_alloc_frame_buffer(&cpi->alt_ref_buffer, - width, height, VP8BORDERINPIXELS)) - vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, - "Failed to allocate altref buffer"); - -#endif -} - -static int alloc_partition_data(VP9_COMP *cpi) { - vpx_free(cpi->mb.pip); - - cpi->mb.pip = vpx_calloc((cpi->common.mb_cols + 1) * - (cpi->common.mb_rows + 1), - sizeof(PARTITION_INFO)); - if (!cpi->mb.pip) - return 1; - - cpi->mb.pi = cpi->mb.pip + cpi->common.mode_info_stride + 1; - - return 0; -} - -void vp9_alloc_compressor_data(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - int width = cm->Width; - int height = cm->Height; - - if (vp9_alloc_frame_buffers(cm, width, height)) - vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, - "Failed to allocate frame buffers"); - - if (alloc_partition_data(cpi)) - vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, - "Failed to allocate partition data"); - - - if ((width & 0xf) != 0) - width += 16 - (width & 0xf); - - if ((height & 0xf) != 0) - height += 16 - (height & 0xf); - - - if (vp8_yv12_alloc_frame_buffer(&cpi->last_frame_uf, - width, height, VP8BORDERINPIXELS)) - vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, - "Failed to allocate last frame buffer"); - - if (vp8_yv12_alloc_frame_buffer(&cpi->scaled_source, - width, height, VP8BORDERINPIXELS)) - vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, - "Failed to allocate scaled source buffer"); - - - vpx_free(cpi->tok); - - { - unsigned int tokens = cm->mb_rows * cm->mb_cols * 24 * 16; - - CHECK_MEM_ERROR(cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok))); - } - - // Data used for real time vc mode to see if gf needs refreshing - cpi->inter_zz_count = 0; - cpi->gf_bad_count = 0; - cpi->gf_update_recommended = 0; - - - // Structures used to minitor GF usage - vpx_free(cpi->gf_active_flags); - CHECK_MEM_ERROR(cpi->gf_active_flags, - vpx_calloc(1, cm->mb_rows * cm->mb_cols)); - cpi->gf_active_count = cm->mb_rows * cm->mb_cols; - - vpx_free(cpi->mb_activity_map); - CHECK_MEM_ERROR(cpi->mb_activity_map, - vpx_calloc(sizeof(unsigned int), - cm->mb_rows * cm->mb_cols)); - - vpx_free(cpi->mb_norm_activity_map); - CHECK_MEM_ERROR(cpi->mb_norm_activity_map, - vpx_calloc(sizeof(unsigned int), - cm->mb_rows * cm->mb_cols)); - - vpx_free(cpi->twopass.total_stats); - - cpi->twopass.total_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS)); - - vpx_free(cpi->twopass.total_left_stats); - cpi->twopass.total_left_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS)); - - vpx_free(cpi->twopass.this_frame_stats); - - cpi->twopass.this_frame_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS)); - - if (!cpi->twopass.total_stats || - !cpi->twopass.total_left_stats || - !cpi->twopass.this_frame_stats) - vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, - "Failed to allocate firstpass stats"); - - vpx_free(cpi->tplist); - - CHECK_MEM_ERROR(cpi->tplist, - vpx_malloc(sizeof(TOKENLIST) * (cpi->common.mb_rows))); -} - - -// TODO perhaps change number of steps expose to outside world when setting -// max and min limits. Also this will likely want refining for the extended Q -// range. -// -// Table that converts 0-63 Q range values passed in outside to the Qindex -// range used internally. -static const int q_trans[] = { - 0, 4, 8, 12, 16, 20, 24, 28, - 32, 36, 40, 44, 48, 52, 56, 60, - 64, 68, 72, 76, 80, 84, 88, 92, - 96, 100, 104, 108, 112, 116, 120, 124, - 128, 132, 136, 140, 144, 148, 152, 156, - 160, 164, 168, 172, 176, 180, 184, 188, - 192, 196, 200, 204, 208, 212, 216, 220, - 224, 228, 232, 236, 240, 244, 249, 255, -}; - -int vp9_reverse_trans(int x) { - int i; - - for (i = 0; i < 64; i++) - if (q_trans[i] >= x) - return i; - - return 63; -}; -void vp9_new_frame_rate(VP9_COMP *cpi, double framerate) { - if (framerate < .1) - framerate = 30; - - cpi->oxcf.frame_rate = framerate; - cpi->output_frame_rate = cpi->oxcf.frame_rate; - cpi->per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate); - cpi->av_per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate); - cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); - - if (cpi->min_frame_bandwidth < FRAME_OVERHEAD_BITS) - cpi->min_frame_bandwidth = FRAME_OVERHEAD_BITS; - - // Set Maximum gf/arf interval - cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2); - - if (cpi->max_gf_interval < 12) - cpi->max_gf_interval = 12; - - // Extended interval for genuinely static scenes - cpi->twopass.static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; - - // Special conditions when altr ref frame enabled in lagged compress mode - if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames) { - if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1) - cpi->max_gf_interval = cpi->oxcf.lag_in_frames - 1; - - if (cpi->twopass.static_scene_max_gf_interval > cpi->oxcf.lag_in_frames - 1) - cpi->twopass.static_scene_max_gf_interval = cpi->oxcf.lag_in_frames - 1; - } - - if (cpi->max_gf_interval > cpi->twopass.static_scene_max_gf_interval) - cpi->max_gf_interval = cpi->twopass.static_scene_max_gf_interval; -} - - -static int -rescale(int val, int num, int denom) { - int64_t llnum = num; - int64_t llden = denom; - int64_t llval = val; - - return llval * llnum / llden; -} - - -static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); - VP9_COMMON *cm = &cpi->common; - - cpi->oxcf = *oxcf; - - cpi->goldfreq = 7; - - cm->version = oxcf->Version; - vp9_setup_version(cm); - - // change includes all joint functionality - vp9_change_config(ptr, oxcf); - - // Initialize active best and worst q and average q values. - cpi->active_worst_quality = cpi->oxcf.worst_allowed_q; - cpi->active_best_quality = cpi->oxcf.best_allowed_q; - cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q; - - // Initialise the starting buffer levels - cpi->buffer_level = cpi->oxcf.starting_buffer_level; - cpi->bits_off_target = cpi->oxcf.starting_buffer_level; - - cpi->rolling_target_bits = cpi->av_per_frame_bandwidth; - cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth; - cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth; - cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth; - - cpi->total_actual_bits = 0; - cpi->total_target_vs_actual = 0; - - cpi->static_mb_pct = 0; - -#if VP9_TEMPORAL_ALT_REF - { - int i; - - cpi->fixed_divide[0] = 0; - - for (i = 1; i < 512; i++) - cpi->fixed_divide[i] = 0x80000 / i; - } -#endif -} - - -void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); - VP9_COMMON *cm = &cpi->common; - - if (!cpi) - return; - - if (!oxcf) - return; - - if (cm->version != oxcf->Version) { - cm->version = oxcf->Version; - vp9_setup_version(cm); - } - - cpi->oxcf = *oxcf; - - switch (cpi->oxcf.Mode) { - // Real time and one pass deprecated in test code base - case MODE_FIRSTPASS: - cpi->pass = 1; - cpi->compressor_speed = 1; - break; - - case MODE_SECONDPASS: - cpi->pass = 2; - cpi->compressor_speed = 1; - - if (cpi->oxcf.cpu_used < -5) { - cpi->oxcf.cpu_used = -5; - } - - if (cpi->oxcf.cpu_used > 5) - cpi->oxcf.cpu_used = 5; - - break; - - case MODE_SECONDPASS_BEST: - cpi->pass = 2; - cpi->compressor_speed = 0; - break; - } - - cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q]; - cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q]; - cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level]; - -#if CONFIG_LOSSLESS - cpi->oxcf.lossless = oxcf->lossless; - if (cpi->oxcf.lossless) { - cpi->common.rtcd.idct.idct1 = vp9_short_inv_walsh4x4_1_x8_c; - cpi->common.rtcd.idct.idct16 = vp9_short_inv_walsh4x4_x8_c; - cpi->common.rtcd.idct.idct1_scalar_add = vp9_dc_only_inv_walsh_add_c; - cpi->common.rtcd.idct.iwalsh1 = vp9_short_inv_walsh4x4_1_c; - cpi->common.rtcd.idct.iwalsh16 = vp9_short_inv_walsh4x4_lossless_c; - } -#endif - - cpi->baseline_gf_interval = DEFAULT_GF_INTERVAL; - - cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; - - // cpi->use_golden_frame_only = 0; - // cpi->use_last_frame_only = 0; - cm->refresh_golden_frame = 0; - cm->refresh_last_frame = 1; - cm->refresh_entropy_probs = 1; - - setup_features(cpi); - cpi->mb.e_mbd.allow_high_precision_mv = 0; // Default mv precision adaptation - - { - int i; - - for (i = 0; i < MAX_MB_SEGMENTS; i++) - cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout; - } - - // At the moment the first order values may not be > MAXQ - if (cpi->oxcf.fixed_q > MAXQ) - cpi->oxcf.fixed_q = MAXQ; - - // local file playback mode == really big buffer - if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) { - cpi->oxcf.starting_buffer_level = 60000; - cpi->oxcf.optimal_buffer_level = 60000; - cpi->oxcf.maximum_buffer_size = 240000; - } - - // Convert target bandwidth from Kbit/s to Bit/s - cpi->oxcf.target_bandwidth *= 1000; - - cpi->oxcf.starting_buffer_level = - rescale(cpi->oxcf.starting_buffer_level, - cpi->oxcf.target_bandwidth, 1000); - - // Set or reset optimal and maximum buffer levels. - if (cpi->oxcf.optimal_buffer_level == 0) - cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8; - else - cpi->oxcf.optimal_buffer_level = - rescale(cpi->oxcf.optimal_buffer_level, - cpi->oxcf.target_bandwidth, 1000); - - if (cpi->oxcf.maximum_buffer_size == 0) - cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8; - else - cpi->oxcf.maximum_buffer_size = - rescale(cpi->oxcf.maximum_buffer_size, - cpi->oxcf.target_bandwidth, 1000); - - // Set up frame rate and related parameters rate control values. - vp9_new_frame_rate(cpi, cpi->oxcf.frame_rate); - - // Set absolute upper and lower quality limits - cpi->worst_quality = cpi->oxcf.worst_allowed_q; - cpi->best_quality = cpi->oxcf.best_allowed_q; - - // active values should only be modified if out of new range - if (cpi->active_worst_quality > cpi->oxcf.worst_allowed_q) { - cpi->active_worst_quality = cpi->oxcf.worst_allowed_q; - } - // less likely - else if (cpi->active_worst_quality < cpi->oxcf.best_allowed_q) { - cpi->active_worst_quality = cpi->oxcf.best_allowed_q; - } - if (cpi->active_best_quality < cpi->oxcf.best_allowed_q) { - cpi->active_best_quality = cpi->oxcf.best_allowed_q; - } - // less likely - else if (cpi->active_best_quality > cpi->oxcf.worst_allowed_q) { - cpi->active_best_quality = cpi->oxcf.worst_allowed_q; - } - - cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE; - - cpi->cq_target_quality = cpi->oxcf.cq_level; - - if (!cm->use_bilinear_mc_filter) - cm->mcomp_filter_type = DEFAULT_INTERP_FILTER; - else - cm->mcomp_filter_type = BILINEAR; - - cpi->target_bandwidth = cpi->oxcf.target_bandwidth; - - cm->Width = cpi->oxcf.Width; - cm->Height = cpi->oxcf.Height; - - cm->horiz_scale = cpi->horiz_scale; - cm->vert_scale = cpi->vert_scale; - - // VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs) - if (cpi->oxcf.Sharpness > 7) - cpi->oxcf.Sharpness = 7; - - cm->sharpness_level = cpi->oxcf.Sharpness; - - if (cm->horiz_scale != NORMAL || cm->vert_scale != NORMAL) { - int UNINITIALIZED_IS_SAFE(hr), UNINITIALIZED_IS_SAFE(hs); - int UNINITIALIZED_IS_SAFE(vr), UNINITIALIZED_IS_SAFE(vs); - - Scale2Ratio(cm->horiz_scale, &hr, &hs); - Scale2Ratio(cm->vert_scale, &vr, &vs); - - // always go to the next whole number - cm->Width = (hs - 1 + cpi->oxcf.Width * hr) / hs; - cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs; - } - - if (((cm->Width + 15) & 0xfffffff0) != - cm->yv12_fb[cm->lst_fb_idx].y_width || - ((cm->Height + 15) & 0xfffffff0) != - cm->yv12_fb[cm->lst_fb_idx].y_height || - cm->yv12_fb[cm->lst_fb_idx].y_width == 0) { - alloc_raw_frame_buffers(cpi); - vp9_alloc_compressor_data(cpi); - } - - if (cpi->oxcf.fixed_q >= 0) { - cpi->last_q[0] = cpi->oxcf.fixed_q; - cpi->last_q[1] = cpi->oxcf.fixed_q; - cpi->last_boosted_qindex = cpi->oxcf.fixed_q; - } - - cpi->Speed = cpi->oxcf.cpu_used; - - // force to allowlag to 0 if lag_in_frames is 0; - if (cpi->oxcf.lag_in_frames == 0) { - cpi->oxcf.allow_lag = 0; - } - // Limit on lag buffers as these are not currently dynamically allocated - else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS) - cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS; - - // YX Temp - cpi->alt_ref_source = NULL; - cpi->is_src_frame_alt_ref = 0; - -#if 0 - // Experimental RD Code - cpi->frame_distortion = 0; - cpi->last_frame_distortion = 0; -#endif - -} - -#define M_LOG2_E 0.693147180559945309417 -#define log2f(x) (log (x) / (float) M_LOG2_E) - -static void cal_nmvjointsadcost(int *mvjointsadcost) { - mvjointsadcost[0] = 600; - mvjointsadcost[1] = 300; - mvjointsadcost[2] = 300; - mvjointsadcost[0] = 300; -} - -static void cal_nmvsadcosts(int *mvsadcost[2]) { - int i = 1; - - mvsadcost [0] [0] = 0; - mvsadcost [1] [0] = 0; - - do { - double z = 256 * (2 * (log2f(8 * i) + .6)); - mvsadcost [0][i] = (int) z; - mvsadcost [1][i] = (int) z; - mvsadcost [0][-i] = (int) z; - mvsadcost [1][-i] = (int) z; - } while (++i <= MV_MAX); -} - -static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { - int i = 1; - - mvsadcost [0] [0] = 0; - mvsadcost [1] [0] = 0; - - do { - double z = 256 * (2 * (log2f(8 * i) + .6)); - mvsadcost [0][i] = (int) z; - mvsadcost [1][i] = (int) z; - mvsadcost [0][-i] = (int) z; - mvsadcost [1][-i] = (int) z; - } while (++i <= MV_MAX); -} - -VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { - int i; - volatile union { - VP9_COMP *cpi; - VP9_PTR ptr; - } ctx; - - VP9_COMP *cpi; - VP9_COMMON *cm; - - cpi = ctx.cpi = vpx_memalign(32, sizeof(VP9_COMP)); - // Check that the CPI instance is valid - if (!cpi) - return 0; - - cm = &cpi->common; - - vpx_memset(cpi, 0, sizeof(VP9_COMP)); - - if (setjmp(cm->error.jmp)) { - VP9_PTR ptr = ctx.ptr; - - ctx.cpi->common.error.setjmp = 0; - vp9_remove_compressor(&ptr); - return 0; - } - - cpi->common.error.setjmp = 1; - - CHECK_MEM_ERROR(cpi->mb.ss, vpx_calloc(sizeof(search_site), (MAX_MVSEARCH_STEPS * 8) + 1)); - - vp9_create_common(&cpi->common); - vp9_cmachine_specific_config(cpi); - - init_config((VP9_PTR)cpi, oxcf); - - memcpy(cpi->base_skip_false_prob, base_skip_false_prob, sizeof(base_skip_false_prob)); - cpi->common.current_video_frame = 0; - cpi->kf_overspend_bits = 0; - cpi->kf_bitrate_adjustment = 0; - cpi->frames_till_gf_update_due = 0; - cpi->gf_overspend_bits = 0; - cpi->non_gf_bitrate_adjustment = 0; - cm->prob_last_coded = 128; - cm->prob_gf_coded = 128; - cm->prob_intra_coded = 63; -#if CONFIG_SUPERBLOCKS - cm->sb_coded = 200; -#endif - for (i = 0; i < COMP_PRED_CONTEXTS; i++) - cm->prob_comppred[i] = 128; - for (i = 0; i < TX_SIZE_MAX - 1; i++) - cm->prob_tx[i] = 128; - - // Prime the recent reference frame useage counters. - // Hereafter they will be maintained as a sort of moving average - cpi->recent_ref_frame_usage[INTRA_FRAME] = 1; - cpi->recent_ref_frame_usage[LAST_FRAME] = 1; - cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1; - cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1; - - // Set reference frame sign bias for ALTREF frame to 1 (for now) - cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1; - - cpi->baseline_gf_interval = DEFAULT_GF_INTERVAL; - - cpi->gold_is_last = 0; - cpi->alt_is_last = 0; - cpi->gold_is_alt = 0; - - // allocate memory for storing last frame's MVs for MV prediction. - CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows + 2) * (cpi->common.mb_cols + 2), sizeof(int_mv))); - CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows + 2) * (cpi->common.mb_cols + 2), sizeof(int))); - CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows + 2) * (cpi->common.mb_cols + 2), sizeof(int))); - - // Create the encoder segmentation map and set all entries to 0 - CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); - - // And a copy in common for temporal coding - CHECK_MEM_ERROR(cm->last_frame_seg_map, - vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); - - // And a place holder structure is the coding context - // for use if we want to save and restore it - CHECK_MEM_ERROR(cpi->coding_context.last_frame_seg_map_copy, - vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); - - CHECK_MEM_ERROR(cpi->active_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); - vpx_memset(cpi->active_map, 1, (cpi->common.mb_rows * cpi->common.mb_cols)); - cpi->active_map_enabled = 0; - - for (i = 0; i < (sizeof(cpi->mbgraph_stats) / - sizeof(cpi->mbgraph_stats[0])); i++) { - CHECK_MEM_ERROR(cpi->mbgraph_stats[i].mb_stats, - vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols * - sizeof(*cpi->mbgraph_stats[i].mb_stats), - 1)); - } - -#ifdef ENTROPY_STATS - if (cpi->pass != 1) - init_context_counters(); -#endif -#ifdef MODE_STATS - vp9_zero(y_modes); - vp9_zero(i8x8_modes); - vp9_zero(uv_modes); - vp9_zero(uv_modes_y); - vp9_zero(b_modes); - vp9_zero(inter_y_modes); - vp9_zero(inter_uv_modes); - vp9_zero(inter_b_modes); -#endif -#ifdef NMV_STATS - init_nmvstats(); -#endif - - /*Initialize the feed-forward activity masking.*/ - cpi->activity_avg = 90 << 12; - - cpi->frames_since_key = 8; // Give a sensible default for the first frame. - cpi->key_frame_frequency = cpi->oxcf.key_freq; - cpi->this_key_frame_forced = FALSE; - cpi->next_key_frame_forced = FALSE; - - cpi->source_alt_ref_pending = FALSE; - cpi->source_alt_ref_active = FALSE; - cpi->common.refresh_alt_ref_frame = 0; - - cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS; -#if CONFIG_INTERNAL_STATS - cpi->b_calculate_ssimg = 0; - - cpi->count = 0; - cpi->bytes = 0; - - if (cpi->b_calculate_psnr) { - cpi->total_sq_error = 0.0; - cpi->total_sq_error2 = 0.0; - cpi->total_y = 0.0; - cpi->total_u = 0.0; - cpi->total_v = 0.0; - cpi->total = 0.0; - cpi->totalp_y = 0.0; - cpi->totalp_u = 0.0; - cpi->totalp_v = 0.0; - cpi->totalp = 0.0; - cpi->tot_recode_hits = 0; - cpi->summed_quality = 0; - cpi->summed_weights = 0; - } - - if (cpi->b_calculate_ssimg) { - cpi->total_ssimg_y = 0; - cpi->total_ssimg_u = 0; - cpi->total_ssimg_v = 0; - cpi->total_ssimg_all = 0; - } - -#endif - -#ifndef LLONG_MAX -#define LLONG_MAX 9223372036854775807LL -#endif - cpi->first_time_stamp_ever = LLONG_MAX; - - cpi->frames_till_gf_update_due = 0; - cpi->key_frame_count = 1; - - cpi->ni_av_qi = cpi->oxcf.worst_allowed_q; - cpi->ni_tot_qi = 0; - cpi->ni_frames = 0; - cpi->tot_q = 0.0; - cpi->avg_q = vp9_convert_qindex_to_q(cpi->oxcf.worst_allowed_q); - cpi->total_byte_count = 0; - - cpi->rate_correction_factor = 1.0; - cpi->key_frame_rate_correction_factor = 1.0; - cpi->gf_rate_correction_factor = 1.0; - cpi->twopass.est_max_qcorrection_factor = 1.0; - - cal_nmvjointsadcost(cpi->mb.nmvjointsadcost); - cpi->mb.nmvcost[0] = &cpi->mb.nmvcosts[0][MV_MAX]; - cpi->mb.nmvcost[1] = &cpi->mb.nmvcosts[1][MV_MAX]; - cpi->mb.nmvsadcost[0] = &cpi->mb.nmvsadcosts[0][MV_MAX]; - cpi->mb.nmvsadcost[1] = &cpi->mb.nmvsadcosts[1][MV_MAX]; - cal_nmvsadcosts(cpi->mb.nmvsadcost); - - cpi->mb.nmvcost_hp[0] = &cpi->mb.nmvcosts_hp[0][MV_MAX]; - cpi->mb.nmvcost_hp[1] = &cpi->mb.nmvcosts_hp[1][MV_MAX]; - cpi->mb.nmvsadcost_hp[0] = &cpi->mb.nmvsadcosts_hp[0][MV_MAX]; - cpi->mb.nmvsadcost_hp[1] = &cpi->mb.nmvsadcosts_hp[1][MV_MAX]; - cal_nmvsadcosts_hp(cpi->mb.nmvsadcost_hp); - - for (i = 0; i < KEY_FRAME_CONTEXT; i++) { - cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate; - } - -#ifdef OUTPUT_YUV_SRC - yuv_file = fopen("bd.yuv", "ab"); -#endif -#ifdef OUTPUT_YUV_REC - yuv_rec_file = fopen("rec.yuv", "wb"); -#endif - -#if 0 - framepsnr = fopen("framepsnr.stt", "a"); - kf_list = fopen("kf_list.stt", "w"); -#endif - - cpi->output_pkt_list = oxcf->output_pkt_list; - - if (cpi->pass == 1) { - vp9_init_first_pass(cpi); - } else if (cpi->pass == 2) { - size_t packet_sz = sizeof(FIRSTPASS_STATS); - int packets = oxcf->two_pass_stats_in.sz / packet_sz; - - cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; - cpi->twopass.stats_in = cpi->twopass.stats_in_start; - cpi->twopass.stats_in_end = (void *)((char *)cpi->twopass.stats_in - + (packets - 1) * packet_sz); - vp9_init_second_pass(cpi); - } - - vp9_set_speed_features(cpi); - - // Set starting values of RD threshold multipliers (128 = *1) - for (i = 0; i < MAX_MODES; i++) { - cpi->rd_thresh_mult[i] = 128; - } - -#ifdef ENTROPY_STATS - init_mv_ref_counts(); -#endif - -#define BFP(BT, SDF, VF, SVF, SVFHH, SVFHV, SVFHHV, SDX3F, SDX8F, SDX4DF) \ - cpi->fn_ptr[BT].sdf = SDF; \ - cpi->fn_ptr[BT].vf = VF; \ - cpi->fn_ptr[BT].svf = SVF; \ - cpi->fn_ptr[BT].svf_halfpix_h = SVFHH; \ - cpi->fn_ptr[BT].svf_halfpix_v = SVFHV; \ - cpi->fn_ptr[BT].svf_halfpix_hv = SVFHHV; \ - cpi->fn_ptr[BT].sdx3f = SDX3F; \ - cpi->fn_ptr[BT].sdx8f = SDX8F; \ - cpi->fn_ptr[BT].sdx4df = SDX4DF; - - -#if CONFIG_SUPERBLOCKS - BFP(BLOCK_32X32, vp9_sad32x32, vp9_variance32x32, vp9_sub_pixel_variance32x32, - vp9_variance_halfpixvar32x32_h, vp9_variance_halfpixvar32x32_v, - vp9_variance_halfpixvar32x32_hv, vp9_sad32x32x3, vp9_sad32x32x8, - vp9_sad32x32x4d) -#endif - - BFP(BLOCK_16X16, vp9_sad16x16, vp9_variance16x16, vp9_sub_pixel_variance16x16, - vp9_variance_halfpixvar16x16_h, vp9_variance_halfpixvar16x16_v, - vp9_variance_halfpixvar16x16_hv, vp9_sad16x16x3, vp9_sad16x16x8, - vp9_sad16x16x4d) - - BFP(BLOCK_16X8, vp9_sad16x8, vp9_variance16x8, vp9_sub_pixel_variance16x8, - NULL, NULL, NULL, vp9_sad16x8x3, vp9_sad16x8x8, vp9_sad16x8x4d) - - BFP(BLOCK_8X16, vp9_sad8x16, vp9_variance8x16, vp9_sub_pixel_variance8x16, - NULL, NULL, NULL, vp9_sad8x16x3, vp9_sad8x16x8, vp9_sad8x16x4d) - - BFP(BLOCK_8X8, vp9_sad8x8, vp9_variance8x8, vp9_sub_pixel_variance8x8, - NULL, NULL, NULL, vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d) - - BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4, - NULL, NULL, NULL, vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d) - -#if ARCH_X86 || ARCH_X86_64 - cpi->fn_ptr[BLOCK_16X16].copymem = vp9_copy32xn; - cpi->fn_ptr[BLOCK_16X8].copymem = vp9_copy32xn; - cpi->fn_ptr[BLOCK_8X16].copymem = vp9_copy32xn; - cpi->fn_ptr[BLOCK_8X8].copymem = vp9_copy32xn; - cpi->fn_ptr[BLOCK_4X4].copymem = vp9_copy32xn; -#endif - - cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search); - cpi->diamond_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, diamond_search); - cpi->refining_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, refining_search); - - // make sure frame 1 is okay - cpi->error_bins[0] = cpi->common.MBs; - - /* vp9_init_quantizer() is first called here. Add check in - * vp9_frame_init_quantizer() so that vp9_init_quantizer is only - * called later when needed. This will avoid unnecessary calls of - * vp9_init_quantizer() for every frame. - */ - vp9_init_quantizer(cpi); - - vp9_loop_filter_init(cm); - - cpi->common.error.setjmp = 0; - - vp9_zero(cpi->y_uv_mode_count) - - return (VP9_PTR) cpi; -} - -void vp9_remove_compressor(VP9_PTR *ptr) { - VP9_COMP *cpi = (VP9_COMP *)(*ptr); - int i; - - if (!cpi) - return; - - if (cpi && (cpi->common.current_video_frame > 0)) { - if (cpi->pass == 2) { - vp9_end_second_pass(cpi); - } - -#ifdef ENTROPY_STATS - if (cpi->pass != 1) { - print_context_counters(); - print_tree_update_probs(); - print_mode_context(); - } -#endif -#ifdef NMV_STATS - if (cpi->pass != 1) - print_nmvstats(); -#endif - -#if CONFIG_INTERNAL_STATS - - vp9_clear_system_state(); - - // printf("\n8x8-4x4:%d-%d\n", cpi->t8x8_count, cpi->t4x4_count); - if (cpi->pass != 1) { - FILE *f = fopen("opsnr.stt", "a"); - double time_encoded = (cpi->last_end_time_stamp_seen - - cpi->first_time_stamp_ever) / 10000000.000; - double total_encode_time = (cpi->time_receive_data + cpi->time_compress_data) / 1000.000; - double dr = (double)cpi->bytes * (double) 8 / (double)1000 / time_encoded; -#if defined(MODE_STATS) - print_mode_contexts(&cpi->common); -#endif - if (cpi->b_calculate_psnr) { - YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx]; - double samples = 3.0 / 2 * cpi->count * lst_yv12->y_width * lst_yv12->y_height; - double total_psnr = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error); - double total_psnr2 = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error2); - double total_ssim = 100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0); - - fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\tVPXSSIM\t Time(ms)\n"); - fprintf(f, "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%8.0f\n", - dr, cpi->total / cpi->count, total_psnr, cpi->totalp / cpi->count, total_psnr2, total_ssim, - total_encode_time); -// fprintf(f, "%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%8.0f %10ld\n", -// dr, cpi->total / cpi->count, total_psnr, cpi->totalp / cpi->count, total_psnr2, total_ssim, -// total_encode_time, cpi->tot_recode_hits); - } - - if (cpi->b_calculate_ssimg) { - fprintf(f, "BitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t Time(ms)\n"); - fprintf(f, "%7.2f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%8.0f\n", dr, - cpi->total_ssimg_y / cpi->count, cpi->total_ssimg_u / cpi->count, - cpi->total_ssimg_v / cpi->count, cpi->total_ssimg_all / cpi->count, total_encode_time); -// fprintf(f, "%7.3f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%8.0f %10ld\n", dr, -// cpi->total_ssimg_y / cpi->count, cpi->total_ssimg_u / cpi->count, -// cpi->total_ssimg_v / cpi->count, cpi->total_ssimg_all / cpi->count, total_encode_time, cpi->tot_recode_hits); - } - - fclose(f); - } - -#endif - - -#ifdef MODE_STATS - { - extern int count_mb_seg[4]; - char modes_stats_file[250]; - FILE *f; - double dr = (double)cpi->oxcf.frame_rate * (double)cpi->bytes * (double)8 / (double)cpi->count / (double)1000; - sprintf(modes_stats_file, "modes_q%03d.stt", cpi->common.base_qindex); - f = fopen(modes_stats_file, "w"); - fprintf(f, "intra_mode in Intra Frames:\n"); - { - int i; - fprintf(f, "Y: "); - for (i = 0; i < VP9_YMODES; i++) fprintf(f, " %8d,", y_modes[i]); - fprintf(f, "\n"); - } - { - int i; - fprintf(f, "I8: "); - for (i = 0; i < VP9_I8X8_MODES; i++) fprintf(f, " %8d,", i8x8_modes[i]); - fprintf(f, "\n"); - } - { - int i; - fprintf(f, "UV: "); - for (i = 0; i < VP9_UV_MODES; i++) fprintf(f, " %8d,", uv_modes[i]); - fprintf(f, "\n"); - } - { - int i, j; - fprintf(f, "KeyFrame Y-UV:\n"); - for (i = 0; i < VP9_YMODES; i++) { - fprintf(f, "%2d:", i); - for (j = 0; j < VP9_UV_MODES; j++) fprintf(f, "%8d, ", uv_modes_y[i][j]); - fprintf(f, "\n"); - } - } - { - int i, j; - fprintf(f, "Inter Y-UV:\n"); - for (i = 0; i < VP9_YMODES; i++) { - fprintf(f, "%2d:", i); - for (j = 0; j < VP9_UV_MODES; j++) fprintf(f, "%8d, ", cpi->y_uv_mode_count[i][j]); - fprintf(f, "\n"); - } - } - { - int i; - - fprintf(f, "B: "); - for (i = 0; i < VP9_BINTRAMODES; i++) - fprintf(f, "%8d, ", b_modes[i]); - - fprintf(f, "\n"); - - } - - fprintf(f, "Modes in Inter Frames:\n"); - { - int i; - fprintf(f, "Y: "); - for (i = 0; i < MB_MODE_COUNT; i++) fprintf(f, " %8d,", inter_y_modes[i]); - fprintf(f, "\n"); - } - { - int i; - fprintf(f, "UV: "); - for (i = 0; i < VP9_UV_MODES; i++) fprintf(f, " %8d,", inter_uv_modes[i]); - fprintf(f, "\n"); - } - { - int i; - fprintf(f, "B: "); - for (i = 0; i < B_MODE_COUNT; i++) fprintf(f, "%8d, ", inter_b_modes[i]); - fprintf(f, "\n"); - } - fprintf(f, "P:%8d, %8d, %8d, %8d\n", count_mb_seg[0], count_mb_seg[1], count_mb_seg[2], count_mb_seg[3]); - fprintf(f, "PB:%8d, %8d, %8d, %8d\n", inter_b_modes[LEFT4X4], inter_b_modes[ABOVE4X4], inter_b_modes[ZERO4X4], inter_b_modes[NEW4X4]); - fclose(f); - } -#endif - -#ifdef ENTROPY_STATS - { - int i, j, k; - FILE *fmode = fopen("modecontext.c", "w"); - - fprintf(fmode, "\n#include \"entropymode.h\"\n\n"); - fprintf(fmode, "const unsigned int vp9_kf_default_bmode_counts "); - fprintf(fmode, "[VP9_BINTRAMODES] [VP9_BINTRAMODES] [VP9_BINTRAMODES] =\n{\n"); - - for (i = 0; i < 10; i++) { - - fprintf(fmode, " { // Above Mode : %d\n", i); - - for (j = 0; j < 10; j++) { - - fprintf(fmode, " {"); - - for (k = 0; k < VP9_BINTRAMODES; k++) { - if (!intra_mode_stats[i][j][k]) - fprintf(fmode, " %5d, ", 1); - else - fprintf(fmode, " %5d, ", intra_mode_stats[i][j][k]); - } - - fprintf(fmode, "}, // left_mode %d\n", j); - - } - - fprintf(fmode, " },\n"); - - } - - fprintf(fmode, "};\n"); - fclose(fmode); - } -#endif - - -#if defined(SECTIONBITS_OUTPUT) - - if (0) { - int i; - FILE *f = fopen("tokenbits.stt", "a"); - - for (i = 0; i < 28; i++) - fprintf(f, "%8d", (int)(Sectionbits[i] / 256)); - - fprintf(f, "\n"); - fclose(f); - } - -#endif - -#if 0 - { - printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000); - printf("\n_frames recive_data encod_mb_row compress_frame Total\n"); - printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame, cpi->time_receive_data / 1000, cpi->time_encode_mb_row / 1000, cpi->time_compress_data / 1000, (cpi->time_receive_data + cpi->time_compress_data) / 1000); - } -#endif - - } - - dealloc_compressor_data(cpi); - vpx_free(cpi->mb.ss); - vpx_free(cpi->tok); - - for (i = 0; i < sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]); i++) { - vpx_free(cpi->mbgraph_stats[i].mb_stats); - } - - vp9_remove_common(&cpi->common); - vpx_free(cpi); - *ptr = 0; - -#ifdef OUTPUT_YUV_SRC - fclose(yuv_file); -#endif -#ifdef OUTPUT_YUV_REC - fclose(yuv_rec_file); -#endif - -#if 0 - - if (keyfile) - fclose(keyfile); - - if (framepsnr) - fclose(framepsnr); - - if (kf_list) - fclose(kf_list); - -#endif - -} - - -static uint64_t calc_plane_error(unsigned char *orig, int orig_stride, - unsigned char *recon, int recon_stride, - unsigned int cols, unsigned int rows) { - unsigned int row, col; - uint64_t total_sse = 0; - int diff; - - for (row = 0; row + 16 <= rows; row += 16) { - for (col = 0; col + 16 <= cols; col += 16) { - unsigned int sse; - - vp9_mse16x16(orig + col, orig_stride, recon + col, recon_stride, &sse); - total_sse += sse; - } - - /* Handle odd-sized width */ - if (col < cols) { - unsigned int border_row, border_col; - unsigned char *border_orig = orig; - unsigned char *border_recon = recon; - - for (border_row = 0; border_row < 16; border_row++) { - for (border_col = col; border_col < cols; border_col++) { - diff = border_orig[border_col] - border_recon[border_col]; - total_sse += diff * diff; - } - - border_orig += orig_stride; - border_recon += recon_stride; - } - } - - orig += orig_stride * 16; - recon += recon_stride * 16; - } - - /* Handle odd-sized height */ - for (; row < rows; row++) { - for (col = 0; col < cols; col++) { - diff = orig[col] - recon[col]; - total_sse += diff * diff; - } - - orig += orig_stride; - recon += recon_stride; - } - - return total_sse; -} - - -static void generate_psnr_packet(VP9_COMP *cpi) { - YV12_BUFFER_CONFIG *orig = cpi->Source; - YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show; - struct vpx_codec_cx_pkt pkt; - uint64_t sse; - int i; - unsigned int width = cpi->common.Width; - unsigned int height = cpi->common.Height; - - pkt.kind = VPX_CODEC_PSNR_PKT; - sse = calc_plane_error(orig->y_buffer, orig->y_stride, - recon->y_buffer, recon->y_stride, - width, height); - pkt.data.psnr.sse[0] = sse; - pkt.data.psnr.sse[1] = sse; - pkt.data.psnr.samples[0] = width * height; - pkt.data.psnr.samples[1] = width * height; - - width = (width + 1) / 2; - height = (height + 1) / 2; - - sse = calc_plane_error(orig->u_buffer, orig->uv_stride, - recon->u_buffer, recon->uv_stride, - width, height); - pkt.data.psnr.sse[0] += sse; - pkt.data.psnr.sse[2] = sse; - pkt.data.psnr.samples[0] += width * height; - pkt.data.psnr.samples[2] = width * height; - - sse = calc_plane_error(orig->v_buffer, orig->uv_stride, - recon->v_buffer, recon->uv_stride, - width, height); - pkt.data.psnr.sse[0] += sse; - pkt.data.psnr.sse[3] = sse; - pkt.data.psnr.samples[0] += width * height; - pkt.data.psnr.samples[3] = width * height; - - for (i = 0; i < 4; i++) - pkt.data.psnr.psnr[i] = vp9_mse2psnr(pkt.data.psnr.samples[i], 255.0, - pkt.data.psnr.sse[i]); - - vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt); -} - - -int vp9_use_as_reference(VP9_PTR ptr, int ref_frame_flags) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); - - if (ref_frame_flags > 7) - return -1; - - cpi->ref_frame_flags = ref_frame_flags; - return 0; -} -int vp9_update_reference(VP9_PTR ptr, int ref_frame_flags) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); - - if (ref_frame_flags > 7) - return -1; - - cpi->common.refresh_golden_frame = 0; - cpi->common.refresh_alt_ref_frame = 0; - cpi->common.refresh_last_frame = 0; - - if (ref_frame_flags & VP9_LAST_FLAG) - cpi->common.refresh_last_frame = 1; - - if (ref_frame_flags & VP9_GOLD_FLAG) - cpi->common.refresh_golden_frame = 1; - - if (ref_frame_flags & VP9_ALT_FLAG) - cpi->common.refresh_alt_ref_frame = 1; - - return 0; -} - -int vp9_get_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); - VP9_COMMON *cm = &cpi->common; - int ref_fb_idx; - - if (ref_frame_flag == VP9_LAST_FLAG) - ref_fb_idx = cm->lst_fb_idx; - else if (ref_frame_flag == VP9_GOLD_FLAG) - ref_fb_idx = cm->gld_fb_idx; - else if (ref_frame_flag == VP9_ALT_FLAG) - ref_fb_idx = cm->alt_fb_idx; - else - return -1; - - vp8_yv12_copy_frame_ptr(&cm->yv12_fb[ref_fb_idx], sd); - - return 0; -} - -int vp9_set_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); - VP9_COMMON *cm = &cpi->common; - - int ref_fb_idx; - - if (ref_frame_flag == VP9_LAST_FLAG) - ref_fb_idx = cm->lst_fb_idx; - else if (ref_frame_flag == VP9_GOLD_FLAG) - ref_fb_idx = cm->gld_fb_idx; - else if (ref_frame_flag == VP9_ALT_FLAG) - ref_fb_idx = cm->alt_fb_idx; - else - return -1; - - vp8_yv12_copy_frame_ptr(sd, &cm->yv12_fb[ref_fb_idx]); - - return 0; -} -int vp9_update_entropy(VP9_PTR comp, int update) { - VP9_COMP *cpi = (VP9_COMP *) comp; - VP9_COMMON *cm = &cpi->common; - cm->refresh_entropy_probs = update; - - return 0; -} - - -#ifdef OUTPUT_YUV_SRC -void vp9_write_yuv_frame(YV12_BUFFER_CONFIG *s) { - unsigned char *src = s->y_buffer; - int h = s->y_height; - - do { - fwrite(src, s->y_width, 1, yuv_file); - src += s->y_stride; - } while (--h); - - src = s->u_buffer; - h = s->uv_height; - - do { - fwrite(src, s->uv_width, 1, yuv_file); - src += s->uv_stride; - } while (--h); - - src = s->v_buffer; - h = s->uv_height; - - do { - fwrite(src, s->uv_width, 1, yuv_file); - src += s->uv_stride; - } while (--h); -} -#endif - -#ifdef OUTPUT_YUV_REC -void vp9_write_yuv_rec_frame(VP9_COMMON *cm) { - YV12_BUFFER_CONFIG *s = cm->frame_to_show; - unsigned char *src = s->y_buffer; - int h = cm->Height; - - do { - fwrite(src, s->y_width, 1, yuv_rec_file); - src += s->y_stride; - } while (--h); - - src = s->u_buffer; - h = (cm->Height + 1) / 2; - - do { - fwrite(src, s->uv_width, 1, yuv_rec_file); - src += s->uv_stride; - } while (--h); - - src = s->v_buffer; - h = (cm->Height + 1) / 2; - - do { - fwrite(src, s->uv_width, 1, yuv_rec_file); - src += s->uv_stride; - } while (--h); -} -#endif - -static void update_alt_ref_frame_stats(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - // Update data structure that monitors level of reference to last GF - vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); - cpi->gf_active_count = cm->mb_rows * cm->mb_cols; - - // this frame refreshes means next frames don't unless specified by user - cpi->common.frames_since_golden = 0; - - // Clear the alternate reference update pending flag. - cpi->source_alt_ref_pending = FALSE; - - // Set the alternate refernce frame active flag - cpi->source_alt_ref_active = TRUE; - - -} -static void update_golden_frame_stats(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - // Update the Golden frame usage counts. - if (cm->refresh_golden_frame) { - // Update data structure that monitors level of reference to last GF - vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); - cpi->gf_active_count = cm->mb_rows * cm->mb_cols; - - // this frame refreshes means next frames don't unless specified by user - cm->refresh_golden_frame = 0; - cpi->common.frames_since_golden = 0; - - // if ( cm->frame_type == KEY_FRAME ) - // { - cpi->recent_ref_frame_usage[INTRA_FRAME] = 1; - cpi->recent_ref_frame_usage[LAST_FRAME] = 1; - cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1; - cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1; - // } - // else - // { - // // Carry a potrtion of count over to begining of next gf sequence - // cpi->recent_ref_frame_usage[INTRA_FRAME] >>= 5; - // cpi->recent_ref_frame_usage[LAST_FRAME] >>= 5; - // cpi->recent_ref_frame_usage[GOLDEN_FRAME] >>= 5; - // cpi->recent_ref_frame_usage[ALTREF_FRAME] >>= 5; - // } - - // ******** Fixed Q test code only ************ - // If we are going to use the ALT reference for the next group of frames set a flag to say so. - if (cpi->oxcf.fixed_q >= 0 && - cpi->oxcf.play_alternate && !cpi->common.refresh_alt_ref_frame) { - cpi->source_alt_ref_pending = TRUE; - cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; - } - - if (!cpi->source_alt_ref_pending) - cpi->source_alt_ref_active = FALSE; - - // Decrement count down till next gf - if (cpi->frames_till_gf_update_due > 0) - cpi->frames_till_gf_update_due--; - - } else if (!cpi->common.refresh_alt_ref_frame) { - // Decrement count down till next gf - if (cpi->frames_till_gf_update_due > 0) - cpi->frames_till_gf_update_due--; - - if (cpi->common.frames_till_alt_ref_frame) - cpi->common.frames_till_alt_ref_frame--; - - cpi->common.frames_since_golden++; - - if (cpi->common.frames_since_golden > 1) { - cpi->recent_ref_frame_usage[INTRA_FRAME] += cpi->count_mb_ref_frame_usage[INTRA_FRAME]; - cpi->recent_ref_frame_usage[LAST_FRAME] += cpi->count_mb_ref_frame_usage[LAST_FRAME]; - cpi->recent_ref_frame_usage[GOLDEN_FRAME] += cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]; - cpi->recent_ref_frame_usage[ALTREF_FRAME] += cpi->count_mb_ref_frame_usage[ALTREF_FRAME]; - } - } -} - -static int find_fp_qindex() { - int i; - - for (i = 0; i < QINDEX_RANGE; i++) { - if (vp9_convert_qindex_to_q(i) >= 30.0) { - break; - } - } - - if (i == QINDEX_RANGE) - i--; - - return i; -} - -static void Pass1Encode(VP9_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned int *frame_flags) { - (void) size; - (void) dest; - (void) frame_flags; - - - vp9_set_quantizer(cpi, find_fp_qindex()); - vp9_first_pass(cpi); -} - -#define WRITE_RECON_BUFFER 0 -#if WRITE_RECON_BUFFER -void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) { - - // write the frame - FILE *yframe; - int i; - char filename[255]; - - sprintf(filename, "cx\\y%04d.raw", this_frame); - yframe = fopen(filename, "wb"); - - for (i = 0; i < frame->y_height; i++) - fwrite(frame->y_buffer + i * frame->y_stride, - frame->y_width, 1, yframe); - - fclose(yframe); - sprintf(filename, "cx\\u%04d.raw", this_frame); - yframe = fopen(filename, "wb"); - - for (i = 0; i < frame->uv_height; i++) - fwrite(frame->u_buffer + i * frame->uv_stride, - frame->uv_width, 1, yframe); - - fclose(yframe); - sprintf(filename, "cx\\v%04d.raw", this_frame); - yframe = fopen(filename, "wb"); - - for (i = 0; i < frame->uv_height; i++) - fwrite(frame->v_buffer + i * frame->uv_stride, - frame->uv_width, 1, yframe); - - fclose(yframe); -} -#endif - -static double compute_edge_pixel_proportion(YV12_BUFFER_CONFIG *frame) { -#define EDGE_THRESH 128 - int i, j; - int num_edge_pels = 0; - int num_pels = (frame->y_height - 2) * (frame->y_width - 2); - unsigned char *prev = frame->y_buffer + 1; - unsigned char *curr = frame->y_buffer + 1 + frame->y_stride; - unsigned char *next = frame->y_buffer + 1 + 2 * frame->y_stride; - for (i = 1; i < frame->y_height - 1; i++) { - for (j = 1; j < frame->y_width - 1; j++) { - /* Sobel hor and ver gradients */ - int v = 2 * (curr[1] - curr[-1]) + (prev[1] - prev[-1]) + (next[1] - next[-1]); - int h = 2 * (prev[0] - next[0]) + (prev[1] - next[1]) + (prev[-1] - next[-1]); - h = (h < 0 ? -h : h); - v = (v < 0 ? -v : v); - if (h > EDGE_THRESH || v > EDGE_THRESH) num_edge_pels++; - curr++; - prev++; - next++; - } - curr += frame->y_stride - frame->y_width + 2; - prev += frame->y_stride - frame->y_width + 2; - next += frame->y_stride - frame->y_width + 2; - } - return (double)num_edge_pels / (double)num_pels; -} - -// Function to test for conditions that indicate we should loop -// back and recode a frame. -static BOOL recode_loop_test(VP9_COMP *cpi, - int high_limit, int low_limit, - int q, int maxq, int minq) { - BOOL force_recode = FALSE; - VP9_COMMON *cm = &cpi->common; - - // Is frame recode allowed at all - // Yes if either recode mode 1 is selected or mode two is selcted - // and the frame is a key frame. golden frame or alt_ref_frame - if ((cpi->sf.recode_loop == 1) || - ((cpi->sf.recode_loop == 2) && - ((cm->frame_type == KEY_FRAME) || - cm->refresh_golden_frame || - cm->refresh_alt_ref_frame))) { - // General over and under shoot tests - if (((cpi->projected_frame_size > high_limit) && (q < maxq)) || - ((cpi->projected_frame_size < low_limit) && (q > minq))) { - force_recode = TRUE; - } - // Special Constrained quality tests - else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { - // Undershoot and below auto cq level - if ((q > cpi->cq_target_quality) && - (cpi->projected_frame_size < - ((cpi->this_frame_target * 7) >> 3))) { - force_recode = TRUE; - } - // Severe undershoot and between auto and user cq level - else if ((q > cpi->oxcf.cq_level) && - (cpi->projected_frame_size < cpi->min_frame_bandwidth) && - (cpi->active_best_quality > cpi->oxcf.cq_level)) { - force_recode = TRUE; - cpi->active_best_quality = cpi->oxcf.cq_level; - } - } - } - - return force_recode; -} - -static void update_reference_frames(VP9_COMMON *cm) { - YV12_BUFFER_CONFIG *yv12_fb = cm->yv12_fb; - - // At this point the new frame has been encoded. - // If any buffer copy / swapping is signaled it should be done here. - - if (cm->frame_type == KEY_FRAME) { - yv12_fb[cm->new_fb_idx].flags |= VP9_GOLD_FLAG | VP9_ALT_FLAG; - - yv12_fb[cm->gld_fb_idx].flags &= ~VP9_GOLD_FLAG; - yv12_fb[cm->alt_fb_idx].flags &= ~VP9_ALT_FLAG; - - cm->alt_fb_idx = cm->gld_fb_idx = cm->new_fb_idx; - } else { /* For non key frames */ - if (cm->refresh_alt_ref_frame) { - assert(!cm->copy_buffer_to_arf); - - cm->yv12_fb[cm->new_fb_idx].flags |= VP9_ALT_FLAG; - cm->yv12_fb[cm->alt_fb_idx].flags &= ~VP9_ALT_FLAG; - cm->alt_fb_idx = cm->new_fb_idx; - } else if (cm->copy_buffer_to_arf) { - assert(!(cm->copy_buffer_to_arf & ~0x3)); - - if (cm->copy_buffer_to_arf == 1) { - if (cm->alt_fb_idx != cm->lst_fb_idx) { - yv12_fb[cm->lst_fb_idx].flags |= VP9_ALT_FLAG; - yv12_fb[cm->alt_fb_idx].flags &= ~VP9_ALT_FLAG; - cm->alt_fb_idx = cm->lst_fb_idx; - } - } else { /* if (cm->copy_buffer_to_arf == 2) */ - if (cm->alt_fb_idx != cm->gld_fb_idx) { - yv12_fb[cm->gld_fb_idx].flags |= VP9_ALT_FLAG; - yv12_fb[cm->alt_fb_idx].flags &= ~VP9_ALT_FLAG; - cm->alt_fb_idx = cm->gld_fb_idx; - } - } - } - - if (cm->refresh_golden_frame) { - assert(!cm->copy_buffer_to_gf); - - cm->yv12_fb[cm->new_fb_idx].flags |= VP9_GOLD_FLAG; - cm->yv12_fb[cm->gld_fb_idx].flags &= ~VP9_GOLD_FLAG; - cm->gld_fb_idx = cm->new_fb_idx; - } else if (cm->copy_buffer_to_gf) { - assert(!(cm->copy_buffer_to_arf & ~0x3)); - - if (cm->copy_buffer_to_gf == 1) { - if (cm->gld_fb_idx != cm->lst_fb_idx) { - yv12_fb[cm->lst_fb_idx].flags |= VP9_GOLD_FLAG; - yv12_fb[cm->gld_fb_idx].flags &= ~VP9_GOLD_FLAG; - cm->gld_fb_idx = cm->lst_fb_idx; - } - } else { /* if (cm->copy_buffer_to_gf == 2) */ - if (cm->alt_fb_idx != cm->gld_fb_idx) { - yv12_fb[cm->alt_fb_idx].flags |= VP9_GOLD_FLAG; - yv12_fb[cm->gld_fb_idx].flags &= ~VP9_GOLD_FLAG; - cm->gld_fb_idx = cm->alt_fb_idx; - } - } - } - } - - if (cm->refresh_last_frame) { - cm->yv12_fb[cm->new_fb_idx].flags |= VP9_LAST_FLAG; - cm->yv12_fb[cm->lst_fb_idx].flags &= ~VP9_LAST_FLAG; - cm->lst_fb_idx = cm->new_fb_idx; - } -} - -static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { - if (cm->no_lpf) { - cm->filter_level = 0; - } -#if CONFIG_LOSSLESS - else if (cpi->oxcf.lossless) { - cm->filter_level = 0; - } -#endif - else { - struct vpx_usec_timer timer; - - vp9_clear_system_state(); - - vpx_usec_timer_start(&timer); - if (cpi->sf.auto_filter == 0) - vp9_pick_filter_level_fast(cpi->Source, cpi); - else - vp9_pick_filter_level(cpi->Source, cpi); - - vpx_usec_timer_mark(&timer); - cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); - } - - if (cm->filter_level > 0) { - vp9_set_alt_lf_level(cpi, cm->filter_level); - vp9_loop_filter_frame(cm, &cpi->mb.e_mbd); - } - - vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show); - -} - -#if CONFIG_PRED_FILTER -void select_pred_filter_mode(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - int prob_pred_filter_off = cm->prob_pred_filter_off; - - // Force filter on/off if probability is extreme - if (prob_pred_filter_off >= 255 * 0.95) - cm->pred_filter_mode = 0; // Off at the frame level - else if (prob_pred_filter_off <= 255 * 0.05) - cm->pred_filter_mode = 1; // On at the frame level - else - cm->pred_filter_mode = 2; // Selectable at the MB level -} - -void update_pred_filt_prob(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - int prob_pred_filter_off; - - // Based on the selection in the previous frame determine what mode - // to use for the current frame and work out the signaling probability - if (cpi->pred_filter_on_count + cpi->pred_filter_off_count) { - prob_pred_filter_off = cpi->pred_filter_off_count * 256 / - (cpi->pred_filter_on_count + cpi->pred_filter_off_count); - - if (prob_pred_filter_off < 1) - prob_pred_filter_off = 1; - - if (prob_pred_filter_off > 255) - prob_pred_filter_off = 255; - - cm->prob_pred_filter_off = prob_pred_filter_off; - } else - cm->prob_pred_filter_off = 128; - /* - { - FILE *fp = fopen("filt_use.txt", "a"); - fprintf (fp, "%d %d prob=%d\n", cpi->pred_filter_off_count, - cpi->pred_filter_on_count, cm->prob_pred_filter_off); - fclose(fp); - } - */ -} -#endif - -static void encode_frame_to_data_rate -( - VP9_COMP *cpi, - unsigned long *size, - unsigned char *dest, - unsigned int *frame_flags -) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &cpi->mb.e_mbd; - - int Q; - int frame_over_shoot_limit; - int frame_under_shoot_limit; - - int Loop = FALSE; - int loop_count; - int this_q; - int last_zbin_oq; - - int q_low; - int q_high; - int zbin_oq_high; - int zbin_oq_low = 0; - - int top_index; - int bottom_index; - int active_worst_qchanged = FALSE; - - int overshoot_seen = FALSE; - int undershoot_seen = FALSE; - - int loop_size_estimate = 0; - - SPEED_FEATURES *sf = &cpi->sf; -#if RESET_FOREACH_FILTER - int q_low0; - int q_high0; - int zbin_oq_high0; - int zbin_oq_low0 = 0; - int Q0; - int last_zbin_oq0; - int active_best_quality0; - int active_worst_quality0; - double rate_correction_factor0; - double gf_rate_correction_factor0; -#endif - - /* list of filters to search over */ - int mcomp_filters_to_search[] = { - EIGHTTAP, EIGHTTAP_SHARP, SIXTAP, SWITCHABLE - }; - int mcomp_filters = sizeof(mcomp_filters_to_search) / - sizeof(*mcomp_filters_to_search); - int mcomp_filter_index = 0; - INT64 mcomp_filter_cost[4]; - - // Clear down mmx registers to allow floating point in what follows - vp9_clear_system_state(); - - - // For an alt ref frame in 2 pass we skip the call to the second - // pass function that sets the target bandwidth so must set it here - if (cpi->common.refresh_alt_ref_frame) { - cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for the alt ref frame - cpi->target_bandwidth = cpi->twopass.gf_bits * cpi->output_frame_rate; // per second target bitrate - } - - // Default turn off buffer to buffer copying - cm->copy_buffer_to_gf = 0; - cm->copy_buffer_to_arf = 0; - - // Clear zbin over-quant value and mode boost values. - cpi->zbin_over_quant = 0; - cpi->zbin_mode_boost = 0; - - // Enable or disable mode based tweaking of the zbin - // For 2 Pass Only used where GF/ARF prediction quality - // is above a threshold - cpi->zbin_mode_boost = 0; -#if CONFIG_LOSSLESS - cpi->zbin_mode_boost_enabled = FALSE; -#else - cpi->zbin_mode_boost_enabled = TRUE; -#endif - if (cpi->gfu_boost <= 400) { - cpi->zbin_mode_boost_enabled = FALSE; - } - - // Current default encoder behaviour for the altref sign bias - if (cpi->source_alt_ref_active) - cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1; - else - cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 0; - - // Check to see if a key frame is signalled - // For two pass with auto key frame enabled cm->frame_type may already be set, but not for one pass. - if ((cm->current_video_frame == 0) || - (cm->frame_flags & FRAMEFLAGS_KEY) || - (cpi->oxcf.auto_key && (cpi->frames_since_key % cpi->key_frame_frequency == 0))) { - // Key frame from VFW/auto-keyframe/first frame - cm->frame_type = KEY_FRAME; - } - - // Set default state for segment based loop filter update flags - xd->mode_ref_lf_delta_update = 0; - - // Set various flags etc to special state if it is a key frame - if (cm->frame_type == KEY_FRAME) { - int i; - - // Reset the loop filter deltas and segmentation map - setup_features(cpi); - - // If segmentation is enabled force a map update for key frames - if (xd->segmentation_enabled) { - xd->update_mb_segmentation_map = 1; - xd->update_mb_segmentation_data = 1; - } - - // The alternate reference frame cannot be active for a key frame - cpi->source_alt_ref_active = FALSE; - - // Reset the RD threshold multipliers to default of * 1 (128) - for (i = 0; i < MAX_MODES; i++) { - cpi->rd_thresh_mult[i] = 128; - } - } - - // Test code for new segment features - init_seg_features(cpi); - - // Decide how big to make the frame - vp9_pick_frame_size(cpi); - - vp9_clear_system_state(); - - // Set an active best quality and if necessary active worst quality - Q = cpi->active_worst_quality; - - if (cm->frame_type == KEY_FRAME) { - int high = 2000; - int low = 400; - - if (cpi->kf_boost > high) - cpi->active_best_quality = kf_low_motion_minq[Q]; - else if (cpi->kf_boost < low) - cpi->active_best_quality = kf_high_motion_minq[Q]; - else { - int gap = high - low; - int offset = high - cpi->kf_boost; - int qdiff = kf_high_motion_minq[Q] - kf_low_motion_minq[Q]; - int adjustment = ((offset * qdiff) + (gap >> 1)) / gap; - - cpi->active_best_quality = kf_low_motion_minq[Q] + adjustment; - } - - // Make an adjustment based on the %s static - // The main impact of this is at lower Q to prevent overly large key - // frames unless a lot of the image is static. - if (cpi->kf_zeromotion_pct < 64) - cpi->active_best_quality += 4 - (cpi->kf_zeromotion_pct >> 4); - - // Special case for key frames forced because we have reached - // the maximum key frame interval. Here force the Q to a range - // based on the ambient Q to reduce the risk of popping - if (cpi->this_key_frame_forced) { - int delta_qindex; - int qindex = cpi->last_boosted_qindex; - - delta_qindex = compute_qdelta(cpi, qindex, - (qindex * 0.75)); - - cpi->active_best_quality = qindex + delta_qindex; - if (cpi->active_best_quality < cpi->best_quality) - cpi->active_best_quality = cpi->best_quality; - } - } - - else if (cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame) { - int high = 2000; - int low = 400; - - // Use the lower of cpi->active_worst_quality and recent - // average Q as basis for GF/ARF Q limit unless last frame was - // a key frame. - if ((cpi->frames_since_key > 1) && - (cpi->avg_frame_qindex < cpi->active_worst_quality)) { - Q = cpi->avg_frame_qindex; - } - - // For constrained quality dont allow Q less than the cq level - if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && - (Q < cpi->cq_target_quality)) { - Q = cpi->cq_target_quality; - } - - if (cpi->gfu_boost > high) - cpi->active_best_quality = gf_low_motion_minq[Q]; - else if (cpi->gfu_boost < low) - cpi->active_best_quality = gf_high_motion_minq[Q]; - else { - int gap = high - low; - int offset = high - cpi->gfu_boost; - int qdiff = gf_high_motion_minq[Q] - gf_low_motion_minq[Q]; - int adjustment = ((offset * qdiff) + (gap >> 1)) / gap; - - cpi->active_best_quality = gf_low_motion_minq[Q] + adjustment; - } - - // Constrained quality use slightly lower active best. - if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { - cpi->active_best_quality = - cpi->active_best_quality * 15 / 16; - } - } else { - cpi->active_best_quality = inter_minq[Q]; - - // For the constant/constrained quality mode we dont want - // q to fall below the cq level. - if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && - (cpi->active_best_quality < cpi->cq_target_quality)) { - // If we are strongly undershooting the target rate in the last - // frames then use the user passed in cq value not the auto - // cq value. - if (cpi->rolling_actual_bits < cpi->min_frame_bandwidth) - cpi->active_best_quality = cpi->oxcf.cq_level; - else - cpi->active_best_quality = cpi->cq_target_quality; - } - } - - // Clip the active best and worst quality values to limits - if (cpi->active_worst_quality > cpi->worst_quality) - cpi->active_worst_quality = cpi->worst_quality; - - if (cpi->active_best_quality < cpi->best_quality) - cpi->active_best_quality = cpi->best_quality; - - if (cpi->active_best_quality > cpi->worst_quality) - cpi->active_best_quality = cpi->worst_quality; - - if (cpi->active_worst_quality < cpi->active_best_quality) - cpi->active_worst_quality = cpi->active_best_quality; - - // Specuial case code to try and match quality with forced key frames - if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) { - Q = cpi->last_boosted_qindex; - } else { - // Determine initial Q to try - Q = vp9_regulate_q(cpi, cpi->this_frame_target); - } - last_zbin_oq = cpi->zbin_over_quant; - - // Set highest allowed value for Zbin over quant - if (cm->frame_type == KEY_FRAME) - zbin_oq_high = 0; // ZBIN_OQ_MAX/16 - else if (cm->refresh_alt_ref_frame || (cm->refresh_golden_frame && !cpi->source_alt_ref_active)) - zbin_oq_high = 16; - else - zbin_oq_high = ZBIN_OQ_MAX; - - vp9_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, - &frame_over_shoot_limit); - - // Limit Q range for the adaptive loop. - bottom_index = cpi->active_best_quality; - top_index = cpi->active_worst_quality; - q_low = cpi->active_best_quality; - q_high = cpi->active_worst_quality; - - loop_count = 0; - - if (cm->frame_type != KEY_FRAME) { - /* TODO: Decide this more intelligently */ - if (sf->search_best_filter) { - cm->mcomp_filter_type = mcomp_filters_to_search[0]; - mcomp_filter_index = 0; - } else { - cm->mcomp_filter_type = DEFAULT_INTERP_FILTER; - } - /* TODO: Decide this more intelligently */ - xd->allow_high_precision_mv = (Q < HIGH_PRECISION_MV_QTHRESH); - } - -#if CONFIG_POSTPROC - - if (cpi->oxcf.noise_sensitivity > 0) { - unsigned char *src; - int l = 0; - - switch (cpi->oxcf.noise_sensitivity) { - case 1: - l = 20; - break; - case 2: - l = 40; - break; - case 3: - l = 60; - break; - case 4: - - case 5: - l = 100; - break; - case 6: - l = 150; - break; - } - - - if (cm->frame_type == KEY_FRAME) { - vp9_de_noise(cpi->Source, cpi->Source, l, 1, 0, RTCD(postproc)); - } else { - vp9_de_noise(cpi->Source, cpi->Source, l, 1, 0, RTCD(postproc)); - - src = cpi->Source->y_buffer; - - if (cpi->Source->y_stride < 0) { - src += cpi->Source->y_stride * (cpi->Source->y_height - 1); - } - } - } - -#endif - -#ifdef OUTPUT_YUV_SRC - vp9_write_yuv_frame(cpi->Source); -#endif - -#if RESET_FOREACH_FILTER - if (sf->search_best_filter) { - q_low0 = q_low; - q_high0 = q_high; - Q0 = Q; - zbin_oq_low0 = zbin_oq_low; - zbin_oq_high0 = zbin_oq_high; - last_zbin_oq0 = last_zbin_oq; - rate_correction_factor0 = cpi->rate_correction_factor; - gf_rate_correction_factor0 = cpi->gf_rate_correction_factor; - active_best_quality0 = cpi->active_best_quality; - active_worst_quality0 = cpi->active_worst_quality; - } -#endif - do { - vp9_clear_system_state(); // __asm emms; - - vp9_set_quantizer(cpi, Q); - this_q = Q; - - if (loop_count == 0) { - - // setup skip prob for costing in mode/mv decision - if (cpi->common.mb_no_coeff_skip) { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; k++) - cm->mbskip_pred_probs[k] = cpi->base_skip_false_prob[Q][k]; - - if (cm->frame_type != KEY_FRAME) { - if (cpi->common.refresh_alt_ref_frame) { - for (k = 0; k < MBSKIP_CONTEXTS; k++) { - if (cpi->last_skip_false_probs[2][k] != 0) - cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[2][k]; - } - } else if (cpi->common.refresh_golden_frame) { - for (k = 0; k < MBSKIP_CONTEXTS; k++) { - if (cpi->last_skip_false_probs[1][k] != 0) - cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[1][k]; - } - } else { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; k++) { - if (cpi->last_skip_false_probs[0][k] != 0) - cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[0][k]; - } - } - - // as this is for cost estimate, let's make sure it does not - // get extreme either way - { - int k; - for (k = 0; k < MBSKIP_CONTEXTS; ++k) { - if (cm->mbskip_pred_probs[k] < 5) - cm->mbskip_pred_probs[k] = 5; - - if (cm->mbskip_pred_probs[k] > 250) - cm->mbskip_pred_probs[k] = 250; - - if (cpi->is_src_frame_alt_ref) - cm->mbskip_pred_probs[k] = 1; - } - } - } - } - - // Set up entropy depending on frame type. - if (cm->frame_type == KEY_FRAME) - vp9_setup_key_frame(cpi); - else - vp9_setup_inter_frame(cpi); - } - - // transform / motion compensation build reconstruction frame - - vp9_encode_frame(cpi); - - // Update the skip mb flag probabilities based on the distribution - // seen in the last encoder iteration. - update_base_skip_probs(cpi); - - vp9_clear_system_state(); // __asm emms; - -#if CONFIG_PRED_FILTER - // Update prediction filter on/off probability based on - // selection made for the current frame - if (cm->frame_type != KEY_FRAME) - update_pred_filt_prob(cpi); -#endif - - // Dummy pack of the bitstream using up to date stats to get an - // accurate estimate of output frame size to determine if we need - // to recode. - vp9_save_coding_context(cpi); - cpi->dummy_packing = 1; - vp9_pack_bitstream(cpi, dest, size); - cpi->projected_frame_size = (*size) << 3; - vp9_restore_coding_context(cpi); - - if (frame_over_shoot_limit == 0) - frame_over_shoot_limit = 1; - active_worst_qchanged = FALSE; - - // Special case handling for forced key frames - if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) { - int last_q = Q; - int kf_err = vp9_calc_ss_err(cpi->Source, - &cm->yv12_fb[cm->new_fb_idx]); - - int high_err_target = cpi->ambient_err; - int low_err_target = (cpi->ambient_err >> 1); - - // Prevent possible divide by zero error below for perfect KF - kf_err += (!kf_err); - - // The key frame is not good enough or we can afford - // to make it better without undue risk of popping. - if (((kf_err > high_err_target) && - (cpi->projected_frame_size <= frame_over_shoot_limit)) || - ((kf_err > low_err_target) && - (cpi->projected_frame_size <= frame_under_shoot_limit))) { - // Lower q_high - q_high = (Q > q_low) ? (Q - 1) : q_low; - - // Adjust Q - Q = (Q * high_err_target) / kf_err; - if (Q < ((q_high + q_low) >> 1)) - Q = (q_high + q_low) >> 1; - } - // The key frame is much better than the previous frame - else if ((kf_err < low_err_target) && - (cpi->projected_frame_size >= frame_under_shoot_limit)) { - // Raise q_low - q_low = (Q < q_high) ? (Q + 1) : q_high; - - // Adjust Q - Q = (Q * low_err_target) / kf_err; - if (Q > ((q_high + q_low + 1) >> 1)) - Q = (q_high + q_low + 1) >> 1; - } - - // Clamp Q to upper and lower limits: - if (Q > q_high) - Q = q_high; - else if (Q < q_low) - Q = q_low; - - Loop = ((Q != last_q)) ? TRUE : FALSE; - } - - // Is the projected frame size out of range and are we allowed to attempt to recode. - else if (recode_loop_test(cpi, - frame_over_shoot_limit, frame_under_shoot_limit, - Q, top_index, bottom_index)) { - int last_q = Q; - int Retries = 0; - - // Frame size out of permitted range: - // Update correction factor & compute new Q to try... - - // Frame is too large - if (cpi->projected_frame_size > cpi->this_frame_target) { - q_low = (Q < q_high) ? (Q + 1) : q_high; // Raise Qlow as to at least the current value - - if (cpi->zbin_over_quant > 0) // If we are using over quant do the same for zbin_oq_low - zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high; - - if (undershoot_seen || (loop_count > 1)) { - // Update rate_correction_factor unless cpi->active_worst_quality has changed. - if (!active_worst_qchanged) - vp9_update_rate_correction_factors(cpi, 1); - - Q = (q_high + q_low + 1) / 2; - - // Adjust cpi->zbin_over_quant (only allowed when Q is max) - if (Q < MAXQ) - cpi->zbin_over_quant = 0; - else { - zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high; - cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2; - } - } else { - // Update rate_correction_factor unless cpi->active_worst_quality has changed. - if (!active_worst_qchanged) - vp9_update_rate_correction_factors(cpi, 0); - - Q = vp9_regulate_q(cpi, cpi->this_frame_target); - - while (((Q < q_low) || (cpi->zbin_over_quant < zbin_oq_low)) && (Retries < 10)) { - vp9_update_rate_correction_factors(cpi, 0); - Q = vp9_regulate_q(cpi, cpi->this_frame_target); - Retries++; - } - } - - overshoot_seen = TRUE; - } - // Frame is too small - else { - if (cpi->zbin_over_quant == 0) - q_high = (Q > q_low) ? (Q - 1) : q_low; // Lower q_high if not using over quant - else // else lower zbin_oq_high - zbin_oq_high = (cpi->zbin_over_quant > zbin_oq_low) ? (cpi->zbin_over_quant - 1) : zbin_oq_low; - - if (overshoot_seen || (loop_count > 1)) { - // Update rate_correction_factor unless cpi->active_worst_quality has changed. - if (!active_worst_qchanged) - vp9_update_rate_correction_factors(cpi, 1); - - Q = (q_high + q_low) / 2; - - // Adjust cpi->zbin_over_quant (only allowed when Q is max) - if (Q < MAXQ) - cpi->zbin_over_quant = 0; - else - cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2; - } else { - // Update rate_correction_factor unless cpi->active_worst_quality has changed. - if (!active_worst_qchanged) - vp9_update_rate_correction_factors(cpi, 0); - - Q = vp9_regulate_q(cpi, cpi->this_frame_target); - - // Special case reset for qlow for constrained quality. - // This should only trigger where there is very substantial - // undershoot on a frame and the auto cq level is above - // the user passsed in value. - if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && - (Q < q_low)) { - q_low = Q; - } - - while (((Q > q_high) || (cpi->zbin_over_quant > zbin_oq_high)) && (Retries < 10)) { - vp9_update_rate_correction_factors(cpi, 0); - Q = vp9_regulate_q(cpi, cpi->this_frame_target); - Retries++; - } - } - - undershoot_seen = TRUE; - } - - // Clamp Q to upper and lower limits: - if (Q > q_high) - Q = q_high; - else if (Q < q_low) - Q = q_low; - - // Clamp cpi->zbin_over_quant - cpi->zbin_over_quant = (cpi->zbin_over_quant < zbin_oq_low) ? - zbin_oq_low : (cpi->zbin_over_quant > zbin_oq_high) ? - zbin_oq_high : cpi->zbin_over_quant; - - // Loop = ((Q != last_q) || (last_zbin_oq != cpi->zbin_over_quant)) ? TRUE : FALSE; - Loop = ((Q != last_q)) ? TRUE : FALSE; - last_zbin_oq = cpi->zbin_over_quant; - } else - Loop = FALSE; - - if (cpi->is_src_frame_alt_ref) - Loop = FALSE; - - if (cm->frame_type != KEY_FRAME && - !sf->search_best_filter && - cm->mcomp_filter_type == SWITCHABLE) { - int interp_factor = Q / 3; /* denominator is 256 */ - int count[VP9_SWITCHABLE_FILTERS]; - int tot_count = 0, c = 0, thr; - int i, j; - for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { - count[i] = 0; - for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) { - count[i] += cpi->switchable_interp_count[j][i]; - } - tot_count += count[i]; - } - - thr = ((tot_count * interp_factor + 128) >> 8); - for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { - c += (count[i] >= thr); - } - if (c == 1) { - /* Mostly one filter is used. So set the filter at frame level */ - for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { - if (count[i]) { - cm->mcomp_filter_type = vp9_switchable_interp[i]; - Loop = TRUE; /* Make sure to loop since the filter changed */ - break; - } - } - } - } - - if (Loop == FALSE && cm->frame_type != KEY_FRAME && sf->search_best_filter) { - if (mcomp_filter_index < mcomp_filters) { - INT64 err = vp9_calc_ss_err(cpi->Source, - &cm->yv12_fb[cm->new_fb_idx]); - INT64 rate = cpi->projected_frame_size << 8; - mcomp_filter_cost[mcomp_filter_index] = - (RDCOST(cpi->RDMULT, cpi->RDDIV, rate, err)); - mcomp_filter_index++; - if (mcomp_filter_index < mcomp_filters) { - cm->mcomp_filter_type = mcomp_filters_to_search[mcomp_filter_index]; - loop_count = -1; - Loop = TRUE; - } else { - int f; - INT64 best_cost = mcomp_filter_cost[0]; - int mcomp_best_filter = mcomp_filters_to_search[0]; - for (f = 1; f < mcomp_filters; f++) { - if (mcomp_filter_cost[f] < best_cost) { - mcomp_best_filter = mcomp_filters_to_search[f]; - best_cost = mcomp_filter_cost[f]; - } - } - if (mcomp_best_filter != mcomp_filters_to_search[mcomp_filters - 1]) { - loop_count = -1; - Loop = TRUE; - cm->mcomp_filter_type = mcomp_best_filter; - } - /* - printf(" best filter = %d, ( ", mcomp_best_filter); - for (f=0;f<mcomp_filters; f++) printf("%d ", mcomp_filter_cost[f]); - printf(")\n"); - */ - } -#if RESET_FOREACH_FILTER - if (Loop == TRUE) { - overshoot_seen = FALSE; - undershoot_seen = FALSE; - zbin_oq_low = zbin_oq_low0; - zbin_oq_high = zbin_oq_high0; - q_low = q_low0; - q_high = q_high0; - Q = Q0; - cpi->zbin_over_quant = last_zbin_oq = last_zbin_oq0; - cpi->rate_correction_factor = rate_correction_factor0; - cpi->gf_rate_correction_factor = gf_rate_correction_factor0; - cpi->active_best_quality = active_best_quality0; - cpi->active_worst_quality = active_worst_quality0; - } -#endif - } - } - - if (Loop == TRUE) { - loop_count++; -#if CONFIG_INTERNAL_STATS - cpi->tot_recode_hits++; -#endif - } - } while (Loop == TRUE); - - // Special case code to reduce pulsing when key frames are forced at a - // fixed interval. Note the reconstruction error if it is the frame before - // the force key frame - if (cpi->next_key_frame_forced && (cpi->twopass.frames_to_key == 0)) { - cpi->ambient_err = vp9_calc_ss_err(cpi->Source, - &cm->yv12_fb[cm->new_fb_idx]); - } - - // This frame's MVs are saved and will be used in next frame's MV - // prediction. Last frame has one more line(add to bottom) and one - // more column(add to right) than cm->mip. The edge elements are - // initialized to 0. - if (cm->show_frame) { // do not save for altref frame - int mb_row; - int mb_col; - MODE_INFO *tmp = cm->mip; - - if (cm->frame_type != KEY_FRAME) { - for (mb_row = 0; mb_row < cm->mb_rows + 1; mb_row ++) { - for (mb_col = 0; mb_col < cm->mb_cols + 1; mb_col ++) { - if (tmp->mbmi.ref_frame != INTRA_FRAME) - cpi->lfmv[mb_col + mb_row * (cm->mode_info_stride + 1)].as_int = tmp->mbmi.mv[0].as_int; - - cpi->lf_ref_frame_sign_bias[mb_col + mb_row * (cm->mode_info_stride + 1)] = cm->ref_frame_sign_bias[tmp->mbmi.ref_frame]; - cpi->lf_ref_frame[mb_col + mb_row * (cm->mode_info_stride + 1)] = tmp->mbmi.ref_frame; - tmp++; - } - } - } - } - - // Update the GF useage maps. - // This is done after completing the compression of a frame when all modes - // etc. are finalized but before loop filter - vp9_update_gf_useage_maps(cpi, cm, &cpi->mb); - - if (cm->frame_type == KEY_FRAME) - cm->refresh_last_frame = 1; - -#if 0 - { - FILE *f = fopen("gfactive.stt", "a"); - fprintf(f, "%8d %8d %8d %8d %8d\n", cm->current_video_frame, (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols), cpi->this_iiratio, cpi->next_iiratio, cm->refresh_golden_frame); - fclose(f); - } -#endif - - cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx]; - -#if WRITE_RECON_BUFFER - if (cm->show_frame) - write_cx_frame_to_file(cm->frame_to_show, - cm->current_video_frame); - else - write_cx_frame_to_file(cm->frame_to_show, - cm->current_video_frame + 1000); -#endif - - // Pick the loop filter level for the frame. - loopfilter_frame(cpi, cm); - - // build the bitstream - cpi->dummy_packing = 0; - vp9_pack_bitstream(cpi, dest, size); - - if (cpi->mb.e_mbd.update_mb_segmentation_map) { - update_reference_segmentation_map(cpi); - } - -#if CONFIG_PRED_FILTER - // Select the prediction filtering mode to use for the - // next frame based on the current frame selections - if (cm->frame_type != KEY_FRAME) - select_pred_filter_mode(cpi); -#endif - - update_reference_frames(cm); - vp9_copy(cpi->common.fc.coef_counts, cpi->coef_counts); - vp9_copy(cpi->common.fc.hybrid_coef_counts, cpi->hybrid_coef_counts); - vp9_copy(cpi->common.fc.coef_counts_8x8, cpi->coef_counts_8x8); - vp9_copy(cpi->common.fc.hybrid_coef_counts_8x8, cpi->hybrid_coef_counts_8x8); - vp9_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16); - vp9_copy(cpi->common.fc.hybrid_coef_counts_16x16, - cpi->hybrid_coef_counts_16x16); - vp9_adapt_coef_probs(&cpi->common); - if (cpi->common.frame_type != KEY_FRAME) { - vp9_copy(cpi->common.fc.ymode_counts, cpi->ymode_count); - vp9_copy(cpi->common.fc.uv_mode_counts, cpi->y_uv_mode_count); - vp9_copy(cpi->common.fc.bmode_counts, cpi->bmode_count); - vp9_copy(cpi->common.fc.i8x8_mode_counts, cpi->i8x8_mode_count); - vp9_copy(cpi->common.fc.sub_mv_ref_counts, cpi->sub_mv_ref_count); - vp9_copy(cpi->common.fc.mbsplit_counts, cpi->mbsplit_count); - vp9_adapt_mode_probs(&cpi->common); - - cpi->common.fc.NMVcount = cpi->NMVcount; - vp9_adapt_nmv_probs(&cpi->common, cpi->mb.e_mbd.allow_high_precision_mv); - vp9_update_mode_context(&cpi->common); - } - - /* Move storing frame_type out of the above loop since it is also - * needed in motion search besides loopfilter */ - cm->last_frame_type = cm->frame_type; - - // Keep a copy of the size estimate used in the loop - loop_size_estimate = cpi->projected_frame_size; - - // Update rate control heuristics - cpi->total_byte_count += (*size); - cpi->projected_frame_size = (*size) << 3; - - if (!active_worst_qchanged) - vp9_update_rate_correction_factors(cpi, 2); - - cpi->last_q[cm->frame_type] = cm->base_qindex; - - // Keep record of last boosted (KF/KF/ARF) Q value. - // If the current frame is coded at a lower Q then we also update it. - // If all mbs in this group are skipped only update if the Q value is - // better than that already stored. - // This is used to help set quality in forced key frames to reduce popping - if ((cm->base_qindex < cpi->last_boosted_qindex) || - ((cpi->static_mb_pct < 100) && - ((cm->frame_type == KEY_FRAME) || - cm->refresh_alt_ref_frame || - (cm->refresh_golden_frame && !cpi->is_src_frame_alt_ref)))) { - cpi->last_boosted_qindex = cm->base_qindex; - } - - if (cm->frame_type == KEY_FRAME) { - vp9_adjust_key_frame_context(cpi); - } - - // Keep a record of ambient average Q. - if (cm->frame_type != KEY_FRAME) - cpi->avg_frame_qindex = (2 + 3 * cpi->avg_frame_qindex + cm->base_qindex) >> 2; - - // Keep a record from which we can calculate the average Q excluding GF updates and key frames - if ((cm->frame_type != KEY_FRAME) && !cm->refresh_golden_frame && !cm->refresh_alt_ref_frame) { - cpi->ni_frames++; - cpi->tot_q += vp9_convert_qindex_to_q(Q); - cpi->avg_q = cpi->tot_q / (double)cpi->ni_frames; - - // Calculate the average Q for normal inter frames (not key or GFU - // frames). - cpi->ni_tot_qi += Q; - cpi->ni_av_qi = (cpi->ni_tot_qi / cpi->ni_frames); - } - - // Update the buffer level variable. - // Non-viewable frames are a special case and are treated as pure overhead. - if (!cm->show_frame) - cpi->bits_off_target -= cpi->projected_frame_size; - else - cpi->bits_off_target += cpi->av_per_frame_bandwidth - cpi->projected_frame_size; - - // Clip the buffer level at the maximum buffer size - if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size) - cpi->bits_off_target = cpi->oxcf.maximum_buffer_size; - - // Rolling monitors of whether we are over or underspending used to help regulate min and Max Q in two pass. - cpi->rolling_target_bits = ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4; - cpi->rolling_actual_bits = ((cpi->rolling_actual_bits * 3) + cpi->projected_frame_size + 2) / 4; - cpi->long_rolling_target_bits = ((cpi->long_rolling_target_bits * 31) + cpi->this_frame_target + 16) / 32; - cpi->long_rolling_actual_bits = ((cpi->long_rolling_actual_bits * 31) + cpi->projected_frame_size + 16) / 32; - - // Actual bits spent - cpi->total_actual_bits += cpi->projected_frame_size; - - // Debug stats - cpi->total_target_vs_actual += (cpi->this_frame_target - cpi->projected_frame_size); - - cpi->buffer_level = cpi->bits_off_target; - - // Update bits left to the kf and gf groups to account for overshoot or undershoot on these frames - if (cm->frame_type == KEY_FRAME) { - cpi->twopass.kf_group_bits += cpi->this_frame_target - cpi->projected_frame_size; - - if (cpi->twopass.kf_group_bits < 0) - cpi->twopass.kf_group_bits = 0; - } else if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame) { - cpi->twopass.gf_group_bits += cpi->this_frame_target - cpi->projected_frame_size; - - if (cpi->twopass.gf_group_bits < 0) - cpi->twopass.gf_group_bits = 0; - } - - // Update the skip mb flag probabilities based on the distribution seen - // in this frame. - update_base_skip_probs(cpi); - -#if 0 //CONFIG_NEW_MVREF && CONFIG_INTERNAL_STATS - { - FILE *f = fopen("mv_ref_dist.stt", "a"); - unsigned int i; - for (i = 0; i < MAX_MV_REFS; ++i) { - fprintf(f, "%10d", cpi->best_ref_index_counts[0][i]); - } - fprintf(f, "\n" ); - - fclose(f); - } -#endif - -#if 0// 1 && CONFIG_INTERNAL_STATS - { - FILE *f = fopen("tmp.stt", "a"); - int recon_err; - - vp9_clear_system_state(); // __asm emms; - - recon_err = vp9_calc_ss_err(cpi->Source, - &cm->yv12_fb[cm->new_fb_idx]); - - if (cpi->twopass.total_left_stats->coded_error != 0.0) - fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d" - "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f" - "%6d %5d %5d %5d %8d %8.2f %10d %10.3f" - "%10.3f %8d %10d %10d %10d\n", - cpi->common.current_video_frame, cpi->this_frame_target, - cpi->projected_frame_size, loop_size_estimate, - (cpi->projected_frame_size - cpi->this_frame_target), - (int)cpi->total_target_vs_actual, - (cpi->oxcf.starting_buffer_level - cpi->bits_off_target), - (int)cpi->total_actual_bits, - vp9_convert_qindex_to_q(cm->base_qindex), - (double)vp9_dc_quant(cm->base_qindex, 0) / 4.0, - vp9_convert_qindex_to_q(cpi->active_best_quality), - vp9_convert_qindex_to_q(cpi->active_worst_quality), - cpi->avg_q, - vp9_convert_qindex_to_q(cpi->ni_av_qi), - vp9_convert_qindex_to_q(cpi->cq_target_quality), - cpi->zbin_over_quant, - // cpi->avg_frame_qindex, cpi->zbin_over_quant, - cm->refresh_golden_frame, cm->refresh_alt_ref_frame, - cm->frame_type, cpi->gfu_boost, - cpi->twopass.est_max_qcorrection_factor, - (int)cpi->twopass.bits_left, - cpi->twopass.total_left_stats->coded_error, - (double)cpi->twopass.bits_left / - cpi->twopass.total_left_stats->coded_error, - cpi->tot_recode_hits, recon_err, cpi->kf_boost, - cpi->kf_zeromotion_pct); - else - fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d" - "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f" - "%6d %5d %5d %5d %8d %8.2f %10d %10.3f" - "%8d %10d %10d %10d\n", - cpi->common.current_video_frame, - cpi->this_frame_target, cpi->projected_frame_size, - loop_size_estimate, - (cpi->projected_frame_size - cpi->this_frame_target), - (int)cpi->total_target_vs_actual, - (cpi->oxcf.starting_buffer_level - cpi->bits_off_target), - (int)cpi->total_actual_bits, - vp9_convert_qindex_to_q(cm->base_qindex), - (double)vp9_dc_quant(cm->base_qindex, 0) / 4.0, - vp9_convert_qindex_to_q(cpi->active_best_quality), - vp9_convert_qindex_to_q(cpi->active_worst_quality), - cpi->avg_q, - vp9_convert_qindex_to_q(cpi->ni_av_qi), - vp9_convert_qindex_to_q(cpi->cq_target_quality), - cpi->zbin_over_quant, - // cpi->avg_frame_qindex, cpi->zbin_over_quant, - cm->refresh_golden_frame, cm->refresh_alt_ref_frame, - cm->frame_type, cpi->gfu_boost, - cpi->twopass.est_max_qcorrection_factor, - (int)cpi->twopass.bits_left, - cpi->twopass.total_left_stats->coded_error, - cpi->tot_recode_hits, recon_err, cpi->kf_boost, - cpi->kf_zeromotion_pct); - - fclose(f); - - if (0) { - FILE *fmodes = fopen("Modes.stt", "a"); - int i; - - fprintf(fmodes, "%6d:%1d:%1d:%1d ", - cpi->common.current_video_frame, - cm->frame_type, cm->refresh_golden_frame, - cm->refresh_alt_ref_frame); - - for (i = 0; i < MAX_MODES; i++) - fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]); - - fprintf(fmodes, "\n"); - - fclose(fmodes); - } - } - -#endif - -#if 0 - // Debug stats for segment feature experiments. - print_seg_map(cpi); -#endif - - // If this was a kf or Gf note the Q - if ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cm->refresh_alt_ref_frame) - cm->last_kf_gf_q = cm->base_qindex; - - if (cm->refresh_golden_frame == 1) - cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN; - else - cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_GOLDEN; - - if (cm->refresh_alt_ref_frame == 1) - cm->frame_flags = cm->frame_flags | FRAMEFLAGS_ALTREF; - else - cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_ALTREF; - - - if (cm->refresh_last_frame & cm->refresh_golden_frame) // both refreshed - cpi->gold_is_last = 1; - else if (cm->refresh_last_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other - cpi->gold_is_last = 0; - - if (cm->refresh_last_frame & cm->refresh_alt_ref_frame) // both refreshed - cpi->alt_is_last = 1; - else if (cm->refresh_last_frame ^ cm->refresh_alt_ref_frame) // 1 refreshed but not the other - cpi->alt_is_last = 0; - - if (cm->refresh_alt_ref_frame & cm->refresh_golden_frame) // both refreshed - cpi->gold_is_alt = 1; - else if (cm->refresh_alt_ref_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other - cpi->gold_is_alt = 0; - - cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; - - if (cpi->gold_is_last) - cpi->ref_frame_flags &= ~VP9_GOLD_FLAG; - - if (cpi->alt_is_last) - cpi->ref_frame_flags &= ~VP9_ALT_FLAG; - - if (cpi->gold_is_alt) - cpi->ref_frame_flags &= ~VP9_ALT_FLAG; - - if (cpi->oxcf.play_alternate && cm->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME)) - // Update the alternate reference frame stats as appropriate. - update_alt_ref_frame_stats(cpi); - else - // Update the Golden frame stats as appropriate. - update_golden_frame_stats(cpi); - - if (cm->frame_type == KEY_FRAME) { - // Tell the caller that the frame was coded as a key frame - *frame_flags = cm->frame_flags | FRAMEFLAGS_KEY; - - // As this frame is a key frame the next defaults to an inter frame. - cm->frame_type = INTER_FRAME; - } else { - *frame_flags = cm->frame_flags&~FRAMEFLAGS_KEY; - } - - // Clear the one shot update flags for segmentation map and mode/ref loop filter deltas. - xd->update_mb_segmentation_map = 0; - xd->update_mb_segmentation_data = 0; - xd->mode_ref_lf_delta_update = 0; - - - // Dont increment frame counters if this was an altref buffer update not a real frame - if (cm->show_frame) { - cm->current_video_frame++; - cpi->frames_since_key++; - } - - // reset to normal state now that we are done. - - - -#if 0 - { - char filename[512]; - FILE *recon_file; - sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame); - recon_file = fopen(filename, "wb"); - fwrite(cm->yv12_fb[cm->lst_fb_idx].buffer_alloc, - cm->yv12_fb[cm->lst_fb_idx].frame_size, 1, recon_file); - fclose(recon_file); - } -#endif -#ifdef OUTPUT_YUV_REC - vp9_write_yuv_rec_frame(cm); -#endif - - if (cm->show_frame) { - vpx_memcpy(cm->prev_mip, cm->mip, - (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO)); - } else { - vpx_memset(cm->prev_mip, 0, - (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO)); - } -} - -static void Pass2Encode(VP9_COMP *cpi, unsigned long *size, - unsigned char *dest, unsigned int *frame_flags) { - - if (!cpi->common.refresh_alt_ref_frame) - vp9_second_pass(cpi); - - encode_frame_to_data_rate(cpi, size, dest, frame_flags); - cpi->twopass.bits_left -= 8 * *size; - - if (!cpi->common.refresh_alt_ref_frame) { - double lower_bounds_min_rate = FRAME_OVERHEAD_BITS * cpi->oxcf.frame_rate; - double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth - * cpi->oxcf.two_pass_vbrmin_section / 100); - - if (two_pass_min_rate < lower_bounds_min_rate) - two_pass_min_rate = lower_bounds_min_rate; - - cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->oxcf.frame_rate); - } -} - -// For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us. -#if HAVE_ARMV7 -extern void vp9_push_neon(int64_t *store); -extern void vp9_pop_neon(int64_t *store); -#endif - - -int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags, - YV12_BUFFER_CONFIG *sd, int64_t time_stamp, - int64_t end_time) { -#if HAVE_ARMV7 - int64_t store_reg[8]; -#endif - VP9_COMP *cpi = (VP9_COMP *) ptr; - VP9_COMMON *cm = &cpi->common; - struct vpx_usec_timer timer; - int res = 0; - -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp9_push_neon(store_reg); - } -#endif - - vpx_usec_timer_start(&timer); - if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, frame_flags, - cpi->active_map_enabled ? cpi->active_map : NULL)) - res = -1; - cm->clr_type = sd->clrtype; - vpx_usec_timer_mark(&timer); - cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); - -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp9_pop_neon(store_reg); - } -#endif - - return res; -} - - -static int frame_is_reference(const VP9_COMP *cpi) { - const VP9_COMMON *cm = &cpi->common; - const MACROBLOCKD *xd = &cpi->mb.e_mbd; - - return cm->frame_type == KEY_FRAME || cm->refresh_last_frame - || cm->refresh_golden_frame || cm->refresh_alt_ref_frame - || cm->copy_buffer_to_gf || cm->copy_buffer_to_arf - || cm->refresh_entropy_probs - || xd->mode_ref_lf_delta_update - || xd->update_mb_segmentation_map || xd->update_mb_segmentation_data; -} - - -int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, - unsigned long *size, unsigned char *dest, - int64_t *time_stamp, int64_t *time_end, int flush) { -#if HAVE_ARMV7 - int64_t store_reg[8]; -#endif - VP9_COMP *cpi = (VP9_COMP *) ptr; - VP9_COMMON *cm = &cpi->common; - struct vpx_usec_timer cmptimer; - YV12_BUFFER_CONFIG *force_src_buffer = NULL; - - if (!cpi) - return -1; - -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp9_push_neon(store_reg); - } -#endif - - vpx_usec_timer_start(&cmptimer); - - cpi->source = NULL; - - cpi->mb.e_mbd.allow_high_precision_mv = ALTREF_HIGH_PRECISION_MV; - // Should we code an alternate reference frame - if (cpi->oxcf.play_alternate && - cpi->source_alt_ref_pending) { - if ((cpi->source = vp9_lookahead_peek(cpi->lookahead, - cpi->frames_till_gf_update_due))) { - cpi->alt_ref_source = cpi->source; - if (cpi->oxcf.arnr_max_frames > 0) { - vp9_temporal_filter_prepare_c(cpi, - cpi->frames_till_gf_update_due); - force_src_buffer = &cpi->alt_ref_buffer; - } - cm->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due; - cm->refresh_alt_ref_frame = 1; - cm->refresh_golden_frame = 0; - cm->refresh_last_frame = 0; - cm->show_frame = 0; - cpi->source_alt_ref_pending = FALSE; // Clear Pending altf Ref flag. - cpi->is_src_frame_alt_ref = 0; - } - } - - if (!cpi->source) { - if ((cpi->source = vp9_lookahead_pop(cpi->lookahead, flush))) { - cm->show_frame = 1; - - cpi->is_src_frame_alt_ref = cpi->alt_ref_source - && (cpi->source == cpi->alt_ref_source); - - if (cpi->is_src_frame_alt_ref) - cpi->alt_ref_source = NULL; - } - } - - if (cpi->source) { - cpi->un_scaled_source = - cpi->Source = force_src_buffer ? force_src_buffer : &cpi->source->img; - *time_stamp = cpi->source->ts_start; - *time_end = cpi->source->ts_end; - *frame_flags = cpi->source->flags; - } else { - *size = 0; - if (flush && cpi->pass == 1 && !cpi->twopass.first_pass_done) { - vp9_end_first_pass(cpi); /* get last stats packet */ - cpi->twopass.first_pass_done = 1; - } - -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp9_pop_neon(store_reg); - } -#endif - return -1; - } - - if (cpi->source->ts_start < cpi->first_time_stamp_ever) { - cpi->first_time_stamp_ever = cpi->source->ts_start; - cpi->last_end_time_stamp_seen = cpi->source->ts_start; - } - - // adjust frame rates based on timestamps given - if (!cm->refresh_alt_ref_frame) { - int64_t this_duration; - int step = 0; - - if (cpi->source->ts_start == cpi->first_time_stamp_ever) { - this_duration = cpi->source->ts_end - cpi->source->ts_start; - step = 1; - } else { - int64_t last_duration; - - this_duration = cpi->source->ts_end - cpi->last_end_time_stamp_seen; - last_duration = cpi->last_end_time_stamp_seen - - cpi->last_time_stamp_seen; - // do a step update if the duration changes by 10% - if (last_duration) - step = ((this_duration - last_duration) * 10 / last_duration); - } - - if (this_duration) { - if (step) - vp9_new_frame_rate(cpi, 10000000.0 / this_duration); - else { - double avg_duration, interval; - - /* Average this frame's rate into the last second's average - * frame rate. If we haven't seen 1 second yet, then average - * over the whole interval seen. - */ - interval = cpi->source->ts_end - cpi->first_time_stamp_ever; - if (interval > 10000000.0) - interval = 10000000; - - avg_duration = 10000000.0 / cpi->oxcf.frame_rate; - avg_duration *= (interval - avg_duration + this_duration); - avg_duration /= interval; - - vp9_new_frame_rate(cpi, 10000000.0 / avg_duration); - } - } - - cpi->last_time_stamp_seen = cpi->source->ts_start; - cpi->last_end_time_stamp_seen = cpi->source->ts_end; - } - - // start with a 0 size frame - *size = 0; - - // Clear down mmx registers - vp9_clear_system_state(); // __asm emms; - - cm->frame_type = INTER_FRAME; - cm->frame_flags = *frame_flags; - -#if 0 - - if (cm->refresh_alt_ref_frame) { - // cm->refresh_golden_frame = 1; - cm->refresh_golden_frame = 0; - cm->refresh_last_frame = 0; - } else { - cm->refresh_golden_frame = 0; - cm->refresh_last_frame = 1; - } - -#endif - /* find a free buffer for the new frame */ - { - int i = 0; - for (; i < NUM_YV12_BUFFERS; i++) { - if (!cm->yv12_fb[i].flags) { - cm->new_fb_idx = i; - break; - } - } - - assert(i < NUM_YV12_BUFFERS); - } - if (cpi->pass == 1) { - Pass1Encode(cpi, size, dest, frame_flags); - } else if (cpi->pass == 2) { - Pass2Encode(cpi, size, dest, frame_flags); - } else { - encode_frame_to_data_rate(cpi, size, dest, frame_flags); - } - - if (cm->refresh_entropy_probs) { - if (cm->refresh_alt_ref_frame) - vpx_memcpy(&cm->lfc_a, &cm->fc, sizeof(cm->fc)); - else - vpx_memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc)); - } - - // if its a dropped frame honor the requests on subsequent frames - if (*size > 0) { - cpi->droppable = !frame_is_reference(cpi); - - // return to normal state - cm->refresh_entropy_probs = 1; - cm->refresh_alt_ref_frame = 0; - cm->refresh_golden_frame = 0; - cm->refresh_last_frame = 1; - cm->frame_type = INTER_FRAME; - - } - - vpx_usec_timer_mark(&cmptimer); - cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer); - - if (cpi->b_calculate_psnr && cpi->pass != 1 && cm->show_frame) { - generate_psnr_packet(cpi); - } - -#if CONFIG_INTERNAL_STATS - - if (cpi->pass != 1) { - cpi->bytes += *size; - - if (cm->show_frame) { - - cpi->count++; - - if (cpi->b_calculate_psnr) { - double ye, ue, ve; - double frame_psnr; - YV12_BUFFER_CONFIG *orig = cpi->Source; - YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show; - YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer; - int y_samples = orig->y_height * orig->y_width; - int uv_samples = orig->uv_height * orig->uv_width; - int t_samples = y_samples + 2 * uv_samples; - int64_t sq_error; - - ye = calc_plane_error(orig->y_buffer, orig->y_stride, - recon->y_buffer, recon->y_stride, orig->y_width, - orig->y_height); - - ue = calc_plane_error(orig->u_buffer, orig->uv_stride, - recon->u_buffer, recon->uv_stride, orig->uv_width, - orig->uv_height); - - ve = calc_plane_error(orig->v_buffer, orig->uv_stride, - recon->v_buffer, recon->uv_stride, orig->uv_width, - orig->uv_height); - - sq_error = ye + ue + ve; - - frame_psnr = vp9_mse2psnr(t_samples, 255.0, sq_error); - - cpi->total_y += vp9_mse2psnr(y_samples, 255.0, ye); - cpi->total_u += vp9_mse2psnr(uv_samples, 255.0, ue); - cpi->total_v += vp9_mse2psnr(uv_samples, 255.0, ve); - cpi->total_sq_error += sq_error; - cpi->total += frame_psnr; - { - double frame_psnr2, frame_ssim2 = 0; - double weight = 0; -#if CONFIG_POSTPROC - vp9_deblock(cm->frame_to_show, &cm->post_proc_buffer, cm->filter_level * 10 / 6, 1, 0, IF_RTCD(&cm->rtcd.postproc)); -#endif - vp9_clear_system_state(); - - ye = calc_plane_error(orig->y_buffer, orig->y_stride, - pp->y_buffer, pp->y_stride, orig->y_width, - orig->y_height); - - ue = calc_plane_error(orig->u_buffer, orig->uv_stride, - pp->u_buffer, pp->uv_stride, orig->uv_width, - orig->uv_height); - - ve = calc_plane_error(orig->v_buffer, orig->uv_stride, - pp->v_buffer, pp->uv_stride, orig->uv_width, - orig->uv_height); - - sq_error = ye + ue + ve; - - frame_psnr2 = vp9_mse2psnr(t_samples, 255.0, sq_error); - - cpi->totalp_y += vp9_mse2psnr(y_samples, 255.0, ye); - cpi->totalp_u += vp9_mse2psnr(uv_samples, 255.0, ue); - cpi->totalp_v += vp9_mse2psnr(uv_samples, 255.0, ve); - cpi->total_sq_error2 += sq_error; - cpi->totalp += frame_psnr2; - - frame_ssim2 = vp9_calc_ssim(cpi->Source, - &cm->post_proc_buffer, 1, &weight); - - cpi->summed_quality += frame_ssim2 * weight; - cpi->summed_weights += weight; -#if 0 - { - FILE *f = fopen("q_used.stt", "a"); - fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n", - cpi->common.current_video_frame, y2, u2, v2, - frame_psnr2, frame_ssim2); - fclose(f); - } -#endif - } - } - - if (cpi->b_calculate_ssimg) { - double y, u, v, frame_all; - frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, - &y, &u, &v); - cpi->total_ssimg_y += y; - cpi->total_ssimg_u += u; - cpi->total_ssimg_v += v; - cpi->total_ssimg_all += frame_all; - } - - } - } - -#endif - -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp9_pop_neon(store_reg); - } -#endif - - return 0; -} - -int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest, - vp9_ppflags_t *flags) { - VP9_COMP *cpi = (VP9_COMP *) comp; - - if (cpi->common.refresh_alt_ref_frame) - return -1; - else { - int ret; -#if CONFIG_POSTPROC - ret = vp9_post_proc_frame(&cpi->common, dest, flags); -#else - - if (cpi->common.frame_to_show) { - *dest = *cpi->common.frame_to_show; - dest->y_width = cpi->common.Width; - dest->y_height = cpi->common.Height; - dest->uv_height = cpi->common.Height / 2; - ret = 0; - } else { - ret = -1; - } - -#endif // !CONFIG_POSTPROC - vp9_clear_system_state(); - return ret; - } -} - -int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows, - unsigned int cols, int delta_q[4], int delta_lf[4], - unsigned int threshold[4]) { - VP9_COMP *cpi = (VP9_COMP *) comp; - signed char feature_data[SEG_LVL_MAX][MAX_MB_SEGMENTS]; - MACROBLOCKD *xd = &cpi->mb.e_mbd; - int i; - - if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols) - return -1; - - if (!map) { - vp9_disable_segmentation((VP9_PTR)cpi); - return 0; - } - - // Set the segmentation Map - vp9_set_segmentation_map((VP9_PTR)cpi, map); - - // Activate segmentation. - vp9_enable_segmentation((VP9_PTR)cpi); - - // Set up the quant segment data - feature_data[SEG_LVL_ALT_Q][0] = delta_q[0]; - feature_data[SEG_LVL_ALT_Q][1] = delta_q[1]; - feature_data[SEG_LVL_ALT_Q][2] = delta_q[2]; - feature_data[SEG_LVL_ALT_Q][3] = delta_q[3]; - - // Set up the loop segment data s - feature_data[SEG_LVL_ALT_LF][0] = delta_lf[0]; - feature_data[SEG_LVL_ALT_LF][1] = delta_lf[1]; - feature_data[SEG_LVL_ALT_LF][2] = delta_lf[2]; - feature_data[SEG_LVL_ALT_LF][3] = delta_lf[3]; - - cpi->segment_encode_breakout[0] = threshold[0]; - cpi->segment_encode_breakout[1] = threshold[1]; - cpi->segment_encode_breakout[2] = threshold[2]; - cpi->segment_encode_breakout[3] = threshold[3]; - - // Enable the loop and quant changes in the feature mask - for (i = 0; i < 4; i++) { - if (delta_q[i]) - vp9_enable_segfeature(xd, i, SEG_LVL_ALT_Q); - else - vp9_disable_segfeature(xd, i, SEG_LVL_ALT_Q); - - if (delta_lf[i]) - vp9_enable_segfeature(xd, i, SEG_LVL_ALT_LF); - else - vp9_disable_segfeature(xd, i, SEG_LVL_ALT_LF); - } - - // Initialise the feature data structure - // SEGMENT_DELTADATA 0, SEGMENT_ABSDATA 1 - vp9_set_segment_data((VP9_PTR)cpi, &feature_data[0][0], SEGMENT_DELTADATA); - - return 0; -} - -int vp9_set_active_map(VP9_PTR comp, unsigned char *map, - unsigned int rows, unsigned int cols) { - VP9_COMP *cpi = (VP9_COMP *) comp; - - if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) { - if (map) { - vpx_memcpy(cpi->active_map, map, rows * cols); - cpi->active_map_enabled = 1; - } else - cpi->active_map_enabled = 0; - - return 0; - } else { - // cpi->active_map_enabled = 0; - return -1; - } -} - -int vp9_set_internal_size(VP9_PTR comp, - VPX_SCALING horiz_mode, VPX_SCALING vert_mode) { - VP9_COMP *cpi = (VP9_COMP *) comp; - - if (horiz_mode <= ONETWO) - cpi->common.horiz_scale = horiz_mode; - else - return -1; - - if (vert_mode <= ONETWO) - cpi->common.vert_scale = vert_mode; - else - return -1; - - return 0; -} - - - -int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) { - int i, j; - int Total = 0; - - unsigned char *src = source->y_buffer; - unsigned char *dst = dest->y_buffer; - - // Loop through the Y plane raw and reconstruction data summing (square differences) - for (i = 0; i < source->y_height; i += 16) { - for (j = 0; j < source->y_width; j += 16) { - unsigned int sse; - Total += vp9_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, - &sse); - } - - src += 16 * source->y_stride; - dst += 16 * dest->y_stride; - } - - return Total; -} - - -int vp9_get_quantizer(VP9_PTR c) { - VP9_COMP *cpi = (VP9_COMP *) c; - return cpi->common.base_qindex; -} diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h deleted file mode 100644 index 42cb97232..000000000 --- a/vp8/encoder/onyx_int.h +++ /dev/null @@ -1,788 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __INC_ONYX_INT_H -#define __INC_ONYX_INT_H - -#include <stdio.h> -#include "vpx_ports/config.h" -#include "vp8/common/onyx.h" -#include "treewriter.h" -#include "tokenize.h" -#include "vp8/common/onyxc_int.h" -#include "variance.h" -#include "encodemb.h" -#include "quantize.h" -#include "vp8/common/entropy.h" -#include "vp8/common/entropymode.h" -#include "vpx_ports/mem.h" -#include "vpx/internal/vpx_codec_internal.h" -#include "mcomp.h" -#include "temporal_filter.h" -#include "vp8/common/findnearmv.h" -#include "lookahead.h" - -// #define SPEEDSTATS 1 -#define MIN_GF_INTERVAL 4 -#define DEFAULT_GF_INTERVAL 7 - -#define KEY_FRAME_CONTEXT 5 - -#define MAX_LAG_BUFFERS 25 - -#define AF_THRESH 25 -#define AF_THRESH2 100 -#define ARF_DECAY_THRESH 12 - -#if CONFIG_PRED_FILTER -#define MAX_MODES 54 -#else // CONFIG_PRED_FILTER -#define MAX_MODES 42 -#endif // CONFIG_PRED_FILTER - -#define MIN_THRESHMULT 32 -#define MAX_THRESHMULT 512 - -#define GF_ZEROMV_ZBIN_BOOST 12 -#define LF_ZEROMV_ZBIN_BOOST 6 -#define MV_ZBIN_BOOST 4 -#define ZBIN_OQ_MAX 192 - -#define VP9_TEMPORAL_ALT_REF 1 - -typedef struct { - nmv_context nmvc; - int nmvjointcost[MV_JOINTS]; - int nmvcosts[2][MV_VALS]; - int nmvcosts_hp[2][MV_VALS]; - -#ifdef MODE_STATS - // Stats - int y_modes[VP9_YMODES]; - int uv_modes[VP9_UV_MODES]; - int i8x8_modes[VP9_I8X8_MODES]; - int b_modes[B_MODE_COUNT]; - int inter_y_modes[MB_MODE_COUNT]; - int inter_uv_modes[VP9_UV_MODES]; - int inter_b_modes[B_MODE_COUNT]; -#endif - - vp9_prob segment_pred_probs[PREDICTION_PROBS]; - unsigned char ref_pred_probs_update[PREDICTION_PROBS]; - vp9_prob ref_pred_probs[PREDICTION_PROBS]; - vp9_prob prob_comppred[COMP_PRED_CONTEXTS]; - - unsigned char *last_frame_seg_map_copy; - - // 0 = Intra, Last, GF, ARF - signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; - // 0 = BPRED, ZERO_MV, MV, SPLIT - signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; - - vp9_prob coef_probs[BLOCK_TYPES] - [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; - vp9_prob hybrid_coef_probs[BLOCK_TYPES] - [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; - - vp9_prob coef_probs_8x8[BLOCK_TYPES_8X8] - [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; - vp9_prob hybrid_coef_probs_8x8[BLOCK_TYPES_8X8] - [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; - - vp9_prob coef_probs_16x16[BLOCK_TYPES_16X16] - [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; - vp9_prob hybrid_coef_probs_16x16[BLOCK_TYPES_16X16] - [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; - - vp9_prob ymode_prob [VP9_YMODES - 1]; /* interframe intra mode probs */ - vp9_prob uv_mode_prob [VP9_YMODES][VP9_UV_MODES - 1]; - vp9_prob bmode_prob [VP9_BINTRAMODES - 1]; - vp9_prob i8x8_mode_prob [VP9_I8X8_MODES - 1]; - vp9_prob sub_mv_ref_prob [SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; - vp9_prob mbsplit_prob [VP9_NUMMBSPLITS - 1]; - - vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] - [VP9_SWITCHABLE_FILTERS - 1]; - - int mv_ref_ct[6][4][2]; - int mode_context[6][4]; - int mv_ref_ct_a[6][4][2]; - int mode_context_a[6][4]; - -} CODING_CONTEXT; - -typedef struct { - double frame; - double intra_error; - double coded_error; - double sr_coded_error; - double ssim_weighted_pred_err; - double pcnt_inter; - double pcnt_motion; - double pcnt_second_ref; - double pcnt_neutral; - double MVr; - double mvr_abs; - double MVc; - double mvc_abs; - double MVrv; - double MVcv; - double mv_in_out_count; - double new_mv_count; - double duration; - double count; -} -FIRSTPASS_STATS; - -typedef struct { - int frames_so_far; - double frame_intra_error; - double frame_coded_error; - double frame_pcnt_inter; - double frame_pcnt_motion; - double frame_mvr; - double frame_mvr_abs; - double frame_mvc; - double frame_mvc_abs; - -} ONEPASS_FRAMESTATS; - -typedef struct { - struct { - int err; - union { - int_mv mv; - MB_PREDICTION_MODE mode; - } m; - } ref[MAX_REF_FRAMES]; -} MBGRAPH_MB_STATS; - -typedef struct { - MBGRAPH_MB_STATS *mb_stats; -} MBGRAPH_FRAME_STATS; - -#if CONFIG_PRED_FILTER -typedef enum { - THR_ZEROMV, - THR_ZEROMV_FILT, - THR_DC, - - THR_NEARESTMV, - THR_NEARESTMV_FILT, - THR_NEARMV, - THR_NEARMV_FILT, - - THR_ZEROG, - THR_ZEROG_FILT, - THR_NEARESTG, - THR_NEARESTG_FILT, - - THR_ZEROA, - THR_ZEROA_FILT, - THR_NEARESTA, - THR_NEARESTA_FILT, - - THR_NEARG, - THR_NEARG_FILT, - THR_NEARA, - THR_NEARA_FILT, - - THR_V_PRED, - THR_H_PRED, - THR_D45_PRED, - THR_D135_PRED, - THR_D117_PRED, - THR_D153_PRED, - THR_D27_PRED, - THR_D63_PRED, - THR_TM, - - THR_NEWMV, - THR_NEWMV_FILT, - THR_NEWG, - THR_NEWG_FILT, - THR_NEWA, - THR_NEWA_FILT, - - THR_SPLITMV, - THR_SPLITG, - THR_SPLITA, - - THR_B_PRED, - THR_I8X8_PRED, - - THR_COMP_ZEROLG, - THR_COMP_NEARESTLG, - THR_COMP_NEARLG, - - THR_COMP_ZEROLA, - THR_COMP_NEARESTLA, - THR_COMP_NEARLA, - - THR_COMP_ZEROGA, - THR_COMP_NEARESTGA, - THR_COMP_NEARGA, - - THR_COMP_NEWLG, - THR_COMP_NEWLA, - THR_COMP_NEWGA, - - THR_COMP_SPLITLG, - THR_COMP_SPLITLA, - THR_COMP_SPLITGA, -} -THR_MODES; -#else -typedef enum { - THR_ZEROMV, - THR_DC, - - THR_NEARESTMV, - THR_NEARMV, - - THR_ZEROG, - THR_NEARESTG, - - THR_ZEROA, - THR_NEARESTA, - - THR_NEARG, - THR_NEARA, - - THR_V_PRED, - THR_H_PRED, - THR_D45_PRED, - THR_D135_PRED, - THR_D117_PRED, - THR_D153_PRED, - THR_D27_PRED, - THR_D63_PRED, - THR_TM, - - THR_NEWMV, - THR_NEWG, - THR_NEWA, - - THR_SPLITMV, - THR_SPLITG, - THR_SPLITA, - - THR_B_PRED, - THR_I8X8_PRED, - - THR_COMP_ZEROLG, - THR_COMP_NEARESTLG, - THR_COMP_NEARLG, - - THR_COMP_ZEROLA, - THR_COMP_NEARESTLA, - THR_COMP_NEARLA, - - THR_COMP_ZEROGA, - THR_COMP_NEARESTGA, - THR_COMP_NEARGA, - - THR_COMP_NEWLG, - THR_COMP_NEWLA, - THR_COMP_NEWGA, - - THR_COMP_SPLITLG, - THR_COMP_SPLITLA, - THR_COMP_SPLITGA -} -THR_MODES; -#endif - -typedef enum { - DIAMOND = 0, - NSTEP = 1, - HEX = 2 -} SEARCH_METHODS; - -typedef struct { - int RD; - SEARCH_METHODS search_method; - int improved_dct; - int auto_filter; - int recode_loop; - int iterative_sub_pixel; - int half_pixel_search; - int quarter_pixel_search; - int thresh_mult[MAX_MODES]; - int max_step_search_steps; - int first_step; - int optimize_coefficients; - int no_skip_block4x4_search; - int improved_mv_pred; - int search_best_filter; - -} SPEED_FEATURES; - -typedef struct { - MACROBLOCK mb; - int totalrate; -} MB_ROW_COMP; - -typedef struct { - TOKENEXTRA *start; - TOKENEXTRA *stop; -} TOKENLIST; - -typedef struct { - int ithread; - void *ptr1; - void *ptr2; -} ENCODETHREAD_DATA; -typedef struct { - int ithread; - void *ptr1; -} LPFTHREAD_DATA; - - -typedef struct VP9_ENCODER_RTCD { - VP9_COMMON_RTCD *common; - vp9_search_rtcd_vtable_t search; - vp9_temporal_rtcd_vtable_t temporal; -} VP9_ENCODER_RTCD; - -enum BlockSize { - BLOCK_16X8 = PARTITIONING_16X8, - BLOCK_8X16 = PARTITIONING_8X16, - BLOCK_8X8 = PARTITIONING_8X8, - BLOCK_4X4 = PARTITIONING_4X4, - BLOCK_16X16, - BLOCK_MAX_SEGMENTS, - BLOCK_32X32 = BLOCK_MAX_SEGMENTS, - BLOCK_MAX_SB_SEGMENTS, -}; - -typedef struct VP9_COMP { - - DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, unsigned char, Y1quant_shift[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][16]); - - DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, unsigned char, Y2quant_shift[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][16]); - - DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, unsigned char, UVquant_shift[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][16]); - - DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]); - - DECLARE_ALIGNED(64, short, Y1zbin_8x8[QINDEX_RANGE][64]); - DECLARE_ALIGNED(64, short, Y2zbin_8x8[QINDEX_RANGE][64]); - DECLARE_ALIGNED(64, short, UVzbin_8x8[QINDEX_RANGE][64]); - DECLARE_ALIGNED(64, short, zrun_zbin_boost_y1_8x8[QINDEX_RANGE][64]); - DECLARE_ALIGNED(64, short, zrun_zbin_boost_y2_8x8[QINDEX_RANGE][64]); - DECLARE_ALIGNED(64, short, zrun_zbin_boost_uv_8x8[QINDEX_RANGE][64]); - - DECLARE_ALIGNED(16, short, Y1zbin_16x16[QINDEX_RANGE][256]); - DECLARE_ALIGNED(16, short, Y2zbin_16x16[QINDEX_RANGE][256]); - DECLARE_ALIGNED(16, short, UVzbin_16x16[QINDEX_RANGE][256]); - DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1_16x16[QINDEX_RANGE][256]); - DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2_16x16[QINDEX_RANGE][256]); - DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv_16x16[QINDEX_RANGE][256]); - - MACROBLOCK mb; - VP9_COMMON common; - VP9_CONFIG oxcf; - - struct lookahead_ctx *lookahead; - struct lookahead_entry *source; - struct lookahead_entry *alt_ref_source; - - YV12_BUFFER_CONFIG *Source; - YV12_BUFFER_CONFIG *un_scaled_source; - YV12_BUFFER_CONFIG scaled_source; - - int source_alt_ref_pending; // frame in src_buffers has been identified to be encoded as an alt ref - int source_alt_ref_active; // an alt ref frame has been encoded and is usable - - int is_src_frame_alt_ref; // source of frame to encode is an exact copy of an alt ref frame - - int gold_is_last; // golden frame same as last frame ( short circuit gold searches) - int alt_is_last; // Alt reference frame same as last ( short circuit altref search) - int gold_is_alt; // don't do both alt and gold search ( just do gold). - - // int refresh_alt_ref_frame; - YV12_BUFFER_CONFIG last_frame_uf; - - TOKENEXTRA *tok; - unsigned int tok_count; - - - unsigned int frames_since_key; - unsigned int key_frame_frequency; - unsigned int this_key_frame_forced; - unsigned int next_key_frame_forced; - - // Ambient reconstruction err target for force key frames - int ambient_err; - - unsigned int mode_check_freq[MAX_MODES]; - unsigned int mode_test_hit_counts[MAX_MODES]; - unsigned int mode_chosen_counts[MAX_MODES]; - - int rd_thresh_mult[MAX_MODES]; - int rd_baseline_thresh[MAX_MODES]; - int rd_threshes[MAX_MODES]; - int64_t rd_comp_pred_diff[NB_PREDICTION_TYPES]; - int rd_prediction_type_threshes[4][NB_PREDICTION_TYPES]; - int comp_pred_count[COMP_PRED_CONTEXTS]; - int single_pred_count[COMP_PRED_CONTEXTS]; - // FIXME contextualize - int txfm_count[TX_SIZE_MAX]; - int txfm_count_8x8p[TX_SIZE_MAX - 1]; - int64_t rd_tx_select_diff[NB_TXFM_MODES]; - int rd_tx_select_threshes[4][NB_TXFM_MODES]; - - int RDMULT; - int RDDIV; - - CODING_CONTEXT coding_context; - - // Rate targetting variables - int64_t prediction_error; - int64_t last_prediction_error; - int64_t intra_error; - int64_t last_intra_error; - - int this_frame_target; - int projected_frame_size; - int last_q[2]; // Separate values for Intra/Inter - int last_boosted_qindex; // Last boosted GF/KF/ARF q - - double rate_correction_factor; - double key_frame_rate_correction_factor; - double gf_rate_correction_factor; - - int frames_till_gf_update_due; // Count down till next GF - int current_gf_interval; // GF interval chosen when we coded the last GF - - int gf_overspend_bits; // Total bits overspent becasue of GF boost (cumulative) - - int non_gf_bitrate_adjustment; // Used in the few frames following a GF to recover the extra bits spent in that GF - - int kf_overspend_bits; // Extra bits spent on key frames that need to be recovered on inter frames - int kf_bitrate_adjustment; // Current number of bit s to try and recover on each inter frame. - int max_gf_interval; - int baseline_gf_interval; - int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames - - int64_t key_frame_count; - int prior_key_frame_distance[KEY_FRAME_CONTEXT]; - int per_frame_bandwidth; // Current section per frame bandwidth target - int av_per_frame_bandwidth; // Average frame size target for clip - int min_frame_bandwidth; // Minimum allocation that should be used for any frame - int inter_frame_target; - double output_frame_rate; - int64_t last_time_stamp_seen; - int64_t last_end_time_stamp_seen; - int64_t first_time_stamp_ever; - - int ni_av_qi; - int ni_tot_qi; - int ni_frames; - int avg_frame_qindex; - double tot_q; - double avg_q; - - int zbin_over_quant; - int zbin_mode_boost; - int zbin_mode_boost_enabled; - - int64_t total_byte_count; - - int buffered_mode; - - int buffer_level; - int bits_off_target; - - int rolling_target_bits; - int rolling_actual_bits; - - int long_rolling_target_bits; - int long_rolling_actual_bits; - - int64_t total_actual_bits; - int total_target_vs_actual; // debug stats - - int worst_quality; - int active_worst_quality; - int best_quality; - int active_best_quality; - - int cq_target_quality; - -#if CONFIG_SUPERBLOCKS - int sb_count; - int sb_ymode_count [VP9_I32X32_MODES]; -#endif - int ymode_count [VP9_YMODES]; /* intra MB type cts this frame */ - int bmode_count [VP9_BINTRAMODES]; - int i8x8_mode_count [VP9_I8X8_MODES]; - int sub_mv_ref_count [SUBMVREF_COUNT][VP9_SUBMVREFS]; - int mbsplit_count [VP9_NUMMBSPLITS]; - // int uv_mode_count[VP9_UV_MODES]; /* intra MB type cts this frame */ - int y_uv_mode_count[VP9_YMODES][VP9_UV_MODES]; - - nmv_context_counts NMVcount; - - unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ - vp9_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - unsigned int frame_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; - unsigned int hybrid_coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ - vp9_prob frame_hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - unsigned int frame_hybrid_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; - - unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ - vp9_prob frame_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - unsigned int frame_branch_ct_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; - unsigned int hybrid_coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ - vp9_prob frame_hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - unsigned int frame_hybrid_branch_ct_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; - - unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ - vp9_prob frame_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - unsigned int frame_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; - unsigned int hybrid_coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ - vp9_prob frame_hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - unsigned int frame_hybrid_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; - - int gfu_boost; - int last_boost; - int kf_boost; - int kf_zeromotion_pct; - - int target_bandwidth; - struct vpx_codec_pkt_list *output_pkt_list; - -#if 0 - // Experimental code for lagged and one pass - ONEPASS_FRAMESTATS one_pass_frame_stats[MAX_LAG_BUFFERS]; - int one_pass_frame_index; -#endif - MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS]; - int mbgraph_n_frames; // number of frames filled in the above - int static_mb_pct; // % forced skip mbs by segmentation - int seg0_progress, seg0_idx, seg0_cnt; - int ref_pred_count[3][2]; - - int decimation_factor; - int decimation_count; - - // for real time encoding - int avg_encode_time; // microsecond - int avg_pick_mode_time; // microsecond - int Speed; - unsigned int cpu_freq; // Mhz - int compressor_speed; - - int interquantizer; - int goldfreq; - int auto_worst_q; - int cpu_used; - int horiz_scale; - int vert_scale; - int pass; - - vp9_prob last_skip_false_probs[3][MBSKIP_CONTEXTS]; - int last_skip_probs_q[3]; - - int recent_ref_frame_usage[MAX_REF_FRAMES]; - int count_mb_ref_frame_usage[MAX_REF_FRAMES]; - int ref_frame_flags; - - unsigned char ref_pred_probs_update[PREDICTION_PROBS]; - - SPEED_FEATURES sf; - int error_bins[1024]; - - // Data used for real time conferencing mode to help determine if it would be good to update the gf - int inter_zz_count; - int gf_bad_count; - int gf_update_recommended; - int skip_true_count[3]; - int skip_false_count[3]; - - unsigned char *segmentation_map; - - // segment threashold for encode breakout - int segment_encode_breakout[MAX_MB_SEGMENTS]; - - unsigned char *active_map; - unsigned int active_map_enabled; - - TOKENLIST *tplist; - - fractional_mv_step_fp *find_fractional_mv_step; - vp9_full_search_fn_t full_search_sad; - vp9_refining_search_fn_t refining_search_sad; - vp9_diamond_search_fn_t diamond_search_sad; - vp9_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SB_SEGMENTS]; - uint64_t time_receive_data; - uint64_t time_compress_data; - uint64_t time_pick_lpf; - uint64_t time_encode_mb_row; - - int base_skip_false_prob[QINDEX_RANGE][3]; - - struct twopass_rc { - unsigned int section_intra_rating; - unsigned int next_iiratio; - unsigned int this_iiratio; - FIRSTPASS_STATS *total_stats; - FIRSTPASS_STATS *this_frame_stats; - FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start; - FIRSTPASS_STATS *total_left_stats; - int first_pass_done; - int64_t bits_left; - int64_t clip_bits_total; - double avg_iiratio; - double modified_error_total; - double modified_error_used; - double modified_error_left; - double kf_intra_err_min; - double gf_intra_err_min; - int frames_to_key; - int maxq_max_limit; - int maxq_min_limit; - int static_scene_max_gf_interval; - int kf_bits; - int gf_group_error_left; // Remaining error from uncoded frames in a gf group. Two pass use only - - // Projected total bits available for a key frame group of frames - int64_t kf_group_bits; - - // Error score of frames still to be coded in kf group - int64_t kf_group_error_left; - - int gf_group_bits; // Projected Bits available for a group of frames including 1 GF or ARF - int gf_bits; // Bits for the golden frame or ARF - 2 pass only - int alt_extra_bits; - - int sr_update_lag; - double est_max_qcorrection_factor; - } twopass; - -#if CONFIG_RUNTIME_CPU_DETECT - VP9_ENCODER_RTCD rtcd; -#endif -#if VP9_TEMPORAL_ALT_REF - YV12_BUFFER_CONFIG alt_ref_buffer; - YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS]; - int fixed_divide[512]; -#endif - -#if CONFIG_INTERNAL_STATS - int count; - double total_y; - double total_u; - double total_v; - double total; - double total_sq_error; - double totalp_y; - double totalp_u; - double totalp_v; - double totalp; - double total_sq_error2; - int bytes; - double summed_quality; - double summed_weights; - unsigned int tot_recode_hits; - - - double total_ssimg_y; - double total_ssimg_u; - double total_ssimg_v; - double total_ssimg_all; - - int b_calculate_ssimg; -#endif - int b_calculate_psnr; - - // Per MB activity measurement - unsigned int activity_avg; - unsigned int *mb_activity_map; - int *mb_norm_activity_map; - - // Record of which MBs still refer to last golden frame either - // directly or through 0,0 - unsigned char *gf_active_flags; - int gf_active_count; - - int output_partition; - - // Store last frame's MV info for next frame MV prediction - int_mv *lfmv; - int *lf_ref_frame_sign_bias; - int *lf_ref_frame; - - /* force next frame to intra when kf_auto says so */ - int force_next_frame_intra; - - int droppable; - - // TODO Do we still need this?? - int update_context; - - int dummy_packing; /* flag to indicate if packing is dummy */ - -#if CONFIG_PRED_FILTER - int pred_filter_on_count; - int pred_filter_off_count; -#endif - unsigned int switchable_interp_count[VP9_SWITCHABLE_FILTERS + 1] - [VP9_SWITCHABLE_FILTERS]; - -#if CONFIG_NEW_MVREF - unsigned int best_ref_index_counts[MAX_REF_FRAMES][MAX_MV_REFS]; -#endif - -} VP9_COMP; - -void vp9_encode_frame(VP9_COMP *cpi); - -void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, - unsigned long *size); - -void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x); - -void vp9_tokenize_mb(VP9_COMP *, MACROBLOCKD *, TOKENEXTRA **, int dry_run); -void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); - -void vp9_set_speed_features(VP9_COMP *cpi); - -#if CONFIG_DEBUG -#define CHECK_MEM_ERROR(lval,expr) do {\ - lval = (expr); \ - if(!lval) \ - vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,\ - "Failed to allocate "#lval" at %s:%d", \ - __FILE__,__LINE__);\ - } while(0) -#else -#define CHECK_MEM_ERROR(lval,expr) do {\ - lval = (expr); \ - if(!lval) \ - vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,\ - "Failed to allocate "#lval);\ - } while(0) -#endif -#endif // __INC_ONYX_INT_H diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c deleted file mode 100644 index 147a20543..000000000 --- a/vp8/encoder/picklpf.c +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp8/common/onyxc_int.h" -#include "onyx_int.h" -#include "quantize.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_scale/yv12extend.h" -#include "vpx_scale/vpxscale.h" -#include "vp8/common/alloccommon.h" -#include "vp8/common/loopfilter.h" -#if ARCH_ARM -#include "vpx_ports/arm.h" -#endif - -extern int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest); -#if HAVE_ARMV7 -extern void vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); -#endif - -#if CONFIG_RUNTIME_CPU_DETECT -#define IF_RTCD(x) (x) -#else -#define IF_RTCD(x) NULL -#endif - -extern void(*vp9_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, - YV12_BUFFER_CONFIG *dst_ybc, - int fraction); - -void vp9_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, - YV12_BUFFER_CONFIG *dst_ybc, int Fraction) { - unsigned char *src_y, *dst_y; - int yheight; - int ystride; - int border; - int yoffset; - int linestocopy; - - border = src_ybc->border; - yheight = src_ybc->y_height; - ystride = src_ybc->y_stride; - - linestocopy = (yheight >> (Fraction + 4)); - - if (linestocopy < 1) - linestocopy = 1; - - linestocopy <<= 4; - - yoffset = ystride * ((yheight >> 5) * 16 - 8); - src_y = src_ybc->y_buffer + yoffset; - dst_y = dst_ybc->y_buffer + yoffset; - - vpx_memcpy(dst_y, src_y, ystride * (linestocopy + 16)); -} - -static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, int Fraction) { - int i, j; - int Total = 0; - int srcoffset, dstoffset; - unsigned char *src = source->y_buffer; - unsigned char *dst = dest->y_buffer; - - int linestocopy = (source->y_height >> (Fraction + 4)); - - if (linestocopy < 1) - linestocopy = 1; - - linestocopy <<= 4; - - - srcoffset = source->y_stride * (dest->y_height >> 5) * 16; - dstoffset = dest->y_stride * (dest->y_height >> 5) * 16; - - src += srcoffset; - dst += dstoffset; - - // Loop through the Y plane raw and reconstruction data summing (square differences) - for (i = 0; i < linestocopy; i += 16) { - for (j = 0; j < source->y_width; j += 16) { - unsigned int sse; - Total += vp9_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, - &sse); - } - - src += 16 * source->y_stride; - dst += 16 * dest->y_stride; - } - - return Total; -} - -// Enforce a minimum filter level based upon baseline Q -static int get_min_filter_level(VP9_COMP *cpi, int base_qindex) { - int min_filter_level; - /*int q = (int) vp9_convert_qindex_to_q(base_qindex); - - if (cpi->source_alt_ref_active && cpi->common.refresh_golden_frame && !cpi->common.refresh_alt_ref_frame) - min_filter_level = 0; - else - { - if (q <= 10) - min_filter_level = 0; - else if (q <= 64) - min_filter_level = 1; - else - min_filter_level = (q >> 6); - } - */ - min_filter_level = 0; - - return min_filter_level; -} - -// Enforce a maximum filter level based upon baseline Q -static int get_max_filter_level(VP9_COMP *cpi, int base_qindex) { - // PGW August 2006: Highest filter values almost always a bad idea - - // jbb chg: 20100118 - not so any more with this overquant stuff allow high values - // with lots of intra coming in. - int max_filter_level = MAX_LOOP_FILTER;// * 3 / 4; - (void)base_qindex; - - if (cpi->twopass.section_intra_rating > 8) - max_filter_level = MAX_LOOP_FILTER * 3 / 4; - - return max_filter_level; -} - -void vp9_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - int best_err = 0; - int filt_err = 0; - int min_filter_level = get_min_filter_level(cpi, cm->base_qindex); - int max_filter_level = get_max_filter_level(cpi, cm->base_qindex); - int filt_val; - int best_filt_val = cm->filter_level; - - // Make a copy of the unfiltered / processed recon buffer - vp9_yv12_copy_partial_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf, 3); - - if (cm->frame_type == KEY_FRAME) - cm->sharpness_level = 0; - else - cm->sharpness_level = cpi->oxcf.Sharpness; - - if (cm->sharpness_level != cm->last_sharpness_level) { - vp9_loop_filter_update_sharpness(&cm->lf_info, cm->sharpness_level); - cm->last_sharpness_level = cm->sharpness_level; - } - - // Start the search at the previous frame filter level unless it is now out of range. - if (cm->filter_level < min_filter_level) - cm->filter_level = min_filter_level; - else if (cm->filter_level > max_filter_level) - cm->filter_level = max_filter_level; - - filt_val = cm->filter_level; - best_filt_val = filt_val; - - // Get the err using the previous frame's filter value. - vp9_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); - - best_err = calc_partial_ssl_err(sd, cm->frame_to_show, 3); - - // Re-instate the unfiltered frame - vp9_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3); - - filt_val -= (1 + ((filt_val > 10) ? 1 : 0)); - - // Search lower filter levels - while (filt_val >= min_filter_level) { - // Apply the loop filter - vp9_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); - - // Get the err for filtered frame - filt_err = calc_partial_ssl_err(sd, cm->frame_to_show, 3); - - // Re-instate the unfiltered frame - vp9_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3); - - - // Update the best case record or exit loop. - if (filt_err < best_err) { - best_err = filt_err; - best_filt_val = filt_val; - } else - break; - - // Adjust filter level - filt_val -= (1 + ((filt_val > 10) ? 1 : 0)); - } - - // Search up (note that we have already done filt_val = cm->filter_level) - filt_val = cm->filter_level + (1 + ((filt_val > 10) ? 1 : 0)); - - if (best_filt_val == cm->filter_level) { - // Resist raising filter level for very small gains - best_err -= (best_err >> 10); - - while (filt_val < max_filter_level) { - // Apply the loop filter - vp9_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); - - // Get the err for filtered frame - filt_err = calc_partial_ssl_err(sd, cm->frame_to_show, 3); - - // Re-instate the unfiltered frame - vp9_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, - cm->frame_to_show, 3); - - // Update the best case record or exit loop. - if (filt_err < best_err) { - // Do not raise filter level if improvement is < 1 part in 4096 - best_err = filt_err - (filt_err >> 10); - - best_filt_val = filt_val; - } else - break; - - // Adjust filter level - filt_val += (1 + ((filt_val > 10) ? 1 : 0)); - } - } - - cm->filter_level = best_filt_val; - - if (cm->filter_level < min_filter_level) - cm->filter_level = min_filter_level; - - if (cm->filter_level > max_filter_level) - cm->filter_level = max_filter_level; -} - -// Stub function for now Alt LF not used -void vp9_set_alt_lf_level(VP9_COMP *cpi, int filt_val) { -} - -void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - int best_err = 0; - int filt_err = 0; - int min_filter_level = get_min_filter_level(cpi, cm->base_qindex); - int max_filter_level = get_max_filter_level(cpi, cm->base_qindex); - - int filter_step; - int filt_high = 0; - int filt_mid = cm->filter_level; // Start search at previous frame filter level - int filt_low = 0; - int filt_best; - int filt_direction = 0; - - int Bias = 0; // Bias against raising loop filter and in favour of lowering it - - // Make a copy of the unfiltered / processed recon buffer -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf); - } -#if CONFIG_RUNTIME_CPU_DETECT - else -#endif -#endif -#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT - { - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf); - } -#endif - - if (cm->frame_type == KEY_FRAME) - cm->sharpness_level = 0; - else - cm->sharpness_level = cpi->oxcf.Sharpness; - - // Start the search at the previous frame filter level unless it is now out of range. - filt_mid = cm->filter_level; - - if (filt_mid < min_filter_level) - filt_mid = min_filter_level; - else if (filt_mid > max_filter_level) - filt_mid = max_filter_level; - - // Define the initial step size - filter_step = (filt_mid < 16) ? 4 : filt_mid / 4; - - // Get baseline error score - vp9_set_alt_lf_level(cpi, filt_mid); - vp9_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_mid); - - best_err = vp9_calc_ss_err(sd, cm->frame_to_show); - filt_best = filt_mid; - - // Re-instate the unfiltered frame -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); - } -#if CONFIG_RUNTIME_CPU_DETECT - else -#endif -#endif -#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT - { - vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show); - } -#endif - - while (filter_step > 0) { - Bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; // PGW change 12/12/06 for small images - - // jbb chg: 20100118 - in sections with lots of new material coming in don't bias as much to a low filter value - if (cpi->twopass.section_intra_rating < 20) - Bias = Bias * cpi->twopass.section_intra_rating / 20; - - // yx, bias less for large block size - if (cpi->common.txfm_mode != ONLY_4X4) - Bias >>= 1; - - filt_high = ((filt_mid + filter_step) > max_filter_level) ? max_filter_level : (filt_mid + filter_step); - filt_low = ((filt_mid - filter_step) < min_filter_level) ? min_filter_level : (filt_mid - filter_step); - - if ((filt_direction <= 0) && (filt_low != filt_mid)) { - // Get Low filter error score - vp9_set_alt_lf_level(cpi, filt_low); - vp9_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low); - - filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); - - // Re-instate the unfiltered frame -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); - } -#if CONFIG_RUNTIME_CPU_DETECT - else -#endif -#endif -#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT - { - vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show); - } -#endif - - // If value is close to the best so far then bias towards a lower loop filter value. - if ((filt_err - Bias) < best_err) { - // Was it actually better than the previous best? - if (filt_err < best_err) - best_err = filt_err; - - filt_best = filt_low; - } - } - - // Now look at filt_high - if ((filt_direction >= 0) && (filt_high != filt_mid)) { - vp9_set_alt_lf_level(cpi, filt_high); - vp9_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high); - - filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); - - // Re-instate the unfiltered frame -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); - } -#if CONFIG_RUNTIME_CPU_DETECT - else -#endif -#endif -#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT - { - vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show); - } -#endif - - // Was it better than the previous best? - if (filt_err < (best_err - Bias)) { - best_err = filt_err; - filt_best = filt_high; - } - } - - // Half the step distance if the best filter value was the same as last time - if (filt_best == filt_mid) { - filter_step = filter_step / 2; - filt_direction = 0; - } else { - filt_direction = (filt_best < filt_mid) ? -1 : 1; - filt_mid = filt_best; - } - } - - cm->filter_level = filt_best; -} - diff --git a/vp8/encoder/ppc/csystemdependent.c b/vp8/encoder/ppc/csystemdependent.c deleted file mode 100644 index edecb2033..000000000 --- a/vp8/encoder/ppc/csystemdependent.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp8/encoder/variance.h" -#include "vp8/encoder/onyx_int.h" - -SADFunction *vp9_sad16x16; -SADFunction *vp9_sad16x8; -SADFunction *vp9_sad8x16; -SADFunction *vp9_sad8x8; -SADFunction *vp9_sad4x4; - -variance_function *vp9_variance4x4; -variance_function *vp9_variance8x8; -variance_function *vp9_variance8x16; -variance_function *vp9_variance16x8; -variance_function *vp9_variance16x16; - -variance_function *vp9_mse16x16; - -sub_pixel_variance_function *vp9_sub_pixel_variance4x4; -sub_pixel_variance_function *vp9_sub_pixel_variance8x8; -sub_pixel_variance_function *vp9_sub_pixel_variance8x16; -sub_pixel_variance_function *vp9_sub_pixel_variance16x8; -sub_pixel_variance_function *vp9_sub_pixel_variance16x16; - -int (*vp9_block_error)(short *coeff, short *dqcoeff); -int (*vp9_mbblock_error)(MACROBLOCK *mb, int dc); - -int (*vp9_mbuverror)(MACROBLOCK *mb); -unsigned int (*vp9_get_mb_ss)(short *); -void (*vp9_short_fdct4x4)(short *input, short *output, int pitch); -void (*vp9_short_fdct8x4)(short *input, short *output, int pitch); -void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch); -void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch); -void (*short_walsh4x4)(short *input, short *output, int pitch); - -void (*vp9_subtract_b)(BLOCK *be, BLOCKD *bd, int pitch); -void (*vp9_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, int stride); -void (*vp9_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); -void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d); - -// c imports -extern int block_error_c(short *coeff, short *dqcoeff); -extern int vp9_mbblock_error_c(MACROBLOCK *mb, int dc); - -extern int vp9_mbuverror_c(MACROBLOCK *mb); -extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); -extern void short_fdct4x4_c(short *input, short *output, int pitch); -extern void short_fdct8x4_c(short *input, short *output, int pitch); -extern void vp9_short_walsh4x4_c(short *input, short *output, int pitch); - -extern void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch); -extern void subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride); -extern void subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); -extern void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d); - -extern SADFunction sad16x16_c; -extern SADFunction sad16x8_c; -extern SADFunction sad8x16_c; -extern SADFunction sad8x8_c; -extern SADFunction sad4x4_c; - -extern variance_function variance16x16_c; -extern variance_function variance8x16_c; -extern variance_function variance16x8_c; -extern variance_function variance8x8_c; -extern variance_function variance4x4_c; -extern variance_function mse16x16_c; - -extern sub_pixel_variance_function sub_pixel_variance4x4_c; -extern sub_pixel_variance_function sub_pixel_variance8x8_c; -extern sub_pixel_variance_function sub_pixel_variance8x16_c; -extern sub_pixel_variance_function sub_pixel_variance16x8_c; -extern sub_pixel_variance_function sub_pixel_variance16x16_c; - -extern unsigned int vp9_get_mb_ss_c(short *); - -// ppc -extern int vp9_block_error_ppc(short *coeff, short *dqcoeff); - -extern void vp9_short_fdct4x4_ppc(short *input, short *output, int pitch); -extern void vp9_short_fdct8x4_ppc(short *input, short *output, int pitch); - -extern void vp9_subtract_mby_ppc(short *diff, unsigned char *src, unsigned char *pred, int stride); -extern void vp9_subtract_mbuv_ppc(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); - -extern SADFunction vp9_sad16x16_ppc; -extern SADFunction vp9_sad16x8_ppc; -extern SADFunction vp9_sad8x16_ppc; -extern SADFunction vp9_sad8x8_ppc; -extern SADFunction vp9_sad4x4_ppc; - -extern variance_function vp9_variance16x16_ppc; -extern variance_function vp9_variance8x16_ppc; -extern variance_function vp9_variance16x8_ppc; -extern variance_function vp9_variance8x8_ppc; -extern variance_function vp9_variance4x4_ppc; -extern variance_function vp9_mse16x16_ppc; - -extern sub_pixel_variance_function vp9_sub_pixel_variance4x4_ppc; -extern sub_pixel_variance_function vp9_sub_pixel_variance8x8_ppc; -extern sub_pixel_variance_function vp9_sub_pixel_variance8x16_ppc; -extern sub_pixel_variance_function vp9_sub_pixel_variance16x8_ppc; -extern sub_pixel_variance_function vp9_sub_pixel_variance16x16_ppc; - -extern unsigned int vp8_get8x8var_ppc(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); -extern unsigned int vp8_get16x16var_ppc(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); - -void vp9_cmachine_specific_config(void) { - // Pure C: - vp9_mbuverror = vp9_mbuverror_c; - vp8_fast_quantize_b = vp8_fast_quantize_b_c; - vp9_short_fdct4x4 = vp9_short_fdct4x4_ppc; - vp9_short_fdct8x4 = vp9_short_fdct8x4_ppc; - vp8_fast_fdct4x4 = vp9_short_fdct4x4_ppc; - vp8_fast_fdct8x4 = vp9_short_fdct8x4_ppc; - short_walsh4x4 = vp9_short_walsh4x4_c; - - vp9_variance4x4 = vp9_variance4x4_ppc; - vp9_variance8x8 = vp9_variance8x8_ppc; - vp9_variance8x16 = vp9_variance8x16_ppc; - vp9_variance16x8 = vp9_variance16x8_ppc; - vp9_variance16x16 = vp9_variance16x16_ppc; - vp9_mse16x16 = vp9_mse16x16_ppc; - - vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ppc; - vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ppc; - vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ppc; - vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ppc; - vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_ppc; - - vp9_get_mb_ss = vp9_get_mb_ss_c; - - vp9_sad16x16 = vp9_sad16x16_ppc; - vp9_sad16x8 = vp9_sad16x8_ppc; - vp9_sad8x16 = vp9_sad8x16_ppc; - vp9_sad8x8 = vp9_sad8x8_ppc; - vp9_sad4x4 = vp9_sad4x4_ppc; - - vp9_block_error = vp9_block_error_ppc; - vp9_mbblock_error = vp9_mbblock_error_c; - - vp9_subtract_b = vp9_subtract_b_c; - vp9_subtract_mby = vp9_subtract_mby_ppc; - vp9_subtract_mbuv = vp9_subtract_mbuv_ppc; -} diff --git a/vp8/encoder/ppc/encodemb_altivec.asm b/vp8/encoder/ppc/encodemb_altivec.asm deleted file mode 100644 index 6e0099ddc..000000000 --- a/vp8/encoder/ppc/encodemb_altivec.asm +++ /dev/null @@ -1,153 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp8_subtract_mbuv_ppc - .globl vp8_subtract_mby_ppc - -;# r3 short *diff -;# r4 unsigned char *usrc -;# r5 unsigned char *vsrc -;# r6 unsigned char *pred -;# r7 int stride -vp8_subtract_mbuv_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xf000 - mtspr 256, r12 ;# set VRSAVE - - li r9, 256 - add r3, r3, r9 - add r3, r3, r9 - add r6, r6, r9 - - li r10, 16 - li r9, 4 - mtctr r9 - - vspltisw v0, 0 - -mbu_loop: - lvsl v5, 0, r4 ;# permutate value for alignment - lvx v1, 0, r4 ;# src - lvx v2, 0, r6 ;# pred - - add r4, r4, r7 - addi r6, r6, 16 - - vperm v1, v1, v0, v5 - - vmrghb v3, v0, v1 ;# unpack high src to short - vmrghb v4, v0, v2 ;# unpack high pred to short - - lvsl v5, 0, r4 ;# permutate value for alignment - lvx v1, 0, r4 ;# src - - add r4, r4, r7 - - vsubshs v3, v3, v4 - - stvx v3, 0, r3 ;# store out diff - - vperm v1, v1, v0, v5 - - vmrghb v3, v0, v1 ;# unpack high src to short - vmrglb v4, v0, v2 ;# unpack high pred to short - - vsubshs v3, v3, v4 - - stvx v3, r10, r3 ;# store out diff - - addi r3, r3, 32 - - bdnz mbu_loop - - mtctr r9 - -mbv_loop: - lvsl v5, 0, r5 ;# permutate value for alignment - lvx v1, 0, r5 ;# src - lvx v2, 0, r6 ;# pred - - add r5, r5, r7 - addi r6, r6, 16 - - vperm v1, v1, v0, v5 - - vmrghb v3, v0, v1 ;# unpack high src to short - vmrghb v4, v0, v2 ;# unpack high pred to short - - lvsl v5, 0, r5 ;# permutate value for alignment - lvx v1, 0, r5 ;# src - - add r5, r5, r7 - - vsubshs v3, v3, v4 - - stvx v3, 0, r3 ;# store out diff - - vperm v1, v1, v0, v5 - - vmrghb v3, v0, v1 ;# unpack high src to short - vmrglb v4, v0, v2 ;# unpack high pred to short - - vsubshs v3, v3, v4 - - stvx v3, r10, r3 ;# store out diff - - addi r3, r3, 32 - - bdnz mbv_loop - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -;# r3 short *diff -;# r4 unsigned char *src -;# r5 unsigned char *pred -;# r6 int stride -vp8_subtract_mby_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xf800 - mtspr 256, r12 ;# set VRSAVE - - li r10, 16 - mtctr r10 - - vspltisw v0, 0 - -mby_loop: - lvx v1, 0, r4 ;# src - lvx v2, 0, r5 ;# pred - - add r4, r4, r6 - addi r5, r5, 16 - - vmrghb v3, v0, v1 ;# unpack high src to short - vmrghb v4, v0, v2 ;# unpack high pred to short - - vsubshs v3, v3, v4 - - stvx v3, 0, r3 ;# store out diff - - vmrglb v3, v0, v1 ;# unpack low src to short - vmrglb v4, v0, v2 ;# unpack low pred to short - - vsubshs v3, v3, v4 - - stvx v3, r10, r3 ;# store out diff - - addi r3, r3, 32 - - bdnz mby_loop - - mtspr 256, r11 ;# reset old VRSAVE - - blr diff --git a/vp8/encoder/ppc/fdct_altivec.asm b/vp8/encoder/ppc/fdct_altivec.asm deleted file mode 100644 index 935d0cb09..000000000 --- a/vp8/encoder/ppc/fdct_altivec.asm +++ /dev/null @@ -1,205 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp8_short_fdct4x4_ppc - .globl vp8_short_fdct8x4_ppc - -.macro load_c V, LABEL, OFF, R0, R1 - lis \R0, \LABEL@ha - la \R1, \LABEL@l(\R0) - lvx \V, \OFF, \R1 -.endm - -;# Forward and inverse DCTs are nearly identical; only differences are -;# in normalization (fwd is twice unitary, inv is half unitary) -;# and that they are of course transposes of each other. -;# -;# The following three accomplish most of implementation and -;# are used only by ppc_idct.c and ppc_fdct.c. -.macro prologue - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xfffc - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - li r6, 16 - - load_c v0, dct_tab, 0, r9, r10 - lvx v1, r6, r10 - addi r10, r10, 32 - lvx v2, 0, r10 - lvx v3, r6, r10 - - load_c v4, ppc_dctperm_tab, 0, r9, r10 - load_c v5, ppc_dctperm_tab, r6, r9, r10 - - load_c v6, round_tab, 0, r10, r9 -.endm - -.macro epilogue - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE -.endm - -;# Do horiz xf on two rows of coeffs v8 = a0 a1 a2 a3 b0 b1 b2 b3. -;# a/A are the even rows 0,2 b/B are the odd rows 1,3 -;# For fwd transform, indices are horizontal positions, then frequencies. -;# For inverse transform, frequencies then positions. -;# The two resulting A0..A3 B0..B3 are later combined -;# and vertically transformed. - -.macro two_rows_horiz Dst - vperm v9, v8, v8, v4 ;# v9 = a2 a3 a0 a1 b2 b3 b0 b1 - - vmsumshm v10, v0, v8, v6 - vmsumshm v10, v1, v9, v10 - vsraw v10, v10, v7 ;# v10 = A0 A1 B0 B1 - - vmsumshm v11, v2, v8, v6 - vmsumshm v11, v3, v9, v11 - vsraw v11, v11, v7 ;# v11 = A2 A3 B2 B3 - - vpkuwum v10, v10, v11 ;# v10 = A0 A1 B0 B1 A2 A3 B2 B3 - vperm \Dst, v10, v10, v5 ;# Dest = A0 B0 A1 B1 A2 B2 A3 B3 -.endm - -;# Vertical xf on two rows. DCT values in comments are for inverse transform; -;# forward transform uses transpose. - -.macro two_rows_vert Ceven, Codd - vspltw v8, \Ceven, 0 ;# v8 = c00 c10 or c02 c12 four times - vspltw v9, \Codd, 0 ;# v9 = c20 c30 or c22 c32 "" - vmsumshm v8, v8, v12, v6 - vmsumshm v8, v9, v13, v8 - vsraw v10, v8, v7 - - vspltw v8, \Codd, 1 ;# v8 = c01 c11 or c03 c13 - vspltw v9, \Ceven, 1 ;# v9 = c21 c31 or c23 c33 - vmsumshm v8, v8, v12, v6 - vmsumshm v8, v9, v13, v8 - vsraw v8, v8, v7 - - vpkuwum v8, v10, v8 ;# v8 = rows 0,1 or 2,3 -.endm - -.macro two_rows_h Dest - stw r0, 0(r8) - lwz r0, 4(r3) - stw r0, 4(r8) - lwzux r0, r3,r5 - stw r0, 8(r8) - lwz r0, 4(r3) - stw r0, 12(r8) - lvx v8, 0,r8 - two_rows_horiz \Dest -.endm - - .align 2 -;# r3 short *input -;# r4 short *output -;# r5 int pitch -vp8_short_fdct4x4_ppc: - - prologue - - vspltisw v7, 14 ;# == 14, fits in 5 signed bits - addi r8, r1, 0 - - - lwz r0, 0(r3) - two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13 - - lwzux r0, r3, r5 - two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33 - - lvx v6, r6, r9 ;# v6 = Vround - vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter - - two_rows_vert v0, v1 - stvx v8, 0, r4 - two_rows_vert v2, v3 - stvx v8, r6, r4 - - epilogue - - blr - - .align 2 -;# r3 short *input -;# r4 short *output -;# r5 int pitch -vp8_short_fdct8x4_ppc: - prologue - - vspltisw v7, 14 ;# == 14, fits in 5 signed bits - addi r8, r1, 0 - addi r10, r3, 0 - - lwz r0, 0(r3) - two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13 - - lwzux r0, r3, r5 - two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33 - - lvx v6, r6, r9 ;# v6 = Vround - vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter - - two_rows_vert v0, v1 - stvx v8, 0, r4 - two_rows_vert v2, v3 - stvx v8, r6, r4 - - ;# Next block - addi r3, r10, 8 - addi r4, r4, 32 - lvx v6, 0, r9 ;# v6 = Hround - - vspltisw v7, 14 ;# == 14, fits in 5 signed bits - addi r8, r1, 0 - - lwz r0, 0(r3) - two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13 - - lwzux r0, r3, r5 - two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33 - - lvx v6, r6, r9 ;# v6 = Vround - vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter - - two_rows_vert v0, v1 - stvx v8, 0, r4 - two_rows_vert v2, v3 - stvx v8, r6, r4 - - epilogue - - blr - - .data - .align 4 -ppc_dctperm_tab: - .byte 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 - .byte 0,1,4,5, 2,3,6,7, 8,9,12,13, 10,11,14,15 - - .align 4 -dct_tab: - .short 23170, 23170,-12540,-30274, 23170, 23170,-12540,-30274 - .short 23170, 23170, 30274, 12540, 23170, 23170, 30274, 12540 - - .short 23170,-23170, 30274,-12540, 23170,-23170, 30274,-12540 - .short -23170, 23170, 12540,-30274,-23170, 23170, 12540,-30274 - - .align 4 -round_tab: - .long (1 << (14-1)), (1 << (14-1)), (1 << (14-1)), (1 << (14-1)) - .long (1 << (16-1)), (1 << (16-1)), (1 << (16-1)), (1 << (16-1)) diff --git a/vp8/encoder/ppc/rdopt_altivec.asm b/vp8/encoder/ppc/rdopt_altivec.asm deleted file mode 100644 index ba4823009..000000000 --- a/vp8/encoder/ppc/rdopt_altivec.asm +++ /dev/null @@ -1,51 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp8_block_error_ppc - - .align 2 -;# r3 short *Coeff -;# r4 short *dqcoeff -vp8_block_error_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xf800 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - stw r5, 12(r1) ;# tranfer dc to vector register - - lvx v0, 0, r3 ;# Coeff - lvx v1, 0, r4 ;# dqcoeff - - li r10, 16 - - vspltisw v3, 0 - - vsubshs v0, v0, v1 - - vmsumshm v2, v0, v0, v3 ;# multiply differences - - lvx v0, r10, r3 ;# Coeff - lvx v1, r10, r4 ;# dqcoeff - - vsubshs v0, v0, v1 - - vmsumshm v1, v0, v0, v2 ;# multiply differences - vsumsws v1, v1, v3 ;# sum up - - stvx v1, 0, r1 - lwz r3, 12(r1) ;# return value - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - - blr diff --git a/vp8/encoder/ppc/sad_altivec.asm b/vp8/encoder/ppc/sad_altivec.asm deleted file mode 100644 index e5f26380f..000000000 --- a/vp8/encoder/ppc/sad_altivec.asm +++ /dev/null @@ -1,277 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp8_sad16x16_ppc - .globl vp8_sad16x8_ppc - .globl vp8_sad8x16_ppc - .globl vp8_sad8x8_ppc - .globl vp8_sad4x4_ppc - -.macro load_aligned_16 V R O - lvsl v3, 0, \R ;# permutate value for alignment - - lvx v1, 0, \R - lvx v2, \O, \R - - vperm \V, v1, v2, v3 -.endm - -.macro prologue - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffc0 - mtspr 256, r12 ;# set VRSAVE - - stwu r1, -32(r1) ;# create space on the stack - - li r10, 16 ;# load offset and loop counter - - vspltisw v8, 0 ;# zero out total to start -.endm - -.macro epilogue - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE -.endm - -.macro SAD_16 - ;# v6 = abs (v4 - v5) - vsububs v6, v4, v5 - vsububs v7, v5, v4 - vor v6, v6, v7 - - ;# v8 += abs (v4 - v5) - vsum4ubs v8, v6, v8 -.endm - -.macro sad_16_loop loop_label - lvsl v3, 0, r5 ;# only needs to be done once per block - - ;# preload a line of data before getting into the loop - lvx v4, 0, r3 - lvx v1, 0, r5 - lvx v2, r10, r5 - - add r5, r5, r6 - add r3, r3, r4 - - vperm v5, v1, v2, v3 - - .align 4 -\loop_label: - ;# compute difference on first row - vsububs v6, v4, v5 - vsububs v7, v5, v4 - - ;# load up next set of data - lvx v9, 0, r3 - lvx v1, 0, r5 - lvx v2, r10, r5 - - ;# perform abs() of difference - vor v6, v6, v7 - add r3, r3, r4 - - ;# add to the running tally - vsum4ubs v8, v6, v8 - - ;# now onto the next line - vperm v5, v1, v2, v3 - add r5, r5, r6 - lvx v4, 0, r3 - - ;# compute difference on second row - vsububs v6, v9, v5 - lvx v1, 0, r5 - vsububs v7, v5, v9 - lvx v2, r10, r5 - vor v6, v6, v7 - add r3, r3, r4 - vsum4ubs v8, v6, v8 - vperm v5, v1, v2, v3 - add r5, r5, r6 - - bdnz \loop_label - - vspltisw v7, 0 - - vsumsws v8, v8, v7 - - stvx v8, 0, r1 - lwz r3, 12(r1) -.endm - -.macro sad_8_loop loop_label - .align 4 -\loop_label: - ;# only one of the inputs should need to be aligned. - load_aligned_16 v4, r3, r10 - load_aligned_16 v5, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - ;# only one of the inputs should need to be aligned. - load_aligned_16 v6, r3, r10 - load_aligned_16 v7, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - vmrghb v4, v4, v6 - vmrghb v5, v5, v7 - - SAD_16 - - bdnz \loop_label - - vspltisw v7, 0 - - vsumsws v8, v8, v7 - - stvx v8, 0, r1 - lwz r3, 12(r1) -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_stride -;# r5 unsigned char *ref_ptr -;# r6 int ref_stride -;# -;# r3 return value -vp8_sad16x16_ppc: - - prologue - - li r9, 8 - mtctr r9 - - sad_16_loop sad16x16_loop - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_stride -;# r5 unsigned char *ref_ptr -;# r6 int ref_stride -;# -;# r3 return value -vp8_sad16x8_ppc: - - prologue - - li r9, 4 - mtctr r9 - - sad_16_loop sad16x8_loop - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_stride -;# r5 unsigned char *ref_ptr -;# r6 int ref_stride -;# -;# r3 return value -vp8_sad8x16_ppc: - - prologue - - li r9, 8 - mtctr r9 - - sad_8_loop sad8x16_loop - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_stride -;# r5 unsigned char *ref_ptr -;# r6 int ref_stride -;# -;# r3 return value -vp8_sad8x8_ppc: - - prologue - - li r9, 4 - mtctr r9 - - sad_8_loop sad8x8_loop - - epilogue - - blr - -.macro transfer_4x4 I P - lwz r0, 0(\I) - add \I, \I, \P - - lwz r7, 0(\I) - add \I, \I, \P - - lwz r8, 0(\I) - add \I, \I, \P - - lwz r9, 0(\I) - - stw r0, 0(r1) - stw r7, 4(r1) - stw r8, 8(r1) - stw r9, 12(r1) -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_stride -;# r5 unsigned char *ref_ptr -;# r6 int ref_stride -;# -;# r3 return value -vp8_sad4x4_ppc: - - prologue - - transfer_4x4 r3, r4 - lvx v4, 0, r1 - - transfer_4x4 r5, r6 - lvx v5, 0, r1 - - vspltisw v8, 0 ;# zero out total to start - - ;# v6 = abs (v4 - v5) - vsububs v6, v4, v5 - vsububs v7, v5, v4 - vor v6, v6, v7 - - ;# v8 += abs (v4 - v5) - vsum4ubs v7, v6, v8 - vsumsws v7, v7, v8 - - stvx v7, 0, r1 - lwz r3, 12(r1) - - epilogue - - blr diff --git a/vp8/encoder/ppc/variance_altivec.asm b/vp8/encoder/ppc/variance_altivec.asm deleted file mode 100644 index ad2664143..000000000 --- a/vp8/encoder/ppc/variance_altivec.asm +++ /dev/null @@ -1,375 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp8_get8x8var_ppc - .globl vp8_get16x16var_ppc - .globl vp8_mse16x16_ppc - .globl vp9_variance16x16_ppc - .globl vp9_variance16x8_ppc - .globl vp9_variance8x16_ppc - .globl vp9_variance8x8_ppc - .globl vp9_variance4x4_ppc - -.macro load_aligned_16 V R O - lvsl v3, 0, \R ;# permutate value for alignment - - lvx v1, 0, \R - lvx v2, \O, \R - - vperm \V, v1, v2, v3 -.endm - -.macro prologue - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffc0 - mtspr 256, r12 ;# set VRSAVE - - stwu r1, -32(r1) ;# create space on the stack - - li r10, 16 ;# load offset and loop counter - - vspltisw v7, 0 ;# zero for merging - vspltisw v8, 0 ;# zero out total to start - vspltisw v9, 0 ;# zero out total for dif^2 -.endm - -.macro epilogue - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE -.endm - -.macro compute_sum_sse - ;# Compute sum first. Unpack to so signed subract - ;# can be used. Only have a half word signed - ;# subract. Do high, then low. - vmrghb v2, v7, v4 - vmrghb v3, v7, v5 - vsubshs v2, v2, v3 - vsum4shs v8, v2, v8 - - vmrglb v2, v7, v4 - vmrglb v3, v7, v5 - vsubshs v2, v2, v3 - vsum4shs v8, v2, v8 - - ;# Now compute sse. - vsububs v2, v4, v5 - vsububs v3, v5, v4 - vor v2, v2, v3 - - vmsumubm v9, v2, v2, v9 -.endm - -.macro variance_16 DS loop_label store_sum -\loop_label: - ;# only one of the inputs should need to be aligned. - load_aligned_16 v4, r3, r10 - load_aligned_16 v5, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - compute_sum_sse - - bdnz \loop_label - - vsumsws v8, v8, v7 - vsumsws v9, v9, v7 - - stvx v8, 0, r1 - lwz r3, 12(r1) - - stvx v9, 0, r1 - lwz r4, 12(r1) - -.if \store_sum - stw r3, 0(r8) ;# sum -.endif - stw r4, 0(r7) ;# sse - - mullw r3, r3, r3 ;# sum*sum - srawi r3, r3, \DS ;# (sum*sum) >> DS - subf r3, r3, r4 ;# sse - ((sum*sum) >> DS) -.endm - -.macro variance_8 DS loop_label store_sum -\loop_label: - ;# only one of the inputs should need to be aligned. - load_aligned_16 v4, r3, r10 - load_aligned_16 v5, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - ;# only one of the inputs should need to be aligned. - load_aligned_16 v6, r3, r10 - load_aligned_16 v0, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - vmrghb v4, v4, v6 - vmrghb v5, v5, v0 - - compute_sum_sse - - bdnz \loop_label - - vsumsws v8, v8, v7 - vsumsws v9, v9, v7 - - stvx v8, 0, r1 - lwz r3, 12(r1) - - stvx v9, 0, r1 - lwz r4, 12(r1) - -.if \store_sum - stw r3, 0(r8) ;# sum -.endif - stw r4, 0(r7) ;# sse - - mullw r3, r3, r3 ;# sum*sum - srawi r3, r3, \DS ;# (sum*sum) >> 8 - subf r3, r3, r4 ;# sse - ((sum*sum) >> 8) -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *SSE -;# r8 int *Sum -;# -;# r3 return value -vp8_get8x8var_ppc: - - prologue - - li r9, 4 - mtctr r9 - - variance_8 6, get8x8var_loop, 1 - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *SSE -;# r8 int *Sum -;# -;# r3 return value -vp8_get16x16var_ppc: - - prologue - - mtctr r10 - - variance_16 8, get16x16var_loop, 1 - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r 3 return value -vp8_mse16x16_ppc: - prologue - - mtctr r10 - -mse16x16_loop: - ;# only one of the inputs should need to be aligned. - load_aligned_16 v4, r3, r10 - load_aligned_16 v5, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - ;# Now compute sse. - vsububs v2, v4, v5 - vsububs v3, v5, v4 - vor v2, v2, v3 - - vmsumubm v9, v2, v2, v9 - - bdnz mse16x16_loop - - vsumsws v9, v9, v7 - - stvx v9, 0, r1 - lwz r3, 12(r1) - - stvx v9, 0, r1 - lwz r3, 12(r1) - - stw r3, 0(r7) ;# sse - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r3 return value -vp9_variance16x16_ppc: - - prologue - - mtctr r10 - - variance_16 8, variance16x16_loop, 0 - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r3 return value -vp9_variance16x8_ppc: - - prologue - - li r9, 8 - mtctr r9 - - variance_16 7, variance16x8_loop, 0 - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r3 return value -vp9_variance8x16_ppc: - - prologue - - li r9, 8 - mtctr r9 - - variance_8 7, variance8x16_loop, 0 - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r3 return value -vp9_variance8x8_ppc: - - prologue - - li r9, 4 - mtctr r9 - - variance_8 6, variance8x8_loop, 0 - - epilogue - - blr - -.macro transfer_4x4 I P - lwz r0, 0(\I) - add \I, \I, \P - - lwz r10,0(\I) - add \I, \I, \P - - lwz r8, 0(\I) - add \I, \I, \P - - lwz r9, 0(\I) - - stw r0, 0(r1) - stw r10, 4(r1) - stw r8, 8(r1) - stw r9, 12(r1) -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r3 return value -vp9_variance4x4_ppc: - - prologue - - transfer_4x4 r3, r4 - lvx v4, 0, r1 - - transfer_4x4 r5, r6 - lvx v5, 0, r1 - - compute_sum_sse - - vsumsws v8, v8, v7 - vsumsws v9, v9, v7 - - stvx v8, 0, r1 - lwz r3, 12(r1) - - stvx v9, 0, r1 - lwz r4, 12(r1) - - stw r4, 0(r7) ;# sse - - mullw r3, r3, r3 ;# sum*sum - srawi r3, r3, 4 ;# (sum*sum) >> 4 - subf r3, r3, r4 ;# sse - ((sum*sum) >> 4) - - epilogue - - blr diff --git a/vp8/encoder/ppc/variance_subpixel_altivec.asm b/vp8/encoder/ppc/variance_subpixel_altivec.asm deleted file mode 100644 index 26cc76f73..000000000 --- a/vp8/encoder/ppc/variance_subpixel_altivec.asm +++ /dev/null @@ -1,865 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp9_sub_pixel_variance4x4_ppc - .globl vp9_sub_pixel_variance8x8_ppc - .globl vp9_sub_pixel_variance8x16_ppc - .globl vp9_sub_pixel_variance16x8_ppc - .globl vp9_sub_pixel_variance16x16_ppc - -.macro load_c V, LABEL, OFF, R0, R1 - lis \R0, \LABEL@ha - la \R1, \LABEL@l(\R0) - lvx \V, \OFF, \R1 -.endm - -.macro load_vfilter V0, V1 - load_c \V0, vfilter_b, r6, r12, r10 - - addi r6, r6, 16 - lvx \V1, r6, r10 -.endm - -.macro HProlog jump_label - ;# load up horizontal filter - slwi. r5, r5, 4 ;# index into horizontal filter array - - ;# index to the next set of vectors in the row. - li r10, 16 - - ;# downshift by 7 ( divide by 128 ) at the end - vspltish v19, 7 - - ;# If there isn't any filtering to be done for the horizontal, then - ;# just skip to the second pass. - beq \jump_label - - load_c v20, hfilter_b, r5, r12, r0 - - ;# setup constants - ;# v14 permutation value for alignment - load_c v28, b_hperm_b, 0, r12, r0 - - ;# index to the next set of vectors in the row. - li r12, 32 - - ;# rounding added in on the multiply - vspltisw v21, 8 - vspltisw v18, 3 - vslw v18, v21, v18 ;# 0x00000040000000400000004000000040 - - slwi. r6, r6, 5 ;# index into vertical filter array -.endm - -;# Filters a horizontal line -;# expects: -;# r3 src_ptr -;# r4 pitch -;# r10 16 -;# r12 32 -;# v17 perm intput -;# v18 rounding -;# v19 shift -;# v20 filter taps -;# v21 tmp -;# v22 tmp -;# v23 tmp -;# v24 tmp -;# v25 tmp -;# v26 tmp -;# v27 tmp -;# v28 perm output -;# - -.macro hfilter_8 V, hp, lp, increment_counter - lvsl v17, 0, r3 ;# permutate value for alignment - - ;# input to filter is 9 bytes wide, output is 8 bytes. - lvx v21, 0, r3 - lvx v22, r10, r3 - -.if \increment_counter - add r3, r3, r4 -.endif - vperm v21, v21, v22, v17 - - vperm v24, v21, v21, \hp ;# v20 = 0123 1234 2345 3456 - vperm v25, v21, v21, \lp ;# v21 = 4567 5678 6789 789A - - vmsummbm v24, v20, v24, v18 - vmsummbm v25, v20, v25, v18 - - vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit) - - vsrh v24, v24, v19 ;# divide v0, v1 by 128 - - vpkuhus \V, v24, v24 ;# \V = scrambled 8-bit result -.endm - -.macro vfilter_16 P0 P1 - vmuleub v22, \P0, v20 ;# 64 + 4 positive taps - vadduhm v22, v18, v22 - vmuloub v23, \P0, v20 - vadduhm v23, v18, v23 - - vmuleub v24, \P1, v21 - vadduhm v22, v22, v24 ;# Re = evens, saturation unnecessary - vmuloub v25, \P1, v21 - vadduhm v23, v23, v25 ;# Ro = odds - - vsrh v22, v22, v19 ;# divide by 128 - vsrh v23, v23, v19 ;# v16 v17 = evens, odds - vmrghh \P0, v22, v23 ;# v18 v19 = 16-bit result in order - vmrglh v23, v22, v23 - vpkuhus \P0, \P0, v23 ;# P0 = 8-bit result -.endm - -.macro compute_sum_sse src, ref, sum, sse, t1, t2, z0 - ;# Compute sum first. Unpack to so signed subract - ;# can be used. Only have a half word signed - ;# subract. Do high, then low. - vmrghb \t1, \z0, \src - vmrghb \t2, \z0, \ref - vsubshs \t1, \t1, \t2 - vsum4shs \sum, \t1, \sum - - vmrglb \t1, \z0, \src - vmrglb \t2, \z0, \ref - vsubshs \t1, \t1, \t2 - vsum4shs \sum, \t1, \sum - - ;# Now compute sse. - vsububs \t1, \src, \ref - vsububs \t2, \ref, \src - vor \t1, \t1, \t2 - - vmsumubm \sse, \t1, \t1, \sse -.endm - -.macro variance_final sum, sse, z0, DS - vsumsws \sum, \sum, \z0 - vsumsws \sse, \sse, \z0 - - stvx \sum, 0, r1 - lwz r3, 12(r1) - - stvx \sse, 0, r1 - lwz r4, 12(r1) - - stw r4, 0(r9) ;# sse - - mullw r3, r3, r3 ;# sum*sum - srawi r3, r3, \DS ;# (sum*sum) >> 8 - subf r3, r3, r4 ;# sse - ((sum*sum) >> 8) -.endm - -.macro compute_sum_sse_16 V, increment_counter - load_and_align_16 v16, r7, r8, \increment_counter - compute_sum_sse \V, v16, v18, v19, v20, v21, v23 -.endm - -.macro load_and_align_16 V, R, P, increment_counter - lvsl v17, 0, \R ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx v21, 0, \R - lvx v22, r10, \R - -.if \increment_counter - add \R, \R, \P -.endif - - vperm \V, v21, v22, v17 -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_pixels_per_line -;# r5 int xoffset -;# r6 int yoffset -;# r7 unsigned char *dst_ptr -;# r8 int dst_pixels_per_line -;# r9 unsigned int *sse -;# -;# r3 return value -vp9_sub_pixel_variance4x4_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xf830 - ori r12, r12, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - HProlog second_pass_4x4_pre_copy_b - - ;# Load up permutation constants - load_c v10, b_0123_b, 0, r12, r0 - load_c v11, b_4567_b, 0, r12, r0 - - hfilter_8 v0, v10, v11, 1 - hfilter_8 v1, v10, v11, 1 - hfilter_8 v2, v10, v11, 1 - hfilter_8 v3, v10, v11, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq compute_sum_sse_4x4_b - - hfilter_8 v4, v10, v11, 0 - - b second_pass_4x4_b - -second_pass_4x4_pre_copy_b: - slwi r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, r3, r4, 1 - load_and_align_16 v1, r3, r4, 1 - load_and_align_16 v2, r3, r4, 1 - load_and_align_16 v3, r3, r4, 1 - load_and_align_16 v4, r3, r4, 0 - -second_pass_4x4_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - -compute_sum_sse_4x4_b: - vspltish v18, 0 ;# sum - vspltish v19, 0 ;# sse - vspltish v23, 0 ;# unpack - li r10, 16 - - load_and_align_16 v4, r7, r8, 1 - load_and_align_16 v5, r7, r8, 1 - load_and_align_16 v6, r7, r8, 1 - load_and_align_16 v7, r7, r8, 1 - - vmrghb v0, v0, v1 - vmrghb v1, v2, v3 - - vmrghb v2, v4, v5 - vmrghb v3, v6, v7 - - load_c v10, b_hilo_b, 0, r12, r0 - - vperm v0, v0, v1, v10 - vperm v1, v2, v3, v10 - - compute_sum_sse v0, v1, v18, v19, v20, v21, v23 - - variance_final v18, v19, v23, 4 - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_pixels_per_line -;# r5 int xoffset -;# r6 int yoffset -;# r7 unsigned char *dst_ptr -;# r8 int dst_pixels_per_line -;# r9 unsigned int *sse -;# -;# r3 return value -vp9_sub_pixel_variance8x8_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xfff0 - ori r12, r12, 0xffff - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - HProlog second_pass_8x8_pre_copy_b - - ;# Load up permutation constants - load_c v10, b_0123_b, 0, r12, r0 - load_c v11, b_4567_b, 0, r12, r0 - - hfilter_8 v0, v10, v11, 1 - hfilter_8 v1, v10, v11, 1 - hfilter_8 v2, v10, v11, 1 - hfilter_8 v3, v10, v11, 1 - hfilter_8 v4, v10, v11, 1 - hfilter_8 v5, v10, v11, 1 - hfilter_8 v6, v10, v11, 1 - hfilter_8 v7, v10, v11, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq compute_sum_sse_8x8_b - - hfilter_8 v8, v10, v11, 0 - - b second_pass_8x8_b - -second_pass_8x8_pre_copy_b: - slwi. r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, r3, r4, 1 - load_and_align_16 v1, r3, r4, 1 - load_and_align_16 v2, r3, r4, 1 - load_and_align_16 v3, r3, r4, 1 - load_and_align_16 v4, r3, r4, 1 - load_and_align_16 v5, r3, r4, 1 - load_and_align_16 v6, r3, r4, 1 - load_and_align_16 v7, r3, r4, 1 - load_and_align_16 v8, r3, r4, 0 - - beq compute_sum_sse_8x8_b - -second_pass_8x8_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - vfilter_16 v4, v5 - vfilter_16 v5, v6 - vfilter_16 v6, v7 - vfilter_16 v7, v8 - -compute_sum_sse_8x8_b: - vspltish v18, 0 ;# sum - vspltish v19, 0 ;# sse - vspltish v23, 0 ;# unpack - li r10, 16 - - vmrghb v0, v0, v1 - vmrghb v1, v2, v3 - vmrghb v2, v4, v5 - vmrghb v3, v6, v7 - - load_and_align_16 v4, r7, r8, 1 - load_and_align_16 v5, r7, r8, 1 - load_and_align_16 v6, r7, r8, 1 - load_and_align_16 v7, r7, r8, 1 - load_and_align_16 v8, r7, r8, 1 - load_and_align_16 v9, r7, r8, 1 - load_and_align_16 v10, r7, r8, 1 - load_and_align_16 v11, r7, r8, 0 - - vmrghb v4, v4, v5 - vmrghb v5, v6, v7 - vmrghb v6, v8, v9 - vmrghb v7, v10, v11 - - compute_sum_sse v0, v4, v18, v19, v20, v21, v23 - compute_sum_sse v1, v5, v18, v19, v20, v21, v23 - compute_sum_sse v2, v6, v18, v19, v20, v21, v23 - compute_sum_sse v3, v7, v18, v19, v20, v21, v23 - - variance_final v18, v19, v23, 6 - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_pixels_per_line -;# r5 int xoffset -;# r6 int yoffset -;# r7 unsigned char *dst_ptr -;# r8 int dst_pixels_per_line -;# r9 unsigned int *sse -;# -;# r3 return value -vp9_sub_pixel_variance8x16_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xfffc - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - HProlog second_pass_8x16_pre_copy_b - - ;# Load up permutation constants - load_c v29, b_0123_b, 0, r12, r0 - load_c v30, b_4567_b, 0, r12, r0 - - hfilter_8 v0, v29, v30, 1 - hfilter_8 v1, v29, v30, 1 - hfilter_8 v2, v29, v30, 1 - hfilter_8 v3, v29, v30, 1 - hfilter_8 v4, v29, v30, 1 - hfilter_8 v5, v29, v30, 1 - hfilter_8 v6, v29, v30, 1 - hfilter_8 v7, v29, v30, 1 - hfilter_8 v8, v29, v30, 1 - hfilter_8 v9, v29, v30, 1 - hfilter_8 v10, v29, v30, 1 - hfilter_8 v11, v29, v30, 1 - hfilter_8 v12, v29, v30, 1 - hfilter_8 v13, v29, v30, 1 - hfilter_8 v14, v29, v30, 1 - hfilter_8 v15, v29, v30, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq compute_sum_sse_8x16_b - - hfilter_8 v16, v29, v30, 0 - - b second_pass_8x16_b - -second_pass_8x16_pre_copy_b: - slwi. r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, r3, r4, 1 - load_and_align_16 v1, r3, r4, 1 - load_and_align_16 v2, r3, r4, 1 - load_and_align_16 v3, r3, r4, 1 - load_and_align_16 v4, r3, r4, 1 - load_and_align_16 v5, r3, r4, 1 - load_and_align_16 v6, r3, r4, 1 - load_and_align_16 v7, r3, r4, 1 - load_and_align_16 v8, r3, r4, 1 - load_and_align_16 v9, r3, r4, 1 - load_and_align_16 v10, r3, r4, 1 - load_and_align_16 v11, r3, r4, 1 - load_and_align_16 v12, r3, r4, 1 - load_and_align_16 v13, r3, r4, 1 - load_and_align_16 v14, r3, r4, 1 - load_and_align_16 v15, r3, r4, 1 - load_and_align_16 v16, r3, r4, 0 - - beq compute_sum_sse_8x16_b - -second_pass_8x16_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - vfilter_16 v4, v5 - vfilter_16 v5, v6 - vfilter_16 v6, v7 - vfilter_16 v7, v8 - vfilter_16 v8, v9 - vfilter_16 v9, v10 - vfilter_16 v10, v11 - vfilter_16 v11, v12 - vfilter_16 v12, v13 - vfilter_16 v13, v14 - vfilter_16 v14, v15 - vfilter_16 v15, v16 - -compute_sum_sse_8x16_b: - vspltish v18, 0 ;# sum - vspltish v19, 0 ;# sse - vspltish v23, 0 ;# unpack - li r10, 16 - - vmrghb v0, v0, v1 - vmrghb v1, v2, v3 - vmrghb v2, v4, v5 - vmrghb v3, v6, v7 - vmrghb v4, v8, v9 - vmrghb v5, v10, v11 - vmrghb v6, v12, v13 - vmrghb v7, v14, v15 - - load_and_align_16 v8, r7, r8, 1 - load_and_align_16 v9, r7, r8, 1 - load_and_align_16 v10, r7, r8, 1 - load_and_align_16 v11, r7, r8, 1 - load_and_align_16 v12, r7, r8, 1 - load_and_align_16 v13, r7, r8, 1 - load_and_align_16 v14, r7, r8, 1 - load_and_align_16 v15, r7, r8, 1 - - vmrghb v8, v8, v9 - vmrghb v9, v10, v11 - vmrghb v10, v12, v13 - vmrghb v11, v14, v15 - - compute_sum_sse v0, v8, v18, v19, v20, v21, v23 - compute_sum_sse v1, v9, v18, v19, v20, v21, v23 - compute_sum_sse v2, v10, v18, v19, v20, v21, v23 - compute_sum_sse v3, v11, v18, v19, v20, v21, v23 - - load_and_align_16 v8, r7, r8, 1 - load_and_align_16 v9, r7, r8, 1 - load_and_align_16 v10, r7, r8, 1 - load_and_align_16 v11, r7, r8, 1 - load_and_align_16 v12, r7, r8, 1 - load_and_align_16 v13, r7, r8, 1 - load_and_align_16 v14, r7, r8, 1 - load_and_align_16 v15, r7, r8, 0 - - vmrghb v8, v8, v9 - vmrghb v9, v10, v11 - vmrghb v10, v12, v13 - vmrghb v11, v14, v15 - - compute_sum_sse v4, v8, v18, v19, v20, v21, v23 - compute_sum_sse v5, v9, v18, v19, v20, v21, v23 - compute_sum_sse v6, v10, v18, v19, v20, v21, v23 - compute_sum_sse v7, v11, v18, v19, v20, v21, v23 - - variance_final v18, v19, v23, 7 - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - blr - -;# Filters a horizontal line -;# expects: -;# r3 src_ptr -;# r4 pitch -;# r10 16 -;# r12 32 -;# v17 perm intput -;# v18 rounding -;# v19 shift -;# v20 filter taps -;# v21 tmp -;# v22 tmp -;# v23 tmp -;# v24 tmp -;# v25 tmp -;# v26 tmp -;# v27 tmp -;# v28 perm output -;# -.macro hfilter_16 V, increment_counter - - lvsl v17, 0, r3 ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx v21, 0, r3 - lvx v22, r10, r3 - lvx v23, r12, r3 - -.if \increment_counter - add r3, r3, r4 -.endif - vperm v21, v21, v22, v17 - vperm v22, v22, v23, v17 ;# v8 v9 = 21 input pixels left-justified - - ;# set 0 - vmsummbm v24, v20, v21, v18 ;# taps times elements - - ;# set 1 - vsldoi v23, v21, v22, 1 - vmsummbm v25, v20, v23, v18 - - ;# set 2 - vsldoi v23, v21, v22, 2 - vmsummbm v26, v20, v23, v18 - - ;# set 3 - vsldoi v23, v21, v22, 3 - vmsummbm v27, v20, v23, v18 - - vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit) - vpkswus v25, v26, v27 ;# v25 = 2 6 A E 3 7 B F - - vsrh v24, v24, v19 ;# divide v0, v1 by 128 - vsrh v25, v25, v19 - - vpkuhus \V, v24, v25 ;# \V = scrambled 8-bit result - vperm \V, \V, v0, v28 ;# \V = correctly-ordered result -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_pixels_per_line -;# r5 int xoffset -;# r6 int yoffset -;# r7 unsigned char *dst_ptr -;# r8 int dst_pixels_per_line -;# r9 unsigned int *sse -;# -;# r3 return value -vp9_sub_pixel_variance16x8_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - stwu r1, -32(r1) ;# create space on the stack - - HProlog second_pass_16x8_pre_copy_b - - hfilter_16 v0, 1 - hfilter_16 v1, 1 - hfilter_16 v2, 1 - hfilter_16 v3, 1 - hfilter_16 v4, 1 - hfilter_16 v5, 1 - hfilter_16 v6, 1 - hfilter_16 v7, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq compute_sum_sse_16x8_b - - hfilter_16 v8, 0 - - b second_pass_16x8_b - -second_pass_16x8_pre_copy_b: - slwi. r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, r3, r4, 1 - load_and_align_16 v1, r3, r4, 1 - load_and_align_16 v2, r3, r4, 1 - load_and_align_16 v3, r3, r4, 1 - load_and_align_16 v4, r3, r4, 1 - load_and_align_16 v5, r3, r4, 1 - load_and_align_16 v6, r3, r4, 1 - load_and_align_16 v7, r3, r4, 1 - load_and_align_16 v8, r3, r4, 1 - - beq compute_sum_sse_16x8_b - -second_pass_16x8_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - vfilter_16 v4, v5 - vfilter_16 v5, v6 - vfilter_16 v6, v7 - vfilter_16 v7, v8 - -compute_sum_sse_16x8_b: - vspltish v18, 0 ;# sum - vspltish v19, 0 ;# sse - vspltish v23, 0 ;# unpack - li r10, 16 - - compute_sum_sse_16 v0, 1 - compute_sum_sse_16 v1, 1 - compute_sum_sse_16 v2, 1 - compute_sum_sse_16 v3, 1 - compute_sum_sse_16 v4, 1 - compute_sum_sse_16 v5, 1 - compute_sum_sse_16 v6, 1 - compute_sum_sse_16 v7, 0 - - variance_final v18, v19, v23, 7 - - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_pixels_per_line -;# r5 int xoffset -;# r6 int yoffset -;# r7 unsigned char *dst_ptr -;# r8 int dst_pixels_per_line -;# r9 unsigned int *sse -;# -;# r3 return value -vp9_sub_pixel_variance16x16_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - stwu r1, -32(r1) ;# create space on the stack - - HProlog second_pass_16x16_pre_copy_b - - hfilter_16 v0, 1 - hfilter_16 v1, 1 - hfilter_16 v2, 1 - hfilter_16 v3, 1 - hfilter_16 v4, 1 - hfilter_16 v5, 1 - hfilter_16 v6, 1 - hfilter_16 v7, 1 - hfilter_16 v8, 1 - hfilter_16 v9, 1 - hfilter_16 v10, 1 - hfilter_16 v11, 1 - hfilter_16 v12, 1 - hfilter_16 v13, 1 - hfilter_16 v14, 1 - hfilter_16 v15, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq compute_sum_sse_16x16_b - - hfilter_16 v16, 0 - - b second_pass_16x16_b - -second_pass_16x16_pre_copy_b: - slwi. r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, r3, r4, 1 - load_and_align_16 v1, r3, r4, 1 - load_and_align_16 v2, r3, r4, 1 - load_and_align_16 v3, r3, r4, 1 - load_and_align_16 v4, r3, r4, 1 - load_and_align_16 v5, r3, r4, 1 - load_and_align_16 v6, r3, r4, 1 - load_and_align_16 v7, r3, r4, 1 - load_and_align_16 v8, r3, r4, 1 - load_and_align_16 v9, r3, r4, 1 - load_and_align_16 v10, r3, r4, 1 - load_and_align_16 v11, r3, r4, 1 - load_and_align_16 v12, r3, r4, 1 - load_and_align_16 v13, r3, r4, 1 - load_and_align_16 v14, r3, r4, 1 - load_and_align_16 v15, r3, r4, 1 - load_and_align_16 v16, r3, r4, 0 - - beq compute_sum_sse_16x16_b - -second_pass_16x16_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - vfilter_16 v4, v5 - vfilter_16 v5, v6 - vfilter_16 v6, v7 - vfilter_16 v7, v8 - vfilter_16 v8, v9 - vfilter_16 v9, v10 - vfilter_16 v10, v11 - vfilter_16 v11, v12 - vfilter_16 v12, v13 - vfilter_16 v13, v14 - vfilter_16 v14, v15 - vfilter_16 v15, v16 - -compute_sum_sse_16x16_b: - vspltish v18, 0 ;# sum - vspltish v19, 0 ;# sse - vspltish v23, 0 ;# unpack - li r10, 16 - - compute_sum_sse_16 v0, 1 - compute_sum_sse_16 v1, 1 - compute_sum_sse_16 v2, 1 - compute_sum_sse_16 v3, 1 - compute_sum_sse_16 v4, 1 - compute_sum_sse_16 v5, 1 - compute_sum_sse_16 v6, 1 - compute_sum_sse_16 v7, 1 - compute_sum_sse_16 v8, 1 - compute_sum_sse_16 v9, 1 - compute_sum_sse_16 v10, 1 - compute_sum_sse_16 v11, 1 - compute_sum_sse_16 v12, 1 - compute_sum_sse_16 v13, 1 - compute_sum_sse_16 v14, 1 - compute_sum_sse_16 v15, 0 - - variance_final v18, v19, v23, 8 - - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .data - - .align 4 -hfilter_b: - .byte 128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0 - .byte 112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0 - .byte 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0 - .byte 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0 - .byte 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0 - .byte 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0 - .byte 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0 - .byte 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0 - - .align 4 -vfilter_b: - .byte 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112 - .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 - .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96 - .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 - .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 - .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48 - .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 - .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 - .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48 - .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 - .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 - .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96 - .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 - .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112 - - .align 4 -b_hperm_b: - .byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 - - .align 4 -b_0123_b: - .byte 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 - - .align 4 -b_4567_b: - .byte 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 - -b_hilo_b: - .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c deleted file mode 100644 index 04850518f..000000000 --- a/vp8/encoder/psnr.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_scale/yv12config.h" -#include "math.h" -#include "vp8/common/systemdependent.h" /* for vp9_clear_system_state() */ - -#define MAX_PSNR 100 - -double vp9_mse2psnr(double Samples, double Peak, double Mse) { - double psnr; - - if ((double)Mse > 0.0) - psnr = 10.0 * log10(Peak * Peak * Samples / Mse); - else - psnr = MAX_PSNR; // Limit to prevent / 0 - - if (psnr > MAX_PSNR) - psnr = MAX_PSNR; - - return psnr; -} diff --git a/vp8/encoder/psnr.h b/vp8/encoder/psnr.h deleted file mode 100644 index c25bea750..000000000 --- a/vp8/encoder/psnr.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __INC_PSNR_H -#define __INC_PSNR_H - -extern double vp9_mse2psnr(double Samples, double Peak, double Mse); - -#endif diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c deleted file mode 100644 index 65b533d2c..000000000 --- a/vp8/encoder/quantize.c +++ /dev/null @@ -1,716 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <math.h> -#include "vpx_mem/vpx_mem.h" - -#include "onyx_int.h" -#include "quantize.h" -#include "vp8/common/quant_common.h" - -#include "vp8/common/seg_common.h" - -#ifdef ENC_DEBUG -extern int enc_debug; -#endif - -void vp9_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) { - int i, rc, eob; - int zbin; - int x, y, z, sz; - short *zbin_boost_ptr = b->zrun_zbin_boost; - short *coeff_ptr = b->coeff; - short *zbin_ptr = b->zbin; - short *round_ptr = b->round; - short *quant_ptr = b->quant; - unsigned char *quant_shift_ptr = b->quant_shift; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = d->dequant; - short zbin_oq_value = b->zbin_extra; - - int const *pt_scan ; - - switch (tx_type) { - case ADST_DCT : - pt_scan = vp9_row_scan; - break; - - case DCT_ADST : - pt_scan = vp9_col_scan; - break; - - default : - pt_scan = vp9_default_zig_zag1d; - break; - } - - vpx_memset(qcoeff_ptr, 0, 32); - vpx_memset(dqcoeff_ptr, 0, 32); - - eob = -1; - - for (i = 0; i < b->eob_max_offset; i++) { - rc = pt_scan[i]; - z = coeff_ptr[rc]; - - zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value; - zbin_boost_ptr ++; - - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) - - if (x >= zbin) { - x += round_ptr[rc]; - y = (((x * quant_ptr[rc]) >> 16) + x) - >> quant_shift_ptr[rc]; // quantize (x) - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value - - if (y) { - eob = i; // last nonzero coeffs - zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength - } - } - } - - d->eob = eob + 1; -} - -void vp9_regular_quantize_b_4x4(BLOCK *b, BLOCKD *d) { - int i, rc, eob; - int zbin; - int x, y, z, sz; - short *zbin_boost_ptr = b->zrun_zbin_boost; - short *coeff_ptr = b->coeff; - short *zbin_ptr = b->zbin; - short *round_ptr = b->round; - short *quant_ptr = b->quant; - unsigned char *quant_shift_ptr = b->quant_shift; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = d->dequant; - short zbin_oq_value = b->zbin_extra; - - vpx_memset(qcoeff_ptr, 0, 32); - vpx_memset(dqcoeff_ptr, 0, 32); - - eob = -1; - - for (i = 0; i < b->eob_max_offset; i++) { - rc = vp9_default_zig_zag1d[i]; - z = coeff_ptr[rc]; - - zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value; - zbin_boost_ptr ++; - - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) - - if (x >= zbin) { - x += round_ptr[rc]; - - y = (((x * quant_ptr[rc]) >> 16) + x) - >> quant_shift_ptr[rc]; // quantize (x) - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value - - if (y) { - eob = i; // last nonzero coeffs - zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength - } - } - } - - d->eob = eob + 1; -} - -void vp9_quantize_mby_4x4_c(MACROBLOCK *x) { - int i; - int has_2nd_order = x->e_mbd.mode_info_context->mbmi.mode != SPLITMV; - - for (i = 0; i < 16; i++) - x->quantize_b_4x4(&x->block[i], &x->e_mbd.block[i]); - - if (has_2nd_order) - x->quantize_b_4x4(&x->block[24], &x->e_mbd.block[24]); -} - -void vp9_quantize_mbuv_4x4_c(MACROBLOCK *x) { - int i; - - for (i = 16; i < 24; i++) - x->quantize_b_4x4(&x->block[i], &x->e_mbd.block[i]); -} - -void vp9_quantize_mb_4x4_c(MACROBLOCK *x) { - vp9_quantize_mby_4x4_c(x); - vp9_quantize_mbuv_4x4_c(x); -} - -void vp9_regular_quantize_b_2x2(BLOCK *b, BLOCKD *d) { - int i, rc, eob; - int zbin; - int x, y, z, sz; - short *zbin_boost_ptr = b->zrun_zbin_boost; - int zbin_zrun_index = 0; - short *coeff_ptr = b->coeff; - short *zbin_ptr = b->zbin; - short *round_ptr = b->round; - short *quant_ptr = b->quant; - unsigned char *quant_shift_ptr = b->quant_shift; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = d->dequant; - short zbin_oq_value = b->zbin_extra; - // double q2nd = 4; - vpx_memset(qcoeff_ptr, 0, 32); - vpx_memset(dqcoeff_ptr, 0, 32); - - eob = -1; - - for (i = 0; i < b->eob_max_offset_8x8; i++) { - rc = vp9_default_zig_zag1d[i]; - z = coeff_ptr[rc]; - - zbin_boost_ptr = &b->zrun_zbin_boost[zbin_zrun_index]; - zbin_zrun_index += 4; - zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value); - - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) - - if (x >= zbin) { - x += (round_ptr[rc]); - y = ((int)((int)(x * quant_ptr[rc]) >> 16) + x) - >> quant_shift_ptr[rc]; // quantize (x) - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value - - if (y) { - eob = i; // last nonzero coeffs - zbin_zrun_index = 0; - } - } - } - - d->eob = eob + 1; -} - -void vp9_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) { - int i, rc, eob; - int zbin; - int x, y, z, sz; - short *zbin_boost_ptr = b->zrun_zbin_boost_8x8; - short *coeff_ptr = b->coeff; - short *zbin_ptr = b->zbin_8x8; - short *round_ptr = b->round; - short *quant_ptr = b->quant; - unsigned char *quant_shift_ptr = b->quant_shift; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = d->dequant; - short zbin_oq_value = b->zbin_extra; - - vpx_memset(qcoeff_ptr, 0, 64 * sizeof(short)); - vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(short)); - - eob = -1; - - for (i = 0; i < b->eob_max_offset_8x8; i++) { - rc = vp9_default_zig_zag1d_8x8[i]; - z = coeff_ptr[rc]; - - zbin = (zbin_ptr[rc != 0] + *zbin_boost_ptr + zbin_oq_value); - zbin_boost_ptr++; - - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) - - if (x >= zbin) { - x += (round_ptr[rc != 0]); - y = ((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x)) - >> quant_shift_ptr[rc != 0]; // quantize (x) - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0]; // dequantized value - - if (y) { - eob = i; // last nonzero coeffs - zbin_boost_ptr = b->zrun_zbin_boost_8x8; - } - } - } - - d->eob = eob + 1; -} - -void vp9_quantize_mby_8x8(MACROBLOCK *x) { - int i; - int has_2nd_order = x->e_mbd.mode_info_context->mbmi.mode != SPLITMV; - - for (i = 0; i < 16; i ++) { - x->e_mbd.block[i].eob = 0; - } - x->e_mbd.block[24].eob = 0; - for (i = 0; i < 16; i += 4) - x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]); - - if (has_2nd_order) - x->quantize_b_2x2(&x->block[24], &x->e_mbd.block[24]); -} - -void vp9_quantize_mbuv_8x8(MACROBLOCK *x) { - int i; - - for (i = 16; i < 24; i ++) - x->e_mbd.block[i].eob = 0; - for (i = 16; i < 24; i += 4) - x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]); -} - -void vp9_quantize_mb_8x8(MACROBLOCK *x) { - vp9_quantize_mby_8x8(x); - vp9_quantize_mbuv_8x8(x); -} - -void vp9_quantize_mby_16x16(MACROBLOCK *x) { - int i; - - for (i = 0; i < 16; i++) - x->e_mbd.block[i].eob = 0; - x->e_mbd.block[24].eob = 0; - x->quantize_b_16x16(&x->block[0], &x->e_mbd.block[0]); -} - -void vp9_quantize_mb_16x16(MACROBLOCK *x) { - vp9_quantize_mby_16x16(x); - vp9_quantize_mbuv_8x8(x); -} - -void vp9_regular_quantize_b_16x16(BLOCK *b, BLOCKD *d) { - int i, rc, eob; - int zbin; - int x, y, z, sz; - short *zbin_boost_ptr = b->zrun_zbin_boost_16x16; - short *coeff_ptr = b->coeff; - short *zbin_ptr = b->zbin_16x16; - short *round_ptr = b->round; - short *quant_ptr = b->quant; - unsigned char *quant_shift_ptr = b->quant_shift; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = d->dequant; - short zbin_oq_value = b->zbin_extra; - - vpx_memset(qcoeff_ptr, 0, 256*sizeof(short)); - vpx_memset(dqcoeff_ptr, 0, 256*sizeof(short)); - - eob = -1; - for (i = 0; i < b->eob_max_offset_16x16; i++) { - rc = vp9_default_zig_zag1d_16x16[i]; - z = coeff_ptr[rc]; - - zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value); - zbin_boost_ptr ++; - - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) - - if (x >= zbin) { - x += (round_ptr[rc!=0]); - y = ((int)(((int)(x * quant_ptr[rc!=0]) >> 16) + x)) - >> quant_shift_ptr[rc!=0]; // quantize (x) - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value - - if (y) { - eob = i; // last nonzero coeffs - zbin_boost_ptr = b->zrun_zbin_boost_16x16; - } - } - } - - d->eob = eob + 1; -} - -/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of - * these two C functions if corresponding optimized routine is not available. - * NEON optimized version implements currently the fast quantization for pair - * of blocks. */ -void vp9_regular_quantize_b_4x4_pair(BLOCK *b1, BLOCK *b2, - BLOCKD *d1, BLOCKD *d2) { - vp9_regular_quantize_b_4x4(b1, d1); - vp9_regular_quantize_b_4x4(b2, d2); -} - -static void invert_quant(short *quant, - unsigned char *shift, short d) { - unsigned t; - int l; - t = d; - for (l = 0; t > 1; l++) - t >>= 1; - t = 1 + (1 << (16 + l)) / d; - *quant = (short)(t - (1 << 16)); - *shift = l; -} - -void vp9_init_quantizer(VP9_COMP *cpi) { - int i; - int quant_val; - int Q; - static const int zbin_boost[16] = { 0, 0, 8, 10, 12, 14, 16, 20, - 24, 28, 32, 36, 40, 44, 44, 44 - }; - - static const int zbin_boost_8x8[64] = { 0, 0, 0, 8, 8, 8, 10, 12, - 14, 16, 18, 20, 22, 24, 26, 28, - 30, 32, 34, 36, 38, 40, 42, 44, - 46, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48 - }; - static const int zbin_boost_16x16[256] = { - 0, 0, 0, 8, 8, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, - 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - }; - int qrounding_factor = 48; - - - for (Q = 0; Q < QINDEX_RANGE; Q++) { - int qzbin_factor = (vp9_dc_quant(Q, 0) < 148) ? 84 : 80; - -#if CONFIG_LOSSLESS - if (cpi->oxcf.lossless) { - if (Q == 0) { - qzbin_factor = 64; - qrounding_factor = 64; - } - } -#endif - - // dc values - quant_val = vp9_dc_quant(Q, cpi->common.y1dc_delta_q); - invert_quant(cpi->Y1quant[Q] + 0, - cpi->Y1quant_shift[Q] + 0, quant_val); - cpi->Y1zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->Y1zbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->Y1zbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->Y1round[Q][0] = (qrounding_factor * quant_val) >> 7; - cpi->common.Y1dequant[Q][0] = quant_val; - cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7; - cpi->zrun_zbin_boost_y1_8x8[Q][0] = - ((quant_val * zbin_boost_8x8[0]) + 64) >> 7; - cpi->zrun_zbin_boost_y1_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7; - - - quant_val = vp9_dc2quant(Q, cpi->common.y2dc_delta_q); - invert_quant(cpi->Y2quant[Q] + 0, - cpi->Y2quant_shift[Q] + 0, quant_val); - cpi->Y2zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->Y2zbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->Y2zbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->Y2round[Q][0] = (qrounding_factor * quant_val) >> 7; - cpi->common.Y2dequant[Q][0] = quant_val; - cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7; - cpi->zrun_zbin_boost_y2_8x8[Q][0] = - ((quant_val * zbin_boost_8x8[0]) + 64) >> 7; - cpi->zrun_zbin_boost_y2_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7; - - quant_val = vp9_dc_uv_quant(Q, cpi->common.uvdc_delta_q); - invert_quant(cpi->UVquant[Q] + 0, - cpi->UVquant_shift[Q] + 0, quant_val); - cpi->UVzbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->UVzbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->UVzbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->UVround[Q][0] = (qrounding_factor * quant_val) >> 7; - cpi->common.UVdequant[Q][0] = quant_val; - cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7; - cpi->zrun_zbin_boost_uv_8x8[Q][0] = - ((quant_val * zbin_boost_8x8[0]) + 64) >> 7; - cpi->zrun_zbin_boost_uv_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7; - - // all the 4x4 ac values =; - for (i = 1; i < 16; i++) { - int rc = vp9_default_zig_zag1d[i]; - - quant_val = vp9_ac_yquant(Q); - invert_quant(cpi->Y1quant[Q] + rc, - cpi->Y1quant_shift[Q] + rc, quant_val); - cpi->Y1zbin[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->Y1round[Q][rc] = (qrounding_factor * quant_val) >> 7; - cpi->common.Y1dequant[Q][rc] = quant_val; - cpi->zrun_zbin_boost_y1[Q][i] = - ((quant_val * zbin_boost[i]) + 64) >> 7; - - quant_val = vp9_ac2quant(Q, cpi->common.y2ac_delta_q); - invert_quant(cpi->Y2quant[Q] + rc, - cpi->Y2quant_shift[Q] + rc, quant_val); - cpi->Y2zbin[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->Y2round[Q][rc] = (qrounding_factor * quant_val) >> 7; - cpi->common.Y2dequant[Q][rc] = quant_val; - cpi->zrun_zbin_boost_y2[Q][i] = - ((quant_val * zbin_boost[i]) + 64) >> 7; - - quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q); - invert_quant(cpi->UVquant[Q] + rc, - cpi->UVquant_shift[Q] + rc, quant_val); - cpi->UVzbin[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->UVround[Q][rc] = (qrounding_factor * quant_val) >> 7; - cpi->common.UVdequant[Q][rc] = quant_val; - cpi->zrun_zbin_boost_uv[Q][i] = - ((quant_val * zbin_boost[i]) + 64) >> 7; - } - - // 8x8 structures... only zbin seperated out for now - // This needs cleaning up for 8x8 especially if we are to add - // support for non flat Q matices - for (i = 1; i < 64; i++) { - int rc = vp9_default_zig_zag1d_8x8[i]; - - quant_val = vp9_ac_yquant(Q); - cpi->Y1zbin_8x8[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->zrun_zbin_boost_y1_8x8[Q][i] = - ((quant_val * zbin_boost_8x8[i]) + 64) >> 7; - - quant_val = vp9_ac2quant(Q, cpi->common.y2ac_delta_q); - cpi->Y2zbin_8x8[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->zrun_zbin_boost_y2_8x8[Q][i] = - ((quant_val * zbin_boost_8x8[i]) + 64) >> 7; - - quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q); - cpi->UVzbin_8x8[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->zrun_zbin_boost_uv_8x8[Q][i] = - ((quant_val * zbin_boost_8x8[i]) + 64) >> 7; - } - - // 16x16 structures. Same comment above applies. - for (i = 1; i < 256; i++) { - int rc = vp9_default_zig_zag1d_16x16[i]; - - quant_val = vp9_ac_yquant(Q); - cpi->Y1zbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->zrun_zbin_boost_y1_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7; - - quant_val = vp9_ac2quant(Q, cpi->common.y2ac_delta_q); - cpi->Y2zbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->zrun_zbin_boost_y2_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7; - - quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q); - cpi->UVzbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->zrun_zbin_boost_uv_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7; - } - } -} - -void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { - int i; - int QIndex; - MACROBLOCKD *xd = &x->e_mbd; - int zbin_extra; - int segment_id = xd->mode_info_context->mbmi.segment_id; - - // Select the baseline MB Q index allowing for any segment level change. - if (vp9_segfeature_active(xd, segment_id, SEG_LVL_ALT_Q)) { - // Abs Value - if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA) - QIndex = vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q); - - // Delta Value - else { - QIndex = cpi->common.base_qindex + - vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q); - - // Clamp to valid range - QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; - } - } else - QIndex = cpi->common.base_qindex; - - // Y - zbin_extra = (cpi->common.Y1dequant[QIndex][1] * - (cpi->zbin_over_quant + - cpi->zbin_mode_boost + - x->act_zbin_adj)) >> 7; - - for (i = 0; i < 16; i++) { - x->block[i].quant = cpi->Y1quant[QIndex]; - x->block[i].quant_shift = cpi->Y1quant_shift[QIndex]; - x->block[i].zbin = cpi->Y1zbin[QIndex]; - x->block[i].zbin_8x8 = cpi->Y1zbin_8x8[QIndex]; - x->block[i].zbin_16x16 = cpi->Y1zbin_16x16[QIndex]; - x->block[i].round = cpi->Y1round[QIndex]; - x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex]; - x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex]; - x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y1_8x8[QIndex]; - x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y1_16x16[QIndex]; - x->block[i].zbin_extra = (short)zbin_extra; - - // Segment max eob offset feature. - if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) { - x->block[i].eob_max_offset = - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); - x->block[i].eob_max_offset_8x8 = - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); - x->block[i].eob_max_offset_16x16 = - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); - } else { - x->block[i].eob_max_offset = 16; - x->block[i].eob_max_offset_8x8 = 64; - x->block[i].eob_max_offset_16x16 = 256; - } - } - - // UV - zbin_extra = (cpi->common.UVdequant[QIndex][1] * - (cpi->zbin_over_quant + - cpi->zbin_mode_boost + - x->act_zbin_adj)) >> 7; - - for (i = 16; i < 24; i++) { - x->block[i].quant = cpi->UVquant[QIndex]; - x->block[i].quant_shift = cpi->UVquant_shift[QIndex]; - x->block[i].zbin = cpi->UVzbin[QIndex]; - x->block[i].zbin_8x8 = cpi->UVzbin_8x8[QIndex]; - x->block[i].zbin_16x16 = cpi->UVzbin_16x16[QIndex]; - x->block[i].round = cpi->UVround[QIndex]; - x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex]; - x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex]; - x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_uv_8x8[QIndex]; - x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_uv_16x16[QIndex]; - - x->block[i].zbin_extra = (short)zbin_extra; - - // Segment max eob offset feature. - if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) { - x->block[i].eob_max_offset = - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); - x->block[i].eob_max_offset_8x8 = - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); - } else { - x->block[i].eob_max_offset = 16; - x->block[i].eob_max_offset_8x8 = 64; - } - } - - // Y2 - zbin_extra = (cpi->common.Y2dequant[QIndex][1] * - ((cpi->zbin_over_quant / 2) + - cpi->zbin_mode_boost + - x->act_zbin_adj)) >> 7; - - x->block[24].quant = cpi->Y2quant[QIndex]; - x->block[24].quant_shift = cpi->Y2quant_shift[QIndex]; - x->block[24].zbin = cpi->Y2zbin[QIndex]; - x->block[24].zbin_8x8 = cpi->Y2zbin_8x8[QIndex]; - x->block[24].zbin_16x16 = cpi->Y2zbin_16x16[QIndex]; - x->block[24].round = cpi->Y2round[QIndex]; - x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex]; - x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex]; - x->block[24].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y2_8x8[QIndex]; - x->block[24].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y2_16x16[QIndex]; - x->block[24].zbin_extra = (short)zbin_extra; - - // TBD perhaps not use for Y2 - // Segment max eob offset feature. - if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) { - x->block[24].eob_max_offset = - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); - x->block[24].eob_max_offset_8x8 = - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); - } else { - x->block[24].eob_max_offset = 16; - x->block[24].eob_max_offset_8x8 = 4; - } - - /* save this macroblock QIndex for vp9_update_zbin_extra() */ - x->e_mbd.q_index = QIndex; -} - -void vp9_update_zbin_extra(VP9_COMP *cpi, MACROBLOCK *x) { - int i; - int QIndex = x->e_mbd.q_index; - int zbin_extra; - - // Y - zbin_extra = (cpi->common.Y1dequant[QIndex][1] * - (cpi->zbin_over_quant + - cpi->zbin_mode_boost + - x->act_zbin_adj)) >> 7; - for (i = 0; i < 16; i++) { - x->block[i].zbin_extra = (short)zbin_extra; - } - - // UV - zbin_extra = (cpi->common.UVdequant[QIndex][1] * - (cpi->zbin_over_quant + - cpi->zbin_mode_boost + - x->act_zbin_adj)) >> 7; - - for (i = 16; i < 24; i++) { - x->block[i].zbin_extra = (short)zbin_extra; - } - - // Y2 - zbin_extra = (cpi->common.Y2dequant[QIndex][1] * - ((cpi->zbin_over_quant / 2) + - cpi->zbin_mode_boost + - x->act_zbin_adj)) >> 7; - - x->block[24].zbin_extra = (short)zbin_extra; -} - -void vp9_frame_init_quantizer(VP9_COMP *cpi) { - // Clear Zbin mode boost for default case - cpi->zbin_mode_boost = 0; - - // MB level quantizer setup - vp9_mb_init_quantizer(cpi, &cpi->mb); -} - -void vp9_set_quantizer(struct VP9_COMP *cpi, int Q) { - VP9_COMMON *cm = &cpi->common; - - cm->base_qindex = Q; - - // if any of the delta_q values are changing update flag will - // have to be set. - cm->y1dc_delta_q = 0; - cm->y2ac_delta_q = 0; - cm->uvdc_delta_q = 0; - cm->uvac_delta_q = 0; - cm->y2dc_delta_q = 0; - - // quantizer has to be reinitialized if any delta_q changes. - // As there are not any here for now this is inactive code. - // if(update) - // vp9_init_quantizer(cpi); -} diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h deleted file mode 100644 index b7a77613f..000000000 --- a/vp8/encoder/quantize.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef __INC_QUANTIZE_H -#define __INC_QUANTIZE_H - -#include "block.h" - -#define prototype_quantize_block(sym) \ - void (sym)(BLOCK *b,BLOCKD *d) - -#define prototype_quantize_block_pair(sym) \ - void (sym)(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2) - -#define prototype_quantize_mb(sym) \ - void (sym)(MACROBLOCK *x) - -#if ARCH_X86 || ARCH_X86_64 -#include "x86/quantize_x86.h" -#endif - -#if ARCH_ARM -#include "arm/quantize_arm.h" -#endif - -#define prototype_quantize_block_type(sym) \ - void (sym)(BLOCK *b, BLOCKD *d, TX_TYPE type) -extern prototype_quantize_block_type(vp9_ht_quantize_b_4x4); - -#ifndef vp9_quantize_quantb_4x4 -#define vp9_quantize_quantb_4x4 vp9_regular_quantize_b_4x4 -#endif -extern prototype_quantize_block(vp9_quantize_quantb_4x4); - -#ifndef vp9_quantize_quantb_4x4_pair -#define vp9_quantize_quantb_4x4_pair vp9_regular_quantize_b_4x4_pair -#endif -extern prototype_quantize_block_pair(vp9_quantize_quantb_4x4_pair); - -#ifndef vp9_quantize_quantb_8x8 -#define vp9_quantize_quantb_8x8 vp9_regular_quantize_b_8x8 -#endif -extern prototype_quantize_block(vp9_quantize_quantb_8x8); - -#ifndef vp9_quantize_quantb_16x16 -#define vp9_quantize_quantb_16x16 vp9_regular_quantize_b_16x16 -#endif -extern prototype_quantize_block(vp9_quantize_quantb_16x16); - -#ifndef vp9_quantize_quantb_2x2 -#define vp9_quantize_quantb_2x2 vp9_regular_quantize_b_2x2 -#endif -extern prototype_quantize_block(vp9_quantize_quantb_2x2); - -#ifndef vp9_quantize_mb_4x4 -#define vp9_quantize_mb_4x4 vp9_quantize_mb_4x4_c -#endif -extern prototype_quantize_mb(vp9_quantize_mb_4x4); -void vp9_quantize_mb_8x8(MACROBLOCK *x); - -#ifndef vp9_quantize_mbuv_4x4 -#define vp9_quantize_mbuv_4x4 vp9_quantize_mbuv_4x4_c -#endif -extern prototype_quantize_mb(vp9_quantize_mbuv_4x4); - -#ifndef vp9_quantize_mby_4x4 -#define vp9_quantize_mby_4x4 vp9_quantize_mby_4x4_c -#endif -extern prototype_quantize_mb(vp9_quantize_mby_4x4); - -extern prototype_quantize_mb(vp9_quantize_mby_8x8); -extern prototype_quantize_mb(vp9_quantize_mbuv_8x8); - -void vp9_quantize_mb_16x16(MACROBLOCK *x); -extern prototype_quantize_block(vp9_quantize_quantb_16x16); -extern prototype_quantize_mb(vp9_quantize_mby_16x16); - -struct VP9_COMP; - -extern void vp9_set_quantizer(struct VP9_COMP *cpi, int Q); - -extern void vp9_frame_init_quantizer(struct VP9_COMP *cpi); - -extern void vp9_update_zbin_extra(struct VP9_COMP *cpi, MACROBLOCK *x); - -extern void vp9_mb_init_quantizer(struct VP9_COMP *cpi, MACROBLOCK *x); - -extern void vp9_init_quantizer(struct VP9_COMP *cpi); - -#endif diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c deleted file mode 100644 index 1ce5e0eb8..000000000 --- a/vp8/encoder/ratectrl.c +++ /dev/null @@ -1,698 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <limits.h> -#include <assert.h> - -#include "math.h" -#include "vp8/common/alloccommon.h" -#include "vp8/common/common.h" -#include "ratectrl.h" -#include "vp8/common/entropymode.h" -#include "vpx_mem/vpx_mem.h" -#include "vp8/common/systemdependent.h" -#include "encodemv.h" -#include "vp8/common/quant_common.h" - -#define MIN_BPB_FACTOR 0.005 -#define MAX_BPB_FACTOR 50 - -#ifdef MODE_STATS -extern unsigned int y_modes[VP9_YMODES]; -extern unsigned int uv_modes[VP9_UV_MODES]; -extern unsigned int b_modes[B_MODE_COUNT]; - -extern unsigned int inter_y_modes[MB_MODE_COUNT]; -extern unsigned int inter_uv_modes[VP9_UV_MODES]; -extern unsigned int inter_b_modes[B_MODE_COUNT]; -#endif - -// Bits Per MB at different Q (Multiplied by 512) -#define BPER_MB_NORMBITS 9 - -// % adjustment to target kf size based on seperation from previous frame -static const int kf_boost_seperation_adjustment[16] = { - 30, 40, 50, 55, 60, 65, 70, 75, - 80, 85, 90, 95, 100, 100, 100, 100, -}; - -static const int gf_adjust_table[101] = { - 100, - 115, 130, 145, 160, 175, 190, 200, 210, 220, 230, - 240, 260, 270, 280, 290, 300, 310, 320, 330, 340, - 350, 360, 370, 380, 390, 400, 400, 400, 400, 400, - 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, - 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, - 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, - 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, - 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, - 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, - 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, -}; - -static const int gf_intra_usage_adjustment[20] = { - 125, 120, 115, 110, 105, 100, 95, 85, 80, 75, - 70, 65, 60, 55, 50, 50, 50, 50, 50, 50, -}; - -static const int gf_interval_table[101] = { - 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, -}; - -static const unsigned int prior_key_frame_weight[KEY_FRAME_CONTEXT] = { 1, 2, 3, 4, 5 }; - -// These functions use formulaic calculations to make playing with the -// quantizer tables easier. If necessary they can be replaced by lookup -// tables if and when things settle down in the experimental bitstream -double vp9_convert_qindex_to_q(int qindex) { - // Convert the index to a real Q value (scaled down to match old Q values) - return (double)vp9_ac_yquant(qindex) / 4.0; -} - -int vp9_gfboost_qadjust(int qindex) { - int retval; - double q; - - q = vp9_convert_qindex_to_q(qindex); - retval = (int)((0.00000828 * q * q * q) + - (-0.0055 * q * q) + - (1.32 * q) + 79.3); - return retval; -} - -static int kfboost_qadjust(int qindex) { - int retval; - double q; - - q = vp9_convert_qindex_to_q(qindex); - retval = (int)((0.00000973 * q * q * q) + - (-0.00613 * q * q) + - (1.316 * q) + 121.2); - return retval; -} - -int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex) { - if (frame_type == KEY_FRAME) - return (int)(4500000 / vp9_convert_qindex_to_q(qindex)); - else - return (int)(2850000 / vp9_convert_qindex_to_q(qindex)); -} - - -void vp9_save_coding_context(VP9_COMP *cpi) { - CODING_CONTEXT *const cc = &cpi->coding_context; - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &cpi->mb.e_mbd; - - // Stores a snapshot of key state variables which can subsequently be - // restored with a call to vp9_restore_coding_context. These functions are - // intended for use in a re-code loop in vp9_compress_frame where the - // quantizer value is adjusted between loop iterations. - - cc->nmvc = cm->fc.nmvc; - vp9_copy(cc->nmvjointcost, cpi->mb.nmvjointcost); - vp9_copy(cc->nmvcosts, cpi->mb.nmvcosts); - vp9_copy(cc->nmvcosts_hp, cpi->mb.nmvcosts_hp); - - vp9_copy(cc->mv_ref_ct, cm->fc.mv_ref_ct); - vp9_copy(cc->mode_context, cm->fc.mode_context); - vp9_copy(cc->mv_ref_ct_a, cm->fc.mv_ref_ct_a); - vp9_copy(cc->mode_context_a, cm->fc.mode_context_a); - - vp9_copy(cc->ymode_prob, cm->fc.ymode_prob); - vp9_copy(cc->bmode_prob, cm->fc.bmode_prob); - vp9_copy(cc->uv_mode_prob, cm->fc.uv_mode_prob); - vp9_copy(cc->i8x8_mode_prob, cm->fc.i8x8_mode_prob); - vp9_copy(cc->sub_mv_ref_prob, cm->fc.sub_mv_ref_prob); - vp9_copy(cc->mbsplit_prob, cm->fc.mbsplit_prob); - - // Stats -#ifdef MODE_STATS - vp9_copy(cc->y_modes, y_modes); - vp9_copy(cc->uv_modes, uv_modes); - vp9_copy(cc->b_modes, b_modes); - vp9_copy(cc->inter_y_modes, inter_y_modes); - vp9_copy(cc->inter_uv_modes, inter_uv_modes); - vp9_copy(cc->inter_b_modes, inter_b_modes); -#endif - - vp9_copy(cc->segment_pred_probs, cm->segment_pred_probs); - vp9_copy(cc->ref_pred_probs_update, cpi->ref_pred_probs_update); - vp9_copy(cc->ref_pred_probs, cm->ref_pred_probs); - vp9_copy(cc->prob_comppred, cm->prob_comppred); - - vpx_memcpy(cpi->coding_context.last_frame_seg_map_copy, - cm->last_frame_seg_map, (cm->mb_rows * cm->mb_cols)); - - vp9_copy(cc->last_ref_lf_deltas, xd->last_ref_lf_deltas); - vp9_copy(cc->last_mode_lf_deltas, xd->last_mode_lf_deltas); - - vp9_copy(cc->coef_probs, cm->fc.coef_probs); - vp9_copy(cc->hybrid_coef_probs, cm->fc.hybrid_coef_probs); - vp9_copy(cc->coef_probs_8x8, cm->fc.coef_probs_8x8); - vp9_copy(cc->hybrid_coef_probs_8x8, cm->fc.hybrid_coef_probs_8x8); - vp9_copy(cc->coef_probs_16x16, cm->fc.coef_probs_16x16); - vp9_copy(cc->hybrid_coef_probs_16x16, cm->fc.hybrid_coef_probs_16x16); - vp9_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob); -} - -void vp9_restore_coding_context(VP9_COMP *cpi) { - CODING_CONTEXT *const cc = &cpi->coding_context; - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &cpi->mb.e_mbd; - - // Restore key state variables to the snapshot state stored in the - // previous call to vp9_save_coding_context. - - cm->fc.nmvc = cc->nmvc; - vp9_copy(cpi->mb.nmvjointcost, cc->nmvjointcost); - vp9_copy(cpi->mb.nmvcosts, cc->nmvcosts); - vp9_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp); - - vp9_copy(cm->fc.mv_ref_ct, cc->mv_ref_ct); - vp9_copy(cm->fc.mode_context, cc->mode_context); - vp9_copy(cm->fc.mv_ref_ct_a, cc->mv_ref_ct_a); - vp9_copy(cm->fc.mode_context_a, cc->mode_context_a); - - vp9_copy(cm->fc.ymode_prob, cc->ymode_prob); - vp9_copy(cm->fc.bmode_prob, cc->bmode_prob); - vp9_copy(cm->fc.i8x8_mode_prob, cc->i8x8_mode_prob); - vp9_copy(cm->fc.uv_mode_prob, cc->uv_mode_prob); - vp9_copy(cm->fc.sub_mv_ref_prob, cc->sub_mv_ref_prob); - vp9_copy(cm->fc.mbsplit_prob, cc->mbsplit_prob); - - // Stats -#ifdef MODE_STATS - vp9_copy(y_modes, cc->y_modes); - vp9_copy(uv_modes, cc->uv_modes); - vp9_copy(b_modes, cc->b_modes); - vp9_copy(inter_y_modes, cc->inter_y_modes); - vp9_copy(inter_uv_modes, cc->inter_uv_modes); - vp9_copy(inter_b_modes, cc->inter_b_modes); -#endif - - vp9_copy(cm->segment_pred_probs, cc->segment_pred_probs); - vp9_copy(cpi->ref_pred_probs_update, cc->ref_pred_probs_update); - vp9_copy(cm->ref_pred_probs, cc->ref_pred_probs); - vp9_copy(cm->prob_comppred, cc->prob_comppred); - - vpx_memcpy(cm->last_frame_seg_map, - cpi->coding_context.last_frame_seg_map_copy, - (cm->mb_rows * cm->mb_cols)); - - vp9_copy(xd->last_ref_lf_deltas, cc->last_ref_lf_deltas); - vp9_copy(xd->last_mode_lf_deltas, cc->last_mode_lf_deltas); - - vp9_copy(cm->fc.coef_probs, cc->coef_probs); - vp9_copy(cm->fc.hybrid_coef_probs, cc->hybrid_coef_probs); - vp9_copy(cm->fc.coef_probs_8x8, cc->coef_probs_8x8); - vp9_copy(cm->fc.hybrid_coef_probs_8x8, cc->hybrid_coef_probs_8x8); - vp9_copy(cm->fc.coef_probs_16x16, cc->coef_probs_16x16); - vp9_copy(cm->fc.hybrid_coef_probs_16x16, cc->hybrid_coef_probs_16x16); - vp9_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob); -} - - -void vp9_setup_key_frame(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - // Setup for Key frame: - vp9_default_coef_probs(& cpi->common); - vp9_kf_default_bmode_probs(cpi->common.kf_bmode_prob); - vp9_init_mbmode_probs(& cpi->common); - vp9_default_bmode_probs(cm->fc.bmode_prob); - - vp9_init_mv_probs(& cpi->common); - - // cpi->common.filter_level = 0; // Reset every key frame. - cpi->common.filter_level = cpi->common.base_qindex * 3 / 8; - - // interval before next GF - cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; - - cpi->common.refresh_golden_frame = TRUE; - cpi->common.refresh_alt_ref_frame = TRUE; - - vp9_init_mode_contexts(&cpi->common); - vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc)); - vpx_memcpy(&cpi->common.lfc_a, &cpi->common.fc, sizeof(cpi->common.fc)); - - vpx_memset(cm->prev_mip, 0, - (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO)); - vpx_memset(cm->mip, 0, - (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO)); - - vp9_update_mode_info_border(cm, cm->mip); - vp9_update_mode_info_in_image(cm, cm->mi); -} - -void vp9_setup_inter_frame(VP9_COMP *cpi) { - if (cpi->common.refresh_alt_ref_frame) { - vpx_memcpy(&cpi->common.fc, - &cpi->common.lfc_a, - sizeof(cpi->common.fc)); - vpx_memcpy(cpi->common.fc.vp8_mode_contexts, - cpi->common.fc.mode_context_a, - sizeof(cpi->common.fc.vp8_mode_contexts)); - } else { - vpx_memcpy(&cpi->common.fc, - &cpi->common.lfc, - sizeof(cpi->common.fc)); - vpx_memcpy(cpi->common.fc.vp8_mode_contexts, - cpi->common.fc.mode_context, - sizeof(cpi->common.fc.vp8_mode_contexts)); - } -} - - -static int estimate_bits_at_q(int frame_kind, int Q, int MBs, - double correction_factor) { - int Bpm = (int)(.5 + correction_factor * vp9_bits_per_mb(frame_kind, Q)); - - /* Attempt to retain reasonable accuracy without overflow. The cutoff is - * chosen such that the maximum product of Bpm and MBs fits 31 bits. The - * largest Bpm takes 20 bits. - */ - if (MBs > (1 << 11)) - return (Bpm >> BPER_MB_NORMBITS) * MBs; - else - return (Bpm * MBs) >> BPER_MB_NORMBITS; -} - - -static void calc_iframe_target_size(VP9_COMP *cpi) { - // boost defaults to half second - int target; - - // Clear down mmx registers to allow floating point in what follows - vp9_clear_system_state(); // __asm emms; - - // New Two pass RC - target = cpi->per_frame_bandwidth; - - if (cpi->oxcf.rc_max_intra_bitrate_pct) { - unsigned int max_rate = cpi->per_frame_bandwidth - * cpi->oxcf.rc_max_intra_bitrate_pct / 100; - - if (target > max_rate) - target = max_rate; - } - - cpi->this_frame_target = target; - -} - - -// Do the best we can to define the parameteres for the next GF based -// on what information we have available. -// -// In this experimental code only two pass is supported -// so we just use the interval determined in the two pass code. -static void calc_gf_params(VP9_COMP *cpi) { - // Set the gf interval - cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; -} - - -static void calc_pframe_target_size(VP9_COMP *cpi) { - int min_frame_target; - - min_frame_target = 0; - - min_frame_target = cpi->min_frame_bandwidth; - - if (min_frame_target < (cpi->av_per_frame_bandwidth >> 5)) - min_frame_target = cpi->av_per_frame_bandwidth >> 5; - - - // Special alt reference frame case - if (cpi->common.refresh_alt_ref_frame) { - // Per frame bit target for the alt ref frame - cpi->per_frame_bandwidth = cpi->twopass.gf_bits; - cpi->this_frame_target = cpi->per_frame_bandwidth; - } - - // Normal frames (gf,and inter) - else { - cpi->this_frame_target = cpi->per_frame_bandwidth; - } - - // Sanity check that the total sum of adjustments is not above the maximum allowed - // That is that having allowed for KF and GF penalties we have not pushed the - // current interframe target to low. If the adjustment we apply here is not capable of recovering - // all the extra bits we have spent in the KF or GF then the remainder will have to be recovered over - // a longer time span via other buffer / rate control mechanisms. - if (cpi->this_frame_target < min_frame_target) - cpi->this_frame_target = min_frame_target; - - if (!cpi->common.refresh_alt_ref_frame) - // Note the baseline target data rate for this inter frame. - cpi->inter_frame_target = cpi->this_frame_target; - - // Adjust target frame size for Golden Frames: - if (cpi->frames_till_gf_update_due == 0) { - // int Boost = 0; - int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; - - cpi->common.refresh_golden_frame = TRUE; - - calc_gf_params(cpi); - - // If we are using alternate ref instead of gf then do not apply the boost - // It will instead be applied to the altref update - // Jims modified boost - if (!cpi->source_alt_ref_active) { - if (cpi->oxcf.fixed_q < 0) { - // The spend on the GF is defined in the two pass code - // for two pass encodes - cpi->this_frame_target = cpi->per_frame_bandwidth; - } else - cpi->this_frame_target = - (estimate_bits_at_q(1, Q, cpi->common.MBs, 1.0) - * cpi->last_boost) / 100; - - } - // If there is an active ARF at this location use the minimum - // bits on this frame even if it is a contructed arf. - // The active maximum quantizer insures that an appropriate - // number of bits will be spent if needed for contstructed ARFs. - else { - cpi->this_frame_target = 0; - } - - cpi->current_gf_interval = cpi->frames_till_gf_update_due; - } -} - - -void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { - int Q = cpi->common.base_qindex; - int correction_factor = 100; - double rate_correction_factor; - double adjustment_limit; - - int projected_size_based_on_q = 0; - - // Clear down mmx registers to allow floating point in what follows - vp9_clear_system_state(); // __asm emms; - - if (cpi->common.frame_type == KEY_FRAME) { - rate_correction_factor = cpi->key_frame_rate_correction_factor; - } else { - if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) - rate_correction_factor = cpi->gf_rate_correction_factor; - else - rate_correction_factor = cpi->rate_correction_factor; - } - - // Work out how big we would have expected the frame to be at this Q given the current correction factor. - // Stay in double to avoid int overflow when values are large - projected_size_based_on_q = - (int)(((.5 + rate_correction_factor * - vp9_bits_per_mb(cpi->common.frame_type, Q)) * - cpi->common.MBs) / (1 << BPER_MB_NORMBITS)); - - // Make some allowance for cpi->zbin_over_quant - if (cpi->zbin_over_quant > 0) { - int Z = cpi->zbin_over_quant; - double Factor = 0.99; - double factor_adjustment = 0.01 / 256.0; // (double)ZBIN_OQ_MAX; - - while (Z > 0) { - Z--; - projected_size_based_on_q = - (int)(Factor * projected_size_based_on_q); - Factor += factor_adjustment; - - if (Factor >= 0.999) - Factor = 0.999; - } - } - - // Work out a size correction factor. - // if ( cpi->this_frame_target > 0 ) - // correction_factor = (100 * cpi->projected_frame_size) / cpi->this_frame_target; - if (projected_size_based_on_q > 0) - correction_factor = (100 * cpi->projected_frame_size) / projected_size_based_on_q; - - // More heavily damped adjustment used if we have been oscillating either side of target - switch (damp_var) { - case 0: - adjustment_limit = 0.75; - break; - case 1: - adjustment_limit = 0.375; - break; - case 2: - default: - adjustment_limit = 0.25; - break; - } - - // if ( (correction_factor > 102) && (Q < cpi->active_worst_quality) ) - if (correction_factor > 102) { - // We are not already at the worst allowable quality - correction_factor = (int)(100.5 + ((correction_factor - 100) * adjustment_limit)); - rate_correction_factor = ((rate_correction_factor * correction_factor) / 100); - - // Keep rate_correction_factor within limits - if (rate_correction_factor > MAX_BPB_FACTOR) - rate_correction_factor = MAX_BPB_FACTOR; - } - // else if ( (correction_factor < 99) && (Q > cpi->active_best_quality) ) - else if (correction_factor < 99) { - // We are not already at the best allowable quality - correction_factor = (int)(100.5 - ((100 - correction_factor) * adjustment_limit)); - rate_correction_factor = ((rate_correction_factor * correction_factor) / 100); - - // Keep rate_correction_factor within limits - if (rate_correction_factor < MIN_BPB_FACTOR) - rate_correction_factor = MIN_BPB_FACTOR; - } - - if (cpi->common.frame_type == KEY_FRAME) - cpi->key_frame_rate_correction_factor = rate_correction_factor; - else { - if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) - cpi->gf_rate_correction_factor = rate_correction_factor; - else - cpi->rate_correction_factor = rate_correction_factor; - } -} - - -int vp9_regulate_q(VP9_COMP *cpi, int target_bits_per_frame) { - int Q = cpi->active_worst_quality; - - int i; - int last_error = INT_MAX; - int target_bits_per_mb; - int bits_per_mb_at_this_q; - double correction_factor; - - // Reset Zbin OQ value - cpi->zbin_over_quant = 0; - - // Select the appropriate correction factor based upon type of frame. - if (cpi->common.frame_type == KEY_FRAME) - correction_factor = cpi->key_frame_rate_correction_factor; - else { - if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) - correction_factor = cpi->gf_rate_correction_factor; - else - correction_factor = cpi->rate_correction_factor; - } - - // Calculate required scaling factor based on target frame size and size of frame produced using previous Q - if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS)) - target_bits_per_mb = (target_bits_per_frame / cpi->common.MBs) << BPER_MB_NORMBITS; // Case where we would overflow int - else - target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cpi->common.MBs; - - i = cpi->active_best_quality; - - do { - bits_per_mb_at_this_q = - (int)(.5 + correction_factor * - vp9_bits_per_mb(cpi->common.frame_type, i)); - - if (bits_per_mb_at_this_q <= target_bits_per_mb) { - if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error) - Q = i; - else - Q = i - 1; - - break; - } else - last_error = bits_per_mb_at_this_q - target_bits_per_mb; - } while (++i <= cpi->active_worst_quality); - - - // If we are at MAXQ then enable Q over-run which seeks to claw back additional bits through things like - // the RD multiplier and zero bin size. - if (Q >= MAXQ) { - int zbin_oqmax; - - double Factor = 0.99; - double factor_adjustment = 0.01 / 256.0; // (double)ZBIN_OQ_MAX; - - if (cpi->common.frame_type == KEY_FRAME) - zbin_oqmax = 0; // ZBIN_OQ_MAX/16 - else if (cpi->common.refresh_alt_ref_frame || (cpi->common.refresh_golden_frame && !cpi->source_alt_ref_active)) - zbin_oqmax = 16; - else - zbin_oqmax = ZBIN_OQ_MAX; - - // Each incrment in the zbin is assumed to have a fixed effect on bitrate. This is not of course true. - // The effect will be highly clip dependent and may well have sudden steps. - // The idea here is to acheive higher effective quantizers than the normal maximum by expanding the zero - // bin and hence decreasing the number of low magnitude non zero coefficients. - while (cpi->zbin_over_quant < zbin_oqmax) { - cpi->zbin_over_quant++; - - if (cpi->zbin_over_quant > zbin_oqmax) - cpi->zbin_over_quant = zbin_oqmax; - - // Adjust bits_per_mb_at_this_q estimate - bits_per_mb_at_this_q = (int)(Factor * bits_per_mb_at_this_q); - Factor += factor_adjustment; - - if (Factor >= 0.999) - Factor = 0.999; - - if (bits_per_mb_at_this_q <= target_bits_per_mb) // Break out if we get down to the target rate - break; - } - - } - - return Q; -} - - -static int estimate_keyframe_frequency(VP9_COMP *cpi) { - int i; - - // Average key frame frequency - int av_key_frame_frequency = 0; - - /* First key frame at start of sequence is a special case. We have no - * frequency data. - */ - if (cpi->key_frame_count == 1) { - /* Assume a default of 1 kf every 2 seconds, or the max kf interval, - * whichever is smaller. - */ - int key_freq = cpi->oxcf.key_freq > 0 ? cpi->oxcf.key_freq : 1; - av_key_frame_frequency = (int)cpi->output_frame_rate * 2; - - if (cpi->oxcf.auto_key && av_key_frame_frequency > key_freq) - av_key_frame_frequency = cpi->oxcf.key_freq; - - cpi->prior_key_frame_distance[KEY_FRAME_CONTEXT - 1] - = av_key_frame_frequency; - } else { - unsigned int total_weight = 0; - int last_kf_interval = - (cpi->frames_since_key > 0) ? cpi->frames_since_key : 1; - - /* reset keyframe context and calculate weighted average of last - * KEY_FRAME_CONTEXT keyframes - */ - for (i = 0; i < KEY_FRAME_CONTEXT; i++) { - if (i < KEY_FRAME_CONTEXT - 1) - cpi->prior_key_frame_distance[i] - = cpi->prior_key_frame_distance[i + 1]; - else - cpi->prior_key_frame_distance[i] = last_kf_interval; - - av_key_frame_frequency += prior_key_frame_weight[i] - * cpi->prior_key_frame_distance[i]; - total_weight += prior_key_frame_weight[i]; - } - - av_key_frame_frequency /= total_weight; - - } - return av_key_frame_frequency; -} - - -void vp9_adjust_key_frame_context(VP9_COMP *cpi) { - // Clear down mmx registers to allow floating point in what follows - vp9_clear_system_state(); - - cpi->frames_since_key = 0; - cpi->key_frame_count++; -} - - -void vp9_compute_frame_size_bounds(VP9_COMP *cpi, int *frame_under_shoot_limit, - int *frame_over_shoot_limit) { - // Set-up bounds on acceptable frame size: - if (cpi->oxcf.fixed_q >= 0) { - // Fixed Q scenario: frame size never outranges target (there is no target!) - *frame_under_shoot_limit = 0; - *frame_over_shoot_limit = INT_MAX; - } else { - if (cpi->common.frame_type == KEY_FRAME) { - *frame_over_shoot_limit = cpi->this_frame_target * 9 / 8; - *frame_under_shoot_limit = cpi->this_frame_target * 7 / 8; - } else { - if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) { - *frame_over_shoot_limit = cpi->this_frame_target * 9 / 8; - *frame_under_shoot_limit = cpi->this_frame_target * 7 / 8; - } else { - // Stron overshoot limit for constrained quality - if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { - *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8; - *frame_under_shoot_limit = cpi->this_frame_target * 2 / 8; - } else { - *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8; - *frame_under_shoot_limit = cpi->this_frame_target * 5 / 8; - } - } - } - - // For very small rate targets where the fractional adjustment - // (eg * 7/8) may be tiny make sure there is at least a minimum - // range. - *frame_over_shoot_limit += 200; - *frame_under_shoot_limit -= 200; - if (*frame_under_shoot_limit < 0) - *frame_under_shoot_limit = 0; - } -} - - -// return of 0 means drop frame -int vp9_pick_frame_size(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - if (cm->frame_type == KEY_FRAME) - calc_iframe_target_size(cpi); - else - calc_pframe_target_size(cpi); - - return 1; -} diff --git a/vp8/encoder/ratectrl.h b/vp8/encoder/ratectrl.h deleted file mode 100644 index f5c751ea0..000000000 --- a/vp8/encoder/ratectrl.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#if !defined __INC_RATECTRL_H - -#include "onyx_int.h" - -#define FRAME_OVERHEAD_BITS 200 - -extern void vp9_save_coding_context(VP9_COMP *cpi); -extern void vp9_restore_coding_context(VP9_COMP *cpi); - -extern void vp9_setup_key_frame(VP9_COMP *cpi); -extern void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var); -extern int vp9_regulate_q(VP9_COMP *cpi, int target_bits_per_frame); -extern void vp9_adjust_key_frame_context(VP9_COMP *cpi); -extern void vp9_compute_frame_size_bounds(VP9_COMP *cpi, - int *frame_under_shoot_limit, - int *frame_over_shoot_limit); - -// return of 0 means drop frame -extern int vp9_pick_frame_size(VP9_COMP *cpi); - -extern double vp9_convert_qindex_to_q(int qindex); -extern int vp9_gfboost_qadjust(int qindex); -extern int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex); -void vp9_setup_inter_frame(VP9_COMP *cpi); - -#endif diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c deleted file mode 100644 index a30f9ce2a..000000000 --- a/vp8/encoder/rdopt.c +++ /dev/null @@ -1,4854 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include <stdio.h> -#include <math.h> -#include <limits.h> -#include <assert.h> -#include "vp8/common/pragmas.h" - -#include "tokenize.h" -#include "treewriter.h" -#include "onyx_int.h" -#include "modecosts.h" -#include "encodeintra.h" -#include "vp8/common/entropymode.h" -#include "vp8/common/reconinter.h" -#include "vp8/common/reconintra.h" -#include "vp8/common/reconintra4x4.h" -#include "vp8/common/findnearmv.h" -#include "vp8/common/quant_common.h" -#include "encodemb.h" -#include "quantize.h" -#include "vp8/common/idct.h" -#include "variance.h" -#include "mcomp.h" -#include "rdopt.h" -#include "ratectrl.h" -#include "vpx_mem/vpx_mem.h" -#include "vp8/common/systemdependent.h" -#include "vp8/encoder/encodemv.h" - -#include "vp8/common/seg_common.h" -#include "vp8/common/pred_common.h" -#include "vp8/common/entropy.h" -#include "vpx_rtcd.h" -#if CONFIG_NEWBESTREFMV -#include "vp8/common/mvref_common.h" -#endif - -#if CONFIG_RUNTIME_CPU_DETECT -#define IF_RTCD(x) (x) -#else -#define IF_RTCD(x) NULL -#endif - -extern void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x); -extern void vp9_update_zbin_extra(VP9_COMP *cpi, MACROBLOCK *x); - -#define MAXF(a,b) (((a) > (b)) ? (a) : (b)) - -#define INVALID_MV 0x80008000 - -/* Factor to weigh the rate for switchable interp filters */ -#define SWITCHABLE_INTERP_RATE_FACTOR 1 - -static const int auto_speed_thresh[17] = { - 1000, - 200, - 150, - 130, - 150, - 125, - 120, - 115, - 115, - 115, - 115, - 115, - 115, - 115, - 115, - 115, - 105 -}; - -#if CONFIG_PRED_FILTER -const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { - {ZEROMV, LAST_FRAME, 0, 0}, - {ZEROMV, LAST_FRAME, 0, 1}, - {DC_PRED, INTRA_FRAME, 0, 0}, - - {NEARESTMV, LAST_FRAME, 0, 0}, - {NEARESTMV, LAST_FRAME, 0, 1}, - {NEARMV, LAST_FRAME, 0, 0}, - {NEARMV, LAST_FRAME, 0, 1}, - - {ZEROMV, GOLDEN_FRAME, 0, 0}, - {ZEROMV, GOLDEN_FRAME, 0, 1}, - {NEARESTMV, GOLDEN_FRAME, 0, 0}, - {NEARESTMV, GOLDEN_FRAME, 0, 1}, - - {ZEROMV, ALTREF_FRAME, 0, 0}, - {ZEROMV, ALTREF_FRAME, 0, 1}, - {NEARESTMV, ALTREF_FRAME, 0, 0}, - {NEARESTMV, ALTREF_FRAME, 0, 1}, - - {NEARMV, GOLDEN_FRAME, 0, 0}, - {NEARMV, GOLDEN_FRAME, 0, 1}, - {NEARMV, ALTREF_FRAME, 0, 0}, - {NEARMV, ALTREF_FRAME, 0, 1}, - - {V_PRED, INTRA_FRAME, 0, 0}, - {H_PRED, INTRA_FRAME, 0, 0}, - {D45_PRED, INTRA_FRAME, 0, 0}, - {D135_PRED, INTRA_FRAME, 0, 0}, - {D117_PRED, INTRA_FRAME, 0, 0}, - {D153_PRED, INTRA_FRAME, 0, 0}, - {D27_PRED, INTRA_FRAME, 0, 0}, - {D63_PRED, INTRA_FRAME, 0, 0}, - - {TM_PRED, INTRA_FRAME, 0, 0}, - - {NEWMV, LAST_FRAME, 0, 0}, - {NEWMV, LAST_FRAME, 0, 1}, - {NEWMV, GOLDEN_FRAME, 0, 0}, - {NEWMV, GOLDEN_FRAME, 0, 1}, - {NEWMV, ALTREF_FRAME, 0, 0}, - {NEWMV, ALTREF_FRAME, 0, 1}, - - {SPLITMV, LAST_FRAME, 0, 0}, - {SPLITMV, GOLDEN_FRAME, 0, 0}, - {SPLITMV, ALTREF_FRAME, 0, 0}, - - {B_PRED, INTRA_FRAME, 0, 0}, - {I8X8_PRED, INTRA_FRAME, 0, 0}, - - /* compound prediction modes */ - {ZEROMV, LAST_FRAME, GOLDEN_FRAME, 0}, - {NEARESTMV, LAST_FRAME, GOLDEN_FRAME, 0}, - {NEARMV, LAST_FRAME, GOLDEN_FRAME, 0}, - - {ZEROMV, ALTREF_FRAME, LAST_FRAME, 0}, - {NEARESTMV, ALTREF_FRAME, LAST_FRAME, 0}, - {NEARMV, ALTREF_FRAME, LAST_FRAME, 0}, - - {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME, 0}, - {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME, 0}, - {NEARMV, GOLDEN_FRAME, ALTREF_FRAME, 0}, - - {NEWMV, LAST_FRAME, GOLDEN_FRAME, 0}, - {NEWMV, ALTREF_FRAME, LAST_FRAME, 0}, - {NEWMV, GOLDEN_FRAME, ALTREF_FRAME, 0}, - - {SPLITMV, LAST_FRAME, GOLDEN_FRAME, 0}, - {SPLITMV, ALTREF_FRAME, LAST_FRAME, 0}, - {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME, 0} -}; -#else -const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { - {ZEROMV, LAST_FRAME, 0}, - {DC_PRED, INTRA_FRAME, 0}, - - {NEARESTMV, LAST_FRAME, 0}, - {NEARMV, LAST_FRAME, 0}, - - {ZEROMV, GOLDEN_FRAME, 0}, - {NEARESTMV, GOLDEN_FRAME, 0}, - - {ZEROMV, ALTREF_FRAME, 0}, - {NEARESTMV, ALTREF_FRAME, 0}, - - {NEARMV, GOLDEN_FRAME, 0}, - {NEARMV, ALTREF_FRAME, 0}, - - {V_PRED, INTRA_FRAME, 0}, - {H_PRED, INTRA_FRAME, 0}, - {D45_PRED, INTRA_FRAME, 0}, - {D135_PRED, INTRA_FRAME, 0}, - {D117_PRED, INTRA_FRAME, 0}, - {D153_PRED, INTRA_FRAME, 0}, - {D27_PRED, INTRA_FRAME, 0}, - {D63_PRED, INTRA_FRAME, 0}, - - {TM_PRED, INTRA_FRAME, 0}, - - {NEWMV, LAST_FRAME, 0}, - {NEWMV, GOLDEN_FRAME, 0}, - {NEWMV, ALTREF_FRAME, 0}, - - {SPLITMV, LAST_FRAME, 0}, - {SPLITMV, GOLDEN_FRAME, 0}, - {SPLITMV, ALTREF_FRAME, 0}, - - {B_PRED, INTRA_FRAME, 0}, - {I8X8_PRED, INTRA_FRAME, 0}, - - /* compound prediction modes */ - {ZEROMV, LAST_FRAME, GOLDEN_FRAME}, - {NEARESTMV, LAST_FRAME, GOLDEN_FRAME}, - {NEARMV, LAST_FRAME, GOLDEN_FRAME}, - - {ZEROMV, ALTREF_FRAME, LAST_FRAME}, - {NEARESTMV, ALTREF_FRAME, LAST_FRAME}, - {NEARMV, ALTREF_FRAME, LAST_FRAME}, - - {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME}, - {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME}, - {NEARMV, GOLDEN_FRAME, ALTREF_FRAME}, - - {NEWMV, LAST_FRAME, GOLDEN_FRAME}, - {NEWMV, ALTREF_FRAME, LAST_FRAME }, - {NEWMV, GOLDEN_FRAME, ALTREF_FRAME}, - - {SPLITMV, LAST_FRAME, GOLDEN_FRAME}, - {SPLITMV, ALTREF_FRAME, LAST_FRAME }, - {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME} -}; -#endif - -static void fill_token_costs( - unsigned int (*c)[COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS], - const vp9_prob(*p)[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES], - int block_type_counts) { - int i, j, k; - - for (i = 0; i < block_type_counts; i++) - for (j = 0; j < COEF_BANDS; j++) - for (k = 0; k < PREV_COEF_CONTEXTS; k++) { - if (k == 0 && ((j > 0 && i > 0) || (j > 1 && i == 0))) - vp9_cost_tokens_skip((int *)(c[i][j][k]), - p[i][j][k], - vp9_coef_tree); - else - vp9_cost_tokens((int *)(c[i][j][k]), - p[i][j][k], - vp9_coef_tree); - } -} - - -static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, }; - -// 3* dc_qlookup[Q]*dc_qlookup[Q]; - -/* values are now correlated to quantizer */ -static int sad_per_bit16lut[QINDEX_RANGE]; -static int sad_per_bit4lut[QINDEX_RANGE]; - -void vp9_init_me_luts() { - int i; - - // Initialize the sad lut tables using a formulaic calculation for now - // This is to make it easier to resolve the impact of experimental changes - // to the quantizer tables. - for (i = 0; i < QINDEX_RANGE; i++) { - sad_per_bit16lut[i] = - (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107); - sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742); - } -} - -static int compute_rd_mult(int qindex) { - int q; - - q = vp9_dc_quant(qindex, 0); - return (11 * q * q) >> 6; -} - -void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex) { - cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex]; - cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex]; -} - - -void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) { - int q, i; - - vp9_clear_system_state(); // __asm emms; - - // Further tests required to see if optimum is different - // for key frames, golden frames and arf frames. - // if (cpi->common.refresh_golden_frame || - // cpi->common.refresh_alt_ref_frame) - QIndex = (QIndex < 0) ? 0 : ((QIndex > MAXQ) ? MAXQ : QIndex); - - cpi->RDMULT = compute_rd_mult(QIndex); - - // Extend rate multiplier along side quantizer zbin increases - if (cpi->zbin_over_quant > 0) { - double oq_factor; - - // Experimental code using the same basic equation as used for Q above - // The units of cpi->zbin_over_quant are 1/128 of Q bin size - oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant); - cpi->RDMULT = (int)((double)cpi->RDMULT * oq_factor * oq_factor); - } - - if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { - if (cpi->twopass.next_iiratio > 31) - cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4; - else - cpi->RDMULT += - (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4; - } - - if (cpi->RDMULT < 7) - cpi->RDMULT = 7; - - cpi->mb.errorperbit = (cpi->RDMULT / 110); - cpi->mb.errorperbit += (cpi->mb.errorperbit == 0); - - vp9_set_speed_features(cpi); - - q = (int)pow(vp9_dc_quant(QIndex, 0) >> 2, 1.25); - q = q << 2; - cpi->RDMULT = cpi->RDMULT << 4; - - if (q < 8) - q = 8; - - if (cpi->RDMULT > 1000) { - cpi->RDDIV = 1; - cpi->RDMULT /= 100; - - for (i = 0; i < MAX_MODES; i++) { - if (cpi->sf.thresh_mult[i] < INT_MAX) { - cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100; - } else { - cpi->rd_threshes[i] = INT_MAX; - } - - cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i]; - } - } else { - cpi->RDDIV = 100; - - for (i = 0; i < MAX_MODES; i++) { - if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) { - cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q; - } else { - cpi->rd_threshes[i] = INT_MAX; - } - - cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i]; - } - } - - fill_token_costs( - cpi->mb.token_costs[TX_4X4], - (const vp9_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs, - BLOCK_TYPES); - fill_token_costs( - cpi->mb.hybrid_token_costs[TX_4X4], - (const vp9_prob( *)[8][PREV_COEF_CONTEXTS][11]) - cpi->common.fc.hybrid_coef_probs, - BLOCK_TYPES); - - fill_token_costs( - cpi->mb.token_costs[TX_8X8], - (const vp9_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_8x8, - BLOCK_TYPES_8X8); - fill_token_costs( - cpi->mb.hybrid_token_costs[TX_8X8], - (const vp9_prob( *)[8][PREV_COEF_CONTEXTS][11]) - cpi->common.fc.hybrid_coef_probs_8x8, - BLOCK_TYPES_8X8); - - fill_token_costs( - cpi->mb.token_costs[TX_16X16], - (const vp9_prob(*)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_16x16, - BLOCK_TYPES_16X16); - fill_token_costs( - cpi->mb.hybrid_token_costs[TX_16X16], - (const vp9_prob(*)[8][PREV_COEF_CONTEXTS][11]) - cpi->common.fc.hybrid_coef_probs_16x16, - BLOCK_TYPES_16X16); - - /*rough estimate for costing*/ - cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4; - vp9_init_mode_costs(cpi); - - if (cpi->common.frame_type != KEY_FRAME) - { - vp9_build_nmv_cost_table( - cpi->mb.nmvjointcost, - cpi->mb.e_mbd.allow_high_precision_mv ? - cpi->mb.nmvcost_hp : cpi->mb.nmvcost, - &cpi->common.fc.nmvc, - cpi->mb.e_mbd.allow_high_precision_mv, 1, 1); - } -} - -void vp9_auto_select_speed(VP9_COMP *cpi) { - int milliseconds_for_compress = (int)(1000000 / cpi->oxcf.frame_rate); - - milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16; - - /* - // this is done during parameter valid check - if( cpi->oxcf.cpu_used > 16) - cpi->oxcf.cpu_used = 16; - if( cpi->oxcf.cpu_used < -16) - cpi->oxcf.cpu_used = -16; - */ - - if (cpi->avg_pick_mode_time < milliseconds_for_compress && - (cpi->avg_encode_time - cpi->avg_pick_mode_time) < - milliseconds_for_compress) { - if (cpi->avg_pick_mode_time == 0) { - cpi->Speed = 4; - } else { - if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95) { - cpi->Speed += 2; - cpi->avg_pick_mode_time = 0; - cpi->avg_encode_time = 0; - - if (cpi->Speed > 16) { - cpi->Speed = 16; - } - } - - if (milliseconds_for_compress * 100 > - cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) { - cpi->Speed -= 1; - cpi->avg_pick_mode_time = 0; - cpi->avg_encode_time = 0; - - // In real-time mode, cpi->speed is in [4, 16]. - if (cpi->Speed < 4) { // if ( cpi->Speed < 0 ) - cpi->Speed = 4; // cpi->Speed = 0; - } - } - } - } else { - cpi->Speed += 4; - - if (cpi->Speed > 16) - cpi->Speed = 16; - - - cpi->avg_pick_mode_time = 0; - cpi->avg_encode_time = 0; - } -} - -int vp9_block_error_c(short *coeff, short *dqcoeff, int block_size) { - int i, error = 0; - - for (i = 0; i < block_size; i++) { - int this_diff = coeff[i] - dqcoeff[i]; - error += this_diff * this_diff; - } - - return error; -} - -int vp9_mbblock_error_c(MACROBLOCK *mb, int dc) { - BLOCK *be; - BLOCKD *bd; - int i, j; - int berror, error = 0; - - for (i = 0; i < 16; i++) { - be = &mb->block[i]; - bd = &mb->e_mbd.block[i]; - - berror = 0; - - for (j = dc; j < 16; j++) { - int this_diff = be->coeff[j] - bd->dqcoeff[j]; - berror += this_diff * this_diff; - } - - error += berror; - } - - return error; -} - -int vp9_mbuverror_c(MACROBLOCK *mb) { - BLOCK *be; - BLOCKD *bd; - - int i, error = 0; - - for (i = 16; i < 24; i++) { - be = &mb->block[i]; - bd = &mb->e_mbd.block[i]; - - error += vp9_block_error_c(be->coeff, bd->dqcoeff, 16); - } - - return error; -} - -int vp9_uvsse(MACROBLOCK *x) { - unsigned char *uptr, *vptr; - unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src); - unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src); - int uv_stride = x->block[16].src_stride; - - unsigned int sse1 = 0; - unsigned int sse2 = 0; - int mv_row = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.row; - int mv_col = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.col; - int offset; - int pre_stride = x->e_mbd.block[16].pre_stride; - - if (mv_row < 0) - mv_row -= 1; - else - mv_row += 1; - - if (mv_col < 0) - mv_col -= 1; - else - mv_col += 1; - - mv_row /= 2; - mv_col /= 2; - - offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); - uptr = x->e_mbd.pre.u_buffer + offset; - vptr = x->e_mbd.pre.v_buffer + offset; - - if ((mv_row | mv_col) & 7) { - vp9_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1, - (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2); - vp9_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1, - (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1); - sse2 += sse1; - } else { - vp9_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2); - vp9_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1); - sse2 += sse1; - } - return sse2; - -} - -static int cost_coeffs_2x2(MACROBLOCK *mb, - BLOCKD *b, PLANE_TYPE type, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { - int c = (type == PLANE_TYPE_Y_NO_DC); /* start at coef 0, unless Y with Y2 */ - int eob = b->eob; - int pt; /* surrounding block/prev coef predictor */ - int cost = 0; - short *qcoeff_ptr = b->qcoeff; - - VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); - assert(eob <= 4); - - for (; c < eob; c++) { - int v = qcoeff_ptr[vp9_default_zig_zag1d[c]]; - int t = vp9_dct_value_tokens_ptr[v].Token; - cost += mb->token_costs[TX_8X8][type][vp9_coef_bands[c]][pt][t]; - cost += vp9_dct_value_cost_ptr[v]; - pt = vp9_prev_token_class[t]; - } - - if (c < 4) - cost += mb->token_costs[TX_8X8][type][vp9_coef_bands[c]] - [pt] [DCT_EOB_TOKEN]; - - pt = (c != !type); // is eob first coefficient; - *a = *l = pt; - return cost; -} - -static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - int tx_size) { - const int eob = b->eob; - int c = (type == PLANE_TYPE_Y_NO_DC); /* start at coef 0, unless Y with Y2 */ - int cost = 0, default_eob, seg_eob; - int pt; /* surrounding block/prev coef predictor */ - int const *scan, *band; - short *qcoeff_ptr = b->qcoeff; - MACROBLOCKD *xd = &mb->e_mbd; - MB_MODE_INFO *mbmi = &mb->e_mbd.mode_info_context->mbmi; - TX_TYPE tx_type = DCT_DCT; - int segment_id = mbmi->segment_id; - - switch (tx_size) { - case TX_4X4: - scan = vp9_default_zig_zag1d; - band = vp9_coef_bands; - default_eob = 16; - if (type == PLANE_TYPE_Y_WITH_DC) { - tx_type = get_tx_type_4x4(xd, b); - if (tx_type != DCT_DCT) { - switch (tx_type) { - case ADST_DCT: - scan = vp9_row_scan; - break; - - case DCT_ADST: - scan = vp9_col_scan; - break; - - default: - scan = vp9_default_zig_zag1d; - break; - } - } - } - - break; - case TX_8X8: - scan = vp9_default_zig_zag1d_8x8; - band = vp9_coef_bands_8x8; - default_eob = 64; - if (type == PLANE_TYPE_Y_WITH_DC) { - BLOCKD *bb; - int ib = (b - xd->block); - if (ib < 16) { - ib = (ib & 8) + ((ib & 4) >> 1); - bb = xd->block + ib; - tx_type = get_tx_type_8x8(xd, bb); - } - } - break; - case TX_16X16: - scan = vp9_default_zig_zag1d_16x16; - band = vp9_coef_bands_16x16; - default_eob = 256; - if (type == PLANE_TYPE_Y_WITH_DC) { - tx_type = get_tx_type_16x16(xd, b); - } - break; - default: - break; - } - if (vp9_segfeature_active(&mb->e_mbd, segment_id, SEG_LVL_EOB)) - seg_eob = vp9_get_segdata(&mb->e_mbd, segment_id, SEG_LVL_EOB); - else - seg_eob = default_eob; - - VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); - - if (tx_type != DCT_DCT) { - for (; c < eob; c++) { - int v = qcoeff_ptr[scan[c]]; - int t = vp9_dct_value_tokens_ptr[v].Token; - cost += mb->hybrid_token_costs[tx_size][type][band[c]][pt][t]; - cost += vp9_dct_value_cost_ptr[v]; - pt = vp9_prev_token_class[t]; - } - if (c < seg_eob) - cost += mb->hybrid_token_costs[tx_size][type][band[c]] - [pt][DCT_EOB_TOKEN]; - } else { - for (; c < eob; c++) { - int v = qcoeff_ptr[scan[c]]; - int t = vp9_dct_value_tokens_ptr[v].Token; - cost += mb->token_costs[tx_size][type][band[c]][pt][t]; - cost += vp9_dct_value_cost_ptr[v]; - pt = vp9_prev_token_class[t]; - } - if (c < seg_eob) - cost += mb->token_costs[tx_size][type][band[c]] - [pt][DCT_EOB_TOKEN]; - } - - pt = (c != !type); // is eob first coefficient; - *a = *l = pt; - return cost; -} - -static int rdcost_mby_4x4(MACROBLOCK *mb) { - int cost = 0; - int b; - MACROBLOCKD *xd = &mb->e_mbd; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta; - ENTROPY_CONTEXT *tl; - - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - - for (b = 0; b < 16; b++) - cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_NO_DC, - ta + vp9_block2above[b], tl + vp9_block2left[b], - TX_4X4); - - cost += cost_coeffs(mb, xd->block + 24, PLANE_TYPE_Y2, - ta + vp9_block2above[24], tl + vp9_block2left[24], - TX_4X4); - - return cost; -} - -static void macro_block_yrd_4x4(MACROBLOCK *mb, - int *Rate, - int *Distortion, - const VP9_ENCODER_RTCD *rtcd, - int *skippable) { - int b; - MACROBLOCKD *const xd = &mb->e_mbd; - BLOCK *const mb_y2 = mb->block + 24; - BLOCKD *const x_y2 = xd->block + 24; - short *Y2DCPtr = mb_y2->src_diff; - BLOCK *beptr; - int d; - - vp9_subtract_mby(mb->src_diff, *(mb->block[0].base_src), xd->predictor, - mb->block[0].src_stride); - - // Fdct and building the 2nd order block - for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) { - mb->vp9_short_fdct8x4(beptr->src_diff, beptr->coeff, 32); - *Y2DCPtr++ = beptr->coeff[0]; - *Y2DCPtr++ = beptr->coeff[16]; - } - - // 2nd order fdct - mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8); - - // Quantization - for (b = 0; b < 16; b++) { - mb->quantize_b_4x4(&mb->block[b], &xd->block[b]); - } - - // DC predication and Quantization of 2nd Order block - mb->quantize_b_4x4(mb_y2, x_y2); - - // Distortion - d = vp9_mbblock_error(mb, 1); - - d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16); - - *Distortion = (d >> 2); - // rate - *Rate = rdcost_mby_4x4(mb); - *skippable = vp9_mby_is_skippable_4x4(&mb->e_mbd, 1); -} - -static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) { - int cost = 0; - int b; - MACROBLOCKD *xd = &mb->e_mbd; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta; - ENTROPY_CONTEXT *tl; - - if (backup) { - vpx_memcpy(&t_above,xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - } else { - ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context; - tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context; - } - - for (b = 0; b < 16; b += 4) - cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_NO_DC, - ta + vp9_block2above_8x8[b], tl + vp9_block2left_8x8[b], - TX_8X8); - - cost += cost_coeffs_2x2(mb, xd->block + 24, PLANE_TYPE_Y2, - ta + vp9_block2above[24], tl + vp9_block2left[24]); - return cost; -} - -static void macro_block_yrd_8x8(MACROBLOCK *mb, - int *Rate, - int *Distortion, - const VP9_ENCODER_RTCD *rtcd, - int *skippable) { - MACROBLOCKD *const xd = &mb->e_mbd; - BLOCK *const mb_y2 = mb->block + 24; - BLOCKD *const x_y2 = xd->block + 24; - int d; - - vp9_subtract_mby(mb->src_diff, *(mb->block[0].base_src), xd->predictor, - mb->block[0].src_stride); - - vp9_transform_mby_8x8(mb); - vp9_quantize_mby_8x8(mb); - - /* remove 1st order dc to properly combine 1st/2nd order distortion */ - mb->coeff[0] = 0; - mb->coeff[64] = 0; - mb->coeff[128] = 0; - mb->coeff[192] = 0; - xd->dqcoeff[0] = 0; - xd->dqcoeff[64] = 0; - xd->dqcoeff[128] = 0; - xd->dqcoeff[192] = 0; - - d = vp9_mbblock_error(mb, 0); - d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16); - - *Distortion = (d >> 2); - // rate - *Rate = rdcost_mby_8x8(mb, 1); - *skippable = vp9_mby_is_skippable_8x8(&mb->e_mbd, 1); -} - -static int rdcost_mby_16x16(MACROBLOCK *mb) { - int cost; - MACROBLOCKD *xd = &mb->e_mbd; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta, *tl; - - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - - cost = cost_coeffs(mb, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16); - return cost; -} - -static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion, - const VP9_ENCODER_RTCD *rtcd, int *skippable) { - int d; - MACROBLOCKD *xd = &mb->e_mbd; - BLOCKD *b = &mb->e_mbd.block[0]; - BLOCK *be = &mb->block[0]; - TX_TYPE tx_type; - - vp9_subtract_mby(mb->src_diff, *(mb->block[0].base_src), mb->e_mbd.predictor, - mb->block[0].src_stride); - - tx_type = get_tx_type_16x16(xd, b); - if (tx_type != DCT_DCT) { - vp9_fht(be->src_diff, 32, be->coeff, tx_type, 16); - } else - vp9_transform_mby_16x16(mb); - - vp9_quantize_mby_16x16(mb); - // TODO(jingning) is it possible to quickly determine whether to force - // trailing coefficients to be zero, instead of running trellis - // optimization in the rate-distortion optimization loop? - if (mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED) - vp9_optimize_mby_16x16(mb, rtcd); - - d = vp9_mbblock_error(mb, 0); - - *Distortion = (d >> 2); - // rate - *Rate = rdcost_mby_16x16(mb); - *skippable = vp9_mby_is_skippable_16x16(&mb->e_mbd); -} - -static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int *skippable, - int64_t txfm_cache[NB_TXFM_MODES]) { - VP9_COMMON *cm = &cpi->common; - MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi; - - MACROBLOCKD *xd = &x->e_mbd; - int can_skip = cm->mb_no_coeff_skip; - vp9_prob skip_prob = can_skip ? vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128; - int s0, s1; - int r4x4, r4x4s, r8x8, r8x8s, d4x4, d8x8, s4x4, s8x8; - int64_t rd4x4, rd8x8, rd4x4s, rd8x8s; - int d16x16, r16x16, r16x16s, s16x16; - int64_t rd16x16, rd16x16s; - - // FIXME don't do sub x3 - if (skip_prob == 0) - skip_prob = 1; - s0 = vp9_cost_bit(skip_prob, 0); - s1 = vp9_cost_bit(skip_prob, 1); - macro_block_yrd_16x16(x, &r16x16, &d16x16, IF_RTCD(&cpi->rtcd), &s16x16); - if (can_skip) { - if (s16x16) { - rd16x16 = RDCOST(x->rdmult, x->rddiv, s1, d16x16); - } else { - rd16x16 = RDCOST(x->rdmult, x->rddiv, r16x16 + s0, d16x16); - } - } else { - rd16x16 = RDCOST(x->rdmult, x->rddiv, r16x16, d16x16); - } - r16x16s = r16x16 + vp9_cost_one(cm->prob_tx[0]) + vp9_cost_one(cm->prob_tx[1]); - if (can_skip) { - if (s16x16) { - rd16x16s = RDCOST(x->rdmult, x->rddiv, s1, d16x16); - } else { - rd16x16s = RDCOST(x->rdmult, x->rddiv, r16x16s + s0, d16x16); - } - } else { - rd16x16s = RDCOST(x->rdmult, x->rddiv, r16x16s, d16x16); - } - macro_block_yrd_8x8(x, &r8x8, &d8x8, IF_RTCD(&cpi->rtcd), &s8x8); - if (can_skip) { - if (s8x8) { - rd8x8 = RDCOST(x->rdmult, x->rddiv, s1, d8x8); - } else { - rd8x8 = RDCOST(x->rdmult, x->rddiv, r8x8 + s0, d8x8); - } - } else { - rd8x8 = RDCOST(x->rdmult, x->rddiv, r8x8, d8x8); - } - r8x8s = r8x8 + vp9_cost_one(cm->prob_tx[0]); - r8x8s += vp9_cost_zero(cm->prob_tx[1]); - if (can_skip) { - if (s8x8) { - rd8x8s = RDCOST(x->rdmult, x->rddiv, s1, d8x8); - } else { - rd8x8s = RDCOST(x->rdmult, x->rddiv, r8x8s + s0, d8x8); - } - } else { - rd8x8s = RDCOST(x->rdmult, x->rddiv, r8x8s, d8x8); - } - macro_block_yrd_4x4(x, &r4x4, &d4x4, IF_RTCD(&cpi->rtcd), &s4x4); - if (can_skip) { - if (s4x4) { - rd4x4 = RDCOST(x->rdmult, x->rddiv, s1, d4x4); - } else { - rd4x4 = RDCOST(x->rdmult, x->rddiv, r4x4 + s0, d4x4); - } - } else { - rd4x4 = RDCOST(x->rdmult, x->rddiv, r4x4, d4x4); - } - r4x4s = r4x4 + vp9_cost_zero(cm->prob_tx[0]); - if (can_skip) { - if (s4x4) { - rd4x4s = RDCOST(x->rdmult, x->rddiv, s1, d4x4); - } else { - rd4x4s = RDCOST(x->rdmult, x->rddiv, r4x4s + s0, d4x4); - } - } else { - rd4x4s = RDCOST(x->rdmult, x->rddiv, r4x4s, d4x4); - } - - if ( cpi->common.txfm_mode == ALLOW_16X16 || - (cpi->common.txfm_mode == TX_MODE_SELECT && - rd16x16s < rd8x8s && rd16x16s < rd4x4s)) { - mbmi->txfm_size = TX_16X16; - *skippable = s16x16; - *distortion = d16x16; - *rate = (cpi->common.txfm_mode == ALLOW_16X16) ? r16x16 : r16x16s; - } else - if ( cpi->common.txfm_mode == ALLOW_8X8 || - (cpi->common.txfm_mode == TX_MODE_SELECT && rd8x8s < rd4x4s)) { - mbmi->txfm_size = TX_8X8; - *skippable = s8x8; - *distortion = d8x8; - *rate = (cpi->common.txfm_mode == ALLOW_8X8) ? r8x8 : r8x8s; - } else { - assert(cpi->common.txfm_mode == ONLY_4X4 || - (cpi->common.txfm_mode == TX_MODE_SELECT && rd4x4s <= rd8x8s)); - mbmi->txfm_size = TX_4X4; - *skippable = s4x4; - *distortion = d4x4; - *rate = (cpi->common.txfm_mode == ONLY_4X4) ? r4x4 : r4x4s; - } - - txfm_cache[ONLY_4X4] = rd4x4; - txfm_cache[ALLOW_8X8] = rd8x8; - txfm_cache[ALLOW_16X16] = rd16x16; - if (rd16x16s < rd8x8s && rd16x16s < rd4x4s) - txfm_cache[TX_MODE_SELECT] = rd16x16s; - else - txfm_cache[TX_MODE_SELECT] = rd4x4s < rd8x8s ? rd4x4s : rd8x8s; - -} - -static void copy_predictor(unsigned char *dst, const unsigned char *predictor) { - const unsigned int *p = (const unsigned int *)predictor; - unsigned int *d = (unsigned int *)dst; - d[0] = p[0]; - d[4] = p[4]; - d[8] = p[8]; - d[12] = p[12]; -} - -#if CONFIG_SUPERBLOCKS -static void super_block_yrd_8x8(MACROBLOCK *x, - int *rate, - int *distortion, - const VP9_ENCODER_RTCD *rtcd, int *skip) -{ - MACROBLOCKD *const xd = &x->e_mbd; - BLOCK *const by2 = x->block + 24; - BLOCKD *const bdy2 = xd->block + 24; - int d = 0, r = 0, n; - const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer; - int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; - ENTROPY_CONTEXT_PLANES *ta = xd->above_context; - ENTROPY_CONTEXT_PLANES *tl = xd->left_context; - ENTROPY_CONTEXT_PLANES t_above[2]; - ENTROPY_CONTEXT_PLANES t_left[2]; - int skippable = 1; - - vpx_memcpy(t_above, xd->above_context, sizeof(t_above)); - vpx_memcpy(t_left, xd->left_context, sizeof(t_left)); - - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; - - vp9_subtract_mby_s_c(x->src_diff, - src + x_idx * 16 + y_idx * 16 * src_y_stride, - src_y_stride, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride, - dst_y_stride); - vp9_transform_mby_8x8(x); - vp9_quantize_mby_8x8(x); - - /* remove 1st order dc to properly combine 1st/2nd order distortion */ - x->coeff[ 0] = 0; - x->coeff[ 64] = 0; - x->coeff[128] = 0; - x->coeff[192] = 0; - xd->dqcoeff[ 0] = 0; - xd->dqcoeff[ 64] = 0; - xd->dqcoeff[128] = 0; - xd->dqcoeff[192] = 0; - - d += vp9_mbblock_error(x, 0); - d += vp9_block_error(by2->coeff, bdy2->dqcoeff, 16); - xd->above_context = ta + x_idx; - xd->left_context = tl + y_idx; - r += rdcost_mby_8x8(x, 0); - skippable = skippable && vp9_mby_is_skippable_8x8(xd, 1); - } - - *distortion = (d >> 2); - *rate = r; - if (skip) *skip = skippable; - xd->above_context = ta; - xd->left_context = tl; - vpx_memcpy(xd->above_context, &t_above, sizeof(t_above)); - vpx_memcpy(xd->left_context, &t_left, sizeof(t_left)); -} -#endif - -static void copy_predictor_8x8(unsigned char *dst, const unsigned char *predictor) { - const unsigned int *p = (const unsigned int *)predictor; - unsigned int *d = (unsigned int *)dst; - d[0] = p[0]; - d[1] = p[1]; - d[4] = p[4]; - d[5] = p[5]; - d[8] = p[8]; - d[9] = p[9]; - d[12] = p[12]; - d[13] = p[13]; - d[16] = p[16]; - d[17] = p[17]; - d[20] = p[20]; - d[21] = p[21]; - d[24] = p[24]; - d[25] = p[25]; - d[28] = p[28]; - d[29] = p[29]; -} - -static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, - BLOCKD *b, B_PREDICTION_MODE *best_mode, -#if CONFIG_COMP_INTRA_PRED - B_PREDICTION_MODE *best_second_mode, - int allow_comp, -#endif - int *bmode_costs, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - int *bestrate, int *bestratey, - int *bestdistortion) { - B_PREDICTION_MODE mode; - MACROBLOCKD *xd = &x->e_mbd; - -#if CONFIG_COMP_INTRA_PRED - B_PREDICTION_MODE mode2; -#endif - int64_t best_rd = INT64_MAX; - int rate = 0; - int distortion; - - ENTROPY_CONTEXT ta = *a, tempa = *a; - ENTROPY_CONTEXT tl = *l, templ = *l; - TX_TYPE tx_type = DCT_DCT; - TX_TYPE best_tx_type = DCT_DCT; - /* - * The predictor buffer is a 2d buffer with a stride of 16. Create - * a temp buffer that meets the stride requirements, but we are only - * interested in the left 4x4 block - * */ - DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16 * 4); - DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16); - - for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++) { -#if CONFIG_COMP_INTRA_PRED - for (mode2 = (allow_comp ? 0 : (B_DC_PRED - 1)); - mode2 != (allow_comp ? (mode + 1) : 0); mode2++) { -#endif - int64_t this_rd; - int ratey; - - b->bmi.as_mode.first = mode; - rate = bmode_costs[mode]; - -#if CONFIG_COMP_INTRA_PRED - if (mode2 == (B_PREDICTION_MODE)(B_DC_PRED - 1)) { -#endif - vp9_intra4x4_predict(b, mode, b->predictor); -#if CONFIG_COMP_INTRA_PRED - } else { - vp9_comp_intra4x4_predict(b, mode, mode2, b->predictor); - rate += bmode_costs[mode2]; - } -#endif - vp9_subtract_b(be, b, 16); - - b->bmi.as_mode.first = mode; - tx_type = get_tx_type_4x4(xd, b); - if (tx_type != DCT_DCT) { - vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4); - vp9_ht_quantize_b_4x4(be, b, tx_type); - } else { - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(be, b); - } - - tempa = ta; - templ = tl; - - ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4); - rate += ratey; - distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2; - - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); - - if (this_rd < best_rd) { - *bestrate = rate; - *bestratey = ratey; - *bestdistortion = distortion; - best_rd = this_rd; - *best_mode = mode; - best_tx_type = tx_type; - -#if CONFIG_COMP_INTRA_PRED - *best_second_mode = mode2; -#endif - *a = tempa; - *l = templ; - copy_predictor(best_predictor, b->predictor); - vpx_memcpy(best_dqcoeff, b->dqcoeff, 32); - } -#if CONFIG_COMP_INTRA_PRED - } -#endif - } - b->bmi.as_mode.first = (B_PREDICTION_MODE)(*best_mode); -#if CONFIG_COMP_INTRA_PRED - b->bmi.as_mode.second = (B_PREDICTION_MODE)(*best_second_mode); -#endif - - // inverse transform - if (best_tx_type != DCT_DCT) - vp9_ihtllm_c(best_dqcoeff, b->diff, 32, best_tx_type, 4); - else - IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)( - best_dqcoeff, b->diff, 32); - - vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - - return best_rd; -} - -static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int *Rate, - int *rate_y, int *Distortion, int64_t best_rd, -#if CONFIG_COMP_INTRA_PRED - int allow_comp, -#endif - int update_contexts) { - int i; - MACROBLOCKD *const xd = &mb->e_mbd; - int cost = mb->mbmode_cost [xd->frame_type] [B_PRED]; - int distortion = 0; - int tot_rate_y = 0; - int64_t total_rd = 0; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta, *tl; - int *bmode_costs; - - if (update_contexts) { - ta = (ENTROPY_CONTEXT *)xd->above_context; - tl = (ENTROPY_CONTEXT *)xd->left_context; - } else { - vpx_memcpy(&t_above, xd->above_context, - sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, - sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - } - - xd->mode_info_context->mbmi.mode = B_PRED; - bmode_costs = mb->inter_bmode_costs; - - for (i = 0; i < 16; i++) { - MODE_INFO *const mic = xd->mode_info_context; - const int mis = xd->mode_info_stride; - B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); -#if CONFIG_COMP_INTRA_PRED - B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_second_mode); -#endif - int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d); - - if (xd->frame_type == KEY_FRAME) { - const B_PREDICTION_MODE A = above_block_mode(mic, i, mis); - const B_PREDICTION_MODE L = left_block_mode(mic, i); - - bmode_costs = mb->bmode_costs[A][L]; - } - - total_rd += rd_pick_intra4x4block( - cpi, mb, mb->block + i, xd->block + i, &best_mode, -#if CONFIG_COMP_INTRA_PRED - & best_second_mode, allow_comp, -#endif - bmode_costs, ta + vp9_block2above[i], - tl + vp9_block2left[i], &r, &ry, &d); - - cost += r; - distortion += d; - tot_rate_y += ry; - - mic->bmi[i].as_mode.first = best_mode; -#if CONFIG_COMP_INTRA_PRED - mic->bmi[i].as_mode.second = best_second_mode; -#endif - - if (total_rd >= best_rd) - break; - } - - if (total_rd >= best_rd) - return INT64_MAX; - -#if CONFIG_COMP_INTRA_PRED - cost += vp9_cost_bit(128, allow_comp); -#endif - *Rate = cost; - *rate_y += tot_rate_y; - *Distortion = distortion; - - return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); -} - -#if CONFIG_SUPERBLOCKS -static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, - MACROBLOCK *x, - int *rate, - int *rate_tokenonly, - int *distortion, - int *skippable) { - MB_PREDICTION_MODE mode; - MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); - int this_rate, this_rate_tokenonly; - int this_distortion, s; - int64_t best_rd = INT64_MAX, this_rd; - - /* Y Search for 32x32 intra prediction mode */ - for (mode = DC_PRED; mode <= TM_PRED; mode++) { - x->e_mbd.mode_info_context->mbmi.mode = mode; - vp9_build_intra_predictors_sby_s(&x->e_mbd); - - super_block_yrd_8x8(x, &this_rate_tokenonly, - &this_distortion, IF_RTCD(&cpi->rtcd), &s); - this_rate = this_rate_tokenonly + - x->mbmode_cost[x->e_mbd.frame_type] - [x->e_mbd.mode_info_context->mbmi.mode]; - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); - - if (this_rd < best_rd) { - mode_selected = mode; - best_rd = this_rd; - *rate = this_rate; - *rate_tokenonly = this_rate_tokenonly; - *distortion = this_distortion; - *skippable = s; - } - } - - x->e_mbd.mode_info_context->mbmi.mode = mode_selected; - - return best_rd; -} -#endif - -static int64_t rd_pick_intra16x16mby_mode(VP9_COMP *cpi, - MACROBLOCK *x, - int *Rate, - int *rate_y, - int *Distortion, - int *skippable, - int64_t txfm_cache[NB_TXFM_MODES]) { - MB_PREDICTION_MODE mode; - TX_SIZE txfm_size; - MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); -#if CONFIG_COMP_INTRA_PRED - MB_PREDICTION_MODE mode2; - MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode2_selected); -#endif - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - int rate, ratey; - int distortion, skip; - int64_t best_rd = INT64_MAX; - int64_t this_rd; - MACROBLOCKD *xd = &x->e_mbd; - - int i; - for (i = 0; i < NB_TXFM_MODES; i++) - txfm_cache[i] = INT64_MAX; - - // Y Search for 16x16 intra prediction mode - for (mode = DC_PRED; mode <= TM_PRED; mode++) { - int64_t local_txfm_cache[NB_TXFM_MODES]; - - mbmi->mode = mode; - -#if CONFIG_COMP_INTRA_PRED - for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) { - mbmi->second_mode = mode2; - if (mode2 == (MB_PREDICTION_MODE)(DC_PRED - 1)) { -#endif - vp9_build_intra_predictors_mby(&x->e_mbd); -#if CONFIG_COMP_INTRA_PRED - } else { - continue; // i.e. disable for now - vp9_build_comp_intra_predictors_mby(&x->e_mbd); - } -#endif - - macro_block_yrd(cpi, x, &ratey, &distortion, &skip, local_txfm_cache); - - // FIXME add compoundmode cost - // FIXME add rate for mode2 - rate = ratey + x->mbmode_cost[x->e_mbd.frame_type][mbmi->mode]; - - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); - - - if (this_rd < best_rd) { - mode_selected = mode; - txfm_size = mbmi->txfm_size; -#if CONFIG_COMP_INTRA_PRED - mode2_selected = mode2; -#endif - best_rd = this_rd; - *Rate = rate; - *rate_y = ratey; - *Distortion = distortion; - *skippable = skip; - } - - for (i = 0; i < NB_TXFM_MODES; i++) { - int64_t adj_rd = this_rd + local_txfm_cache[i] - - local_txfm_cache[cpi->common.txfm_mode]; - if (adj_rd < txfm_cache[i]) { - txfm_cache[i] = adj_rd; - } - } - -#if CONFIG_COMP_INTRA_PRED - } -#endif - } - - mbmi->txfm_size = txfm_size; - mbmi->mode = mode_selected; - -#if CONFIG_COMP_INTRA_PRED - mbmi->second_mode = mode2_selected; -#endif - return best_rd; -} - - -static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, - B_PREDICTION_MODE *best_mode, -#if CONFIG_COMP_INTRA_PRED - B_PREDICTION_MODE *best_second_mode, -#endif - int *mode_costs, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - int *bestrate, int *bestratey, - int *bestdistortion) { - MB_PREDICTION_MODE mode; -#if CONFIG_COMP_INTRA_PRED - MB_PREDICTION_MODE mode2; -#endif - MACROBLOCKD *xd = &x->e_mbd; - int64_t best_rd = INT64_MAX; - int distortion, rate = 0; - BLOCK *be = x->block + ib; - BLOCKD *b = xd->block + ib; - ENTROPY_CONTEXT ta0, ta1, besta0 = 0, besta1 = 0; - ENTROPY_CONTEXT tl0, tl1, bestl0 = 0, bestl1 = 0; - - /* - * The predictor buffer is a 2d buffer with a stride of 16. Create - * a temp buffer that meets the stride requirements, but we are only - * interested in the left 8x8 block - * */ - DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16 * 8); - DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16 * 4); - - // perform transformation of dimension 8x8 - // note the input and output index mapping - int idx = (ib & 0x02) ? (ib + 2) : ib; - - for (mode = DC_PRED; mode <= TM_PRED; mode++) { -#if CONFIG_COMP_INTRA_PRED - for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) { -#endif - int64_t this_rd; - int rate_t; - - // FIXME rate for compound mode and second intrapred mode - rate = mode_costs[mode]; - b->bmi.as_mode.first = mode; - -#if CONFIG_COMP_INTRA_PRED - if (mode2 == (MB_PREDICTION_MODE)(DC_PRED - 1)) { -#endif - vp9_intra8x8_predict(b, mode, b->predictor); -#if CONFIG_COMP_INTRA_PRED - } else { - continue; // i.e. disable for now - vp9_comp_intra8x8_predict(b, mode, mode2, b->predictor); - } -#endif - - vp9_subtract_4b_c(be, b, 16); - - if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { - TX_TYPE tx_type = get_tx_type_8x8(xd, b); - if (tx_type != DCT_DCT) - vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8); - else - x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); - x->quantize_b_8x8(x->block + idx, xd->block + idx); - - // compute quantization mse of 8x8 block - distortion = vp9_block_error_c((x->block + idx)->coeff, - (xd->block + idx)->dqcoeff, 64); - ta0 = a[vp9_block2above_8x8[idx]]; - tl0 = l[vp9_block2left_8x8[idx]]; - - rate_t = cost_coeffs(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC, - &ta0, &tl0, TX_8X8); - - rate += rate_t; - ta1 = ta0; - tl1 = tl0; - } else { - x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32); - x->vp9_short_fdct8x4((be + 4)->src_diff, (be + 4)->coeff, 32); - - x->quantize_b_4x4_pair(x->block + ib, x->block + ib + 1, - xd->block + ib, xd->block + ib + 1); - x->quantize_b_4x4_pair(x->block + ib + 4, x->block + ib + 5, - xd->block + ib + 4, xd->block + ib + 5); - - distortion = vp9_block_error_c((x->block + ib)->coeff, - (xd->block + ib)->dqcoeff, 16); - distortion += vp9_block_error_c((x->block + ib + 1)->coeff, - (xd->block + ib + 1)->dqcoeff, 16); - distortion += vp9_block_error_c((x->block + ib + 4)->coeff, - (xd->block + ib + 4)->dqcoeff, 16); - distortion += vp9_block_error_c((x->block + ib + 5)->coeff, - (xd->block + ib + 5)->dqcoeff, 16); - - ta0 = a[vp9_block2above[ib]]; - ta1 = a[vp9_block2above[ib + 1]]; - tl0 = l[vp9_block2left[ib]]; - tl1 = l[vp9_block2left[ib + 4]]; - rate_t = cost_coeffs(x, xd->block + ib, PLANE_TYPE_Y_WITH_DC, - &ta0, &tl0, TX_4X4); - rate_t += cost_coeffs(x, xd->block + ib + 1, PLANE_TYPE_Y_WITH_DC, - &ta1, &tl0, TX_4X4); - rate_t += cost_coeffs(x, xd->block + ib + 4, PLANE_TYPE_Y_WITH_DC, - &ta0, &tl1, TX_4X4); - rate_t += cost_coeffs(x, xd->block + ib + 5, PLANE_TYPE_Y_WITH_DC, - &ta1, &tl1, TX_4X4); - rate += rate_t; - } - - distortion >>= 2; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); - if (this_rd < best_rd) { - *bestrate = rate; - *bestratey = rate_t; - *bestdistortion = distortion; - besta0 = ta0; - besta1 = ta1; - bestl0 = tl0; - bestl1 = tl1; - best_rd = this_rd; - *best_mode = mode; -#if CONFIG_COMP_INTRA_PRED - *best_second_mode = mode2; -#endif - copy_predictor_8x8(best_predictor, b->predictor); - vpx_memcpy(best_dqcoeff, b->dqcoeff, 64); - vpx_memcpy(best_dqcoeff + 32, b->dqcoeff + 64, 64); -#if CONFIG_COMP_INTRA_PRED - } -#endif - } - } - b->bmi.as_mode.first = (*best_mode); -#if CONFIG_COMP_INTRA_PRED - b->bmi.as_mode.second = (*best_second_mode); -#endif - vp9_encode_intra8x8(IF_RTCD(&cpi->rtcd), x, ib); - - if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { - a[vp9_block2above_8x8[idx]] = besta0; - a[vp9_block2above_8x8[idx] + 1] = besta1; - l[vp9_block2left_8x8[idx]] = bestl0; - l[vp9_block2left_8x8[idx] + 1] = bestl1; - } else { - a[vp9_block2above[ib]] = besta0; - a[vp9_block2above[ib + 1]] = besta1; - l[vp9_block2left[ib]] = bestl0; - l[vp9_block2left[ib + 4]] = bestl1; - } - - return best_rd; -} - -static int64_t rd_pick_intra8x8mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, - int *Rate, int *rate_y, - int *Distortion, int64_t best_rd) { - MACROBLOCKD *const xd = &mb->e_mbd; - int i, ib; - int cost = mb->mbmode_cost [xd->frame_type] [I8X8_PRED]; - int distortion = 0; - int tot_rate_y = 0; - long long total_rd = 0; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta, *tl; - int *i8x8mode_costs; - - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - - xd->mode_info_context->mbmi.mode = I8X8_PRED; - i8x8mode_costs = mb->i8x8_mode_costs; - - for (i = 0; i < 4; i++) { - MODE_INFO *const mic = xd->mode_info_context; - B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); -#if CONFIG_COMP_INTRA_PRED - B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_second_mode); -#endif - int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d); - - ib = vp9_i8x8_block[i]; - total_rd += rd_pick_intra8x8block( - cpi, mb, ib, &best_mode, -#if CONFIG_COMP_INTRA_PRED - & best_second_mode, -#endif - i8x8mode_costs, ta, tl, &r, &ry, &d); - cost += r; - distortion += d; - tot_rate_y += ry; - mic->bmi[ib].as_mode.first = best_mode; -#if CONFIG_COMP_INTRA_PRED - mic->bmi[ib].as_mode.second = best_second_mode; -#endif - } - *Rate = cost; - *rate_y += tot_rate_y; - *Distortion = distortion; - return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); -} - -static int rd_cost_mbuv(MACROBLOCK *mb) { - int b; - int cost = 0; - MACROBLOCKD *xd = &mb->e_mbd; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta, *tl; - - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - - for (b = 16; b < 24; b++) - cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV, - ta + vp9_block2above[b], tl + vp9_block2left[b], - TX_4X4); - - return cost; -} - - -static int64_t rd_inter16x16_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int fullpixel, int *skip) { - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); - - vp9_transform_mbuv_4x4(x); - vp9_quantize_mbuv_4x4(x); - - *rate = rd_cost_mbuv(x); - *distortion = vp9_mbuverror(x) / 4; - *skip = vp9_mbuv_is_skippable_4x4(&x->e_mbd); - - return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); -} - -static int rd_cost_mbuv_8x8(MACROBLOCK *mb, int backup) { - int b; - int cost = 0; - MACROBLOCKD *xd = &mb->e_mbd; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta, *tl; - - if (backup) { - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - } else { - ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context; - tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context; - } - - for (b = 16; b < 24; b += 4) - cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV, - ta + vp9_block2above_8x8[b], - tl + vp9_block2left_8x8[b], TX_8X8); - - return cost; -} - -#if CONFIG_SUPERBLOCKS -static int64_t rd_inter32x32_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int fullpixel, int *skip) { - MACROBLOCKD *xd = &x->e_mbd; - int n, r = 0, d = 0; - const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer; - const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; - int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - int skippable = 1; - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT_PLANES *ta = xd->above_context; - ENTROPY_CONTEXT_PLANES *tl = xd->left_context; - - memcpy(t_above, xd->above_context, sizeof(t_above)); - memcpy(t_left, xd->left_context, sizeof(t_left)); - - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; - - vp9_subtract_mbuv_s_c(x->src_diff, - usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - src_uv_stride, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - dst_uv_stride); - - vp9_transform_mbuv_8x8(x); - vp9_quantize_mbuv_8x8(x); - - xd->above_context = ta + x_idx; - xd->left_context = tl + y_idx; - r += rd_cost_mbuv_8x8(x, 0); - d += vp9_mbuverror(x) / 4; - skippable = skippable && vp9_mbuv_is_skippable_8x8(xd); - } - - *rate = r; - *distortion = d; - if (skip) *skip = skippable; - xd->left_context = tl; - xd->above_context = ta; - memcpy(xd->above_context, t_above, sizeof(t_above)); - memcpy(xd->left_context, t_left, sizeof(t_left)); - - return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); -} -#endif - -static int64_t rd_inter16x16_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int fullpixel, int *skip) { - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); - - vp9_transform_mbuv_8x8(x); - vp9_quantize_mbuv_8x8(x); - - *rate = rd_cost_mbuv_8x8(x, 1); - *distortion = vp9_mbuverror(x) / 4; - *skip = vp9_mbuv_is_skippable_8x8(&x->e_mbd); - - return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); -} - - -static int64_t rd_inter4x4_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int *skippable, int fullpixel) { - vp9_build_inter4x4_predictors_mbuv(&x->e_mbd); - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); - - vp9_transform_mbuv_4x4(x); - vp9_quantize_mbuv_4x4(x); - - *rate = rd_cost_mbuv(x); - *distortion = vp9_mbuverror(x) / 4; - *skippable = vp9_mbuv_is_skippable_4x4(&x->e_mbd); - - return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); -} - -static void rd_pick_intra_mbuv_mode(VP9_COMP *cpi, - MACROBLOCK *x, - int *rate, - int *rate_tokenonly, - int *distortion, - int *skippable) { - MB_PREDICTION_MODE mode; - MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); -#if CONFIG_COMP_INTRA_PRED - MB_PREDICTION_MODE mode2; - MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode2_selected); -#endif - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - int64_t best_rd = INT64_MAX; - int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r); - int rate_to, UNINITIALIZED_IS_SAFE(skip); - - for (mode = DC_PRED; mode <= TM_PRED; mode++) { -#if CONFIG_COMP_INTRA_PRED - for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) { -#endif - int rate; - int distortion; - int64_t this_rd; - - mbmi->uv_mode = mode; -#if CONFIG_COMP_INTRA_PRED - mbmi->second_uv_mode = mode2; - if (mode2 == (MB_PREDICTION_MODE)(DC_PRED - 1)) { -#endif - vp9_build_intra_predictors_mbuv(&x->e_mbd); -#if CONFIG_COMP_INTRA_PRED - } else { - continue; - vp9_build_comp_intra_predictors_mbuv(&x->e_mbd); - } -#endif - - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); - vp9_transform_mbuv_4x4(x); - vp9_quantize_mbuv_4x4(x); - - rate_to = rd_cost_mbuv(x); - rate = rate_to - + x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode]; - - distortion = vp9_mbuverror(x) / 4; - - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); - - if (this_rd < best_rd) { - skip = vp9_mbuv_is_skippable_4x4(xd); - best_rd = this_rd; - d = distortion; - r = rate; - *rate_tokenonly = rate_to; - mode_selected = mode; -#if CONFIG_COMP_INTRA_PRED - mode2_selected = mode2; - } -#endif - } - } - - *rate = r; - *distortion = d; - *skippable = skip; - - mbmi->uv_mode = mode_selected; -#if CONFIG_COMP_INTRA_PRED - mbmi->second_uv_mode = mode2_selected; -#endif -} - -static void rd_pick_intra_mbuv_mode_8x8(VP9_COMP *cpi, - MACROBLOCK *x, - int *rate, - int *rate_tokenonly, - int *distortion, - int *skippable) { - MACROBLOCKD *xd = &x->e_mbd; - MB_PREDICTION_MODE mode; - MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - int64_t best_rd = INT64_MAX; - int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r); - int rate_to, UNINITIALIZED_IS_SAFE(skip); - - for (mode = DC_PRED; mode <= TM_PRED; mode++) { - int rate; - int distortion; - int64_t this_rd; - - mbmi->uv_mode = mode; - vp9_build_intra_predictors_mbuv(&x->e_mbd); - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); - vp9_transform_mbuv_8x8(x); - - vp9_quantize_mbuv_8x8(x); - - rate_to = rd_cost_mbuv_8x8(x, 1); - rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode]; - - distortion = vp9_mbuverror(x) / 4; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); - - if (this_rd < best_rd) { - skip = vp9_mbuv_is_skippable_8x8(xd); - best_rd = this_rd; - d = distortion; - r = rate; - *rate_tokenonly = rate_to; - mode_selected = mode; - } - } - *rate = r; - *distortion = d; - *skippable = skip; - mbmi->uv_mode = mode_selected; -} - -#if CONFIG_SUPERBLOCKS -static void super_block_uvrd_8x8(MACROBLOCK *x, - int *rate, - int *distortion, - const VP9_ENCODER_RTCD *rtcd, - int *skippable) { - MACROBLOCKD *const xd = &x->e_mbd; - int d = 0, r = 0, n, s = 1; - const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer; - const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; - int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT_PLANES *ta = xd->above_context; - ENTROPY_CONTEXT_PLANES *tl = xd->left_context; - - memcpy(t_above, xd->above_context, sizeof(t_above)); - memcpy(t_left, xd->left_context, sizeof(t_left)); - - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; - - vp9_subtract_mbuv_s_c(x->src_diff, - usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - src_uv_stride, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - dst_uv_stride); - vp9_transform_mbuv_8x8(x); - vp9_quantize_mbuv_8x8(x); - s &= vp9_mbuv_is_skippable_8x8(xd); - - d += vp9_mbuverror(x) >> 2; - xd->above_context = ta + x_idx; - xd->left_context = tl + y_idx; - r += rd_cost_mbuv_8x8(x, 0); - } - - xd->above_context = ta; - xd->left_context = tl; - *distortion = d; - *rate = r; - *skippable = s; - - xd->left_context = tl; - xd->above_context = ta; - memcpy(xd->above_context, t_above, sizeof(t_above)); - memcpy(xd->left_context, t_left, sizeof(t_left)); -} - -static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, - MACROBLOCK *x, - int *rate, - int *rate_tokenonly, - int *distortion, - int *skippable) { - MB_PREDICTION_MODE mode; - MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); - int64_t best_rd = INT64_MAX, this_rd; - int this_rate_tokenonly, this_rate; - int this_distortion, s; - - for (mode = DC_PRED; mode <= TM_PRED; mode++) { - x->e_mbd.mode_info_context->mbmi.uv_mode = mode; - vp9_build_intra_predictors_sbuv_s(&x->e_mbd); - - super_block_uvrd_8x8(x, &this_rate_tokenonly, - &this_distortion, IF_RTCD(&cpi->rtcd), &s); - this_rate = this_rate_tokenonly + - x->mbmode_cost[x->e_mbd.frame_type] - [x->e_mbd.mode_info_context->mbmi.mode]; - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); - - if (this_rd < best_rd) { - mode_selected = mode; - best_rd = this_rd; - *rate = this_rate; - *rate_tokenonly = this_rate_tokenonly; - *distortion = this_distortion; - *skippable = s; - } - } - - x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected; - - return best_rd; -} -#endif - -int vp9_cost_mv_ref(VP9_COMP *cpi, - MB_PREDICTION_MODE m, - const int near_mv_ref_ct[4]) { - MACROBLOCKD *xd = &cpi->mb.e_mbd; - int segment_id = xd->mode_info_context->mbmi.segment_id; - - // If the mode coding is done entirely at the segment level - // we should not account for it at the per mb level in rd code. - // Note that if the segment level coding is expanded from single mode - // to multiple mode masks as per reference frame coding we will need - // to do something different here. - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) { - VP9_COMMON *pc = &cpi->common; - - vp9_prob p [VP9_MVREFS - 1]; - assert(NEARESTMV <= m && m <= SPLITMV); - vp9_mv_ref_probs(pc, p, near_mv_ref_ct); - return cost_token(vp9_mv_ref_tree, p, - vp9_mv_ref_encoding_array - NEARESTMV + m); - } else - return 0; -} - -void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) { - x->e_mbd.mode_info_context->mbmi.mode = mb; - x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int; -} - -static int labels2mode( - MACROBLOCK *x, - int const *labelings, int which_label, - B_PREDICTION_MODE this_mode, - int_mv *this_mv, int_mv *this_second_mv, - int_mv seg_mvs[MAX_REF_FRAMES - 1], - int_mv *best_ref_mv, - int_mv *second_best_ref_mv, - DEC_MVCOSTS) { - MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *const mic = xd->mode_info_context; - MB_MODE_INFO * mbmi = &mic->mbmi; - const int mis = xd->mode_info_stride; - - int i, cost = 0, thismvcost = 0; - - /* We have to be careful retrieving previously-encoded motion vectors. - Ones from this macroblock have to be pulled from the BLOCKD array - as they have not yet made it to the bmi array in our MB_MODE_INFO. */ - for (i = 0; i < 16; ++i) { - BLOCKD *const d = xd->block + i; - const int row = i >> 2, col = i & 3; - - B_PREDICTION_MODE m; - - if (labelings[i] != which_label) - continue; - - if (col && labelings[i] == labelings[i - 1]) - m = LEFT4X4; - else if (row && labelings[i] == labelings[i - 4]) - m = ABOVE4X4; - else { - // the only time we should do costing for new motion vector or mode - // is when we are on a new label (jbb May 08, 2007) - switch (m = this_mode) { - case NEW4X4 : - if (mbmi->second_ref_frame) { - this_mv->as_int = seg_mvs[mbmi->ref_frame - 1].as_int; - this_second_mv->as_int = - seg_mvs[mbmi->second_ref_frame - 1].as_int; - } - - thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, MVCOSTS, - 102, xd->allow_high_precision_mv); - if (mbmi->second_ref_frame) { - thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv, - MVCOSTS, 102, - xd->allow_high_precision_mv); - } - break; - case LEFT4X4: - this_mv->as_int = col ? d[-1].bmi.as_mv.first.as_int : left_block_mv(mic, i); - if (mbmi->second_ref_frame) - this_second_mv->as_int = col ? d[-1].bmi.as_mv.second.as_int : left_block_second_mv(mic, i); - break; - case ABOVE4X4: - this_mv->as_int = row ? d[-4].bmi.as_mv.first.as_int : above_block_mv(mic, i, mis); - if (mbmi->second_ref_frame) - this_second_mv->as_int = row ? d[-4].bmi.as_mv.second.as_int : above_block_second_mv(mic, i, mis); - break; - case ZERO4X4: - this_mv->as_int = 0; - if (mbmi->second_ref_frame) - this_second_mv->as_int = 0; - break; - default: - break; - } - - if (m == ABOVE4X4) { // replace above with left if same - int_mv left_mv, left_second_mv; - - left_second_mv.as_int = 0; - left_mv.as_int = col ? d[-1].bmi.as_mv.first.as_int : - left_block_mv(mic, i); - if (mbmi->second_ref_frame) - left_second_mv.as_int = col ? d[-1].bmi.as_mv.second.as_int : - left_block_second_mv(mic, i); - - if (left_mv.as_int == this_mv->as_int && - (!mbmi->second_ref_frame || - left_second_mv.as_int == this_second_mv->as_int)) - m = LEFT4X4; - } - - cost = x->inter_bmode_costs[ m]; - } - - d->bmi.as_mv.first.as_int = this_mv->as_int; - if (mbmi->second_ref_frame) - d->bmi.as_mv.second.as_int = this_second_mv->as_int; - - x->partition_info->bmi[i].mode = m; - x->partition_info->bmi[i].mv.as_int = this_mv->as_int; - if (mbmi->second_ref_frame) - x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int; - } - - cost += thismvcost; - return cost; -} - -static int64_t encode_inter_mb_segment(MACROBLOCK *x, - int const *labels, - int which_label, - int *labelyrate, - int *distortion, - ENTROPY_CONTEXT *ta, - ENTROPY_CONTEXT *tl, - const VP9_ENCODER_RTCD *rtcd) { - int i; - MACROBLOCKD *xd = &x->e_mbd; - - *labelyrate = 0; - *distortion = 0; - for (i = 0; i < 16; i++) { - if (labels[i] == which_label) { - BLOCKD *bd = &x->e_mbd.block[i]; - BLOCK *be = &x->block[i]; - int thisdistortion; - - vp9_build_inter_predictors_b(bd, 16, xd->subpixel_predict); - if (xd->mode_info_context->mbmi.second_ref_frame) - vp9_build_2nd_inter_predictors_b(bd, 16, xd->subpixel_predict_avg); - vp9_subtract_b(be, bd, 16); - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(be, bd); - thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16); - *distortion += thisdistortion; - *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above[i], - tl + vp9_block2left[i], TX_4X4); - } - } - *distortion >>= 2; - return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); -} - -static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, - int const *labels, - int which_label, - int *labelyrate, - int *distortion, - int64_t *otherrd, - ENTROPY_CONTEXT *ta, - ENTROPY_CONTEXT *tl, - const VP9_ENCODER_RTCD *rtcd) { - int i, j; - MACROBLOCKD *xd = &x->e_mbd; - const int iblock[4] = { 0, 1, 4, 5 }; - int othercost = 0, otherdist = 0; - ENTROPY_CONTEXT_PLANES tac, tlc; - ENTROPY_CONTEXT *tacp = (ENTROPY_CONTEXT *) &tac, - *tlcp = (ENTROPY_CONTEXT *) &tlc; - - if (otherrd) { - memcpy(&tac, ta, sizeof(ENTROPY_CONTEXT_PLANES)); - memcpy(&tlc, tl, sizeof(ENTROPY_CONTEXT_PLANES)); - } - - *distortion = 0; - *labelyrate = 0; - for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - - if (labels[ib] == which_label) { - int idx = (ib & 8) + ((ib & 2) << 1); - BLOCKD *bd = &xd->block[ib], *bd2 = &xd->block[idx]; - BLOCK *be = &x->block[ib], *be2 = &x->block[idx]; - int thisdistortion; - - vp9_build_inter_predictors4b(xd, bd, 16); - if (xd->mode_info_context->mbmi.second_ref_frame) - vp9_build_2nd_inter_predictors4b(xd, bd, 16); - vp9_subtract_4b_c(be, bd, 16); - - if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) { - if (otherrd) { - x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32); - x->quantize_b_8x8(be2, bd2); - thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); - otherdist += thisdistortion; - othercost += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC, - tacp + vp9_block2above_8x8[idx], - tlcp + vp9_block2left_8x8[idx], TX_8X8); - } - for (j = 0; j < 4; j += 2) { - bd = &xd->block[ib + iblock[j]]; - be = &x->block[ib + iblock[j]]; - x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1); - thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); - *distortion += thisdistortion; - *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above[ib + iblock[j]], - tl + vp9_block2left[ib + iblock[j]], - TX_4X4); - *labelyrate += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above[ib + iblock[j] + 1], - tl + vp9_block2left[ib + iblock[j]], - TX_4X4); - } - } else /* 8x8 */ { - if (otherrd) { - for (j = 0; j < 4; j += 2) { - BLOCKD *bd3 = &xd->block[ib + iblock[j]]; - BLOCK *be3 = &x->block[ib + iblock[j]]; - x->vp9_short_fdct8x4(be3->src_diff, be3->coeff, 32); - x->quantize_b_4x4_pair(be3, be3 + 1, bd3, bd3 + 1); - thisdistortion = vp9_block_error_c(be3->coeff, bd3->dqcoeff, 32); - otherdist += thisdistortion; - othercost += cost_coeffs(x, bd3, PLANE_TYPE_Y_WITH_DC, - tacp + vp9_block2above[ib + iblock[j]], - tlcp + vp9_block2left[ib + iblock[j]], - TX_4X4); - othercost += cost_coeffs(x, bd3 + 1, PLANE_TYPE_Y_WITH_DC, - tacp + vp9_block2above[ib + iblock[j] + 1], - tlcp + vp9_block2left[ib + iblock[j]], - TX_4X4); - } - } - x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32); - x->quantize_b_8x8(be2, bd2); - thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); - *distortion += thisdistortion; - *labelyrate += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above_8x8[idx], - tl + vp9_block2left_8x8[idx], TX_8X8); - } - } - } - *distortion >>= 2; - if (otherrd) { - otherdist >>= 2; - *otherrd = RDCOST(x->rdmult, x->rddiv, othercost, otherdist); - } - return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); -} - -static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0}; - - -typedef struct { - int_mv *ref_mv, *second_ref_mv; - int_mv mvp; - - int64_t segment_rd; - SPLITMV_PARTITIONING_TYPE segment_num; - TX_SIZE txfm_size; - int r; - int d; - int segment_yrate; - B_PREDICTION_MODE modes[16]; - int_mv mvs[16], second_mvs[16]; - int eobs[16]; - - int mvthresh; - int *mdcounts; - - int_mv sv_mvp[4]; // save 4 mvp from 8x8 - int sv_istep[2]; // save 2 initial step_param for 16x8/8x16 - -} BEST_SEG_INFO; - -static __inline -int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { - int r = 0; - r |= (mv->as_mv.row >> 3) < x->mv_row_min; - r |= (mv->as_mv.row >> 3) > x->mv_row_max; - r |= (mv->as_mv.col >> 3) < x->mv_col_min; - r |= (mv->as_mv.col >> 3) > x->mv_col_max; - return r; -} - -static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, - BEST_SEG_INFO *bsi, - SPLITMV_PARTITIONING_TYPE segmentation, - TX_SIZE tx_size, int64_t *otherrds, - int64_t *rds, int *completed, - /* 16 = n_blocks */ - int_mv seg_mvs[16 /* n_blocks */] - [MAX_REF_FRAMES - 1]) { - int i, j; - int const *labels; - int br = 0, bd = 0; - B_PREDICTION_MODE this_mode; - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - - int label_count; - int64_t this_segment_rd = 0, other_segment_rd; - int label_mv_thresh; - int rate = 0; - int sbr = 0, sbd = 0; - int segmentyrate = 0; - int best_eobs[16] = { 0 }; - - vp9_variance_fn_ptr_t *v_fn_ptr; - - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta, *tl; - ENTROPY_CONTEXT_PLANES t_above_b, t_left_b; - ENTROPY_CONTEXT *ta_b, *tl_b; - - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - ta_b = (ENTROPY_CONTEXT *)&t_above_b; - tl_b = (ENTROPY_CONTEXT *)&t_left_b; - - v_fn_ptr = &cpi->fn_ptr[segmentation]; - labels = vp9_mbsplits[segmentation]; - label_count = vp9_mbsplit_count[segmentation]; - - // 64 makes this threshold really big effectively - // making it so that we very rarely check mvs on - // segments. setting this to 1 would make mv thresh - // roughly equal to what it is for macroblocks - label_mv_thresh = 1 * bsi->mvthresh / label_count; - - // Segmentation method overheads - rate = cost_token(vp9_mbsplit_tree, vp9_mbsplit_probs, - vp9_mbsplit_encodings + segmentation); - rate += vp9_cost_mv_ref(cpi, SPLITMV, bsi->mdcounts); - this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); - br += rate; - other_segment_rd = this_segment_rd; - - mbmi->txfm_size = tx_size; - for (i = 0; i < label_count && this_segment_rd < bsi->segment_rd; i++) { - int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT]; - int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX; - B_PREDICTION_MODE mode_selected = ZERO4X4; - int bestlabelyrate = 0; - - // search for the best motion vector on this segment - for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) { - int64_t this_rd, other_rd; - int distortion; - int labelyrate; - ENTROPY_CONTEXT_PLANES t_above_s, t_left_s; - ENTROPY_CONTEXT *ta_s; - ENTROPY_CONTEXT *tl_s; - - vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta_s = (ENTROPY_CONTEXT *)&t_above_s; - tl_s = (ENTROPY_CONTEXT *)&t_left_s; - - // motion search for newmv (single predictor case only) - if (!mbmi->second_ref_frame && this_mode == NEW4X4) { - int sseshift, n; - int step_param = 0; - int further_steps; - int thissme, bestsme = INT_MAX; - BLOCK *c; - BLOCKD *e; - - /* Is the best so far sufficiently good that we cant justify doing - * and new motion search. */ - if (best_label_rd < label_mv_thresh) - break; - - if (cpi->compressor_speed) { - if (segmentation == PARTITIONING_8X16 || - segmentation == PARTITIONING_16X8) { - bsi->mvp.as_int = bsi->sv_mvp[i].as_int; - if (i == 1 && segmentation == PARTITIONING_16X8) - bsi->mvp.as_int = bsi->sv_mvp[2].as_int; - - step_param = bsi->sv_istep[i]; - } - - // use previous block's result as next block's MV predictor. - if (segmentation == PARTITIONING_4X4 && i > 0) { - bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.as_mv.first.as_int; - if (i == 4 || i == 8 || i == 12) - bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.as_mv.first.as_int; - step_param = 2; - } - } - - further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; - - { - int sadpb = x->sadperbit4; - int_mv mvp_full; - - mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; - mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; - - // find first label - n = vp9_mbsplit_offset[segmentation][i]; - - c = &x->block[n]; - e = &x->e_mbd.block[n]; - - bestsme = vp9_full_pixel_diamond(cpi, x, c, e, &mvp_full, step_param, - sadpb, further_steps, 0, v_fn_ptr, - bsi->ref_mv, &mode_mv[NEW4X4]); - - sseshift = segmentation_to_sseshift[segmentation]; - - // Should we do a full search (best quality only) - if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) { - /* Check if mvp_full is within the range. */ - clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, - x->mv_row_min, x->mv_row_max); - - thissme = cpi->full_search_sad(x, c, e, &mvp_full, - sadpb, 16, v_fn_ptr, - XMVCOST, bsi->ref_mv); - - if (thissme < bestsme) { - bestsme = thissme; - mode_mv[NEW4X4].as_int = e->bmi.as_mv.first.as_int; - } else { - /* The full search result is actually worse so re-instate the - * previous best vector */ - e->bmi.as_mv.first.as_int = mode_mv[NEW4X4].as_int; - } - } - } - - if (bestsme < INT_MAX) { - int distortion; - unsigned int sse; - cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], - bsi->ref_mv, x->errorperbit, v_fn_ptr, - XMVCOST, &distortion, &sse); - - // safe motion search result for use in compound prediction - seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int; - } - } /* NEW4X4 */ - else if (mbmi->second_ref_frame && this_mode == NEW4X4) { - /* motion search not completed? Then skip newmv for this block with - * comppred */ - if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV || - seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) { - continue; - } - } - - rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode], - &second_mode_mv[this_mode], seg_mvs[i], - bsi->ref_mv, bsi->second_ref_mv, XMVCOST); - - // Trap vectors that reach beyond the UMV borders - if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || - ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || - ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || - ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) { - continue; - } - if (mbmi->second_ref_frame && - mv_check_bounds(x, &second_mode_mv[this_mode])) - continue; - - if (segmentation == PARTITIONING_4X4) { - this_rd = encode_inter_mb_segment(x, labels, i, &labelyrate, - &distortion, - ta_s, tl_s, IF_RTCD(&cpi->rtcd)); - other_rd = this_rd; - } else { - this_rd = encode_inter_mb_segment_8x8(x, labels, i, &labelyrate, - &distortion, &other_rd, - ta_s, tl_s, IF_RTCD(&cpi->rtcd)); - } - this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); - rate += labelyrate; - - if (this_rd < best_label_rd) { - sbr = rate; - sbd = distortion; - bestlabelyrate = labelyrate; - mode_selected = this_mode; - best_label_rd = this_rd; - if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_4X4) { - for (j = 0; j < 16; j++) - if (labels[j] == i) - best_eobs[j] = x->e_mbd.block[j].eob; - } else { - for (j = 0; j < 4; j++) { - int ib = vp9_i8x8_block[j], idx = j * 4; - - if (labels[ib] == i) - best_eobs[idx] = x->e_mbd.block[idx].eob; - } - } - if (other_rd < best_other_rd) - best_other_rd = other_rd; - - vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES)); - - } - } /*for each 4x4 mode*/ - - vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES)); - - labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], - &second_mode_mv[mode_selected], seg_mvs[i], - bsi->ref_mv, bsi->second_ref_mv, XMVCOST); - - br += sbr; - bd += sbd; - segmentyrate += bestlabelyrate; - this_segment_rd += best_label_rd; - other_segment_rd += best_other_rd; - if (rds) - rds[i] = this_segment_rd; - if (otherrds) - otherrds[i] = other_segment_rd; - } /* for each label */ - - if (this_segment_rd < bsi->segment_rd) { - bsi->r = br; - bsi->d = bd; - bsi->segment_yrate = segmentyrate; - bsi->segment_rd = this_segment_rd; - bsi->segment_num = segmentation; - bsi->txfm_size = mbmi->txfm_size; - - // store everything needed to come back to this!! - for (i = 0; i < 16; i++) { - BLOCKD *bd = &x->e_mbd.block[i]; - - bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv; - if (mbmi->second_ref_frame) - bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv; - bsi->modes[i] = x->partition_info->bmi[i].mode; - bsi->eobs[i] = best_eobs[i]; - } - } - - if (completed) { - *completed = i; - } -} - -static void rd_check_segment(VP9_COMP *cpi, MACROBLOCK *x, - BEST_SEG_INFO *bsi, - unsigned int segmentation, - /* 16 = n_blocks */ - int_mv seg_mvs[16][MAX_REF_FRAMES - 1], - int64_t txfm_cache[NB_TXFM_MODES]) { - int i, n, c = vp9_mbsplit_count[segmentation]; - - if (segmentation == PARTITIONING_4X4) { - int64_t rd[16]; - - rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, NULL, - rd, &n, seg_mvs); - if (n == c) { - for (i = 0; i < NB_TXFM_MODES; i++) { - if (rd[c - 1] < txfm_cache[i]) - txfm_cache[i] = rd[c - 1]; - } - } - } else { - int64_t diff, base_rd; - int cost4x4 = vp9_cost_bit(cpi->common.prob_tx[0], 0); - int cost8x8 = vp9_cost_bit(cpi->common.prob_tx[0], 1); - - if (cpi->common.txfm_mode == TX_MODE_SELECT) { - int64_t rd4x4[4], rd8x8[4]; - int n4x4, n8x8, nmin; - BEST_SEG_INFO bsi4x4, bsi8x8; - - /* factor in cost of cost4x4/8x8 in decision */ - vpx_memcpy(&bsi4x4, bsi, sizeof(*bsi)); - vpx_memcpy(&bsi8x8, bsi, sizeof(*bsi)); - rd_check_segment_txsize(cpi, x, &bsi4x4, segmentation, - TX_4X4, NULL, rd4x4, &n4x4, seg_mvs); - rd_check_segment_txsize(cpi, x, &bsi8x8, segmentation, - TX_8X8, NULL, rd8x8, &n8x8, seg_mvs); - if (bsi4x4.segment_num == segmentation) { - bsi4x4.segment_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0); - if (bsi4x4.segment_rd < bsi->segment_rd) - vpx_memcpy(bsi, &bsi4x4, sizeof(*bsi)); - } - if (bsi8x8.segment_num == segmentation) { - bsi8x8.segment_rd += RDCOST(x->rdmult, x->rddiv, cost8x8, 0); - if (bsi8x8.segment_rd < bsi->segment_rd) - vpx_memcpy(bsi, &bsi8x8, sizeof(*bsi)); - } - n = n4x4 > n8x8 ? n4x4 : n8x8; - if (n == c) { - nmin = n4x4 < n8x8 ? n4x4 : n8x8; - diff = rd8x8[nmin - 1] - rd4x4[nmin - 1]; - if (n == n4x4) { - base_rd = rd4x4[c - 1]; - } else { - base_rd = rd8x8[c - 1] - diff; - } - } - } else { - int64_t rd[4], otherrd[4]; - - if (cpi->common.txfm_mode == ONLY_4X4) { - rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, otherrd, - rd, &n, seg_mvs); - if (n == c) { - base_rd = rd[c - 1]; - diff = otherrd[c - 1] - rd[c - 1]; - } - } else /* use 8x8 transform */ { - rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_8X8, otherrd, - rd, &n, seg_mvs); - if (n == c) { - diff = rd[c - 1] - otherrd[c - 1]; - base_rd = otherrd[c - 1]; - } - } - } - - if (n == c) { - if (base_rd < txfm_cache[ONLY_4X4]) { - txfm_cache[ONLY_4X4] = base_rd; - } - if (base_rd + diff < txfm_cache[1]) { - txfm_cache[ALLOW_8X8] = txfm_cache[ALLOW_16X16] = base_rd + diff; - } - if (diff < 0) { - base_rd += diff + RDCOST(x->rdmult, x->rddiv, cost8x8, 0); - } else { - base_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0); - } - if (base_rd < txfm_cache[TX_MODE_SELECT]) { - txfm_cache[TX_MODE_SELECT] = base_rd; - } - } - } -} - -static __inline void cal_step_param(int sr, int *sp) { - int step = 0; - - if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP; - else if (sr < 1) sr = 1; - - while (sr >>= 1) - step++; - - *sp = MAX_MVSEARCH_STEPS - 1 - step; -} - -static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, - int_mv *best_ref_mv, - int_mv *second_best_ref_mv, - int64_t best_rd, - int *mdcounts, - int *returntotrate, - int *returnyrate, - int *returndistortion, - int *skippable, int mvthresh, - int_mv seg_mvs[NB_PARTITIONINGS] - [16 /* n_blocks */] - [MAX_REF_FRAMES - 1], - int64_t txfm_cache[NB_TXFM_MODES]) { - int i; - BEST_SEG_INFO bsi; - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - - vpx_memset(&bsi, 0, sizeof(bsi)); - for (i = 0; i < NB_TXFM_MODES; i++) - txfm_cache[i] = INT64_MAX; - - bsi.segment_rd = best_rd; - bsi.ref_mv = best_ref_mv; - bsi.second_ref_mv = second_best_ref_mv; - bsi.mvp.as_int = best_ref_mv->as_int; - bsi.mvthresh = mvthresh; - bsi.mdcounts = mdcounts; - bsi.txfm_size = TX_4X4; - - for (i = 0; i < 16; i++) - bsi.modes[i] = ZERO4X4; - - if (cpi->compressor_speed == 0) { - /* for now, we will keep the original segmentation order - when in best quality mode */ - rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8, - seg_mvs[PARTITIONING_16X8], txfm_cache); - rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16, - seg_mvs[PARTITIONING_8X16], txfm_cache); - rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8, - seg_mvs[PARTITIONING_8X8], txfm_cache); - rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4, - seg_mvs[PARTITIONING_4X4], txfm_cache); - } else { - int sr; - - rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8, - seg_mvs[PARTITIONING_8X8], txfm_cache); - - if (bsi.segment_rd < best_rd) { - int tmp_col_min = x->mv_col_min; - int tmp_col_max = x->mv_col_max; - int tmp_row_min = x->mv_row_min; - int tmp_row_max = x->mv_row_max; - - vp9_clamp_mv_min_max(x, best_ref_mv); - - /* Get 8x8 result */ - bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int; - bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int; - bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int; - bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int; - - /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range - * according to the closeness of 2 MV. */ - /* block 8X16 */ - sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3, - (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3); - cal_step_param(sr, &bsi.sv_istep[0]); - - sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, - (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3); - cal_step_param(sr, &bsi.sv_istep[1]); - - rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16, - seg_mvs[PARTITIONING_8X16], txfm_cache); - - /* block 16X8 */ - sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3, - (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3); - cal_step_param(sr, &bsi.sv_istep[0]); - - sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, - (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3); - cal_step_param(sr, &bsi.sv_istep[1]); - - rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8, - seg_mvs[PARTITIONING_16X8], txfm_cache); - - /* If 8x8 is better than 16x8/8x16, then do 4x4 search */ - /* Not skip 4x4 if speed=0 (good quality) */ - if (cpi->sf.no_skip_block4x4_search || - bsi.segment_num == PARTITIONING_8X8) { - /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */ - bsi.mvp.as_int = bsi.sv_mvp[0].as_int; - rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4, - seg_mvs[PARTITIONING_4X4], txfm_cache); - } - - /* restore UMV window */ - x->mv_col_min = tmp_col_min; - x->mv_col_max = tmp_col_max; - x->mv_row_min = tmp_row_min; - x->mv_row_max = tmp_row_max; - } - } - - /* set it to the best */ - for (i = 0; i < 16; i++) { - BLOCKD *bd = &x->e_mbd.block[i]; - - bd->bmi.as_mv.first.as_int = bsi.mvs[i].as_int; - if (mbmi->second_ref_frame) - bd->bmi.as_mv.second.as_int = bsi.second_mvs[i].as_int; - bd->eob = bsi.eobs[i]; - } - - *returntotrate = bsi.r; - *returndistortion = bsi.d; - *returnyrate = bsi.segment_yrate; - *skippable = bsi.txfm_size == TX_4X4 ? - vp9_mby_is_skippable_4x4(&x->e_mbd, 0) : - vp9_mby_is_skippable_8x8(&x->e_mbd, 0); - - /* save partitions */ - mbmi->txfm_size = bsi.txfm_size; - mbmi->partitioning = bsi.segment_num; - x->partition_info->count = vp9_mbsplit_count[bsi.segment_num]; - - for (i = 0; i < x->partition_info->count; i++) { - int j; - - j = vp9_mbsplit_offset[bsi.segment_num][i]; - - x->partition_info->bmi[i].mode = bsi.modes[j]; - x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv; - if (mbmi->second_ref_frame) - x->partition_info->bmi[i].second_mv.as_mv = bsi.second_mvs[j].as_mv; - } - /* - * used to set mbmi->mv.as_int - */ - x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int; - if (mbmi->second_ref_frame) - x->partition_info->bmi[15].second_mv.as_int = bsi.second_mvs[15].as_int; - - return bsi.segment_rd; -} - -/* Order arr in increasing order, original position stored in idx */ -static void insertsortmv(int arr[], int len) { - int i, j, k; - - for (i = 1; i <= len - 1; i++) { - for (j = 0; j < i; j++) { - if (arr[j] > arr[i]) { - int temp; - - temp = arr[i]; - - for (k = i; k > j; k--) - arr[k] = arr[k - 1]; - - arr[j] = temp; - } - } - } -} - -static void insertsortsad(int arr[], int idx[], int len) { - int i, j, k; - - for (i = 1; i <= len - 1; i++) { - for (j = 0; j < i; j++) { - if (arr[j] > arr[i]) { - int temp, tempi; - - temp = arr[i]; - tempi = idx[i]; - - for (k = i; k > j; k--) { - arr[k] = arr[k - 1]; - idx[k] = idx[k - 1]; - } - - arr[j] = temp; - idx[j] = tempi; - } - } - } -} - -// The improved MV prediction -void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCKD *xd, const MODE_INFO *here, - int_mv *mvp, int refframe, int *ref_frame_sign_bias, - int *sr, int near_sadidx[]) { - const MODE_INFO *above = here - xd->mode_info_stride; - const MODE_INFO *left = here - 1; - const MODE_INFO *aboveleft = above - 1; - int_mv near_mvs[8]; - int near_ref[8]; - int_mv mv; - int vcnt = 0; - int find = 0; - int mb_offset; - - int mvx[8]; - int mvy[8]; - int i; - - mv.as_int = 0; - - if (here->mbmi.ref_frame != INTRA_FRAME) { - near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0; - near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0; - - // read in 3 nearby block's MVs from current frame as prediction candidates. - if (above->mbmi.ref_frame != INTRA_FRAME) { - near_mvs[vcnt].as_int = above->mbmi.mv[0].as_int; - mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias); - near_ref[vcnt] = above->mbmi.ref_frame; - } - vcnt++; - if (left->mbmi.ref_frame != INTRA_FRAME) { - near_mvs[vcnt].as_int = left->mbmi.mv[0].as_int; - mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias); - near_ref[vcnt] = left->mbmi.ref_frame; - } - vcnt++; - if (aboveleft->mbmi.ref_frame != INTRA_FRAME) { - near_mvs[vcnt].as_int = aboveleft->mbmi.mv[0].as_int; - mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias); - near_ref[vcnt] = aboveleft->mbmi.ref_frame; - } - vcnt++; - - // read in 5 nearby block's MVs from last frame. - if (cpi->common.last_frame_type != KEY_FRAME) { - mb_offset = (-xd->mb_to_top_edge / 128 + 1) * (xd->mode_info_stride + 1) + (-xd->mb_to_left_edge / 128 + 1); - - // current in last frame - if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME) { - near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int; - mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, &near_mvs[vcnt], ref_frame_sign_bias); - near_ref[vcnt] = cpi->lf_ref_frame[mb_offset]; - } - vcnt++; - - // above in last frame - if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1] != INTRA_FRAME) { - near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride - 1].as_int; - mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride - 1], refframe, &near_mvs[vcnt], ref_frame_sign_bias); - near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1]; - } - vcnt++; - - // left in last frame - if (cpi->lf_ref_frame[mb_offset - 1] != INTRA_FRAME) { - near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - 1].as_int; - mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - 1], refframe, &near_mvs[vcnt], ref_frame_sign_bias); - near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - 1]; - } - vcnt++; - - // right in last frame - if (cpi->lf_ref_frame[mb_offset + 1] != INTRA_FRAME) { - near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + 1].as_int; - mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + 1], refframe, &near_mvs[vcnt], ref_frame_sign_bias); - near_ref[vcnt] = cpi->lf_ref_frame[mb_offset + 1]; - } - vcnt++; - - // below in last frame - if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1] != INTRA_FRAME) { - near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride + 1].as_int; - mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride + 1], refframe, &near_mvs[vcnt], ref_frame_sign_bias); - near_ref[vcnt] = cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1]; - } - vcnt++; - } - - for (i = 0; i < vcnt; i++) { - if (near_ref[near_sadidx[i]] != INTRA_FRAME) { - if (here->mbmi.ref_frame == near_ref[near_sadidx[i]]) { - mv.as_int = near_mvs[near_sadidx[i]].as_int; - find = 1; - if (i < 3) - *sr = 3; - else - *sr = 2; - break; - } - } - } - - if (!find) { - for (i = 0; i < vcnt; i++) { - mvx[i] = near_mvs[i].as_mv.row; - mvy[i] = near_mvs[i].as_mv.col; - } - - insertsortmv(mvx, vcnt); - insertsortmv(mvy, vcnt); - mv.as_mv.row = mvx[vcnt / 2]; - mv.as_mv.col = mvy[vcnt / 2]; - - find = 1; - // sr is set to 0 to allow calling function to decide the search range. - *sr = 0; - } - } - - /* Set up return values */ - mvp->as_int = mv.as_int; - clamp_mv2(mvp, xd); -} - -static void cal_sad(VP9_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, - int recon_yoffset, int near_sadidx[], - enum BlockSize block_size) { - /* 0-cf above, 1-cf left, 2-cf aboveleft, 3-lf current, 4-lf above, - * 5-lf left, 6-lf right, 7-lf below */ - int near_sad[8] = {0}; - BLOCK *b = &x->block[0]; - unsigned char *src_y_ptr = *(b->base_src); - const unsigned char *dst_y_ptr = xd->dst.y_buffer; - const int bs = (block_size == BLOCK_16X16) ? 16 : 32; - const int dst_y_str = xd->dst.y_stride; - - // calculate sad for current frame 3 nearby MBs. - if (xd->mb_to_top_edge == 0 && xd->mb_to_left_edge == 0) { - near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX; - } else if (xd->mb_to_top_edge == 0) { - // only has left MB for sad calculation. - near_sad[0] = near_sad[2] = INT_MAX; - near_sad[1] = cpi->fn_ptr[block_size].sdf(src_y_ptr, b->src_stride, - dst_y_ptr - bs, - dst_y_str, 0x7fffffff); - } else if (xd->mb_to_left_edge == 0) { - // only has left MB for sad calculation. - near_sad[1] = near_sad[2] = INT_MAX; - near_sad[0] = cpi->fn_ptr[block_size].sdf(src_y_ptr, b->src_stride, - dst_y_ptr - dst_y_str * bs, - dst_y_str, 0x7fffffff); - } else { - near_sad[0] = cpi->fn_ptr[block_size].sdf(src_y_ptr, b->src_stride, - dst_y_ptr - dst_y_str * bs, - dst_y_str, 0x7fffffff); - near_sad[1] = cpi->fn_ptr[block_size].sdf(src_y_ptr, b->src_stride, - dst_y_ptr - bs, - dst_y_str, 0x7fffffff); - near_sad[2] = cpi->fn_ptr[block_size].sdf(src_y_ptr, b->src_stride, - dst_y_ptr - dst_y_str * bs - bs, - dst_y_str, 0x7fffffff); - } - - if (cpi->common.last_frame_type != KEY_FRAME) { - // calculate sad for last frame 5 nearby MBs. - unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset; - const int pre_y_str = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride; - - if (xd->mb_to_top_edge == 0) near_sad[4] = INT_MAX; - if (xd->mb_to_left_edge == 0) near_sad[5] = INT_MAX; - if (xd->mb_to_right_edge == 0) near_sad[6] = INT_MAX; - if (xd->mb_to_bottom_edge == 0) near_sad[7] = INT_MAX; - - near_sad[3] = cpi->fn_ptr[block_size].sdf(src_y_ptr, b->src_stride, - pre_y_buffer, - pre_y_str, 0x7fffffff); - if (near_sad[4] != INT_MAX) - near_sad[4] = cpi->fn_ptr[block_size].sdf(src_y_ptr, b->src_stride, - pre_y_buffer - pre_y_str * bs, - pre_y_str, 0x7fffffff); - if (near_sad[5] != INT_MAX) - near_sad[5] = cpi->fn_ptr[block_size].sdf(src_y_ptr, b->src_stride, - pre_y_buffer - bs, - pre_y_str, 0x7fffffff); - if (near_sad[6] != INT_MAX) - near_sad[6] = cpi->fn_ptr[block_size].sdf(src_y_ptr, b->src_stride, - pre_y_buffer + bs, - pre_y_str, 0x7fffffff); - if (near_sad[7] != INT_MAX) - near_sad[7] = cpi->fn_ptr[block_size].sdf(src_y_ptr, b->src_stride, - pre_y_buffer + pre_y_str * bs, - pre_y_str, 0x7fffffff); - } - - if (cpi->common.last_frame_type != KEY_FRAME) { - insertsortsad(near_sad, near_sadidx, 8); - } else { - insertsortsad(near_sad, near_sadidx, 3); - } -} - -static void set_i8x8_block_modes(MACROBLOCK *x, int modes[2][4]) { - int i; - MACROBLOCKD *xd = &x->e_mbd; - for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - xd->mode_info_context->bmi[ib + 0].as_mode.first = modes[0][i]; - xd->mode_info_context->bmi[ib + 1].as_mode.first = modes[0][i]; - xd->mode_info_context->bmi[ib + 4].as_mode.first = modes[0][i]; - xd->mode_info_context->bmi[ib + 5].as_mode.first = modes[0][i]; -#if CONFIG_COMP_INTRA_PRED - xd->mode_info_context->bmi[ib + 0].as_mode.second = modes[1][i]; - xd->mode_info_context->bmi[ib + 1].as_mode.second = modes[1][i]; - xd->mode_info_context->bmi[ib + 4].as_mode.second = modes[1][i]; - xd->mode_info_context->bmi[ib + 5].as_mode.second = modes[1][i]; -#endif - // printf("%d,%d,%d,%d %d,%d,%d,%d\n", - // modes[0][0], modes[0][1], modes[0][2], modes[0][3], - // modes[1][0], modes[1][1], modes[1][2], modes[1][3]); - } - - for (i = 0; i < 16; i++) { - xd->block[i].bmi = xd->mode_info_context->bmi[i]; - } -} - -extern void vp9_calc_ref_probs(int *count, vp9_prob *probs); -static void estimate_curframe_refprobs(VP9_COMP *cpi, vp9_prob mod_refprobs[3], int pred_ref) { - int norm_cnt[MAX_REF_FRAMES]; - const int *const rfct = cpi->count_mb_ref_frame_usage; - int intra_count = rfct[INTRA_FRAME]; - int last_count = rfct[LAST_FRAME]; - int gf_count = rfct[GOLDEN_FRAME]; - int arf_count = rfct[ALTREF_FRAME]; - - // Work out modified reference frame probabilities to use where prediction - // of the reference frame fails - if (pred_ref == INTRA_FRAME) { - norm_cnt[0] = 0; - norm_cnt[1] = last_count; - norm_cnt[2] = gf_count; - norm_cnt[3] = arf_count; - vp9_calc_ref_probs(norm_cnt, mod_refprobs); - mod_refprobs[0] = 0; // This branch implicit - } else if (pred_ref == LAST_FRAME) { - norm_cnt[0] = intra_count; - norm_cnt[1] = 0; - norm_cnt[2] = gf_count; - norm_cnt[3] = arf_count; - vp9_calc_ref_probs(norm_cnt, mod_refprobs); - mod_refprobs[1] = 0; // This branch implicit - } else if (pred_ref == GOLDEN_FRAME) { - norm_cnt[0] = intra_count; - norm_cnt[1] = last_count; - norm_cnt[2] = 0; - norm_cnt[3] = arf_count; - vp9_calc_ref_probs(norm_cnt, mod_refprobs); - mod_refprobs[2] = 0; // This branch implicit - } else { - norm_cnt[0] = intra_count; - norm_cnt[1] = last_count; - norm_cnt[2] = gf_count; - norm_cnt[3] = 0; - vp9_calc_ref_probs(norm_cnt, mod_refprobs); - mod_refprobs[2] = 0; // This branch implicit - } -} - -static __inline unsigned weighted_cost(vp9_prob *tab0, vp9_prob *tab1, int idx, int val, int weight) { - unsigned cost0 = tab0[idx] ? vp9_cost_bit(tab0[idx], val) : 0; - unsigned cost1 = tab1[idx] ? vp9_cost_bit(tab1[idx], val) : 0; - // weight is 16-bit fixed point, so this basically calculates: - // 0.5 + weight * cost1 + (1.0 - weight) * cost0 - return (0x8000 + weight * cost1 + (0x10000 - weight) * cost0) >> 16; -} - -static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, unsigned int *ref_costs) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &cpi->mb.e_mbd; - vp9_prob *mod_refprobs; - - unsigned int cost; - int pred_ref; - int pred_flag; - int pred_ctx; - int i; - int tot_count; - - vp9_prob pred_prob, new_pred_prob; - int seg_ref_active; - int seg_ref_count = 0; - seg_ref_active = vp9_segfeature_active(xd, - segment_id, - SEG_LVL_REF_FRAME); - - if (seg_ref_active) { - seg_ref_count = vp9_check_segref(xd, segment_id, INTRA_FRAME) + - vp9_check_segref(xd, segment_id, LAST_FRAME) + - vp9_check_segref(xd, segment_id, GOLDEN_FRAME) + - vp9_check_segref(xd, segment_id, ALTREF_FRAME); - } - - // Get the predicted reference for this mb - pred_ref = vp9_get_pred_ref(cm, xd); - - // Get the context probability for the prediction flag (based on last frame) - pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF); - - // Predict probability for current frame based on stats so far - pred_ctx = vp9_get_pred_context(cm, xd, PRED_REF); - tot_count = cpi->ref_pred_count[pred_ctx][0] + cpi->ref_pred_count[pred_ctx][1]; - if (tot_count) { - new_pred_prob = - (cpi->ref_pred_count[pred_ctx][0] * 255 + (tot_count >> 1)) / tot_count; - new_pred_prob += !new_pred_prob; - } else - new_pred_prob = 128; - - // Get the set of probabilities to use if prediction fails - mod_refprobs = cm->mod_refprobs[pred_ref]; - - // For each possible selected reference frame work out a cost. - for (i = 0; i < MAX_REF_FRAMES; i++) { - if (seg_ref_active && seg_ref_count == 1) { - cost = 0; - } else { - pred_flag = (i == pred_ref); - - // Get the prediction for the current mb - cost = weighted_cost(&pred_prob, &new_pred_prob, 0, - pred_flag, cpi->seg0_progress); - if (cost > 1024) cost = 768; // i.e. account for 4 bits max. - - // for incorrectly predicted cases - if (! pred_flag) { - vp9_prob curframe_mod_refprobs[3]; - - if (cpi->seg0_progress) { - estimate_curframe_refprobs(cpi, curframe_mod_refprobs, pred_ref); - } else { - vpx_memset(curframe_mod_refprobs, 0, sizeof(curframe_mod_refprobs)); - } - - cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 0, - (i != INTRA_FRAME), cpi->seg0_progress); - if (i != INTRA_FRAME) { - cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 1, - (i != LAST_FRAME), cpi->seg0_progress); - if (i != LAST_FRAME) { - cost += weighted_cost(mod_refprobs, curframe_mod_refprobs, 2, - (i != GOLDEN_FRAME), cpi->seg0_progress); - } - } - } - } - - ref_costs[i] = cost; - } -} - -static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, - int mode_index, - PARTITION_INFO *partition, - int_mv *ref_mv, - int_mv *second_ref_mv, - int single_pred_diff, - int comp_pred_diff, - int hybrid_pred_diff, - int64_t txfm_size_diff[NB_TXFM_MODES]) { - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; - - // Take a snapshot of the coding context so it can be - // restored if we decide to encode this way - ctx->best_mode_index = mode_index; - vpx_memcpy(&ctx->mic, xd->mode_info_context, - sizeof(MODE_INFO)); - if (partition) - vpx_memcpy(&ctx->partition_info, partition, - sizeof(PARTITION_INFO)); - ctx->best_ref_mv.as_int = ref_mv->as_int; - ctx->second_best_ref_mv.as_int = second_ref_mv->as_int; - - // ctx[mb_index].rddiv = x->rddiv; - // ctx[mb_index].rdmult = x->rdmult; - - ctx->single_pred_diff = single_pred_diff; - ctx->comp_pred_diff = comp_pred_diff; - ctx->hybrid_pred_diff = hybrid_pred_diff; - - if (txfm_size_diff) { - memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff)); - } else { - memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff)); - } -} - -static void inter_mode_cost(VP9_COMP *cpi, MACROBLOCK *x, int this_mode, - int *rate2, int *distortion2, int *rate_y, - int *distortion, int* rate_uv, int *distortion_uv, - int *skippable, int64_t txfm_cache[NB_TXFM_MODES]) { - int y_skippable, uv_skippable; - - // Y cost and distortion - macro_block_yrd(cpi, x, rate_y, distortion, &y_skippable, txfm_cache); - - *rate2 += *rate_y; - *distortion2 += *distortion; - - // UV cost and distortion - if (x->e_mbd.mode_info_context->mbmi.txfm_size != TX_4X4) - rd_inter16x16_uv_8x8(cpi, x, rate_uv, distortion_uv, - cpi->common.full_pixel, &uv_skippable); - else - rd_inter16x16_uv(cpi, x, rate_uv, distortion_uv, cpi->common.full_pixel, - &uv_skippable); - *rate2 += *rate_uv; - *distortion2 += *distortion_uv; - *skippable = y_skippable && uv_skippable; -} - -#define MIN(x,y) (((x)<(y))?(x):(y)) -#define MAX(x,y) (((x)>(y))?(x):(y)) -static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, - int idx, int frame_type, - int recon_yoffset, int recon_uvoffset, - int_mv frame_nearest_mv[4], - int_mv frame_near_mv[4], - int_mv frame_best_ref_mv[4], - int frame_mdcounts[4][4], - unsigned char *y_buffer[4], - unsigned char *u_buffer[4], - unsigned char *v_buffer[4]) { - YV12_BUFFER_CONFIG *yv12 = &cpi->common.yv12_fb[idx]; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - - - vp9_find_near_mvs(xd, xd->mode_info_context, - xd->prev_mode_info_context, - &frame_nearest_mv[frame_type], &frame_near_mv[frame_type], - &frame_best_ref_mv[frame_type], frame_mdcounts[frame_type], - frame_type, cpi->common.ref_frame_sign_bias); - - y_buffer[frame_type] = yv12->y_buffer + recon_yoffset; - u_buffer[frame_type] = yv12->u_buffer + recon_uvoffset; - v_buffer[frame_type] = yv12->v_buffer + recon_uvoffset; - -#if CONFIG_NEWBESTREFMV - vp9_find_mv_refs(xd, xd->mode_info_context, - xd->prev_mode_info_context, - frame_type, - mbmi->ref_mvs[frame_type], - cpi->common.ref_frame_sign_bias); - - vp9_find_best_ref_mvs(xd, y_buffer[frame_type], - yv12->y_stride, - mbmi->ref_mvs[frame_type], - &frame_best_ref_mv[frame_type], - &frame_nearest_mv[frame_type], - &frame_near_mv[frame_type]); -#endif -} - -static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, - enum BlockSize block_size, - int *saddone, int near_sadidx[], - int mdcounts[4], int64_t txfm_cache[], - int *rate2, int *distortion, int *skippable, - int *compmode_cost, - int *rate_y, int *distortion_y, - int *rate_uv, int *distortion_uv, - int *mode_excluded, int *disable_skip, - int recon_yoffset, int mode_index, - int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], - int_mv frame_best_ref_mv[4]) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; - BLOCK *b = &x->block[0]; - BLOCKD *d = &xd->block[0]; - const int is_comp_pred = (mbmi->second_ref_frame != 0); - const int num_refs = is_comp_pred ? 2 : 1; - const int this_mode = mbmi->mode; - int i; - int refs[2] = { mbmi->ref_frame, mbmi->second_ref_frame }; - int_mv cur_mv[2]; - int_mv mvp; - int64_t this_rd = 0; - - switch (this_mode) { - case NEWMV: - if (is_comp_pred) { - if (frame_mv[NEWMV][refs[0]].as_int == INVALID_MV || - frame_mv[NEWMV][refs[1]].as_int == INVALID_MV) - return INT64_MAX; - *rate2 += vp9_mv_bit_cost(&frame_mv[NEWMV][refs[0]], - &frame_best_ref_mv[refs[0]], - XMVCOST, 96, - x->e_mbd.allow_high_precision_mv); - *rate2 += vp9_mv_bit_cost(&frame_mv[NEWMV][refs[1]], - &frame_best_ref_mv[refs[1]], - XMVCOST, 96, - x->e_mbd.allow_high_precision_mv); - } else { - int bestsme = INT_MAX; - int further_steps, step_param = cpi->sf.first_step; - int sadpb = x->sadperbit16; - int_mv mvp_full, tmp_mv; - // search range got from mv_pred(). It uses step_param levels. (0-7) - int sr = 0; - - int tmp_col_min = x->mv_col_min; - int tmp_col_max = x->mv_col_max; - int tmp_row_min = x->mv_row_min; - int tmp_row_max = x->mv_row_max; - - vp9_clamp_mv_min_max(x, &frame_best_ref_mv[refs[0]]); - - if (!*saddone) { - cal_sad(cpi, xd, x, recon_yoffset, &near_sadidx[0], block_size); - *saddone = 1; - } - - vp9_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp, - mbmi->ref_frame, cpi->common.ref_frame_sign_bias, - &sr, &near_sadidx[0]); - - mvp_full.as_mv.col = mvp.as_mv.col >> 3; - mvp_full.as_mv.row = mvp.as_mv.row >> 3; - - // adjust search range according to sr from mv prediction - step_param = MAX(step_param, sr); - - // Further step/diamond searches as necessary - further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - - bestsme = vp9_full_pixel_diamond(cpi, x, b, d, &mvp_full, step_param, - sadpb, further_steps, 1, - &cpi->fn_ptr[block_size], - &frame_best_ref_mv[refs[0]], &tmp_mv); - - x->mv_col_min = tmp_col_min; - x->mv_col_max = tmp_col_max; - x->mv_row_min = tmp_row_min; - x->mv_row_max = tmp_row_max; - - if (bestsme < INT_MAX) { - int dis; /* TODO: use dis in distortion calculation later. */ - unsigned int sse; - cpi->find_fractional_mv_step(x, b, d, &tmp_mv, - &frame_best_ref_mv[refs[0]], - x->errorperbit, - &cpi->fn_ptr[block_size], - XMVCOST, &dis, &sse); - } - d->bmi.as_mv.first.as_int = tmp_mv.as_int; - frame_mv[NEWMV][refs[0]].as_int = d->bmi.as_mv.first.as_int; - - // Add the new motion vector cost to our rolling cost variable - *rate2 += vp9_mv_bit_cost(&tmp_mv, &frame_best_ref_mv[refs[0]], - XMVCOST, 96, xd->allow_high_precision_mv); - } - break; - case NEARESTMV: - case NEARMV: - // Do not bother proceeding if the vector (from newmv, nearest or - // near) is 0,0 as this should then be coded using the zeromv mode. - for (i = 0; i < num_refs; ++i) - if (frame_mv[this_mode][refs[i]].as_int == 0) - return INT64_MAX; - case ZEROMV: - default: - break; - } - for (i = 0; i < num_refs; ++i) { - cur_mv[i] = frame_mv[this_mode][refs[i]]; - // Clip "next_nearest" so that it does not extend to far out of image - clamp_mv2(&cur_mv[i], xd); - if (mv_check_bounds(x, &cur_mv[i])) - return INT64_MAX; - mbmi->mv[i].as_int = cur_mv[i].as_int; - } - -#if CONFIG_PRED_FILTER - // Filtered prediction: - mbmi->pred_filter_enabled = vp9_mode_order[mode_index].pred_filter_flag; - *rate2 += vp9_cost_bit(cpi->common.prob_pred_filter_off, - mbmi->pred_filter_enabled); -#endif - if (cpi->common.mcomp_filter_type == SWITCHABLE) { - const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP); - const int m = vp9_switchable_interp_map[mbmi->interp_filter]; - *rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m]; - } - - /* We don't include the cost of the second reference here, because there - * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other - * words if you present them in that order, the second one is always known - * if the first is known */ - *compmode_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), - is_comp_pred); - *rate2 += vp9_cost_mv_ref(cpi, this_mode, mdcounts); - - if (block_size == BLOCK_16X16) { - vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0); - if (is_comp_pred) - vp9_build_2nd_inter16x16_predictors_mby(xd, xd->predictor, 16); - } else { -#if CONFIG_SUPERBLOCKS - vp9_build_inter32x32_predictors_sb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride); -#endif - } - - if (cpi->active_map_enabled && x->active_ptr[0] == 0) - x->skip = 1; - else if (x->encode_breakout) { - unsigned int sse, var; - int threshold = (xd->block[0].dequant[1] - * xd->block[0].dequant[1] >> 4); - - if (threshold < x->encode_breakout) - threshold = x->encode_breakout; - - if (block_size == BLOCK_16X16) { - var = vp9_variance16x16(*(b->base_src), b->src_stride, - xd->predictor, 16, &sse); - } else { -#if CONFIG_SUPERBLOCKS - var = vp9_variance32x32(*(b->base_src), b->src_stride, - xd->dst.y_buffer, xd->dst.y_stride, &sse); -#endif - } - - if (sse < threshold) { - unsigned int q2dc = xd->block[24].dequant[0]; - /* If there is no codeable 2nd order dc - or a very small uniform pixel change change */ - if ((sse - var < q2dc * q2dc >> 4) || - (sse / 2 > var && sse - var < 64)) { - // Check u and v to make sure skip is ok - int sse2; - - if (block_size == BLOCK_16X16) { - sse2 = vp9_uvsse(x); - } else { - unsigned int sse2u, sse2v; - var = vp9_variance16x16(x->src.u_buffer, x->src.uv_stride, - xd->dst.u_buffer, xd->dst.uv_stride, &sse2u); - var = vp9_variance16x16(x->src.v_buffer, x->src.uv_stride, - xd->dst.v_buffer, xd->dst.uv_stride, &sse2v); - sse2 = sse2u + sse2v; - } - - if (sse2 * 2 < threshold) { - x->skip = 1; - *distortion = sse + sse2; - *rate2 = 500; - - /* for best_yrd calculation */ - *rate_uv = 0; - *distortion_uv = sse2; - - *disable_skip = 1; - this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); - } - } - } - } - - if (!x->skip) { - if (block_size == BLOCK_16X16) { - vp9_build_1st_inter16x16_predictors_mbuv(xd, &xd->predictor[256], - &xd->predictor[320], 8); - if (is_comp_pred) - vp9_build_2nd_inter16x16_predictors_mbuv(xd, &xd->predictor[256], - &xd->predictor[320], 8); - inter_mode_cost(cpi, x, this_mode, rate2, distortion, - rate_y, distortion_y, rate_uv, distortion_uv, - skippable, txfm_cache); - } else { -#if CONFIG_SUPERBLOCKS - int skippable_y, skippable_uv; - - // Y cost and distortion - FIXME support other transform sizes - super_block_yrd_8x8(x, rate_y, distortion_y, - IF_RTCD(&cpi->rtcd), &skippable_y); - *rate2 += *rate_y; - *distortion += *distortion_y; - - rd_inter32x32_uv_8x8(cpi, x, rate_uv, distortion_uv, - cm->full_pixel, &skippable_uv); - - *rate2 += *rate_uv; - *distortion += *distortion_uv; - *skippable = skippable_y && skippable_uv; -#endif - } - } - if (is_comp_pred) { - *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY); - } else { - *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY); - } - - return this_rd; // if 0, this will be re-calculated by caller -} - -void vp9_rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, int recon_uvoffset, - int *returnrate, int *returndistortion, - int64_t *returnintra) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - union b_mode_info best_bmodes[16]; - MB_MODE_INFO best_mbmode; - PARTITION_INFO best_partition; - int_mv best_ref_mv, second_best_ref_mv; - MB_PREDICTION_MODE this_mode; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - int i, best_mode_index = 0; - int mode8x8[2][4]; - unsigned char segment_id = mbmi->segment_id; - - int mode_index; - int mdcounts[4]; - int rate, distortion; - int rate2, distortion2; - int64_t best_txfm_rd[NB_TXFM_MODES]; - int64_t best_txfm_diff[NB_TXFM_MODES]; - int64_t best_pred_diff[NB_PREDICTION_TYPES]; - int64_t best_pred_rd[NB_PREDICTION_TYPES]; - int64_t best_rd = INT64_MAX, best_intra_rd = INT64_MAX; -#if CONFIG_PRED_FILTER - int64_t best_overall_rd = INT64_MAX; -#endif - int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly; - int uv_intra_skippable = 0; - int uv_intra_rate_8x8 = 0, uv_intra_distortion_8x8 = 0, uv_intra_rate_tokenonly_8x8 = 0; - int uv_intra_skippable_8x8 = 0; - int rate_y, UNINITIALIZED_IS_SAFE(rate_uv); - int distortion_uv = INT_MAX; - int64_t best_yrd = INT64_MAX; -#if CONFIG_PRED_FILTER - int best_filter_state; -#endif - int switchable_filter_index = 0; - - MB_PREDICTION_MODE uv_intra_mode; - MB_PREDICTION_MODE uv_intra_mode_8x8 = 0; - - int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7}; - int saddone = 0; - - int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; - int_mv frame_best_ref_mv[4]; - int frame_mdcounts[4][4]; - unsigned char *y_buffer[4], *u_buffer[4], *v_buffer[4]; - - unsigned int ref_costs[MAX_REF_FRAMES]; - int_mv seg_mvs[NB_PARTITIONINGS][16 /* n_blocks */][MAX_REF_FRAMES - 1]; - - vpx_memset(mode8x8, 0, sizeof(mode8x8)); - vpx_memset(&frame_mv, 0, sizeof(frame_mv)); - vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); - vpx_memset(&best_bmodes, 0, sizeof(best_bmodes)); - vpx_memset(&x->mb_context[xd->mb_index], 0, sizeof(PICK_MODE_CONTEXT)); - - for (i = 0; i < MAX_REF_FRAMES; i++) - frame_mv[NEWMV][i].as_int = INVALID_MV; - for (i = 0; i < NB_PREDICTION_TYPES; ++i) - best_pred_rd[i] = INT64_MAX; - for (i = 0; i < NB_TXFM_MODES; i++) - best_txfm_rd[i] = INT64_MAX; - - for (i = 0; i < NB_PARTITIONINGS; i++) { - int j, k; - - for (j = 0; j < 16; j++) - for (k = 0; k < MAX_REF_FRAMES - 1; k++) - seg_mvs[i][j][k].as_int = INVALID_MV; - } - - if (cpi->ref_frame_flags & VP9_LAST_FLAG) { - setup_buffer_inter(cpi, x, cpi->common.lst_fb_idx, LAST_FRAME, - recon_yoffset, recon_uvoffset, frame_mv[NEARESTMV], - frame_mv[NEARMV], frame_best_ref_mv, - frame_mdcounts, y_buffer, u_buffer, v_buffer); - } - - if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { - setup_buffer_inter(cpi, x, cpi->common.gld_fb_idx, GOLDEN_FRAME, - recon_yoffset, recon_uvoffset, frame_mv[NEARESTMV], - frame_mv[NEARMV], frame_best_ref_mv, - frame_mdcounts, y_buffer, u_buffer, v_buffer); - } - - if (cpi->ref_frame_flags & VP9_ALT_FLAG) { - setup_buffer_inter(cpi, x, cpi->common.alt_fb_idx, ALTREF_FRAME, - recon_yoffset, recon_uvoffset, frame_mv[NEARESTMV], - frame_mv[NEARMV], frame_best_ref_mv, - frame_mdcounts, y_buffer, u_buffer, v_buffer); - } - - *returnintra = INT64_MAX; - - x->skip = 0; - - mbmi->ref_frame = INTRA_FRAME; - - /* Initialize zbin mode boost for uv costing */ - cpi->zbin_mode_boost = 0; - vp9_update_zbin_extra(cpi, x); - - rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, - &uv_intra_rate_tokenonly, &uv_intra_distortion, - &uv_intra_skippable); - uv_intra_mode = mbmi->uv_mode; - - /* rough estimate for now */ - if (cpi->common.txfm_mode != ONLY_4X4) { - rd_pick_intra_mbuv_mode_8x8(cpi, x, &uv_intra_rate_8x8, - &uv_intra_rate_tokenonly_8x8, - &uv_intra_distortion_8x8, - &uv_intra_skippable_8x8); - uv_intra_mode_8x8 = mbmi->uv_mode; - } - - // Get estimates of reference frame costs for each reference frame - // that depend on the current prediction etc. - estimate_ref_frame_costs(cpi, segment_id, ref_costs); - - for (mode_index = 0; mode_index < MAX_MODES; - mode_index += (!switchable_filter_index)) { - int64_t this_rd = INT64_MAX; - int disable_skip = 0, skippable = 0; - int other_cost = 0; - int compmode_cost = 0; - int mode_excluded = 0; - int64_t txfm_cache[NB_TXFM_MODES] = { 0 }; - - // These variables hold are rolling total cost and distortion for this mode - rate2 = 0; - distortion2 = 0; - rate_y = 0; - rate_uv = 0; - - this_mode = vp9_mode_order[mode_index].mode; - mbmi->mode = this_mode; - mbmi->uv_mode = DC_PRED; - mbmi->ref_frame = vp9_mode_order[mode_index].ref_frame; - mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; -#if CONFIG_PRED_FILTER - mbmi->pred_filter_enabled = 0; -#endif - if (cpi->common.mcomp_filter_type == SWITCHABLE && - this_mode >= NEARESTMV && this_mode <= SPLITMV) { - mbmi->interp_filter = - vp9_switchable_interp[switchable_filter_index++]; - if (switchable_filter_index == VP9_SWITCHABLE_FILTERS) - switchable_filter_index = 0; - } else { - mbmi->interp_filter = cpi->common.mcomp_filter_type; - } - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); - - // Test best rd so far against threshold for trying this mode. - if (best_rd <= cpi->rd_threshes[mode_index]) - continue; - - // current coding mode under rate-distortion optimization test loop -#if CONFIG_COMP_INTRA_PRED - mbmi->second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); - mbmi->second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); -#endif - - // If the segment reference frame feature is enabled.... - // then do nothing if the current ref frame is not allowed.. - if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && - !vp9_check_segref(xd, segment_id, mbmi->ref_frame)) { - continue; - // If the segment mode feature is enabled.... - // then do nothing if the current mode is not allowed.. - } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) && - (this_mode != - vp9_get_segdata(xd, segment_id, SEG_LVL_MODE))) { - continue; - // Disable this drop out case if either the mode or ref frame - // segment level feature is enabled for this segment. This is to - // prevent the possibility that the we end up unable to pick any mode. - } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && - !vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) { - // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, - // unless ARNR filtering is enabled in which case we want - // an unfiltered alternative - if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { - if (this_mode != ZEROMV || - mbmi->ref_frame != ALTREF_FRAME) { - continue; - } - } - } - - /* everything but intra */ - if (mbmi->ref_frame) { - int ref = mbmi->ref_frame; - - xd->pre.y_buffer = y_buffer[ref]; - xd->pre.u_buffer = u_buffer[ref]; - xd->pre.v_buffer = v_buffer[ref]; - best_ref_mv = frame_best_ref_mv[ref]; - vpx_memcpy(mdcounts, frame_mdcounts[ref], sizeof(mdcounts)); - } - - if (mbmi->second_ref_frame) { - int ref = mbmi->second_ref_frame; - - xd->second_pre.y_buffer = y_buffer[ref]; - xd->second_pre.u_buffer = u_buffer[ref]; - xd->second_pre.v_buffer = v_buffer[ref]; - second_best_ref_mv = frame_best_ref_mv[ref]; - } - - // Experimental code. Special case for gf and arf zeromv modes. - // Increase zbin size to suppress noise - if (cpi->zbin_mode_boost_enabled) { - if (vp9_mode_order[mode_index].ref_frame == INTRA_FRAME) - cpi->zbin_mode_boost = 0; - else { - if (vp9_mode_order[mode_index].mode == ZEROMV) { - if (vp9_mode_order[mode_index].ref_frame != LAST_FRAME) - cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; - else - cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; - } else if (vp9_mode_order[mode_index].mode == SPLITMV) - cpi->zbin_mode_boost = 0; - else - cpi->zbin_mode_boost = MV_ZBIN_BOOST; - } - - vp9_update_zbin_extra(cpi, x); - } - - // Intra - if (!mbmi->ref_frame) { - switch (this_mode) { - default: - case DC_PRED: - case V_PRED: - case H_PRED: - case TM_PRED: - case D45_PRED: - case D135_PRED: - case D117_PRED: - case D153_PRED: - case D27_PRED: - case D63_PRED: - mbmi->ref_frame = INTRA_FRAME; - // FIXME compound intra prediction - vp9_build_intra_predictors_mby(&x->e_mbd); - macro_block_yrd(cpi, x, &rate_y, &distortion, &skippable, txfm_cache); - rate2 += rate_y; - distortion2 += distortion; - rate2 += x->mbmode_cost[xd->frame_type][mbmi->mode]; - if (mbmi->txfm_size != TX_4X4) { - rate2 += uv_intra_rate_8x8; - rate_uv = uv_intra_rate_tokenonly_8x8; - distortion2 += uv_intra_distortion_8x8; - distortion_uv = uv_intra_distortion_8x8; - skippable = skippable && uv_intra_skippable_8x8; - } else { - rate2 += uv_intra_rate; - rate_uv = uv_intra_rate_tokenonly; - distortion2 += uv_intra_distortion; - distortion_uv = uv_intra_distortion; - skippable = skippable && uv_intra_skippable; - } - break; - case B_PRED: { - int64_t tmp_rd; - - // Note the rate value returned here includes the cost of coding - // the BPRED mode : x->mbmode_cost[xd->frame_type][BPRED]; - mbmi->txfm_size = TX_4X4; - tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd, -#if CONFIG_COMP_INTRA_PRED - 0, -#endif - 0); - rate2 += rate; - distortion2 += distortion; - - if (tmp_rd < best_yrd) { - rate2 += uv_intra_rate; - rate_uv = uv_intra_rate_tokenonly; - distortion2 += uv_intra_distortion; - distortion_uv = uv_intra_distortion; - } else { - this_rd = INT64_MAX; - disable_skip = 1; - } - } - break; - case I8X8_PRED: { - int cost0 = vp9_cost_bit(cm->prob_tx[0], 0); - int cost1 = vp9_cost_bit(cm->prob_tx[0], 1); - int64_t tmp_rd_4x4s, tmp_rd_8x8s; - int64_t tmp_rd_4x4, tmp_rd_8x8, tmp_rd; - int r4x4, tok4x4, d4x4, r8x8, tok8x8, d8x8; - mbmi->txfm_size = TX_4X4; - tmp_rd_4x4 = rd_pick_intra8x8mby_modes(cpi, x, &r4x4, &tok4x4, - &d4x4, best_yrd); - mode8x8[0][0] = xd->mode_info_context->bmi[0].as_mode.first; - mode8x8[0][1] = xd->mode_info_context->bmi[2].as_mode.first; - mode8x8[0][2] = xd->mode_info_context->bmi[8].as_mode.first; - mode8x8[0][3] = xd->mode_info_context->bmi[10].as_mode.first; -#if CONFIG_COMP_INTRA_PRED - mode8x8[1][0] = xd->mode_info_context->bmi[0].as_mode.second; - mode8x8[1][1] = xd->mode_info_context->bmi[2].as_mode.second; - mode8x8[1][2] = xd->mode_info_context->bmi[8].as_mode.second; - mode8x8[1][3] = xd->mode_info_context->bmi[10].as_mode.second; -#endif - mbmi->txfm_size = TX_8X8; - tmp_rd_8x8 = rd_pick_intra8x8mby_modes(cpi, x, &r8x8, &tok8x8, - &d8x8, best_yrd); - txfm_cache[ONLY_4X4] = tmp_rd_4x4; - txfm_cache[ALLOW_8X8] = tmp_rd_8x8; - txfm_cache[ALLOW_16X16] = tmp_rd_8x8; - tmp_rd_4x4s = tmp_rd_4x4 + RDCOST(x->rdmult, x->rddiv, cost0, 0); - tmp_rd_8x8s = tmp_rd_8x8 + RDCOST(x->rdmult, x->rddiv, cost1, 0); - txfm_cache[TX_MODE_SELECT] = tmp_rd_4x4s < tmp_rd_8x8s ? tmp_rd_4x4s : tmp_rd_8x8s; - if (cm->txfm_mode == TX_MODE_SELECT) { - if (tmp_rd_4x4s < tmp_rd_8x8s) { - rate = r4x4 + cost0; - rate_y = tok4x4 + cost0; - distortion = d4x4; - mbmi->txfm_size = TX_4X4; - tmp_rd = tmp_rd_4x4s; - } else { - rate = r8x8 + cost1; - rate_y = tok8x8 + cost1; - distortion = d8x8; - mbmi->txfm_size = TX_8X8; - tmp_rd = tmp_rd_8x8s; - - mode8x8[0][0] = xd->mode_info_context->bmi[0].as_mode.first; - mode8x8[0][1] = xd->mode_info_context->bmi[2].as_mode.first; - mode8x8[0][2] = xd->mode_info_context->bmi[8].as_mode.first; - mode8x8[0][3] = xd->mode_info_context->bmi[10].as_mode.first; -#if CONFIG_COMP_INTRA_PRED - mode8x8[1][0] = xd->mode_info_context->bmi[0].as_mode.second; - mode8x8[1][1] = xd->mode_info_context->bmi[2].as_mode.second; - mode8x8[1][2] = xd->mode_info_context->bmi[8].as_mode.second; - mode8x8[1][3] = xd->mode_info_context->bmi[10].as_mode.second; -#endif - } - } else if (cm->txfm_mode == ONLY_4X4) { - rate = r4x4; - rate_y = tok4x4; - distortion = d4x4; - mbmi->txfm_size = TX_4X4; - tmp_rd = tmp_rd_4x4; - } else { - rate = r8x8; - rate_y = tok8x8; - distortion = d8x8; - mbmi->txfm_size = TX_8X8; - tmp_rd = tmp_rd_8x8; - - mode8x8[0][0] = xd->mode_info_context->bmi[0].as_mode.first; - mode8x8[0][1] = xd->mode_info_context->bmi[2].as_mode.first; - mode8x8[0][2] = xd->mode_info_context->bmi[8].as_mode.first; - mode8x8[0][3] = xd->mode_info_context->bmi[10].as_mode.first; -#if CONFIG_COMP_INTRA_PRED - mode8x8[1][0] = xd->mode_info_context->bmi[0].as_mode.second; - mode8x8[1][1] = xd->mode_info_context->bmi[2].as_mode.second; - mode8x8[1][2] = xd->mode_info_context->bmi[8].as_mode.second; - mode8x8[1][3] = xd->mode_info_context->bmi[10].as_mode.second; -#endif - } - - rate2 += rate; - distortion2 += distortion; - - /* TODO: uv rate maybe over-estimated here since there is UV intra - mode coded in I8X8_PRED prediction */ - if (tmp_rd < best_yrd) { - rate2 += uv_intra_rate; - rate_uv = uv_intra_rate_tokenonly; - distortion2 += uv_intra_distortion; - distortion_uv = uv_intra_distortion; - } else { - this_rd = INT64_MAX; - disable_skip = 1; - } - } - break; - } - } - // Split MV. The code is very different from the other inter modes so - // special case it. - else if (this_mode == SPLITMV) { - const int is_comp_pred = mbmi->second_ref_frame != 0; - int64_t tmp_rd, this_rd_thresh; - int_mv *second_ref = is_comp_pred ? &second_best_ref_mv : NULL; - - this_rd_thresh = - (mbmi->ref_frame == LAST_FRAME) ? - cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA]; - this_rd_thresh = - (mbmi->ref_frame == GOLDEN_FRAME) ? - cpi->rd_threshes[THR_NEWG] : this_rd_thresh; - - tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, - second_ref, best_yrd, mdcounts, - &rate, &rate_y, &distortion, - &skippable, - this_rd_thresh, seg_mvs, - txfm_cache); - rate2 += rate; - distortion2 += distortion; - - if (cpi->common.mcomp_filter_type == SWITCHABLE) - rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs - [vp9_get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)] - [vp9_switchable_interp_map[mbmi->interp_filter]]; - // If even the 'Y' rd value of split is higher than best so far - // then dont bother looking at UV - if (tmp_rd < best_yrd) { - int uv_skippable; - - rd_inter4x4_uv(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, - cpi->common.full_pixel); - rate2 += rate_uv; - distortion2 += distortion_uv; - skippable = skippable && uv_skippable; - } else { - this_rd = INT64_MAX; - disable_skip = 1; - } - - if (is_comp_pred) - mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; - else - mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY; - - compmode_cost = - vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), is_comp_pred); - mbmi->mode = this_mode; - } - else { - this_rd = handle_inter_mode(cpi, x, BLOCK_16X16, - &saddone, near_sadidx, mdcounts, txfm_cache, - &rate2, &distortion2, &skippable, - &compmode_cost, &rate_y, &distortion, - &rate_uv, &distortion_uv, - &mode_excluded, &disable_skip, recon_yoffset, - mode_index, frame_mv, frame_best_ref_mv); - if (this_rd == INT64_MAX) - continue; - } - - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) - rate2 += compmode_cost; - - // Estimate the reference frame signaling cost and add it - // to the rolling cost variable. - rate2 += ref_costs[mbmi->ref_frame]; - - if (!disable_skip) { - // Test for the condition where skip block will be activated - // because there are no non zero coefficients and make any - // necessary adjustment for rate. Ignore if skip is coded at - // segment level as the cost wont have been added in. - if (cpi->common.mb_no_coeff_skip) { - int mb_skip_allowed; - - // Is Mb level skip allowed for this mb. - mb_skip_allowed = - !vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) || - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); - - if (skippable) { - mbmi->mb_skip_coeff = 1; - - // Back out the coefficient coding costs - rate2 -= (rate_y + rate_uv); - // for best_yrd calculation - rate_uv = 0; - - if (mb_skip_allowed) { - int prob_skip_cost; - - // Cost the skip mb case - vp9_prob skip_prob = - vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP); - - if (skip_prob) { - prob_skip_cost = vp9_cost_bit(skip_prob, 1); - rate2 += prob_skip_cost; - other_cost += prob_skip_cost; - } - } - } - // Add in the cost of the no skip flag. - else { - mbmi->mb_skip_coeff = 0; - if (mb_skip_allowed) { - int prob_skip_cost = vp9_cost_bit( - vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP), 0); - rate2 += prob_skip_cost; - other_cost += prob_skip_cost; - } - } - } - - // Calculate the final RD estimate for this mode. - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); - } - - // Keep record of best intra distortion - if ((mbmi->ref_frame == INTRA_FRAME) && - (this_rd < best_intra_rd)) { - best_intra_rd = this_rd; - *returnintra = distortion2; - } - - if (!disable_skip && mbmi->ref_frame == INTRA_FRAME) - for (i = 0; i < NB_PREDICTION_TYPES; ++i) - best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); - -#if CONFIG_PRED_FILTER - // Keep track of the best mode irrespective of prediction filter state - if (this_rd < best_overall_rd) { - best_overall_rd = this_rd; - best_filter_state = mbmi->pred_filter_enabled; - } - - // Ignore modes where the prediction filter state doesn't - // match the state signaled at the frame level - if ((cm->pred_filter_mode == 2) || - (cm->pred_filter_mode == - mbmi->pred_filter_enabled)) { -#endif - // Did this mode help.. i.e. is it the new best mode - if (this_rd < best_rd || x->skip) { - if (!mode_excluded) { - // Note index of best mode so far - best_mode_index = mode_index; - - if (this_mode <= B_PRED) { - if (mbmi->txfm_size != TX_4X4 - && this_mode != B_PRED - && this_mode != I8X8_PRED) - mbmi->uv_mode = uv_intra_mode_8x8; - else - mbmi->uv_mode = uv_intra_mode; - /* required for left and above block mv */ - mbmi->mv[0].as_int = 0; - } - - other_cost += ref_costs[mbmi->ref_frame]; - - /* Calculate the final y RD estimate for this mode */ - best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2 - rate_uv - other_cost), - (distortion2 - distortion_uv)); - - *returnrate = rate2; - *returndistortion = distortion2; - best_rd = this_rd; - vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO)); - vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO)); - - if ((this_mode == B_PRED) - || (this_mode == I8X8_PRED) - || (this_mode == SPLITMV)) - for (i = 0; i < 16; i++) { - best_bmodes[i] = xd->block[i].bmi; - } - } - - // Testing this mode gave rise to an improvement in best error score. - // Lower threshold a bit for next time - cpi->rd_thresh_mult[mode_index] = - (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? - cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; - cpi->rd_threshes[mode_index] = - (cpi->rd_baseline_thresh[mode_index] >> 7) * - cpi->rd_thresh_mult[mode_index]; - } - // If the mode did not help improve the best error case then raise the - // threshold for testing that mode next time around. - else { - cpi->rd_thresh_mult[mode_index] += 4; - - if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) - cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; - - cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; - } - - /* keep record of best compound/single-only prediction */ - if (!disable_skip && - mbmi->ref_frame != INTRA_FRAME) { - int64_t single_rd, hybrid_rd; - int single_rate, hybrid_rate; - - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { - single_rate = rate2 - compmode_cost; - hybrid_rate = rate2; - } else { - single_rate = rate2; - hybrid_rate = rate2 + compmode_cost; - } - - single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); - hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); - - if (mbmi->second_ref_frame == INTRA_FRAME && - single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { - best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; - } else if (mbmi->second_ref_frame != INTRA_FRAME && - single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { - best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; - } - if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION]) - best_pred_rd[HYBRID_PREDICTION] = hybrid_rd; - } - - /* keep record of best txfm size */ - if (!mode_excluded && this_rd != INT64_MAX) { - for (i = 0; i < NB_TXFM_MODES; i++) { - int64_t adj_rd; - if (this_mode != B_PRED) { - adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode]; - } else { - adj_rd = this_rd; - } - if (adj_rd < best_txfm_rd[i]) - best_txfm_rd[i] = adj_rd; - } - } -#if CONFIG_PRED_FILTER - } -#endif - - if (x->skip && !mode_excluded) - break; - } - -#if CONFIG_PRED_FILTER - // Update counts for prediction filter usage - if (best_filter_state != 0) - ++cpi->pred_filter_on_count; - else - ++cpi->pred_filter_off_count; -#endif - if (cpi->common.mcomp_filter_type == SWITCHABLE && - best_mbmode.mode >= NEARESTMV && - best_mbmode.mode <= SPLITMV) { - ++cpi->switchable_interp_count - [vp9_get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)] - [vp9_switchable_interp_map[best_mbmode.interp_filter]]; - } - - // Reduce the activation RD thresholds for the best choice mode - if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && - (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) { - int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2); - - cpi->rd_thresh_mult[best_mode_index] = - (cpi->rd_thresh_mult[best_mode_index] >= - (MIN_THRESHMULT + best_adjustment)) ? - cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT; - cpi->rd_threshes[best_mode_index] = - (cpi->rd_baseline_thresh[best_mode_index] >> 7) * - cpi->rd_thresh_mult[best_mode_index]; - } - - // This code force Altref,0,0 and skip for the frame that overlays a - // an alrtef unless Altref is filtered. However, this is unsafe if - // segment level coding of ref frame or mode is enabled for this - // segment. - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && - !vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) && - cpi->is_src_frame_alt_ref && - (cpi->oxcf.arnr_max_frames == 0) && - (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) { - mbmi->mode = ZEROMV; - if (cm->txfm_mode != TX_MODE_SELECT) - mbmi->txfm_size = cm->txfm_mode; - else - mbmi->txfm_size = TX_16X16; - mbmi->ref_frame = ALTREF_FRAME; - mbmi->mv[0].as_int = 0; - mbmi->uv_mode = DC_PRED; - mbmi->mb_skip_coeff = - (cpi->common.mb_no_coeff_skip) ? 1 : 0; - mbmi->partitioning = 0; - - vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff)); - vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); - goto end; - } - - // macroblock modes - vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); - if (best_mbmode.mode == B_PRED) { - for (i = 0; i < 16; i++) { - xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode; - xd->block[i].bmi.as_mode = xd->mode_info_context->bmi[i].as_mode; - } - } - - if (best_mbmode.mode == I8X8_PRED) - set_i8x8_block_modes(x, mode8x8); - - if (best_mbmode.mode == SPLITMV) { - for (i = 0; i < 16; i++) - xd->mode_info_context->bmi[i].as_mv.first.as_int = best_bmodes[i].as_mv.first.as_int; - if (mbmi->second_ref_frame) - for (i = 0; i < 16; i++) - xd->mode_info_context->bmi[i].as_mv.second.as_int = best_bmodes[i].as_mv.second.as_int; - - vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO)); - - mbmi->mv[0].as_int = x->partition_info->bmi[15].mv.as_int; - mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int; - } - - for (i = 0; i < NB_PREDICTION_TYPES; ++i) { - if (best_pred_rd[i] == INT64_MAX) - best_pred_diff[i] = INT_MIN; - else - best_pred_diff[i] = best_rd - best_pred_rd[i]; - } - - if (!x->skip) { - for (i = 0; i < NB_TXFM_MODES; i++) { - if (best_txfm_rd[i] == INT64_MAX) - best_txfm_diff[i] = INT_MIN; - else - best_txfm_diff[i] = best_rd - best_txfm_rd[i]; - } - } else { - vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); - } - -end: - store_coding_context(x, &x->mb_context[xd->mb_index], best_mode_index, &best_partition, - &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame], - &frame_best_ref_mv[xd->mode_info_context->mbmi.second_ref_frame], - best_pred_diff[0], best_pred_diff[1], best_pred_diff[2], - best_txfm_diff); -} - -#if CONFIG_SUPERBLOCKS -void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, - int *returnrate, - int *returndist) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - int rate_y, rate_uv; - int rate_y_tokenonly, rate_uv_tokenonly; - int error_y, error_uv; - int dist_y, dist_uv; - int y_skip, uv_skip; - - xd->mode_info_context->mbmi.txfm_size = TX_8X8; - - error_uv = rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, - &dist_uv, &uv_skip); - error_y = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, - &dist_y, &y_skip); - - if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) { - *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + - vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1); - *returndist = dist_y + (dist_uv >> 2); - } else { - *returnrate = rate_y + rate_uv; - if (cpi->common.mb_no_coeff_skip) - *returnrate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); - *returndist = dist_y + (dist_uv >> 2); - } -} -#endif - -void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, - int *returnrate, int *returndist) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - int64_t error4x4, error16x16; -#if CONFIG_COMP_INTRA_PRED - int64_t error4x4d; - int rate4x4d, dist4x4d; -#endif - int rate4x4, rate16x16 = 0, rateuv, rateuv8x8; - int dist4x4, dist16x16, distuv, distuv8x8; - int rate; - int rate4x4_tokenonly = 0; - int rate16x16_tokenonly = 0; - int rateuv_tokenonly = 0, rateuv8x8_tokenonly = 0; - int64_t error8x8; - int rate8x8_tokenonly=0; - int rate8x8, dist8x8; - int mode16x16; - int mode8x8[2][4]; - int dist; - int modeuv, modeuv8x8, uv_intra_skippable, uv_intra_skippable_8x8; - int y_intra16x16_skippable; - int64_t txfm_cache[NB_TXFM_MODES]; - TX_SIZE txfm_size_16x16; - int i; - - mbmi->ref_frame = INTRA_FRAME; - rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv, - &uv_intra_skippable); - modeuv = mbmi->uv_mode; - if (cpi->common.txfm_mode != ONLY_4X4) { - rd_pick_intra_mbuv_mode_8x8(cpi, x, &rateuv8x8, &rateuv8x8_tokenonly, - &distuv8x8, &uv_intra_skippable_8x8); - modeuv8x8 = mbmi->uv_mode; - } else { - uv_intra_skippable_8x8 = uv_intra_skippable; - rateuv8x8 = rateuv; - distuv8x8 = distuv; - rateuv8x8_tokenonly = rateuv_tokenonly; - modeuv8x8 = modeuv; - } - - // current macroblock under rate-distortion optimization test loop - error16x16 = rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, - &rate16x16_tokenonly, &dist16x16, - &y_intra16x16_skippable, txfm_cache); - mode16x16 = mbmi->mode; - txfm_size_16x16 = mbmi->txfm_size; - - // FIXME(rbultje) support transform-size selection - mbmi->txfm_size = (cm->txfm_mode == ONLY_4X4) ? TX_4X4 : TX_8X8; - error8x8 = rd_pick_intra8x8mby_modes(cpi, x, &rate8x8, &rate8x8_tokenonly, - &dist8x8, error16x16); - mode8x8[0][0]= xd->mode_info_context->bmi[0].as_mode.first; - mode8x8[0][1]= xd->mode_info_context->bmi[2].as_mode.first; - mode8x8[0][2]= xd->mode_info_context->bmi[8].as_mode.first; - mode8x8[0][3]= xd->mode_info_context->bmi[10].as_mode.first; -#if CONFIG_COMP_INTRA_PRED - mode8x8[1][0] = xd->mode_info_context->bmi[0].as_mode.second; - mode8x8[1][1] = xd->mode_info_context->bmi[2].as_mode.second; - mode8x8[1][2] = xd->mode_info_context->bmi[8].as_mode.second; - mode8x8[1][3] = xd->mode_info_context->bmi[10].as_mode.second; -#endif - - error4x4 = rd_pick_intra4x4mby_modes(cpi, x, - &rate4x4, &rate4x4_tokenonly, - &dist4x4, error16x16, -#if CONFIG_COMP_INTRA_PRED - 0, -#endif - 0); -#if CONFIG_COMP_INTRA_PRED - error4x4d = rd_pick_intra4x4mby_modes(cpi, x, - &rate4x4d, &rate4x4_tokenonly, - &dist4x4d, error16x16, 1, 0); -#endif - - mbmi->mb_skip_coeff = 0; - if (cpi->common.mb_no_coeff_skip && - y_intra16x16_skippable && uv_intra_skippable_8x8) { - mbmi->mb_skip_coeff = 1; - mbmi->mode = mode16x16; - mbmi->uv_mode = modeuv; - rate = rateuv8x8 + rate16x16 - rateuv8x8_tokenonly - rate16x16_tokenonly + - vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1); - dist = dist16x16 + (distuv8x8 >> 2); - mbmi->txfm_size = txfm_size_16x16; - memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0, - sizeof(x->mb_context[xd->mb_index].txfm_rd_diff)); - } else if (error8x8 > error16x16) { - if (error4x4 < error16x16) { - rate = rateuv; -#if CONFIG_COMP_INTRA_PRED - rate += (error4x4d < error4x4) ? rate4x4d : rate4x4; - if (error4x4d >= error4x4) // FIXME save original modes etc. - error4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, - &rate4x4_tokenonly, - &dist4x4, error16x16, 0, - cpi->update_context); -#else - rate += rate4x4; -#endif - mbmi->mode = B_PRED; - mbmi->txfm_size = TX_4X4; - dist = dist4x4 + (distuv >> 2); - memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0, - sizeof(x->mb_context[xd->mb_index].txfm_rd_diff)); - } else { - mbmi->txfm_size = txfm_size_16x16; - mbmi->mode = mode16x16; - rate = rate16x16 + rateuv8x8; - dist = dist16x16 + (distuv8x8 >> 2); - for (i = 0; i < NB_TXFM_MODES; i++) { - x->mb_context[xd->mb_index].txfm_rd_diff[i] = error16x16 - txfm_cache[i]; - } - } - if (cpi->common.mb_no_coeff_skip) - rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); - } else { - if (error4x4 < error8x8) { - rate = rateuv; -#if CONFIG_COMP_INTRA_PRED - rate += (error4x4d < error4x4) ? rate4x4d : rate4x4; - if (error4x4d >= error4x4) // FIXME save original modes etc. - error4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, - &rate4x4_tokenonly, - &dist4x4, error16x16, 0, - cpi->update_context); -#else - rate += rate4x4; -#endif - mbmi->mode = B_PRED; - mbmi->txfm_size = TX_4X4; - dist = dist4x4 + (distuv >> 2); - memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0, - sizeof(x->mb_context[xd->mb_index].txfm_rd_diff)); - } else { - // FIXME(rbultje) support transform-size selection - mbmi->mode = I8X8_PRED; - mbmi->txfm_size = (cm->txfm_mode == ONLY_4X4) ? TX_4X4 : TX_8X8; - set_i8x8_block_modes(x, mode8x8); - rate = rate8x8 + rateuv; - dist = dist8x8 + (distuv >> 2); - memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0, - sizeof(x->mb_context[xd->mb_index].txfm_rd_diff)); - } - if (cpi->common.mb_no_coeff_skip) - rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); - } - - *returnrate = rate; - *returndist = dist; -} - -#if CONFIG_SUPERBLOCKS -int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, int recon_uvoffset, - int *returnrate, int *returndistortion) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; - MB_PREDICTION_MODE this_mode; - MV_REFERENCE_FRAME ref_frame; - unsigned char segment_id = xd->mode_info_context->mbmi.segment_id; - int comp_pred; - int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; - int_mv frame_best_ref_mv[4]; - int frame_mdcounts[4][4]; - unsigned char *y_buffer[4]; - unsigned char *u_buffer[4]; - unsigned char *v_buffer[4]; - static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, - VP9_ALT_FLAG }; - int idx_list[4] = { 0, cpi->common.lst_fb_idx, cpi->common.gld_fb_idx, - cpi->common.alt_fb_idx }; - int mdcounts[4]; - int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; - int saddone = 0; - int64_t best_rd = INT64_MAX; - int64_t best_comp_rd = INT64_MAX; - int64_t best_single_rd = INT64_MAX; - int64_t best_hybrid_rd = INT64_MAX; - int64_t best_yrd = INT64_MAX; - MB_MODE_INFO best_mbmode; - int mode_index, best_mode_index; - unsigned int ref_costs[MAX_REF_FRAMES]; - - x->skip = 0; - xd->mode_info_context->mbmi.segment_id = segment_id; - estimate_ref_frame_costs(cpi, segment_id, ref_costs); - vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); - - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { - if (cpi->ref_frame_flags & flag_list[ref_frame]) { - setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, - recon_yoffset, recon_uvoffset, frame_mv[NEARESTMV], - frame_mv[NEARMV], frame_best_ref_mv, - frame_mdcounts, y_buffer, u_buffer, v_buffer); - } - frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; - frame_mv[ZEROMV][ref_frame].as_int = 0; - } - - for (mode_index = 0; mode_index < MAX_MODES; mode_index++) { - int mode_excluded; - int64_t this_rd = INT64_MAX; - int disable_skip = 0; - int other_cost = 0; - int compmode_cost = 0; - int rate2 = 0, rate_y = 0, rate_uv = 0; - int distortion2 = 0, distortion_y = 0, distortion_uv = 0; - int skippable; - int64_t txfm_cache[NB_TXFM_MODES]; - - // Test best rd so far against threshold for trying this mode. - if (best_rd <= cpi->rd_threshes[mode_index]) { - continue; - } - - this_mode = vp9_mode_order[mode_index].mode; - ref_frame = vp9_mode_order[mode_index].ref_frame; - mbmi->ref_frame = ref_frame; - comp_pred = vp9_mode_order[mode_index].second_ref_frame != INTRA_FRAME; - mbmi->mode = this_mode; - mbmi->uv_mode = DC_PRED; -#if CONFIG_COMP_INTRA_PRED - mbmi->second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); - mbmi->second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); -#endif - - if (!(cpi->ref_frame_flags & flag_list[ref_frame])) - continue; - - // not yet supported or not superblocky - // TODO(rbultje): support intra coding - if (ref_frame == INTRA_FRAME || this_mode == SPLITMV) - continue; - - if (comp_pred) { - int second_ref; - - if (ref_frame == ALTREF_FRAME) { - second_ref = LAST_FRAME; - } else { - second_ref = ref_frame + 1; - } - if (!(cpi->ref_frame_flags & flag_list[second_ref])) - continue; - mbmi->second_ref_frame = second_ref; - - xd->second_pre.y_buffer = y_buffer[second_ref]; - xd->second_pre.u_buffer = u_buffer[second_ref]; - xd->second_pre.v_buffer = v_buffer[second_ref]; - mode_excluded = cm->comp_pred_mode == SINGLE_PREDICTION_ONLY; - } else { - mbmi->second_ref_frame = INTRA_FRAME; - mode_excluded = cm->comp_pred_mode == COMP_PREDICTION_ONLY; - } - - xd->pre.y_buffer = y_buffer[ref_frame]; - xd->pre.u_buffer = u_buffer[ref_frame]; - xd->pre.v_buffer = v_buffer[ref_frame]; - vpx_memcpy(mdcounts, frame_mdcounts[ref_frame], sizeof(mdcounts)); - - // If the segment reference frame feature is enabled.... - // then do nothing if the current ref frame is not allowed.. - if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && - !vp9_check_segref(xd, segment_id, ref_frame)) { - continue; - // If the segment mode feature is enabled.... - // then do nothing if the current mode is not allowed.. - } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) && - (this_mode != vp9_get_segdata(xd, segment_id, SEG_LVL_MODE))) { - continue; - // Disable this drop out case if either the mode or ref frame - // segment level feature is enabled for this segment. This is to - // prevent the possibility that we end up unable to pick any mode. - } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && - !vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) { - // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, - // unless ARNR filtering is enabled in which case we want - // an unfiltered alternative - if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { - if (this_mode != ZEROMV || ref_frame != ALTREF_FRAME) { - continue; - } - } - } - - this_rd = handle_inter_mode(cpi, x, BLOCK_32X32, - &saddone, near_sadidx, mdcounts, txfm_cache, - &rate2, &distortion2, &skippable, - &compmode_cost, &rate_y, &distortion_y, - &rate_uv, &distortion_uv, - &mode_excluded, &disable_skip, recon_yoffset, - mode_index, frame_mv, frame_best_ref_mv); - if (this_rd == INT64_MAX) - continue; - - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { - rate2 += compmode_cost; - } - - // Estimate the reference frame signaling cost and add it - // to the rolling cost variable. - rate2 += ref_costs[xd->mode_info_context->mbmi.ref_frame]; - - if (!disable_skip) { - // Test for the condition where skip block will be activated - // because there are no non zero coefficients and make any - // necessary adjustment for rate. Ignore if skip is coded at - // segment level as the cost wont have been added in. - if (cpi->common.mb_no_coeff_skip) { - int mb_skip_allowed; - - // Is Mb level skip allowed for this mb. - mb_skip_allowed = - !vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) || - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); - - if (skippable) { - // Back out the coefficient coding costs - rate2 -= (rate_y + rate_uv); - // for best_yrd calculation - rate_uv = 0; - - if (mb_skip_allowed) { - int prob_skip_cost; - - // Cost the skip mb case - vp9_prob skip_prob = - vp9_get_pred_prob(cm, xd, PRED_MBSKIP); - - if (skip_prob) { - prob_skip_cost = vp9_cost_bit(skip_prob, 1); - rate2 += prob_skip_cost; - other_cost += prob_skip_cost; - } - } - } - // Add in the cost of the no skip flag. - else if (mb_skip_allowed) { - int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd, - PRED_MBSKIP), 0); - rate2 += prob_skip_cost; - other_cost += prob_skip_cost; - } - } - - // Calculate the final RD estimate for this mode. - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); - } - -#if 0 - // Keep record of best intra distortion - if ((xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && - (this_rd < best_intra_rd)) { - best_intra_rd = this_rd; - *returnintra = distortion2; - } -#endif - - if (!disable_skip && xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - if (this_rd < best_comp_rd) - best_comp_rd = this_rd; - if (this_rd < best_single_rd) - best_single_rd = this_rd; - if (this_rd < best_hybrid_rd) - best_hybrid_rd = this_rd; - } - - // Did this mode help.. i.e. is it the new best mode - if (this_rd < best_rd || x->skip) { - if (!mode_excluded) { - // Note index of best mode so far - best_mode_index = mode_index; - -#if 0 - if (this_mode <= B_PRED) { - xd->mode_info_context->mbmi.uv_mode = uv_intra_mode_8x8; - /* required for left and above block mv */ - xd->mode_info_context->mbmi.mv.as_int = 0; - } -#endif - - other_cost += ref_costs[xd->mode_info_context->mbmi.ref_frame]; - - /* Calculate the final y RD estimate for this mode */ - best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2 - rate_uv - other_cost), - (distortion2 - distortion_uv)); - - *returnrate = rate2; - *returndistortion = distortion2; - best_rd = this_rd; - vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO)); - } -#if 0 - // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time - cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; - cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; -#endif - } - // If the mode did not help improve the best error case then raise the threshold for testing that mode next time around. - else { -#if 0 - cpi->rd_thresh_mult[mode_index] += 4; - - if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) - cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; - - cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; -#endif - } - - /* keep record of best compound/single-only prediction */ - if (!disable_skip && mbmi->ref_frame != INTRA_FRAME) { - int single_rd, hybrid_rd, single_rate, hybrid_rate; - - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { - single_rate = rate2 - compmode_cost; - hybrid_rate = rate2; - } else { - single_rate = rate2; - hybrid_rate = rate2 + compmode_cost; - } - - single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); - hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); - - if (mbmi->second_ref_frame == INTRA_FRAME && single_rd < best_single_rd) { - best_single_rd = single_rd; - } else if (mbmi->second_ref_frame != INTRA_FRAME && - single_rd < best_comp_rd) { - best_comp_rd = single_rd; - } - if (hybrid_rd < best_hybrid_rd) { - best_hybrid_rd = hybrid_rd; - } - } - - if (x->skip && !mode_excluded) - break; - } - - // TODO(rbultje) integrate with RD thresholding -#if 0 - // Reduce the activation RD thresholds for the best choice mode - if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && - (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) { - int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2); - - cpi->rd_thresh_mult[best_mode_index] = - (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? - cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT; - cpi->rd_threshes[best_mode_index] = - (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index]; - } -#endif - - // This code forces Altref,0,0 and skip for the frame that overlays a - // an alrtef unless Altref is filtered. However, this is unsafe if - // segment level coding of ref frame or mode is enabled for this - // segment. - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && - !vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) && - cpi->is_src_frame_alt_ref && - (cpi->oxcf.arnr_max_frames == 0) && - (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) { - mbmi->mode = ZEROMV; - mbmi->ref_frame = ALTREF_FRAME; - mbmi->second_ref_frame = 0; - mbmi->mv[0].as_int = 0; - mbmi->uv_mode = DC_PRED; - mbmi->mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0; - mbmi->partitioning = 0; - mbmi->txfm_size = TX_8X8; - - if (best_rd != INT64_MAX) - store_coding_context(x, &x->sb_context[0], best_mode_index, NULL, - &frame_best_ref_mv[mbmi->ref_frame], - &frame_best_ref_mv[mbmi->second_ref_frame], - 0, 0, 0, NULL); - return best_rd; - } - - // macroblock modes - vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); - mbmi->txfm_size = TX_8X8; - - if (best_rd != INT64_MAX) - store_coding_context(x, &x->sb_context[0], best_mode_index, NULL, - &frame_best_ref_mv[mbmi->ref_frame], - &frame_best_ref_mv[mbmi->second_ref_frame], - (best_single_rd == INT64_MAX) ? INT_MIN : - (best_rd - best_single_rd), - (best_comp_rd == INT64_MAX) ? INT_MIN : - (best_rd - best_comp_rd), - (best_hybrid_rd == INT64_MAX) ? INT_MIN : - (best_rd - best_hybrid_rd), - NULL); - - return best_rd; -} -#endif - -void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, - int recon_uvoffset, - int *totalrate, int *totaldist) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - int rate, distortion; - int64_t intra_error = 0; - unsigned char *segment_id = &mbmi->segment_id; - - if (xd->segmentation_enabled) - x->encode_breakout = cpi->segment_encode_breakout[*segment_id]; - else - x->encode_breakout = cpi->oxcf.encode_breakout; - - // if (cpi->sf.RD) - // For now this codebase is limited to a single rd encode path - { - int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled; - - vp9_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, - &distortion, &intra_error); - - /* restore cpi->zbin_mode_boost_enabled */ - cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled; - } - // else - // The non rd encode path has been deleted from this code base - // to simplify development - // vp9_pick_inter_mode - - // Store metrics so they can be added in to totals if this mode is picked - x->mb_context[xd->mb_index].distortion = distortion; - x->mb_context[xd->mb_index].intra_error = intra_error; - - *totalrate = rate; - *totaldist = distortion; -} diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h deleted file mode 100644 index ce7c8ca6c..000000000 --- a/vp8/encoder/rdopt.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __INC_RDOPT_H -#define __INC_RDOPT_H - -#define RDCOST(RM,DM,R,D) ( ((128+((int64_t)R)*(RM)) >> 8) + ((int64_t)DM)*(D) ) -#define RDCOST_8x8(RM,DM,R,D) ( ((128+((int64_t)R)*(RM)) >> 8) + ((int64_t)DM)*(D) ) - -extern void vp9_initialize_rd_consts(VP9_COMP *cpi, int Qvalue); - -extern void vp9_rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, int recon_uvoffset, - int *returnrate, int *returndistortion, - int64_t *returnintra); - -extern void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, - int *r, int *d); - -extern void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, - int *r, int *d); - -extern void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCKD *xd, - const MODE_INFO *here, int_mv *mvp, - int refframe, int *ref_frame_sign_bias, - int *sr, int near_sadidx[]); - -extern void vp9_init_me_luts(); - -extern void vp9_set_mbmode_and_mvs(MACROBLOCK *x, - MB_PREDICTION_MODE mb, int_mv *mv); - -#endif diff --git a/vp8/encoder/sad_c.c b/vp8/encoder/sad_c.c deleted file mode 100644 index 332514141..000000000 --- a/vp8/encoder/sad_c.c +++ /dev/null @@ -1,480 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include <stdlib.h> -#include "vp8/common/sadmxn.h" -#include "vpx_ports/config.h" -#include "vpx/vpx_integer.h" - -unsigned int vp9_sad32x32_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad) { - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 32); -} - -unsigned int vp9_sad16x16_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad) { - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 16); -} - -unsigned int vp9_sad8x8_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad) { - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 8); -} - - -unsigned int vp9_sad16x8_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad) { - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 8); -} - -unsigned int vp9_sad8x16_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad) { - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 16); -} - - -unsigned int vp9_sad4x4_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad) { - return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 4); -} - -void vp9_sad32x32x3_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned int *sad_array - ) { - sad_array[0] = vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr, ref_stride, 0x7fffffff); - sad_array[1] = vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, 0x7fffffff); - sad_array[2] = vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, 0x7fffffff); -} - -void vp9_sad32x32x8_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned short *sad_array - ) { - sad_array[0] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr, ref_stride, - 0x7fffffff); - sad_array[1] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, - 0x7fffffff); - sad_array[2] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, - 0x7fffffff); - sad_array[3] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr + 3, ref_stride, - 0x7fffffff); - sad_array[4] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr + 4, ref_stride, - 0x7fffffff); - sad_array[5] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr + 5, ref_stride, - 0x7fffffff); - sad_array[6] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr + 6, ref_stride, - 0x7fffffff); - sad_array[7] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr + 7, ref_stride, - 0x7fffffff); -} - -void vp9_sad16x16x3_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned int *sad_array) { - sad_array[0] = vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr, ref_stride, 0x7fffffff); - sad_array[1] = vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, 0x7fffffff); - sad_array[2] = vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, 0x7fffffff); -} - -void vp9_sad16x16x8_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned short *sad_array) { - sad_array[0] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr, ref_stride, - 0x7fffffff); - sad_array[1] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, - 0x7fffffff); - sad_array[2] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, - 0x7fffffff); - sad_array[3] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr + 3, ref_stride, - 0x7fffffff); - sad_array[4] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr + 4, ref_stride, - 0x7fffffff); - sad_array[5] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr + 5, ref_stride, - 0x7fffffff); - sad_array[6] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr + 6, ref_stride, - 0x7fffffff); - sad_array[7] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr + 7, ref_stride, - 0x7fffffff); -} - -void vp9_sad16x8x3_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned int *sad_array) { - sad_array[0] = vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr, ref_stride, 0x7fffffff); - sad_array[1] = vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, 0x7fffffff); - sad_array[2] = vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, 0x7fffffff); -} - -void vp9_sad16x8x8_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned short *sad_array) { - sad_array[0] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr, ref_stride, - 0x7fffffff); - sad_array[1] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, - 0x7fffffff); - sad_array[2] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, - 0x7fffffff); - sad_array[3] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr + 3, ref_stride, - 0x7fffffff); - sad_array[4] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr + 4, ref_stride, - 0x7fffffff); - sad_array[5] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr + 5, ref_stride, - 0x7fffffff); - sad_array[6] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr + 6, ref_stride, - 0x7fffffff); - sad_array[7] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr + 7, ref_stride, - 0x7fffffff); -} - -void vp9_sad8x8x3_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned int *sad_array) { - sad_array[0] = vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr, ref_stride, 0x7fffffff); - sad_array[1] = vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, 0x7fffffff); - sad_array[2] = vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, 0x7fffffff); -} - -void vp9_sad8x8x8_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned short *sad_array) { - sad_array[0] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr, ref_stride, - 0x7fffffff); - sad_array[1] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, - 0x7fffffff); - sad_array[2] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, - 0x7fffffff); - sad_array[3] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr + 3, ref_stride, - 0x7fffffff); - sad_array[4] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr + 4, ref_stride, - 0x7fffffff); - sad_array[5] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr + 5, ref_stride, - 0x7fffffff); - sad_array[6] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr + 6, ref_stride, - 0x7fffffff); - sad_array[7] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr + 7, ref_stride, - 0x7fffffff); -} - -void vp9_sad8x16x3_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned int *sad_array) { - sad_array[0] = vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr, ref_stride, 0x7fffffff); - sad_array[1] = vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, 0x7fffffff); - sad_array[2] = vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, 0x7fffffff); -} - -void vp9_sad8x16x8_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned short *sad_array) { - sad_array[0] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr, ref_stride, - 0x7fffffff); - sad_array[1] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, - 0x7fffffff); - sad_array[2] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, - 0x7fffffff); - sad_array[3] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr + 3, ref_stride, - 0x7fffffff); - sad_array[4] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr + 4, ref_stride, - 0x7fffffff); - sad_array[5] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr + 5, ref_stride, - 0x7fffffff); - sad_array[6] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr + 6, ref_stride, - 0x7fffffff); - sad_array[7] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr + 7, ref_stride, - 0x7fffffff); -} - -void vp9_sad4x4x3_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned int *sad_array) { - sad_array[0] = vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr, ref_stride, 0x7fffffff); - sad_array[1] = vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, 0x7fffffff); - sad_array[2] = vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, 0x7fffffff); -} - -void vp9_sad4x4x8_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned short *sad_array) { - sad_array[0] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr, ref_stride, - 0x7fffffff); - sad_array[1] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, - 0x7fffffff); - sad_array[2] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, - 0x7fffffff); - sad_array[3] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr + 3, ref_stride, - 0x7fffffff); - sad_array[4] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr + 4, ref_stride, - 0x7fffffff); - sad_array[5] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr + 5, ref_stride, - 0x7fffffff); - sad_array[6] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr + 6, ref_stride, - 0x7fffffff); - sad_array[7] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr + 7, ref_stride, - 0x7fffffff); -} - -void vp9_sad32x32x4d_c(const unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr[], - int ref_stride, - unsigned int *sad_array - ) { - sad_array[0] = vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr[0], ref_stride, 0x7fffffff); - sad_array[1] = vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr[1], ref_stride, 0x7fffffff); - sad_array[2] = vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr[2], ref_stride, 0x7fffffff); - sad_array[3] = vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr[3], ref_stride, 0x7fffffff); -} - -void vp9_sad16x16x4d_c(const unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr[], - int ref_stride, - unsigned int *sad_array) { - sad_array[0] = vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr[0], ref_stride, 0x7fffffff); - sad_array[1] = vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr[1], ref_stride, 0x7fffffff); - sad_array[2] = vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr[2], ref_stride, 0x7fffffff); - sad_array[3] = vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr[3], ref_stride, 0x7fffffff); -} - -void vp9_sad16x8x4d_c(const unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr[], - int ref_stride, - unsigned int *sad_array) { - sad_array[0] = vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr[0], ref_stride, 0x7fffffff); - sad_array[1] = vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr[1], ref_stride, 0x7fffffff); - sad_array[2] = vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr[2], ref_stride, 0x7fffffff); - sad_array[3] = vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr[3], ref_stride, 0x7fffffff); -} - -void vp9_sad8x8x4d_c(const unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr[], - int ref_stride, - unsigned int *sad_array) { - sad_array[0] = vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr[0], ref_stride, 0x7fffffff); - sad_array[1] = vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr[1], ref_stride, 0x7fffffff); - sad_array[2] = vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr[2], ref_stride, 0x7fffffff); - sad_array[3] = vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr[3], ref_stride, 0x7fffffff); -} - -void vp9_sad8x16x4d_c(const unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr[], - int ref_stride, - unsigned int *sad_array) { - sad_array[0] = vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr[0], ref_stride, 0x7fffffff); - sad_array[1] = vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr[1], ref_stride, 0x7fffffff); - sad_array[2] = vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr[2], ref_stride, 0x7fffffff); - sad_array[3] = vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr[3], ref_stride, 0x7fffffff); -} - -void vp9_sad4x4x4d_c(const unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr[], - int ref_stride, - unsigned int *sad_array) { - sad_array[0] = vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr[0], ref_stride, 0x7fffffff); - sad_array[1] = vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr[1], ref_stride, 0x7fffffff); - sad_array[2] = vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr[2], ref_stride, 0x7fffffff); - sad_array[3] = vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr[3], ref_stride, 0x7fffffff); -} - -/* Copy 2 macroblocks to a buffer */ -void vp9_copy32xn_c(unsigned char *src_ptr, - int src_stride, - unsigned char *dst_ptr, - int dst_stride, - int height) { - int r; - - for (r = 0; r < height; r++) { -#if !(CONFIG_FAST_UNALIGNED) - dst_ptr[0] = src_ptr[0]; - dst_ptr[1] = src_ptr[1]; - dst_ptr[2] = src_ptr[2]; - dst_ptr[3] = src_ptr[3]; - dst_ptr[4] = src_ptr[4]; - dst_ptr[5] = src_ptr[5]; - dst_ptr[6] = src_ptr[6]; - dst_ptr[7] = src_ptr[7]; - dst_ptr[8] = src_ptr[8]; - dst_ptr[9] = src_ptr[9]; - dst_ptr[10] = src_ptr[10]; - dst_ptr[11] = src_ptr[11]; - dst_ptr[12] = src_ptr[12]; - dst_ptr[13] = src_ptr[13]; - dst_ptr[14] = src_ptr[14]; - dst_ptr[15] = src_ptr[15]; - dst_ptr[16] = src_ptr[16]; - dst_ptr[17] = src_ptr[17]; - dst_ptr[18] = src_ptr[18]; - dst_ptr[19] = src_ptr[19]; - dst_ptr[20] = src_ptr[20]; - dst_ptr[21] = src_ptr[21]; - dst_ptr[22] = src_ptr[22]; - dst_ptr[23] = src_ptr[23]; - dst_ptr[24] = src_ptr[24]; - dst_ptr[25] = src_ptr[25]; - dst_ptr[26] = src_ptr[26]; - dst_ptr[27] = src_ptr[27]; - dst_ptr[28] = src_ptr[28]; - dst_ptr[29] = src_ptr[29]; - dst_ptr[30] = src_ptr[30]; - dst_ptr[31] = src_ptr[31]; -#else - ((uint32_t *)dst_ptr)[0] = ((uint32_t *)src_ptr)[0]; - ((uint32_t *)dst_ptr)[1] = ((uint32_t *)src_ptr)[1]; - ((uint32_t *)dst_ptr)[2] = ((uint32_t *)src_ptr)[2]; - ((uint32_t *)dst_ptr)[3] = ((uint32_t *)src_ptr)[3]; - ((uint32_t *)dst_ptr)[4] = ((uint32_t *)src_ptr)[4]; - ((uint32_t *)dst_ptr)[5] = ((uint32_t *)src_ptr)[5]; - ((uint32_t *)dst_ptr)[6] = ((uint32_t *)src_ptr)[6]; - ((uint32_t *)dst_ptr)[7] = ((uint32_t *)src_ptr)[7]; -#endif - src_ptr += src_stride; - dst_ptr += dst_stride; - - } -} diff --git a/vp8/encoder/satd_c.c b/vp8/encoder/satd_c.c deleted file mode 100644 index 102aa73e5..000000000 --- a/vp8/encoder/satd_c.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <stdlib.h> -#include "vpx_ports/mem.h" -#include "./vpx_rtcd.h" -unsigned int vp9_satd16x16_c(const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned int *psatd) { - int r, c, i; - unsigned int satd = 0; - DECLARE_ALIGNED(16, short, diff_in[256]); - DECLARE_ALIGNED(16, short, diff_out[16]); - short *in; - - for (r = 0; r < 16; r++) { - for (c = 0; c < 16; c++) { - diff_in[r * 16 + c] = src_ptr[c] - ref_ptr[c]; - } - src_ptr += src_stride; - ref_ptr += ref_stride; - } - - in = diff_in; - for (r = 0; r < 16; r += 4) { - for (c = 0; c < 16; c += 4) { - vp9_short_walsh4x4_c(in + c, diff_out, 32); - for (i = 0; i < 16; i++) - satd += abs(diff_out[i]); - } - in += 64; - } - - if (psatd) - *psatd = satd; - - return satd; -} diff --git a/vp8/encoder/segmentation.c b/vp8/encoder/segmentation.c deleted file mode 100644 index c68925ee6..000000000 --- a/vp8/encoder/segmentation.c +++ /dev/null @@ -1,327 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "limits.h" -#include "vpx_mem/vpx_mem.h" -#include "segmentation.h" -#include "vp8/common/pred_common.h" - -void vp9_update_gf_useage_maps(VP9_COMP *cpi, VP9_COMMON *cm, MACROBLOCK *x) { - int mb_row, mb_col; - - MODE_INFO *this_mb_mode_info = cm->mi; - - x->gf_active_ptr = (signed char *)cpi->gf_active_flags; - - if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame)) { - // Reset Gf useage monitors - vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); - cpi->gf_active_count = cm->mb_rows * cm->mb_cols; - } else { - // for each macroblock row in image - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { - // for each macroblock col in image - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - - // If using golden then set GF active flag if not already set. - // If using last frame 0,0 mode then leave flag as it is - // else if using non 0,0 motion or intra modes then clear - // flag if it is currently set - if ((this_mb_mode_info->mbmi.ref_frame == GOLDEN_FRAME) || - (this_mb_mode_info->mbmi.ref_frame == ALTREF_FRAME)) { - if (*(x->gf_active_ptr) == 0) { - *(x->gf_active_ptr) = 1; - cpi->gf_active_count++; - } - } else if ((this_mb_mode_info->mbmi.mode != ZEROMV) && - *(x->gf_active_ptr)) { - *(x->gf_active_ptr) = 0; - cpi->gf_active_count--; - } - - x->gf_active_ptr++; // Step onto next entry - this_mb_mode_info++; // skip to next mb - - } - - // this is to account for the border - this_mb_mode_info++; - } - } -} - -void vp9_enable_segmentation(VP9_PTR ptr) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); - - // Set the appropriate feature bit - cpi->mb.e_mbd.segmentation_enabled = 1; - cpi->mb.e_mbd.update_mb_segmentation_map = 1; - cpi->mb.e_mbd.update_mb_segmentation_data = 1; -} - -void vp9_disable_segmentation(VP9_PTR ptr) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); - - // Clear the appropriate feature bit - cpi->mb.e_mbd.segmentation_enabled = 0; -} - -void vp9_set_segmentation_map(VP9_PTR ptr, - unsigned char *segmentation_map) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); - - // Copy in the new segmentation map - vpx_memcpy(cpi->segmentation_map, segmentation_map, - (cpi->common.mb_rows * cpi->common.mb_cols)); - - // Signal that the map should be updated. - cpi->mb.e_mbd.update_mb_segmentation_map = 1; - cpi->mb.e_mbd.update_mb_segmentation_data = 1; -} - -void vp9_set_segment_data(VP9_PTR ptr, - signed char *feature_data, - unsigned char abs_delta) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); - - cpi->mb.e_mbd.mb_segment_abs_delta = abs_delta; - - vpx_memcpy(cpi->mb.e_mbd.segment_feature_data, feature_data, - sizeof(cpi->mb.e_mbd.segment_feature_data)); - - // TBD ?? Set the feature mask - // vpx_memcpy(cpi->mb.e_mbd.segment_feature_mask, 0, - // sizeof(cpi->mb.e_mbd.segment_feature_mask)); -} - -// Based on set of segment counts calculate a probability tree -static void calc_segtree_probs(MACROBLOCKD *xd, - int *segcounts, - vp9_prob *segment_tree_probs) { - int count1, count2; - int tot_count; - int i; - - // Blank the strtucture to start with - vpx_memset(segment_tree_probs, 0, - MB_FEATURE_TREE_PROBS * sizeof(*segment_tree_probs)); - - // Total count for all segments - count1 = segcounts[0] + segcounts[1]; - count2 = segcounts[2] + segcounts[3]; - tot_count = count1 + count2; - - // Work out probabilities of each segment - if (tot_count) - segment_tree_probs[0] = (count1 * 255) / tot_count; - if (count1 > 0) - segment_tree_probs[1] = (segcounts[0] * 255) / count1; - if (count2 > 0) - segment_tree_probs[2] = (segcounts[2] * 255) / count2; - - // Clamp probabilities to minimum allowed value - for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) { - if (segment_tree_probs[i] == 0) - segment_tree_probs[i] = 1; - } -} - -// Based on set of segment counts and probabilities calculate a cost estimate -static int cost_segmap(MACROBLOCKD *xd, - int *segcounts, - vp9_prob *probs) { - int cost; - int count1, count2; - - // Cost the top node of the tree - count1 = segcounts[0] + segcounts[1]; - count2 = segcounts[2] + segcounts[3]; - cost = count1 * vp9_cost_zero(probs[0]) + - count2 * vp9_cost_one(probs[0]); - - // Now add the cost of each individual segment branch - if (count1 > 0) - cost += segcounts[0] * vp9_cost_zero(probs[1]) + - segcounts[1] * vp9_cost_one(probs[1]); - - if (count2 > 0) - cost += segcounts[2] * vp9_cost_zero(probs[2]) + - segcounts[3] * vp9_cost_one(probs[2]); - - return cost; - -} - -void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; - - const int mis = cm->mode_info_stride; - int i; - int tot_count; - int no_pred_cost; - int t_pred_cost = INT_MAX; - int pred_context; - - int mb_row, mb_col; - int segmap_index = 0; - unsigned char segment_id; - - int temporal_predictor_count[PREDICTION_PROBS][2]; - int no_pred_segcounts[MAX_MB_SEGMENTS]; - int t_unpred_seg_counts[MAX_MB_SEGMENTS]; - - vp9_prob no_pred_tree[MB_FEATURE_TREE_PROBS]; - vp9_prob t_pred_tree[MB_FEATURE_TREE_PROBS]; - vp9_prob t_nopred_prob[PREDICTION_PROBS]; - - // Set default state for the segment tree probabilities and the - // temporal coding probabilities - vpx_memset(xd->mb_segment_tree_probs, 255, - sizeof(xd->mb_segment_tree_probs)); - vpx_memset(cm->segment_pred_probs, 255, - sizeof(cm->segment_pred_probs)); - - vpx_memset(no_pred_segcounts, 0, sizeof(no_pred_segcounts)); - vpx_memset(t_unpred_seg_counts, 0, sizeof(t_unpred_seg_counts)); - vpx_memset(temporal_predictor_count, 0, sizeof(temporal_predictor_count)); - - // First of all generate stats regarding how well the last segment map - // predicts this one - - // Initialize macroblock decoder mode info context for the first mb - // in the frame - xd->mode_info_context = cm->mi; - - for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 2) { - for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 2) { - for (i = 0; i < 4; i++) { - static const int dx[4] = { +1, -1, +1, +1 }; - static const int dy[4] = { 0, +1, 0, -1 }; - int x_idx = i & 1, y_idx = i >> 1; - - if (mb_col + x_idx >= cm->mb_cols || - mb_row + y_idx >= cm->mb_rows) { - goto end; - } - - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_row) * 16) << 3; - - segmap_index = (mb_row + y_idx) * cm->mb_cols + mb_col + x_idx; - segment_id = xd->mode_info_context->mbmi.segment_id; -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - if (mb_col + 1 < cm->mb_cols) - segment_id = segment_id && - xd->mode_info_context[1].mbmi.segment_id; - if (mb_row + 1 < cm->mb_rows) { - segment_id = segment_id && - xd->mode_info_context[mis].mbmi.segment_id; - if (mb_col + 1 < cm->mb_cols) - segment_id = segment_id && - xd->mode_info_context[mis + 1].mbmi.segment_id; - } - } -#endif - - // Count the number of hits on each segment with no prediction - no_pred_segcounts[segment_id]++; - - // Temporal prediction not allowed on key frames - if (cm->frame_type != KEY_FRAME) { - // Test to see if the segment id matches the predicted value. - int seg_predicted = - (segment_id == vp9_get_pred_mb_segid(cm, xd, segmap_index)); - - // Get the segment id prediction context - pred_context = - vp9_get_pred_context(cm, xd, PRED_SEG_ID); - - // Store the prediction status for this mb and update counts - // as appropriate - vp9_set_pred_flag(xd, PRED_SEG_ID, seg_predicted); - temporal_predictor_count[pred_context][seg_predicted]++; - - if (!seg_predicted) - // Update the "unpredicted" segment count - t_unpred_seg_counts[segment_id]++; - } - -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - assert(!i); - xd->mode_info_context += 2; - break; - } -#endif - end: - xd->mode_info_context += dx[i] + dy[i] * cm->mode_info_stride; - } - } - - // this is to account for the border in mode_info_context - xd->mode_info_context -= mb_col; - xd->mode_info_context += cm->mode_info_stride * 2; - } - - // Work out probability tree for coding segments without prediction - // and the cost. - calc_segtree_probs(xd, no_pred_segcounts, no_pred_tree); - no_pred_cost = cost_segmap(xd, no_pred_segcounts, no_pred_tree); - - // Key frames cannot use temporal prediction - if (cm->frame_type != KEY_FRAME) { - // Work out probability tree for coding those segments not - // predicted using the temporal method and the cost. - calc_segtree_probs(xd, t_unpred_seg_counts, t_pred_tree); - t_pred_cost = cost_segmap(xd, t_unpred_seg_counts, t_pred_tree); - - // Add in the cost of the signalling for each prediction context - for (i = 0; i < PREDICTION_PROBS; i++) { - tot_count = temporal_predictor_count[i][0] + - temporal_predictor_count[i][1]; - - // Work out the context probabilities for the segment - // prediction flag - if (tot_count) { - t_nopred_prob[i] = (temporal_predictor_count[i][0] * 255) / - tot_count; - - // Clamp to minimum allowed value - if (t_nopred_prob[i] < 1) - t_nopred_prob[i] = 1; - } else - t_nopred_prob[i] = 1; - - // Add in the predictor signaling cost - t_pred_cost += (temporal_predictor_count[i][0] * - vp9_cost_zero(t_nopred_prob[i])) + - (temporal_predictor_count[i][1] * - vp9_cost_one(t_nopred_prob[i])); - } - } - - // Now choose which coding method to use. - if (t_pred_cost < no_pred_cost) { - cm->temporal_update = 1; - vpx_memcpy(xd->mb_segment_tree_probs, - t_pred_tree, sizeof(t_pred_tree)); - vpx_memcpy(&cm->segment_pred_probs, - t_nopred_prob, sizeof(t_nopred_prob)); - } else { - cm->temporal_update = 0; - vpx_memcpy(xd->mb_segment_tree_probs, - no_pred_tree, sizeof(no_pred_tree)); - } -} diff --git a/vp8/encoder/segmentation.h b/vp8/encoder/segmentation.h deleted file mode 100644 index 7719da38a..000000000 --- a/vp8/encoder/segmentation.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "string.h" -#include "vp8/common/blockd.h" -#include "onyx_int.h" - -#ifndef __INC_SEGMENTATION_H__ -#define __INC_SEGMENTATION_H__ 1 - -extern void vp9_update_gf_useage_maps(VP9_COMP *cpi, VP9_COMMON *cm, - MACROBLOCK *x); - -extern void vp9_enable_segmentation(VP9_PTR ptr); -extern void vp9_disable_segmentation(VP9_PTR ptr); - -// Valid values for a segment are 0 to 3 -// Segmentation map is arrange as [Rows][Columns] -extern void vp9_set_segmentation_map(VP9_PTR ptr, - unsigned char *segmentation_map); - -// The values given for each segment can be either deltas (from the default -// value chosen for the frame) or absolute values. -// -// Valid range for abs values is (0-127 for MB_LVL_ALT_Q), (0-63 for -// SEGMENT_ALT_LF) -// Valid range for delta values are (+/-127 for MB_LVL_ALT_Q), (+/-63 for -// SEGMENT_ALT_LF) -// -// abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use -// the absolute values given). -// -extern void vp9_set_segment_data(VP9_PTR ptr, signed char *feature_data, - unsigned char abs_delta); - -extern void vp9_choose_segmap_coding_method(VP9_COMP *cpi); - -#endif /* __INC_SEGMENTATION_H__ */ diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c deleted file mode 100644 index 464dfd341..000000000 --- a/vp8/encoder/ssim.c +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "onyx_int.h" - -void vp9_ssim_parms_16x16_c(unsigned char *s, int sp, unsigned char *r, - int rp, unsigned long *sum_s, unsigned long *sum_r, - unsigned long *sum_sq_s, unsigned long *sum_sq_r, - unsigned long *sum_sxr) { - int i, j; - for (i = 0; i < 16; i++, s += sp, r += rp) { - for (j = 0; j < 16; j++) { - *sum_s += s[j]; - *sum_r += r[j]; - *sum_sq_s += s[j] * s[j]; - *sum_sq_r += r[j] * r[j]; - *sum_sxr += s[j] * r[j]; - } - } -} -void vp9_ssim_parms_8x8_c(unsigned char *s, int sp, unsigned char *r, int rp, - unsigned long *sum_s, unsigned long *sum_r, - unsigned long *sum_sq_s, unsigned long *sum_sq_r, - unsigned long *sum_sxr) { - int i, j; - for (i = 0; i < 8; i++, s += sp, r += rp) { - for (j = 0; j < 8; j++) { - *sum_s += s[j]; - *sum_r += r[j]; - *sum_sq_s += s[j] * s[j]; - *sum_sq_r += r[j] * r[j]; - *sum_sxr += s[j] * r[j]; - } - } -} - -const static int64_t cc1 = 26634; // (64^2*(.01*255)^2 -const static int64_t cc2 = 239708; // (64^2*(.03*255)^2 - -static double similarity(unsigned long sum_s, unsigned long sum_r, - unsigned long sum_sq_s, unsigned long sum_sq_r, - unsigned long sum_sxr, int count) { - int64_t ssim_n, ssim_d; - int64_t c1, c2; - - // scale the constants by number of pixels - c1 = (cc1 * count * count) >> 12; - c2 = (cc2 * count * count) >> 12; - - ssim_n = (2 * sum_s * sum_r + c1) * ((int64_t) 2 * count * sum_sxr - - (int64_t) 2 * sum_s * sum_r + c2); - - ssim_d = (sum_s * sum_s + sum_r * sum_r + c1) * - ((int64_t)count * sum_sq_s - (int64_t)sum_s * sum_s + - (int64_t)count * sum_sq_r - (int64_t) sum_r * sum_r + c2); - - return ssim_n * 1.0 / ssim_d; -} - -static double ssim_16x16(unsigned char *s, int sp, unsigned char *r, int rp) { - unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; - vp9_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, - &sum_sxr); - return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256); -} -static double ssim_8x8(unsigned char *s, int sp, unsigned char *r, int rp) { - unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; - vp9_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, - &sum_sxr); - return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64); -} - -// We are using a 8x8 moving window with starting location of each 8x8 window -// on the 4x4 pixel grid. Such arrangement allows the windows to overlap -// block boundaries to penalize blocking artifacts. -double vp9_ssim2(unsigned char *img1, unsigned char *img2, int stride_img1, - int stride_img2, int width, int height) { - int i, j; - int samples = 0; - double ssim_total = 0; - - // sample point start with each 4x4 location - for (i = 0; i < height - 8; i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) { - for (j = 0; j < width - 8; j += 4) { - double v = ssim_8x8(img1 + j, stride_img1, img2 + j, stride_img2); - ssim_total += v; - samples++; - } - } - ssim_total /= samples; - return ssim_total; -} -double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, - int lumamask, double *weight) { - double a, b, c; - double ssimv; - - a = vp9_ssim2(source->y_buffer, dest->y_buffer, - source->y_stride, dest->y_stride, source->y_width, - source->y_height); - - b = vp9_ssim2(source->u_buffer, dest->u_buffer, - source->uv_stride, dest->uv_stride, source->uv_width, - source->uv_height); - - c = vp9_ssim2(source->v_buffer, dest->v_buffer, - source->uv_stride, dest->uv_stride, source->uv_width, - source->uv_height); - - ssimv = a * .8 + .1 * (b + c); - - *weight = 1; - - return ssimv; -} - -double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, - double *ssim_y, double *ssim_u, double *ssim_v) { - double ssim_all = 0; - double a, b, c; - - a = vp9_ssim2(source->y_buffer, dest->y_buffer, - source->y_stride, dest->y_stride, source->y_width, - source->y_height); - - b = vp9_ssim2(source->u_buffer, dest->u_buffer, - source->uv_stride, dest->uv_stride, source->uv_width, - source->uv_height); - - c = vp9_ssim2(source->v_buffer, dest->v_buffer, - source->uv_stride, dest->uv_stride, source->uv_width, - source->uv_height); - *ssim_y = a; - *ssim_u = b; - *ssim_v = c; - ssim_all = (a * 4 + b + c) / 6; - - return ssim_all; -} diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c deleted file mode 100644 index 7e532ea6c..000000000 --- a/vp8/encoder/temporal_filter.c +++ /dev/null @@ -1,516 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp8/common/onyxc_int.h" -#include "onyx_int.h" -#include "vp8/common/systemdependent.h" -#include "quantize.h" -#include "vp8/common/alloccommon.h" -#include "mcomp.h" -#include "firstpass.h" -#include "psnr.h" -#include "vpx_scale/vpxscale.h" -#include "vp8/common/extend.h" -#include "ratectrl.h" -#include "vp8/common/quant_common.h" -#include "segmentation.h" -#include "vpx_scale/yv12extend.h" -#include "vpx_mem/vpx_mem.h" -#include "vp8/common/swapyv12buffer.h" -#include "vpx_ports/vpx_timer.h" - -#include <math.h> -#include <limits.h> - -#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering -#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering - -#if VP9_TEMPORAL_ALT_REF - - -static void temporal_filter_predictors_mb_c -( - MACROBLOCKD *xd, - unsigned char *y_mb_ptr, - unsigned char *u_mb_ptr, - unsigned char *v_mb_ptr, - int stride, - int mv_row, - int mv_col, - unsigned char *pred -) { - int offset; - unsigned char *yptr, *uptr, *vptr; - int omv_row, omv_col; - - // Y - yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3); - - if ((mv_row | mv_col) & 7) { - xd->subpixel_predict16x16(yptr, stride, - (mv_col & 7) << 1, (mv_row & 7) << 1, &pred[0], 16); - } else { - vp9_copy_mem16x16(yptr, stride, &pred[0], 16); - } - - // U & V - omv_row = mv_row; - omv_col = mv_col; - mv_row >>= 1; - mv_col >>= 1; - stride = (stride + 1) >> 1; - offset = (mv_row >> 3) * stride + (mv_col >> 3); - uptr = u_mb_ptr + offset; - vptr = v_mb_ptr + offset; - - if ((omv_row | omv_col) & 15) { - xd->subpixel_predict8x8(uptr, stride, - (omv_col & 15), (omv_row & 15), &pred[256], 8); - xd->subpixel_predict8x8(vptr, stride, - (omv_col & 15), (omv_row & 15), &pred[320], 8); - } - else { - vp9_copy_mem8x8(uptr, stride, &pred[256], 8); - vp9_copy_mem8x8(vptr, stride, &pred[320], 8); - } -} -void vp9_temporal_filter_apply_c -( - unsigned char *frame1, - unsigned int stride, - unsigned char *frame2, - unsigned int block_size, - int strength, - int filter_weight, - unsigned int *accumulator, - unsigned short *count -) { - unsigned int i, j, k; - int modifier; - int byte = 0; - - for (i = 0, k = 0; i < block_size; i++) { - for (j = 0; j < block_size; j++, k++) { - - int src_byte = frame1[byte]; - int pixel_value = *frame2++; - - modifier = src_byte - pixel_value; - // This is an integer approximation of: - // float coeff = (3.0 * modifer * modifier) / pow(2, strength); - // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); - modifier *= modifier; - modifier *= 3; - modifier += 1 << (strength - 1); - modifier >>= strength; - - if (modifier > 16) - modifier = 16; - - modifier = 16 - modifier; - modifier *= filter_weight; - - count[k] += modifier; - accumulator[k] += modifier * pixel_value; - - byte++; - } - - byte += stride - block_size; - } -} - -#if ALT_REF_MC_ENABLED - -static int temporal_filter_find_matching_mb_c -( - VP9_COMP *cpi, - YV12_BUFFER_CONFIG *arf_frame, - YV12_BUFFER_CONFIG *frame_ptr, - int mb_offset, - int error_thresh -) { - MACROBLOCK *x = &cpi->mb; - int step_param; - int further_steps; - int sadpb = x->sadperbit16; - int bestsme = INT_MAX; - - BLOCK *b = &x->block[0]; - BLOCKD *d = &x->e_mbd.block[0]; - int_mv best_ref_mv1; - int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ - - // Save input state - unsigned char **base_src = b->base_src; - int src = b->src; - int src_stride = b->src_stride; - unsigned char **base_pre = d->base_pre; - int pre = d->pre; - int pre_stride = d->pre_stride; - - best_ref_mv1.as_int = 0; - best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >> 3; - best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >> 3; - - // Setup frame pointers - b->base_src = &arf_frame->y_buffer; - b->src_stride = arf_frame->y_stride; - b->src = mb_offset; - - d->base_pre = &frame_ptr->y_buffer; - d->pre_stride = frame_ptr->y_stride; - d->pre = mb_offset; - - // Further step/diamond searches as necessary - if (cpi->Speed < 8) { - step_param = cpi->sf.first_step + - ((cpi->Speed > 5) ? 1 : 0); - further_steps = - (cpi->sf.max_step_search_steps - 1) - step_param; - } else { - step_param = cpi->sf.first_step + 2; - further_steps = 0; - } - - /*cpi->sf.search_method == HEX*/ - // TODO Check that the 16x16 vf & sdf are selected here - // Ignore mv costing by sending NULL pointer instead of cost arrays - bestsme = vp9_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.as_mv.first, - step_param, sadpb, &cpi->fn_ptr[BLOCK_16X16], - NULLMVCOST, NULLMVCOST, - &best_ref_mv1); - -#if ALT_REF_SUBPEL_ENABLED - // Try sub-pixel MC? - // if (bestsme > error_thresh && bestsme < INT_MAX) - { - int distortion; - unsigned int sse; - // Ignore mv costing by sending NULL pointer instead of cost array - bestsme = cpi->find_fractional_mv_step(x, b, d, &d->bmi.as_mv.first, - &best_ref_mv1, - x->errorperbit, - &cpi->fn_ptr[BLOCK_16X16], - NULLMVCOST, - &distortion, &sse); - } -#endif - - // Save input state - b->base_src = base_src; - b->src = src; - b->src_stride = src_stride; - d->base_pre = base_pre; - d->pre = pre; - d->pre_stride = pre_stride; - - return bestsme; -} -#endif - -static void temporal_filter_iterate_c -( - VP9_COMP *cpi, - int frame_count, - int alt_ref_index, - int strength -) { - int byte; - int frame; - int mb_col, mb_row; - unsigned int filter_weight; - int mb_cols = cpi->common.mb_cols; - int mb_rows = cpi->common.mb_rows; - int mb_y_offset = 0; - int mb_uv_offset = 0; - DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16 * 16 + 8 * 8 + 8 * 8); - DECLARE_ALIGNED_ARRAY(16, unsigned short, count, 16 * 16 + 8 * 8 + 8 * 8); - MACROBLOCKD *mbd = &cpi->mb.e_mbd; - YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index]; - unsigned char *dst1, *dst2; - DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16 * 16 + 8 * 8 + 8 * 8); - - // Save input state - unsigned char *y_buffer = mbd->pre.y_buffer; - unsigned char *u_buffer = mbd->pre.u_buffer; - unsigned char *v_buffer = mbd->pre.v_buffer; - - for (mb_row = 0; mb_row < mb_rows; mb_row++) { -#if ALT_REF_MC_ENABLED - // Source frames are extended to 16 pixels. This is different than - // L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS) - // A 6/8 tap filter is used for motion search. This requires 2 pixels - // before and 3 pixels after. So the largest Y mv on a border would - // then be 16 - INTERP_EXTEND. The UV blocks are half the size of the Y and - // therefore only extended by 8. The largest mv that a UV block - // can support is 8 - INTERP_EXTEND. A UV mv is half of a Y mv. - // (16 - INTERP_EXTEND) >> 1 which is greater than 8 - INTERP_EXTEND. - // To keep the mv in play for both Y and UV planes the max that it - // can be on a border is therefore 16 - (2*INTERP_EXTEND+1). - cpi->mb.mv_row_min = -((mb_row * 16) + (17 - 2 * INTERP_EXTEND)); - cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16) - + (17 - 2 * INTERP_EXTEND); -#endif - - for (mb_col = 0; mb_col < mb_cols; mb_col++) { - int i, j, k; - int stride; - - vpx_memset(accumulator, 0, 384 * sizeof(unsigned int)); - vpx_memset(count, 0, 384 * sizeof(unsigned short)); - -#if ALT_REF_MC_ENABLED - cpi->mb.mv_col_min = -((mb_col * 16) + (17 - 2 * INTERP_EXTEND)); - cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16) - + (17 - 2 * INTERP_EXTEND); -#endif - - for (frame = 0; frame < frame_count; frame++) { - if (cpi->frames[frame] == NULL) - continue; - - mbd->block[0].bmi.as_mv.first.as_mv.row = 0; - mbd->block[0].bmi.as_mv.first.as_mv.col = 0; - - if (frame == alt_ref_index) { - filter_weight = 2; - } else { - int err = 0; -#if ALT_REF_MC_ENABLED -#define THRESH_LOW 10000 -#define THRESH_HIGH 20000 - - // Find best match in this frame by MC - err = temporal_filter_find_matching_mb_c - (cpi, - cpi->frames[alt_ref_index], - cpi->frames[frame], - mb_y_offset, - THRESH_LOW); -#endif - // Assign higher weight to matching MB if it's error - // score is lower. If not applying MC default behavior - // is to weight all MBs equal. - filter_weight = err < THRESH_LOW - ? 2 : err < THRESH_HIGH ? 1 : 0; - } - - if (filter_weight != 0) { - // Construct the predictors - temporal_filter_predictors_mb_c - (mbd, - cpi->frames[frame]->y_buffer + mb_y_offset, - cpi->frames[frame]->u_buffer + mb_uv_offset, - cpi->frames[frame]->v_buffer + mb_uv_offset, - cpi->frames[frame]->y_stride, - mbd->block[0].bmi.as_mv.first.as_mv.row, - mbd->block[0].bmi.as_mv.first.as_mv.col, - predictor); - - // Apply the filter (YUV) - TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply) - (f->y_buffer + mb_y_offset, - f->y_stride, - predictor, - 16, - strength, - filter_weight, - accumulator, - count); - - TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply) - (f->u_buffer + mb_uv_offset, - f->uv_stride, - predictor + 256, - 8, - strength, - filter_weight, - accumulator + 256, - count + 256); - - TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply) - (f->v_buffer + mb_uv_offset, - f->uv_stride, - predictor + 320, - 8, - strength, - filter_weight, - accumulator + 320, - count + 320); - } - } - - // Normalize filter output to produce AltRef frame - dst1 = cpi->alt_ref_buffer.y_buffer; - stride = cpi->alt_ref_buffer.y_stride; - byte = mb_y_offset; - for (i = 0, k = 0; i < 16; i++) { - for (j = 0; j < 16; j++, k++) { - unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= cpi->fixed_divide[count[k]]; - pval >>= 19; - - dst1[byte] = (unsigned char)pval; - - // move to next pixel - byte++; - } - - byte += stride - 16; - } - - dst1 = cpi->alt_ref_buffer.u_buffer; - dst2 = cpi->alt_ref_buffer.v_buffer; - stride = cpi->alt_ref_buffer.uv_stride; - byte = mb_uv_offset; - for (i = 0, k = 256; i < 8; i++) { - for (j = 0; j < 8; j++, k++) { - int m = k + 64; - - // U - unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= cpi->fixed_divide[count[k]]; - pval >>= 19; - dst1[byte] = (unsigned char)pval; - - // V - pval = accumulator[m] + (count[m] >> 1); - pval *= cpi->fixed_divide[count[m]]; - pval >>= 19; - dst2[byte] = (unsigned char)pval; - - // move to next pixel - byte++; - } - - byte += stride - 8; - } - - mb_y_offset += 16; - mb_uv_offset += 8; - } - - mb_y_offset += 16 * (f->y_stride - mb_cols); - mb_uv_offset += 8 * (f->uv_stride - mb_cols); - } - - // Restore input state - mbd->pre.y_buffer = y_buffer; - mbd->pre.u_buffer = u_buffer; - mbd->pre.v_buffer = v_buffer; -} - -void vp9_temporal_filter_prepare_c -( - VP9_COMP *cpi, - int distance -) { - int frame = 0; - - int num_frames_backward = 0; - int num_frames_forward = 0; - int frames_to_blur_backward = 0; - int frames_to_blur_forward = 0; - int frames_to_blur = 0; - int start_frame = 0; - - int strength = cpi->oxcf.arnr_strength; - - int blur_type = cpi->oxcf.arnr_type; - - int max_frames = cpi->active_arnr_frames; - - num_frames_backward = distance; - num_frames_forward = vp9_lookahead_depth(cpi->lookahead) - - (num_frames_backward + 1); - - switch (blur_type) { - case 1: - ///////////////////////////////////////// - // Backward Blur - - frames_to_blur_backward = num_frames_backward; - - if (frames_to_blur_backward >= max_frames) - frames_to_blur_backward = max_frames - 1; - - frames_to_blur = frames_to_blur_backward + 1; - break; - - case 2: - ///////////////////////////////////////// - // Forward Blur - - frames_to_blur_forward = num_frames_forward; - - if (frames_to_blur_forward >= max_frames) - frames_to_blur_forward = max_frames - 1; - - frames_to_blur = frames_to_blur_forward + 1; - break; - - case 3: - default: - ///////////////////////////////////////// - // Center Blur - frames_to_blur_forward = num_frames_forward; - frames_to_blur_backward = num_frames_backward; - - if (frames_to_blur_forward > frames_to_blur_backward) - frames_to_blur_forward = frames_to_blur_backward; - - if (frames_to_blur_backward > frames_to_blur_forward) - frames_to_blur_backward = frames_to_blur_forward; - - // When max_frames is even we have 1 more frame backward than forward - if (frames_to_blur_forward > (max_frames - 1) / 2) - frames_to_blur_forward = ((max_frames - 1) / 2); - - if (frames_to_blur_backward > (max_frames / 2)) - frames_to_blur_backward = (max_frames / 2); - - frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1; - break; - } - - start_frame = distance + frames_to_blur_forward; - -#ifdef DEBUGFWG - // DEBUG FWG - printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d" -, max_frames -, num_frames_backward -, num_frames_forward -, frames_to_blur -, frames_to_blur_backward -, frames_to_blur_forward -, cpi->source_encode_index -, cpi->last_alt_ref_sei -, start_frame); -#endif - - // Setup frame pointers, NULL indicates frame not included in filter - vpx_memset(cpi->frames, 0, max_frames * sizeof(YV12_BUFFER_CONFIG *)); - for (frame = 0; frame < frames_to_blur; frame++) { - int which_buffer = start_frame - frame; - struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead, - which_buffer); - cpi->frames[frames_to_blur - 1 - frame] = &buf->img; - } - - temporal_filter_iterate_c( - cpi, - frames_to_blur, - frames_to_blur_backward, - strength); -} -#endif diff --git a/vp8/encoder/temporal_filter.h b/vp8/encoder/temporal_filter.h deleted file mode 100644 index b396abfe8..000000000 --- a/vp8/encoder/temporal_filter.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __INC_TEMPORAL_FILTER_H -#define __INC_TEMPORAL_FILTER_H - -#define prototype_apply(sym)\ - void (sym) \ - ( \ - unsigned char *frame1, \ - unsigned int stride, \ - unsigned char *frame2, \ - unsigned int block_size, \ - int strength, \ - int filter_weight, \ - unsigned int *accumulator, \ - unsigned short *count \ - ) - -#if ARCH_X86 || ARCH_X86_64 -#include "x86/temporal_filter_x86.h" -#endif - -#ifndef vp9_temporal_filter_apply -#define vp9_temporal_filter_apply vp9_temporal_filter_apply_c -#endif -extern prototype_apply(vp9_temporal_filter_apply); - -typedef struct { - prototype_apply(*apply); -} vp9_temporal_rtcd_vtable_t; - -#if CONFIG_RUNTIME_CPU_DETECT -#define TEMPORAL_INVOKE(ctx,fn) (ctx)->fn -#else -#define TEMPORAL_INVOKE(ctx,fn) vp9_temporal_filter_##fn -#endif - -#endif // __INC_TEMPORAL_FILTER_H diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c deleted file mode 100644 index 5c5b6730a..000000000 --- a/vp8/encoder/tokenize.c +++ /dev/null @@ -1,868 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include <math.h> -#include <stdio.h> -#include <string.h> -#include <assert.h> -#include "onyx_int.h" -#include "tokenize.h" -#include "vpx_mem/vpx_mem.h" - -#include "vp8/common/pred_common.h" -#include "vp8/common/seg_common.h" -#include "vp8/common/entropy.h" - -/* Global event counters used for accumulating statistics across several - compressions, then generating context.c = initial stats. */ - -#ifdef ENTROPY_STATS -INT64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -INT64 hybrid_context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; - -INT64 context_counters_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -INT64 hybrid_context_counters_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; - -INT64 context_counters_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -INT64 hybrid_context_counters_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; - -extern unsigned int tree_update_hist[BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES][2]; -extern unsigned int hybrid_tree_update_hist[BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES][2]; -extern unsigned int tree_update_hist_8x8[BLOCK_TYPES_8X8][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2]; -extern unsigned int hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2]; -extern unsigned int tree_update_hist_16x16[BLOCK_TYPES_16X16][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2]; -extern unsigned int hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2]; -#endif /* ENTROPY_STATS */ - -void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); -void vp9_fix_contexts(MACROBLOCKD *xd); - -static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2]; -const TOKENVALUE *vp9_dct_value_tokens_ptr; -static int dct_value_cost[DCT_MAX_VALUE * 2]; -const int *vp9_dct_value_cost_ptr; - -static void fill_value_tokens() { - - TOKENVALUE *const t = dct_value_tokens + DCT_MAX_VALUE; - vp9_extra_bit_struct *const e = vp9_extra_bits; - - int i = -DCT_MAX_VALUE; - int sign = 1; - - do { - if (!i) - sign = 0; - - { - const int a = sign ? -i : i; - int eb = sign; - - if (a > 4) { - int j = 4; - - while (++j < 11 && e[j].base_val <= a) {} - - t[i].Token = --j; - eb |= (a - e[j].base_val) << 1; - } else - t[i].Token = a; - - t[i].Extra = eb; - } - - // initialize the cost for extra bits for all possible coefficient value. - { - int cost = 0; - vp9_extra_bit_struct *p = vp9_extra_bits + t[i].Token; - - if (p->base_val) { - const int extra = t[i].Extra; - const int Length = p->Len; - - if (Length) - cost += treed_cost(p->tree, p->prob, extra >> 1, Length); - - cost += vp9_cost_bit(vp9_prob_half, extra & 1); /* sign */ - dct_value_cost[i + DCT_MAX_VALUE] = cost; - } - - } - - } while (++i < DCT_MAX_VALUE); - - vp9_dct_value_tokens_ptr = dct_value_tokens + DCT_MAX_VALUE; - vp9_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE; -} - -static void tokenize_b(VP9_COMP *cpi, - MACROBLOCKD *xd, - const BLOCKD * const b, - TOKENEXTRA **tp, - PLANE_TYPE type, - ENTROPY_CONTEXT *a, - ENTROPY_CONTEXT *l, - TX_SIZE tx_size, - int dry_run) { - int pt; /* near block/prev token context index */ - int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0; - const int eob = b->eob; /* one beyond last nonzero coeff */ - TOKENEXTRA *t = *tp; /* store tokens starting here */ - const short *qcoeff_ptr = b->qcoeff; - int seg_eob; - int segment_id = xd->mode_info_context->mbmi.segment_id; - const int *bands, *scan; - unsigned int (*counts)[COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; - vp9_prob (*probs)[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; - const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type(xd, b) : DCT_DCT; - - VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); - switch (tx_size) { - default: - case TX_4X4: - seg_eob = 16; - bands = vp9_coef_bands; - scan = vp9_default_zig_zag1d; - if (tx_type != DCT_DCT) { - counts = cpi->hybrid_coef_counts; - probs = cpi->common.fc.hybrid_coef_probs; - if (tx_type == ADST_DCT) { - scan = vp9_row_scan; - } else if (tx_type == DCT_ADST) { - scan = vp9_col_scan; - } - } else { - counts = cpi->coef_counts; - probs = cpi->common.fc.coef_probs; - } - break; - case TX_8X8: - if (type == PLANE_TYPE_Y2) { - seg_eob = 4; - bands = vp9_coef_bands; - scan = vp9_default_zig_zag1d; - } else { - seg_eob = 64; - bands = vp9_coef_bands_8x8; - scan = vp9_default_zig_zag1d_8x8; - } - if (tx_type != DCT_DCT) { - counts = cpi->hybrid_coef_counts_8x8; - probs = cpi->common.fc.hybrid_coef_probs_8x8; - } else { - counts = cpi->coef_counts_8x8; - probs = cpi->common.fc.coef_probs_8x8; - } - break; - case TX_16X16: - seg_eob = 256; - bands = vp9_coef_bands_16x16; - scan = vp9_default_zig_zag1d_16x16; - if (tx_type != DCT_DCT) { - counts = cpi->hybrid_coef_counts_16x16; - probs = cpi->common.fc.hybrid_coef_probs_16x16; - } else { - counts = cpi->coef_counts_16x16; - probs = cpi->common.fc.coef_probs_16x16; - } - break; - } - - if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) - seg_eob = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); - - do { - const int band = bands[c]; - int token; - - if (c < eob) { - const int rc = scan[c]; - const int v = qcoeff_ptr[rc]; - - assert(-DCT_MAX_VALUE <= v && v < DCT_MAX_VALUE); - - t->Extra = vp9_dct_value_tokens_ptr[v].Extra; - token = vp9_dct_value_tokens_ptr[v].Token; - } else { - token = DCT_EOB_TOKEN; - } - - t->Token = token; - t->context_tree = probs[type][band][pt]; - t->skip_eob_node = (pt == 0) && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) || - (band > 1 && type == PLANE_TYPE_Y_NO_DC)); - assert(vp9_coef_encodings[t->Token].Len - t->skip_eob_node > 0); - if (!dry_run) { - ++counts[type][band][pt][token]; - } - pt = vp9_prev_token_class[token]; - ++t; - } while (c < eob && ++c < seg_eob); - - *tp = t; - *a = *l = (c != !type); /* 0 <-> all coeff data is zero */ -} - -int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_y2_block) { - int skip = 1; - int i = 0; - - if (has_y2_block) { - for (i = 0; i < 16; i++) - skip &= (xd->block[i].eob < 2); - skip &= (!xd->block[24].eob); - } else { - for (i = 0; i < 16; i++) - skip &= (!xd->block[i].eob); - } - return skip; -} - -int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd) { - int skip = 1; - int i; - - for (i = 16; i < 24; i++) - skip &= (!xd->block[i].eob); - return skip; -} - -static int mb_is_skippable_4x4(MACROBLOCKD *xd, int has_y2_block) { - return (vp9_mby_is_skippable_4x4(xd, has_y2_block) & - vp9_mbuv_is_skippable_4x4(xd)); -} - -int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_y2_block) { - int skip = 1; - int i = 0; - - if (has_y2_block) { - for (i = 0; i < 16; i += 4) - skip &= (xd->block[i].eob < 2); - skip &= (!xd->block[24].eob); - } else { - for (i = 0; i < 16; i += 4) - skip &= (!xd->block[i].eob); - } - return skip; -} - -int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd) { - return (!xd->block[16].eob) & (!xd->block[20].eob); -} - -static int mb_is_skippable_8x8(MACROBLOCKD *xd, int has_y2_block) { - return (vp9_mby_is_skippable_8x8(xd, has_y2_block) & - vp9_mbuv_is_skippable_8x8(xd)); -} - -static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd, int has_y2_block) { - return (vp9_mby_is_skippable_8x8(xd, has_y2_block) & - vp9_mbuv_is_skippable_4x4(xd)); -} - -int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd) { - int skip = 1; - skip &= !xd->block[0].eob; - return skip; -} - -static int mb_is_skippable_16x16(MACROBLOCKD *xd) { - return (vp9_mby_is_skippable_16x16(xd) & vp9_mbuv_is_skippable_8x8(xd)); -} - -void vp9_tokenize_mb(VP9_COMP *cpi, - MACROBLOCKD *xd, - TOKENEXTRA **t, - int dry_run) { - PLANE_TYPE plane_type; - int has_y2_block; - int b; - int tx_size = xd->mode_info_context->mbmi.txfm_size; - int mb_skip_context = vp9_get_pred_context(&cpi->common, xd, PRED_MBSKIP); - TOKENEXTRA *t_backup = *t; - ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *) xd->above_context; - ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *) xd->left_context; - - // If the MB is going to be skipped because of a segment level flag - // exclude this from the skip count stats used to calculate the - // transmitted skip probability; - int skip_inc; - int segment_id = xd->mode_info_context->mbmi.segment_id; - - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) || - (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0)) { - skip_inc = 1; - } else - skip_inc = 0; - - has_y2_block = (tx_size != TX_16X16 - && xd->mode_info_context->mbmi.mode != B_PRED - && xd->mode_info_context->mbmi.mode != I8X8_PRED - && xd->mode_info_context->mbmi.mode != SPLITMV); - - switch (tx_size) { - case TX_16X16: - xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_16x16(xd); - break; - case TX_8X8: - if (xd->mode_info_context->mbmi.mode == I8X8_PRED || - xd->mode_info_context->mbmi.mode == SPLITMV) - xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_8x8_4x4uv(xd, 0); - else - xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_8x8(xd, has_y2_block); - break; - - default: - xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_4x4(xd, has_y2_block); - break; - } - - if (xd->mode_info_context->mbmi.mb_skip_coeff) { - if (!dry_run) - cpi->skip_true_count[mb_skip_context] += skip_inc; - if (!cpi->common.mb_no_coeff_skip) { - vp9_stuff_mb(cpi, xd, t, dry_run); - } else { - vp9_fix_contexts(xd); - } - if (dry_run) - *t = t_backup; - return; - } - - if (!dry_run) - cpi->skip_false_count[mb_skip_context] += skip_inc; - - if (has_y2_block) { - if (tx_size == TX_8X8) { - tokenize_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, - A + vp9_block2above_8x8[24], L + vp9_block2left_8x8[24], - TX_8X8, dry_run); - } else { - tokenize_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, - A + vp9_block2above[24], L + vp9_block2left[24], - TX_4X4, dry_run); - } - - plane_type = PLANE_TYPE_Y_NO_DC; - } else - plane_type = PLANE_TYPE_Y_WITH_DC; - - if (tx_size == TX_16X16) { - tokenize_b(cpi, xd, xd->block, t, PLANE_TYPE_Y_WITH_DC, - A, L, TX_16X16, dry_run); - A[1] = A[2] = A[3] = A[0]; - L[1] = L[2] = L[3] = L[0]; - - for (b = 16; b < 24; b += 4) { - tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - A + vp9_block2above_8x8[b], L + vp9_block2left_8x8[b], - TX_8X8, dry_run); - A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]]; - L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]]; - } - vpx_memset(&A[8], 0, sizeof(A[8])); - vpx_memset(&L[8], 0, sizeof(L[8])); - } else if (tx_size == TX_8X8) { - for (b = 0; b < 16; b += 4) { - tokenize_b(cpi, xd, xd->block + b, t, plane_type, - A + vp9_block2above_8x8[b], L + vp9_block2left_8x8[b], - TX_8X8, dry_run); - A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]]; - L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]]; - } - if (xd->mode_info_context->mbmi.mode == I8X8_PRED || - xd->mode_info_context->mbmi.mode == SPLITMV) { - for (b = 16; b < 24; b++) { - tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - A + vp9_block2above[b], L + vp9_block2left[b], - TX_4X4, dry_run); - } - } else { - for (b = 16; b < 24; b += 4) { - tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - A + vp9_block2above_8x8[b], L + vp9_block2left_8x8[b], - TX_8X8, dry_run); - A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]]; - L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]]; - } - } - } else { - for (b = 0; b < 16; b++) { - tokenize_b(cpi, xd, xd->block + b, t, plane_type, - A + vp9_block2above[b], L + vp9_block2left[b], - TX_4X4, dry_run); - } - - for (b = 16; b < 24; b++) { - tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - A + vp9_block2above[b], L + vp9_block2left[b], - TX_4X4, dry_run); - } - } - if (dry_run) - *t = t_backup; -} - - -#ifdef ENTROPY_STATS -void init_context_counters(void) { - FILE *f = fopen("context.bin", "rb"); - if (!f) { - vpx_memset(context_counters, 0, sizeof(context_counters)); - vpx_memset(context_counters_8x8, 0, sizeof(context_counters_8x8)); - vpx_memset(context_counters_16x16, 0, sizeof(context_counters_16x16)); - } else { - fread(context_counters, sizeof(context_counters), 1, f); - fread(context_counters_8x8, sizeof(context_counters_8x8), 1, f); - fread(context_counters_16x16, sizeof(context_counters_16x16), 1, f); - fclose(f); - } - - f = fopen("treeupdate.bin", "rb"); - if (!f) { - vpx_memset(tree_update_hist, 0, sizeof(tree_update_hist)); - vpx_memset(tree_update_hist_8x8, 0, sizeof(tree_update_hist_8x8)); - vpx_memset(tree_update_hist_16x16, 0, sizeof(tree_update_hist_16x16)); - } else { - fread(tree_update_hist, sizeof(tree_update_hist), 1, f); - fread(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f); - fread(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f); - fclose(f); - } -} - -void print_context_counters() { - int type, band, pt, t; - FILE *f = fopen("context.c", "w"); - - fprintf(f, "#include \"entropy.h\"\n"); - fprintf(f, "\n/* *** GENERATED FILE: DO NOT EDIT *** */\n\n"); - fprintf(f, "static const unsigned int\n" - "vp9_default_coef_counts[BLOCK_TYPES]\n" - " [COEF_BANDS]\n" - " [PREV_COEF_CONTEXTS]\n" - " [MAX_ENTROPY_TOKENS]={\n"); - -# define Comma( X) (X? ",":"") - type = 0; - do { - fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); - band = 0; - do { - fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); - pt = 0; - do { - fprintf(f, "%s\n {", Comma(pt)); - - t = 0; - do { - const INT64 x = context_counters [type] [band] [pt] [t]; - const int y = (int) x; - assert(x == (INT64) y); /* no overflow handling yet */ - fprintf(f, "%s %d", Comma(t), y); - } while (++t < MAX_ENTROPY_TOKENS); - fprintf(f, "}"); - } while (++pt < PREV_COEF_CONTEXTS); - fprintf(f, "\n }"); - } while (++band < COEF_BANDS); - fprintf(f, "\n }"); - } while (++type < BLOCK_TYPES); - fprintf(f, "\n};\n"); - - fprintf(f, "static const unsigned int\nvp9_default_coef_counts_8x8" - "[BLOCK_TYPES_8X8] [COEF_BANDS]" - "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {"); - type = 0; - do { - fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); - band = 0; - do { - fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); - pt = 0; - do { - fprintf(f, "%s\n {", Comma(pt)); - t = 0; - do { - const INT64 x = context_counters_8x8 [type] [band] [pt] [t]; - const int y = (int) x; - - assert(x == (INT64) y); /* no overflow handling yet */ - fprintf(f, "%s %d", Comma(t), y); - - } while (++t < MAX_ENTROPY_TOKENS); - - fprintf(f, "}"); - } while (++pt < PREV_COEF_CONTEXTS); - - fprintf(f, "\n }"); - - } while (++band < COEF_BANDS); - - fprintf(f, "\n }"); - } while (++type < BLOCK_TYPES_8X8); - fprintf(f, "\n};\n"); - - fprintf(f, "static const unsigned int\nvp9_default_coef_counts_16x16" - "[BLOCK_TYPES_16X16] [COEF_BANDS]" - "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {"); - type = 0; - do { - fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); - band = 0; - do { - fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); - pt = 0; - do { - fprintf(f, "%s\n {", Comma(pt)); - t = 0; - do { - const INT64 x = context_counters_16x16 [type] [band] [pt] [t]; - const int y = (int) x; - - assert(x == (INT64) y); /* no overflow handling yet */ - fprintf(f, "%s %d", Comma(t), y); - - } while (++t < MAX_ENTROPY_TOKENS); - - fprintf(f, "}"); - } while (++pt < PREV_COEF_CONTEXTS); - - fprintf(f, "\n }"); - - } while (++band < COEF_BANDS); - - fprintf(f, "\n }"); - } while (++type < BLOCK_TYPES_16X16); - fprintf(f, "\n};\n"); - - fprintf(f, "static const vp9_prob\n" - "vp9_default_coef_probs[BLOCK_TYPES] [COEF_BANDS] \n" - "[PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {"); - type = 0; - do { - fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); - band = 0; - do { - fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); - pt = 0; - do { - unsigned int branch_ct [ENTROPY_NODES] [2]; - unsigned int coef_counts[MAX_ENTROPY_TOKENS]; - vp9_prob coef_probs[ENTROPY_NODES]; - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - coef_counts[t] = context_counters [type] [band] [pt] [t]; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - coef_probs, branch_ct, coef_counts, 256, 1); - fprintf(f, "%s\n {", Comma(pt)); - - t = 0; - do { - fprintf(f, "%s %d", Comma(t), coef_probs[t]); - - } while (++t < ENTROPY_NODES); - - fprintf(f, "}"); - } while (++pt < PREV_COEF_CONTEXTS); - fprintf(f, "\n }"); - } while (++band < COEF_BANDS); - fprintf(f, "\n }"); - } while (++type < BLOCK_TYPES); - fprintf(f, "\n};\n"); - - fprintf(f, "static const vp9_prob\n" - "vp9_default_coef_probs_8x8[BLOCK_TYPES_8X8] [COEF_BANDS]\n" - "[PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {"); - type = 0; - do { - fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); - band = 0; - do { - fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); - pt = 0; - do { - unsigned int branch_ct [ENTROPY_NODES] [2]; - unsigned int coef_counts[MAX_ENTROPY_TOKENS]; - vp9_prob coef_probs[ENTROPY_NODES]; - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - coef_counts[t] = context_counters_8x8[type] [band] [pt] [t]; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - coef_probs, branch_ct, coef_counts, 256, 1); - fprintf(f, "%s\n {", Comma(pt)); - - t = 0; - do { - fprintf(f, "%s %d", Comma(t), coef_probs[t]); - } while (++t < ENTROPY_NODES); - fprintf(f, "}"); - } while (++pt < PREV_COEF_CONTEXTS); - fprintf(f, "\n }"); - } while (++band < COEF_BANDS); - fprintf(f, "\n }"); - } while (++type < BLOCK_TYPES_8X8); - fprintf(f, "\n};\n"); - - fprintf(f, "static const vp9_prob\n" - "vp9_default_coef_probs_16x16[BLOCK_TYPES_16X16] [COEF_BANDS]\n" - "[PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {"); - type = 0; - do { - fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); - band = 0; - do { - fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); - pt = 0; - do { - unsigned int branch_ct [ENTROPY_NODES] [2]; - unsigned int coef_counts[MAX_ENTROPY_TOKENS]; - vp9_prob coef_probs[ENTROPY_NODES]; - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - coef_counts[t] = context_counters_16x16[type] [band] [pt] [t]; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - coef_probs, branch_ct, coef_counts, 256, 1); - fprintf(f, "%s\n {", Comma(pt)); - - t = 0; - do { - fprintf(f, "%s %d", Comma(t), coef_probs[t]); - } while (++t < ENTROPY_NODES); - fprintf(f, "}"); - } while (++pt < PREV_COEF_CONTEXTS); - fprintf(f, "\n }"); - } while (++band < COEF_BANDS); - fprintf(f, "\n }"); - } while (++type < BLOCK_TYPES_16X16); - fprintf(f, "\n};\n"); - - fclose(f); - - f = fopen("context.bin", "wb"); - fwrite(context_counters, sizeof(context_counters), 1, f); - fwrite(context_counters_8x8, sizeof(context_counters_8x8), 1, f); - fwrite(context_counters_16x16, sizeof(context_counters_16x16), 1, f); - fclose(f); -} -#endif - -void vp9_tokenize_initialize() { - fill_value_tokens(); -} - -static __inline void stuff_b(VP9_COMP *cpi, - MACROBLOCKD *xd, - const BLOCKD * const b, - TOKENEXTRA **tp, - PLANE_TYPE type, - ENTROPY_CONTEXT *a, - ENTROPY_CONTEXT *l, - TX_SIZE tx_size, - int dry_run) { - const int *bands; - unsigned int (*counts)[COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; - vp9_prob (*probs)[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; - int pt, band; - TOKENEXTRA *t = *tp; - const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type(xd, b) : DCT_DCT; - VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); - - switch (tx_size) { - default: - case TX_4X4: - bands = vp9_coef_bands; - if (tx_type != DCT_DCT) { - counts = cpi->hybrid_coef_counts; - probs = cpi->common.fc.hybrid_coef_probs; - } else { - counts = cpi->coef_counts; - probs = cpi->common.fc.coef_probs; - } - break; - case TX_8X8: - bands = vp9_coef_bands_8x8; - if (tx_type != DCT_DCT) { - counts = cpi->hybrid_coef_counts_8x8; - probs = cpi->common.fc.hybrid_coef_probs_8x8; - } else { - counts = cpi->coef_counts_8x8; - probs = cpi->common.fc.coef_probs_8x8; - } - break; - case TX_16X16: - bands = vp9_coef_bands_16x16; - if (tx_type != DCT_DCT) { - counts = cpi->hybrid_coef_counts_16x16; - probs = cpi->common.fc.hybrid_coef_probs_16x16; - } else { - counts = cpi->coef_counts_16x16; - probs = cpi->common.fc.coef_probs_16x16; - } - break; - } - band = bands[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0]; - t->Token = DCT_EOB_TOKEN; - t->context_tree = probs[type][band][pt]; - t->skip_eob_node = 0; - ++t; - *tp = t; - *a = *l = 0; - if (!dry_run) { - ++counts[type][band][pt][DCT_EOB_TOKEN]; - } -} - -static void stuff_mb_8x8(VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run) { - ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)xd->above_context; - ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)xd->left_context; - PLANE_TYPE plane_type; - int b; - const int has_y2_block = (xd->mode_info_context->mbmi.mode != B_PRED && - xd->mode_info_context->mbmi.mode != I8X8_PRED && - xd->mode_info_context->mbmi.mode != SPLITMV); - - if (has_y2_block) { - stuff_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, - A + vp9_block2above_8x8[24], L + vp9_block2left_8x8[24], - TX_8X8, dry_run); - plane_type = PLANE_TYPE_Y_NO_DC; - } else { - plane_type = PLANE_TYPE_Y_WITH_DC; - } - - for (b = 0; b < 16; b += 4) { - stuff_b(cpi, xd, xd->block + b, t, plane_type, A + vp9_block2above_8x8[b], - L + vp9_block2left_8x8[b], TX_8X8, dry_run); - A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]]; - L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]]; - } - - for (b = 16; b < 24; b += 4) { - stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - A + vp9_block2above_8x8[b], L + vp9_block2left_8x8[b], - TX_8X8, dry_run); - A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]]; - L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]]; - } -} - -static void stuff_mb_16x16(VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run) { - ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)xd->above_context; - ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)xd->left_context; - int b; - - stuff_b(cpi, xd, xd->block, t, PLANE_TYPE_Y_WITH_DC, A, L, TX_16X16, dry_run); - A[1] = A[2] = A[3] = A[0]; - L[1] = L[2] = L[3] = L[0]; - for (b = 16; b < 24; b += 4) { - stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, A + vp9_block2above[b], - L + vp9_block2above_8x8[b], TX_8X8, dry_run); - A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]]; - L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]]; - } - vpx_memset(&A[8], 0, sizeof(A[8])); - vpx_memset(&L[8], 0, sizeof(L[8])); -} - -static void stuff_mb_4x4(VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run) { - ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)xd->above_context; - ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)xd->left_context; - int b; - PLANE_TYPE plane_type; - const int has_y2_block = (xd->mode_info_context->mbmi.mode != B_PRED && - xd->mode_info_context->mbmi.mode != I8X8_PRED && - xd->mode_info_context->mbmi.mode != SPLITMV); - - if (has_y2_block) { - stuff_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, A + vp9_block2above[24], - L + vp9_block2left[24], TX_4X4, dry_run); - plane_type = PLANE_TYPE_Y_NO_DC; - } else { - plane_type = PLANE_TYPE_Y_WITH_DC; - } - - for (b = 0; b < 16; b++) - stuff_b(cpi, xd, xd->block + b, t, plane_type, A + vp9_block2above[b], - L + vp9_block2left[b], TX_4X4, dry_run); - - for (b = 16; b < 24; b++) - stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, A + vp9_block2above[b], - L + vp9_block2left[b], TX_4X4, dry_run); -} - -static void stuff_mb_8x8_4x4uv(VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run) { - ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)xd->above_context; - ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)xd->left_context; - int b; - - for (b = 0; b < 16; b += 4) { - stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_Y_WITH_DC, - A + vp9_block2above_8x8[b], L + vp9_block2left_8x8[b], - TX_8X8, dry_run); - A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]]; - L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]]; - } - - for (b = 16; b < 24; b++) - stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, A + vp9_block2above[b], - L + vp9_block2left[b], TX_4X4, dry_run); -} - -void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { - TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - TOKENEXTRA * const t_backup = *t; - - if (tx_size == TX_16X16) { - stuff_mb_16x16(cpi, xd, t, dry_run); - } else if (tx_size == TX_8X8) { - if (xd->mode_info_context->mbmi.mode == I8X8_PRED || - xd->mode_info_context->mbmi.mode == SPLITMV) { - stuff_mb_8x8_4x4uv(cpi, xd, t, dry_run); - } else { - stuff_mb_8x8(cpi, xd, t, dry_run); - } - } else { - stuff_mb_4x4(cpi, xd, t, dry_run); - } - - if (dry_run) { - *t = t_backup; - } -} - -void vp9_fix_contexts(MACROBLOCKD *xd) { - /* Clear entropy contexts for Y2 blocks */ - if ((xd->mode_info_context->mbmi.mode != B_PRED - && xd->mode_info_context->mbmi.mode != I8X8_PRED - && xd->mode_info_context->mbmi.mode != SPLITMV) - || xd->mode_info_context->mbmi.txfm_size == TX_16X16 - ) { - vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); - } else { - vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) - 1); - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) - 1); - } -} diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h deleted file mode 100644 index 4cca36e40..000000000 --- a/vp8/encoder/tokenize.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef tokenize_h -#define tokenize_h - -#include "vp8/common/entropy.h" -#include "block.h" - -void vp9_tokenize_initialize(); - -typedef struct { - short Token; - short Extra; -} TOKENVALUE; - -typedef struct { - const vp9_prob *context_tree; - short Extra; - unsigned char Token; - unsigned char skip_eob_node; -} TOKENEXTRA; - -int rd_cost_mby(MACROBLOCKD *); - -extern int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_y2_block); -extern int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd); -extern int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_y2_block); -extern int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd); -extern int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd); - -#ifdef ENTROPY_STATS -void init_context_counters(); -void print_context_counters(); - -extern INT64 context_counters[BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; -extern INT64 context_counters_8x8[BLOCK_TYPES_8X8][COEF_BANDS] - [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; -extern INT64 context_counters_16x16[BLOCK_TYPES_16X16][COEF_BANDS] - [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; -#endif - -extern const int *vp9_dct_value_cost_ptr; -/* TODO: The Token field should be broken out into a separate char array to - * improve cache locality, since it's needed for costing when the rest of the - * fields are not. - */ -extern const TOKENVALUE *vp9_dct_value_tokens_ptr; - -#endif /* tokenize_h */ diff --git a/vp8/encoder/treewriter.c b/vp8/encoder/treewriter.c deleted file mode 100644 index 2fb984b49..000000000 --- a/vp8/encoder/treewriter.c +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "treewriter.h" - -static void cost( - int *const C, - vp9_tree T, - const vp9_prob *const P, - int i, - int c -) { - const vp9_prob p = P [i >> 1]; - - do { - const vp9_tree_index j = T[i]; - const int d = c + vp9_cost_bit(p, i & 1); - - if (j <= 0) - C[-j] = d; - else - cost(C, T, P, j, d); - } while (++i & 1); -} -void vp9_cost_tokens(int *c, const vp9_prob *p, vp9_tree t) { - cost(c, t, p, 0, 0); -} - -void vp9_cost_tokens_skip(int *c, const vp9_prob *p, vp9_tree t) { - cost(c, t, p, 2, 0); -} diff --git a/vp8/encoder/treewriter.h b/vp8/encoder/treewriter.h deleted file mode 100644 index e2a0ee2d7..000000000 --- a/vp8/encoder/treewriter.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __INC_TREEWRITER_H -#define __INC_TREEWRITER_H - -/* Trees map alphabets into huffman-like codes suitable for an arithmetic - bit coder. Timothy S Murphy 11 October 2004 */ - -#include "vp8/common/treecoder.h" - -#include "boolhuff.h" /* for now */ - -typedef BOOL_CODER vp9_writer; - -#define vp9_write encode_bool -#define vp9_write_literal vp9_encode_value -#define vp9_write_bit(W, V) vp9_write(W, V, vp9_prob_half) - -/* Approximate length of an encoded bool in 256ths of a bit at given prob */ - -#define vp9_cost_zero(x) (vp9_prob_cost[x]) -#define vp9_cost_one(x) vp9_cost_zero(vp9_complement(x)) - -#define vp9_cost_bit(x, b) vp9_cost_zero((b) ? vp9_complement(x) : (x)) - -/* VP8BC version is scaled by 2^20 rather than 2^8; see bool_coder.h */ - - -/* Both of these return bits, not scaled bits. */ - -static __inline unsigned int cost_branch(const unsigned int ct[2], - vp9_prob p) { - /* Imitate existing calculation */ - return ((ct[0] * vp9_cost_zero(p)) - + (ct[1] * vp9_cost_one(p))) >> 8; -} - -static __inline unsigned int cost_branch256(const unsigned int ct[2], - vp9_prob p) { - /* Imitate existing calculation */ - return ((ct[0] * vp9_cost_zero(p)) - + (ct[1] * vp9_cost_one(p))); -} - -/* Small functions to write explicit values and tokens, as well as - estimate their lengths. */ - -static __inline void treed_write(vp9_writer *const w, - vp9_tree t, - const vp9_prob *const p, - int v, - /* number of bits in v, assumed nonzero */ - int n) { - vp9_tree_index i = 0; - - do { - const int b = (v >> --n) & 1; - vp9_write(w, b, p[i >> 1]); - i = t[i + b]; - } while (n); -} - -static __inline void write_token(vp9_writer *const w, - vp9_tree t, - const vp9_prob *const p, - vp9_token *const x) { - treed_write(w, t, p, x->value, x->Len); -} - -static __inline int treed_cost(vp9_tree t, - const vp9_prob *const p, - int v, - /* number of bits in v, assumed nonzero */ - int n) { - int c = 0; - vp9_tree_index i = 0; - - do { - const int b = (v >> --n) & 1; - c += vp9_cost_bit(p[i >> 1], b); - i = t[i + b]; - } while (n); - - return c; -} - -static __inline int cost_token(vp9_tree t, - const vp9_prob *const p, - vp9_token *const x) { - return treed_cost(t, p, x->value, x->Len); -} - -/* Fill array of costs for all possible token values. */ - -void vp9_cost_tokens(int *Costs, const vp9_prob *, vp9_tree); - -void vp9_cost_tokens_skip(int *c, const vp9_prob *p, vp9_tree t); - -#endif diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h deleted file mode 100644 index 6afbfb7a9..000000000 --- a/vp8/encoder/variance.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VARIANCE_H -#define VARIANCE_H - -typedef unsigned int(*vp9_sad_fn_t)(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned int max_sad); - -typedef void (*vp9_copy32xn_fn_t)(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int ref_stride, - int n); - -typedef void (*vp9_sad_multi_fn_t)(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned int *sad_array); - -typedef void (*vp9_sad_multi1_fn_t)(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned short *sad_array); - -typedef void (*vp9_sad_multi_d_fn_t)(const unsigned char *src_ptr, - int source_stride, - const unsigned char * const ref_ptr[], - int ref_stride, unsigned int *sad_array); - -typedef unsigned int (*vp9_variance_fn_t)(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int ref_stride, - unsigned int *sse); - -typedef unsigned int (*vp9_subpixvariance_fn_t)(const unsigned char *src_ptr, - int source_stride, - int xoffset, - int yoffset, - const unsigned char *ref_ptr, - int Refstride, - unsigned int *sse); - -typedef void (*vp9_ssimpf_fn_t)(unsigned char *s, int sp, unsigned char *r, - int rp, unsigned long *sum_s, - unsigned long *sum_r, unsigned long *sum_sq_s, - unsigned long *sum_sq_r, - unsigned long *sum_sxr); - -typedef unsigned int (*vp9_getmbss_fn_t)(const short *); - -typedef unsigned int (*vp9_get16x16prederror_fn_t)(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int ref_stride); - -typedef struct variance_vtable { - vp9_sad_fn_t sdf; - vp9_variance_fn_t vf; - vp9_subpixvariance_fn_t svf; - vp9_variance_fn_t svf_halfpix_h; - vp9_variance_fn_t svf_halfpix_v; - vp9_variance_fn_t svf_halfpix_hv; - vp9_sad_multi_fn_t sdx3f; - vp9_sad_multi1_fn_t sdx8f; - vp9_sad_multi_d_fn_t sdx4df; - vp9_copy32xn_fn_t copymem; -} vp9_variance_fn_ptr_t; - -#endif diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c deleted file mode 100644 index 4dc554dbf..000000000 --- a/vp8/encoder/variance_c.c +++ /dev/null @@ -1,540 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "variance.h" -#include "vp8/common/filter.h" - - -unsigned int vp9_get_mb_ss_c(const short *src_ptr) { - unsigned int i, sum = 0; - - for (i = 0; i < 256; i++) { - sum += (src_ptr[i] * src_ptr[i]); - } - - return sum; -} - - -static void variance(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - int w, - int h, - unsigned int *sse, - int *sum) { - int i, j; - int diff; - - *sum = 0; - *sse = 0; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - diff = src_ptr[j] - ref_ptr[j]; - *sum += diff; - *sse += diff * diff; - } - - src_ptr += source_stride; - ref_ptr += recon_stride; - } -} - -#if CONFIG_SUPERBLOCKS -unsigned int vp9_variance32x32_c(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg); - *sse = var; - return (var - ((avg * avg) >> 10)); -} -#endif - -unsigned int vp9_variance16x16_c(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); - *sse = var; - return (var - ((avg * avg) >> 8)); -} - -unsigned int vp9_variance8x16_c(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); - *sse = var; - return (var - ((avg * avg) >> 7)); -} - -unsigned int vp9_variance16x8_c(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); - *sse = var; - return (var - ((avg * avg) >> 7)); -} - - -unsigned int vp9_variance8x8_c(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); - *sse = var; - return (var - ((avg * avg) >> 6)); -} - -unsigned int vp9_variance4x4_c(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg); - *sse = var; - return (var - ((avg * avg) >> 4)); -} - - -unsigned int vp9_mse16x16_c(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); - *sse = var; - return var; -} - - -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil_first_pass - * - * INPUTS : UINT8 *src_ptr : Pointer to source block. - * UINT32 src_pixels_per_line : Stride of input block. - * UINT32 pixel_step : Offset between filter input samples (see notes). - * UINT32 output_height : Input block height. - * UINT32 output_width : Input block width. - * INT32 *vp9_filter : Array of 2 bi-linear filter taps. - * - * OUTPUTS : INT32 *output_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in - * either horizontal or vertical direction to produce the - * filtered output block. Used to implement first-pass - * of 2-D separable filter. - * - * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. - * Two filter taps should sum to VP9_FILTER_WEIGHT. - * pixel_step defines whether the filter is applied - * horizontally (pixel_step=1) or vertically (pixel_step=stride). - * It defines the offset required to move from one input - * to the next. - * - ****************************************************************************/ -static void var_filter_block2d_bil_first_pass(const unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp9_filter) { - unsigned int i, j; - - for (i = 0; i < output_height; i++) { - for (j = 0; j < output_width; j++) { - // Apply bilinear filter - output_ptr[j] = (((int)src_ptr[0] * vp9_filter[0]) + - ((int)src_ptr[pixel_step] * vp9_filter[1]) + - (VP9_FILTER_WEIGHT / 2)) >> VP9_FILTER_SHIFT; - src_ptr++; - } - - // Next row... - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; - } -} - -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil_second_pass - * - * INPUTS : INT32 *src_ptr : Pointer to source block. - * UINT32 src_pixels_per_line : Stride of input block. - * UINT32 pixel_step : Offset between filter input samples (see notes). - * UINT32 output_height : Input block height. - * UINT32 output_width : Input block width. - * INT32 *vp9_filter : Array of 2 bi-linear filter taps. - * - * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in - * either horizontal or vertical direction to produce the - * filtered output block. Used to implement second-pass - * of 2-D separable filter. - * - * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. - * Two filter taps should sum to VP9_FILTER_WEIGHT. - * pixel_step defines whether the filter is applied - * horizontally (pixel_step=1) or vertically (pixel_step=stride). - * It defines the offset required to move from one input - * to the next. - * - ****************************************************************************/ -static void var_filter_block2d_bil_second_pass(const unsigned short *src_ptr, - unsigned char *output_ptr, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp9_filter) { - unsigned int i, j; - int Temp; - - for (i = 0; i < output_height; i++) { - for (j = 0; j < output_width; j++) { - // Apply filter - Temp = ((int)src_ptr[0] * vp9_filter[0]) + - ((int)src_ptr[pixel_step] * vp9_filter[1]) + - (VP9_FILTER_WEIGHT / 2); - output_ptr[j] = (unsigned int)(Temp >> VP9_FILTER_SHIFT); - src_ptr++; - } - - // Next row... - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; - } -} - - -unsigned int vp9_sub_pixel_variance4x4_c(const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - unsigned char temp2[20 * 16]; - const short *HFilter, *VFilter; - unsigned short FData3[5 * 4]; // Temp data bufffer used in filtering - - HFilter = vp9_bilinear_filters[xoffset]; - VFilter = vp9_bilinear_filters[yoffset]; - - // First filter 1d Horizontal - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter); - - // Now filter Verticaly - var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter); - - return vp9_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse); -} - - -unsigned int vp9_sub_pixel_variance8x8_c(const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - unsigned short FData3[9 * 8]; // Temp data bufffer used in filtering - unsigned char temp2[20 * 16]; - const short *HFilter, *VFilter; - - HFilter = vp9_bilinear_filters[xoffset]; - VFilter = vp9_bilinear_filters[yoffset]; - - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter); - - return vp9_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); -} - -unsigned int vp9_sub_pixel_variance16x16_c(const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - unsigned short FData3[17 * 16]; // Temp data bufffer used in filtering - unsigned char temp2[20 * 16]; - const short *HFilter, *VFilter; - - HFilter = vp9_bilinear_filters[xoffset]; - VFilter = vp9_bilinear_filters[yoffset]; - - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter); - - return vp9_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); -} - -#if CONFIG_SUPERBLOCKS -unsigned int vp9_sub_pixel_variance32x32_c(const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - unsigned short FData3[33 * 32]; // Temp data bufffer used in filtering - unsigned char temp2[36 * 32]; - const short *HFilter, *VFilter; - - HFilter = vp9_bilinear_filters[xoffset]; - VFilter = vp9_bilinear_filters[yoffset]; - - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 33, 32, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 32, 32, 32, 32, VFilter); - - return vp9_variance32x32_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse); -} -#endif - -unsigned int vp9_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0, - ref_ptr, recon_stride, sse); -} - -#if CONFIG_SUPERBLOCKS -unsigned int vp9_variance_halfpixvar32x32_h_c(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0, - ref_ptr, recon_stride, sse); -} -#endif - - -unsigned int vp9_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8, - ref_ptr, recon_stride, sse); -} - -#if CONFIG_SUPERBLOCKS -unsigned int vp9_variance_halfpixvar32x32_v_c(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8, - ref_ptr, recon_stride, sse); -} -#endif - -unsigned int vp9_variance_halfpixvar16x16_hv_c(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8, - ref_ptr, recon_stride, sse); -} - -#if CONFIG_SUPERBLOCKS -unsigned int vp9_variance_halfpixvar32x32_hv_c(const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8, - ref_ptr, recon_stride, sse); -} -#endif - -unsigned int vp9_sub_pixel_mse16x16_c(const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, - xoffset, yoffset, dst_ptr, - dst_pixels_per_line, sse); - return *sse; -} - -#if CONFIG_SUPERBLOCKS -unsigned int vp9_sub_pixel_mse32x32_c(const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line, - xoffset, yoffset, dst_ptr, - dst_pixels_per_line, sse); - return *sse; -} -#endif - -unsigned int vp9_sub_pixel_variance16x8_c(const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - unsigned short FData3[16 * 9]; // Temp data bufffer used in filtering - unsigned char temp2[20 * 16]; - const short *HFilter, *VFilter; - - HFilter = vp9_bilinear_filters[xoffset]; - VFilter = vp9_bilinear_filters[yoffset]; - - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter); - - return vp9_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); -} - -unsigned int vp9_sub_pixel_variance8x16_c(const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - unsigned short FData3[9 * 16]; // Temp data bufffer used in filtering - unsigned char temp2[20 * 16]; - const short *HFilter, *VFilter; - - HFilter = vp9_bilinear_filters[xoffset]; - VFilter = vp9_bilinear_filters[yoffset]; - - var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, - 1, 17, 8, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter); - - return vp9_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); -} - -#if CONFIG_NEWBESTREFMV -unsigned int vp9_variance2x16_c(const unsigned char *src_ptr, - const int source_stride, - const unsigned char *ref_ptr, - const int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 2, 16, &var, &avg); - *sse = var; - return (var - ((avg * avg) >> 5)); -} - -unsigned int vp9_variance16x2_c(const unsigned char *src_ptr, - const int source_stride, - const unsigned char *ref_ptr, - const int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 2, &var, &avg); - *sse = var; - return (var - ((avg * avg) >> 5)); -} - -unsigned int vp9_sub_pixel_variance16x2_c(const unsigned char *src_ptr, - const int src_pixels_per_line, - const int xoffset, - const int yoffset, - const unsigned char *dst_ptr, - const int dst_pixels_per_line, - unsigned int *sse) { - unsigned short FData3[16 * 3]; // Temp data bufffer used in filtering - unsigned char temp2[20 * 16]; - const short *HFilter, *VFilter; - - HFilter = vp9_bilinear_filters[xoffset]; - VFilter = vp9_bilinear_filters[yoffset]; - - var_filter_block2d_bil_first_pass(src_ptr, FData3, - src_pixels_per_line, 1, 3, 16, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 2, 16, VFilter); - - return vp9_variance16x2_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); -} - -unsigned int vp9_sub_pixel_variance2x16_c(const unsigned char *src_ptr, - const int src_pixels_per_line, - const int xoffset, - const int yoffset, - const unsigned char *dst_ptr, - const int dst_pixels_per_line, - unsigned int *sse) { - unsigned short FData3[2 * 17]; // Temp data bufffer used in filtering - unsigned char temp2[2 * 16]; - const short *HFilter, *VFilter; - - HFilter = vp9_bilinear_filters[xoffset]; - VFilter = vp9_bilinear_filters[yoffset]; - - var_filter_block2d_bil_first_pass(src_ptr, FData3, - src_pixels_per_line, 1, 17, 2, HFilter); - var_filter_block2d_bil_second_pass(FData3, temp2, 2, 2, 16, 2, VFilter); - - return vp9_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse); -} -#endif diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm deleted file mode 100644 index 3045466f2..000000000 --- a/vp8/encoder/x86/dct_mmx.asm +++ /dev/null @@ -1,241 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void vp9_short_fdct4x4_mmx(short *input, short *output, int pitch) -global sym(vp9_short_fdct4x4_mmx) -sym(vp9_short_fdct4x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ; input - mov rdi, arg(1) ; output - - movsxd rax, dword ptr arg(2) ;pitch - - lea rcx, [rsi + rax*2] - ; read the input data - movq mm0, [rsi] - movq mm1, [rsi + rax] - - movq mm2, [rcx] - movq mm4, [rcx + rax] - - ; transpose for the first stage - movq mm3, mm0 ; 00 01 02 03 - movq mm5, mm2 ; 20 21 22 23 - - punpcklwd mm0, mm1 ; 00 10 01 11 - punpckhwd mm3, mm1 ; 02 12 03 13 - - punpcklwd mm2, mm4 ; 20 30 21 31 - punpckhwd mm5, mm4 ; 22 32 23 33 - - movq mm1, mm0 ; 00 10 01 11 - punpckldq mm0, mm2 ; 00 10 20 30 - - punpckhdq mm1, mm2 ; 01 11 21 31 - - movq mm2, mm3 ; 02 12 03 13 - punpckldq mm2, mm5 ; 02 12 22 32 - - punpckhdq mm3, mm5 ; 03 13 23 33 - - ; mm0 0 - ; mm1 1 - ; mm2 2 - ; mm3 3 - - ; first stage - movq mm5, mm0 - movq mm4, mm1 - - paddw mm0, mm3 ; a1 = 0 + 3 - paddw mm1, mm2 ; b1 = 1 + 2 - - psubw mm4, mm2 ; c1 = 1 - 2 - psubw mm5, mm3 ; d1 = 0 - 3 - - psllw mm5, 3 - psllw mm4, 3 - - psllw mm0, 3 - psllw mm1, 3 - - ; output 0 and 2 - movq mm2, mm0 ; a1 - - paddw mm0, mm1 ; op[0] = a1 + b1 - psubw mm2, mm1 ; op[2] = a1 - b1 - - ; output 1 and 3 - ; interleave c1, d1 - movq mm1, mm5 ; d1 - punpcklwd mm1, mm4 ; c1 d1 - punpckhwd mm5, mm4 ; c1 d1 - - movq mm3, mm1 - movq mm4, mm5 - - pmaddwd mm1, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - pmaddwd mm4, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - - pmaddwd mm3, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - pmaddwd mm5, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - - paddd mm1, MMWORD PTR[GLOBAL(_14500)] - paddd mm4, MMWORD PTR[GLOBAL(_14500)] - paddd mm3, MMWORD PTR[GLOBAL(_7500)] - paddd mm5, MMWORD PTR[GLOBAL(_7500)] - - psrad mm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 - psrad mm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 - psrad mm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 - psrad mm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 - - packssdw mm1, mm4 ; op[1] - packssdw mm3, mm5 ; op[3] - - ; done with vertical - ; transpose for the second stage - movq mm4, mm0 ; 00 10 20 30 - movq mm5, mm2 ; 02 12 22 32 - - punpcklwd mm0, mm1 ; 00 01 10 11 - punpckhwd mm4, mm1 ; 20 21 30 31 - - punpcklwd mm2, mm3 ; 02 03 12 13 - punpckhwd mm5, mm3 ; 22 23 32 33 - - movq mm1, mm0 ; 00 01 10 11 - punpckldq mm0, mm2 ; 00 01 02 03 - - punpckhdq mm1, mm2 ; 01 22 12 13 - - movq mm2, mm4 ; 20 31 30 31 - punpckldq mm2, mm5 ; 20 21 22 23 - - punpckhdq mm4, mm5 ; 30 31 32 33 - - ; mm0 0 - ; mm1 1 - ; mm2 2 - ; mm3 4 - - movq mm5, mm0 - movq mm3, mm1 - - paddw mm0, mm4 ; a1 = 0 + 3 - paddw mm1, mm2 ; b1 = 1 + 2 - - psubw mm3, mm2 ; c1 = 1 - 2 - psubw mm5, mm4 ; d1 = 0 - 3 - - pxor mm6, mm6 ; zero out for compare - - pcmpeqw mm6, mm5 ; d1 != 0 - - pandn mm6, MMWORD PTR[GLOBAL(_cmp_mask)] ; clear upper, - ; and keep bit 0 of lower - - ; output 0 and 2 - movq mm2, mm0 ; a1 - - paddw mm0, mm1 ; a1 + b1 - psubw mm2, mm1 ; a1 - b1 - - paddw mm0, MMWORD PTR[GLOBAL(_7w)] - paddw mm2, MMWORD PTR[GLOBAL(_7w)] - - psraw mm0, 4 ; op[0] = (a1 + b1 + 7)>>4 - psraw mm2, 4 ; op[8] = (a1 - b1 + 7)>>4 - - movq MMWORD PTR[rdi + 0 ], mm0 - movq MMWORD PTR[rdi + 16], mm2 - - ; output 1 and 3 - ; interleave c1, d1 - movq mm1, mm5 ; d1 - punpcklwd mm1, mm3 ; c1 d1 - punpckhwd mm5, mm3 ; c1 d1 - - movq mm3, mm1 - movq mm4, mm5 - - pmaddwd mm1, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - pmaddwd mm4, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - - pmaddwd mm3, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - pmaddwd mm5, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - - paddd mm1, MMWORD PTR[GLOBAL(_12000)] - paddd mm4, MMWORD PTR[GLOBAL(_12000)] - paddd mm3, MMWORD PTR[GLOBAL(_51000)] - paddd mm5, MMWORD PTR[GLOBAL(_51000)] - - psrad mm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 - psrad mm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 - psrad mm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 - psrad mm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 - - packssdw mm1, mm4 ; op[4] - packssdw mm3, mm5 ; op[12] - - paddw mm1, mm6 ; op[4] += (d1!=0) - - movq MMWORD PTR[rdi + 8 ], mm1 - movq MMWORD PTR[rdi + 24], mm3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 8 -_5352_2217: - dw 5352 - dw 2217 - dw 5352 - dw 2217 -align 8 -_2217_neg5352: - dw 2217 - dw -5352 - dw 2217 - dw -5352 -align 8 -_cmp_mask: - times 4 dw 1 -align 8 -_7w: - times 4 dw 7 -align 8 -_14500: - times 2 dd 14500 -align 8 -_7500: - times 2 dd 7500 -align 8 -_12000: - times 2 dd 12000 -align 8 -_51000: - times 2 dd 51000 diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm deleted file mode 100644 index 2821fbe35..000000000 --- a/vp8/encoder/x86/dct_sse2.asm +++ /dev/null @@ -1,432 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -%macro STACK_FRAME_CREATE 0 -%if ABI_IS_32BIT - %define input rsi - %define output rdi - %define pitch rax - push rbp - mov rbp, rsp - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) - mov rdi, arg(1) - - movsxd rax, dword ptr arg(2) - lea rcx, [rsi + rax*2] -%else - %ifidn __OUTPUT_FORMAT__,x64 - %define input rcx - %define output rdx - %define pitch r8 - SAVE_XMM 7, u - %else - %define input rdi - %define output rsi - %define pitch rdx - %endif -%endif -%endmacro - -%macro STACK_FRAME_DESTROY 0 - %define input - %define output - %define pitch - -%if ABI_IS_32BIT - pop rdi - pop rsi - RESTORE_GOT - pop rbp -%else - %ifidn __OUTPUT_FORMAT__,x64 - RESTORE_XMM - %endif -%endif - ret -%endmacro - -;void vp9_short_fdct4x4_sse2(short *input, short *output, int pitch) -global sym(vp9_short_fdct4x4_sse2) -sym(vp9_short_fdct4x4_sse2): - - STACK_FRAME_CREATE - - movq xmm0, MMWORD PTR[input ] ;03 02 01 00 - movq xmm2, MMWORD PTR[input+ pitch] ;13 12 11 10 - lea input, [input+2*pitch] - movq xmm1, MMWORD PTR[input ] ;23 22 21 20 - movq xmm3, MMWORD PTR[input+ pitch] ;33 32 31 30 - - punpcklqdq xmm0, xmm2 ;13 12 11 10 03 02 01 00 - punpcklqdq xmm1, xmm3 ;33 32 31 30 23 22 21 20 - - movdqa xmm2, xmm0 - punpckldq xmm0, xmm1 ;23 22 03 02 21 20 01 00 - punpckhdq xmm2, xmm1 ;33 32 13 12 31 30 11 10 - movdqa xmm1, xmm0 - punpckldq xmm0, xmm2 ;31 21 30 20 11 10 01 00 - pshufhw xmm1, xmm1, 0b1h ;22 23 02 03 xx xx xx xx - pshufhw xmm2, xmm2, 0b1h ;32 33 12 13 xx xx xx xx - - punpckhdq xmm1, xmm2 ;32 33 22 23 12 13 02 03 - movdqa xmm3, xmm0 - paddw xmm0, xmm1 ;b1 a1 b1 a1 b1 a1 b1 a1 - psubw xmm3, xmm1 ;c1 d1 c1 d1 c1 d1 c1 d1 - psllw xmm0, 3 ;b1 <<= 3 a1 <<= 3 - psllw xmm3, 3 ;c1 <<= 3 d1 <<= 3 - - movdqa xmm1, xmm0 - pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1 - pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1 - movdqa xmm4, xmm3 - pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352 - pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)];d1*2217 - c1*5352 - - paddd xmm3, XMMWORD PTR[GLOBAL(_14500)] - paddd xmm4, XMMWORD PTR[GLOBAL(_7500)] - psrad xmm3, 12 ;(c1 * 2217 + d1 * 5352 + 14500)>>12 - psrad xmm4, 12 ;(d1 * 2217 - c1 * 5352 + 7500)>>12 - - packssdw xmm0, xmm1 ;op[2] op[0] - packssdw xmm3, xmm4 ;op[3] op[1] - ; 23 22 21 20 03 02 01 00 - ; - ; 33 32 31 30 13 12 11 10 - ; - movdqa xmm2, xmm0 - punpcklqdq xmm0, xmm3 ;13 12 11 10 03 02 01 00 - punpckhqdq xmm2, xmm3 ;23 22 21 20 33 32 31 30 - - movdqa xmm3, xmm0 - punpcklwd xmm0, xmm2 ;32 30 22 20 12 10 02 00 - punpckhwd xmm3, xmm2 ;33 31 23 21 13 11 03 01 - movdqa xmm2, xmm0 - punpcklwd xmm0, xmm3 ;13 12 11 10 03 02 01 00 - punpckhwd xmm2, xmm3 ;33 32 31 30 23 22 21 20 - - movdqa xmm5, XMMWORD PTR[GLOBAL(_7)] - pshufd xmm2, xmm2, 04eh - movdqa xmm3, xmm0 - paddw xmm0, xmm2 ;b1 b1 b1 b1 a1 a1 a1 a1 - psubw xmm3, xmm2 ;c1 c1 c1 c1 d1 d1 d1 d1 - - pshufd xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 b1 a1 a1 - movdqa xmm2, xmm3 ;save d1 for compare - pshufd xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 c1 d1 d1 - pshuflw xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 a1 b1 a1 - pshuflw xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 d1 c1 d1 - pshufhw xmm0, xmm0, 0d8h ;b1 a1 b1 a1 b1 a1 b1 a1 - pshufhw xmm3, xmm3, 0d8h ;c1 d1 c1 d1 c1 d1 c1 d1 - movdqa xmm1, xmm0 - pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1 - pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1 - - pxor xmm4, xmm4 ;zero out for compare - paddd xmm0, xmm5 - paddd xmm1, xmm5 - pcmpeqw xmm2, xmm4 - psrad xmm0, 4 ;(a1 + b1 + 7)>>4 - psrad xmm1, 4 ;(a1 - b1 + 7)>>4 - pandn xmm2, XMMWORD PTR[GLOBAL(_cmp_mask)] ;clear upper, - ;and keep bit 0 of lower - - movdqa xmm4, xmm3 - pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352 - pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)] ;d1*2217 - c1*5352 - paddd xmm3, XMMWORD PTR[GLOBAL(_12000)] - paddd xmm4, XMMWORD PTR[GLOBAL(_51000)] - packssdw xmm0, xmm1 ;op[8] op[0] - psrad xmm3, 16 ;(c1 * 2217 + d1 * 5352 + 12000)>>16 - psrad xmm4, 16 ;(d1 * 2217 - c1 * 5352 + 51000)>>16 - - packssdw xmm3, xmm4 ;op[12] op[4] - movdqa xmm1, xmm0 - paddw xmm3, xmm2 ;op[4] += (d1!=0) - punpcklqdq xmm0, xmm3 ;op[4] op[0] - punpckhqdq xmm1, xmm3 ;op[12] op[8] - - movdqa XMMWORD PTR[output + 0], xmm0 - movdqa XMMWORD PTR[output + 16], xmm1 - - STACK_FRAME_DESTROY - -;void vp9_short_fdct8x4_sse2(short *input, short *output, int pitch) -global sym(vp9_short_fdct8x4_sse2) -sym(vp9_short_fdct8x4_sse2): - - STACK_FRAME_CREATE - - ; read the input data - movdqa xmm0, [input ] - movdqa xmm2, [input+ pitch] - lea input, [input+2*pitch] - movdqa xmm4, [input ] - movdqa xmm3, [input+ pitch] - - ; transpose for the first stage - movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07 - movdqa xmm5, xmm4 ; 20 21 22 23 24 25 26 27 - - punpcklwd xmm0, xmm2 ; 00 10 01 11 02 12 03 13 - punpckhwd xmm1, xmm2 ; 04 14 05 15 06 16 07 17 - - punpcklwd xmm4, xmm3 ; 20 30 21 31 22 32 23 33 - punpckhwd xmm5, xmm3 ; 24 34 25 35 26 36 27 37 - - movdqa xmm2, xmm0 ; 00 10 01 11 02 12 03 13 - punpckldq xmm0, xmm4 ; 00 10 20 30 01 11 21 31 - - punpckhdq xmm2, xmm4 ; 02 12 22 32 03 13 23 33 - - movdqa xmm4, xmm1 ; 04 14 05 15 06 16 07 17 - punpckldq xmm4, xmm5 ; 04 14 24 34 05 15 25 35 - - punpckhdq xmm1, xmm5 ; 06 16 26 36 07 17 27 37 - movdqa xmm3, xmm2 ; 02 12 22 32 03 13 23 33 - - punpckhqdq xmm3, xmm1 ; 03 13 23 33 07 17 27 37 - punpcklqdq xmm2, xmm1 ; 02 12 22 32 06 16 26 36 - - movdqa xmm1, xmm0 ; 00 10 20 30 01 11 21 31 - punpcklqdq xmm0, xmm4 ; 00 10 20 30 04 14 24 34 - - punpckhqdq xmm1, xmm4 ; 01 11 21 32 05 15 25 35 - - ; xmm0 0 - ; xmm1 1 - ; xmm2 2 - ; xmm3 3 - - ; first stage - movdqa xmm5, xmm0 - movdqa xmm4, xmm1 - - paddw xmm0, xmm3 ; a1 = 0 + 3 - paddw xmm1, xmm2 ; b1 = 1 + 2 - - psubw xmm4, xmm2 ; c1 = 1 - 2 - psubw xmm5, xmm3 ; d1 = 0 - 3 - - psllw xmm5, 3 - psllw xmm4, 3 - - psllw xmm0, 3 - psllw xmm1, 3 - - ; output 0 and 2 - movdqa xmm2, xmm0 ; a1 - - paddw xmm0, xmm1 ; op[0] = a1 + b1 - psubw xmm2, xmm1 ; op[2] = a1 - b1 - - ; output 1 and 3 - ; interleave c1, d1 - movdqa xmm1, xmm5 ; d1 - punpcklwd xmm1, xmm4 ; c1 d1 - punpckhwd xmm5, xmm4 ; c1 d1 - - movdqa xmm3, xmm1 - movdqa xmm4, xmm5 - - pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - - pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - - paddd xmm1, XMMWORD PTR[GLOBAL(_14500)] - paddd xmm4, XMMWORD PTR[GLOBAL(_14500)] - paddd xmm3, XMMWORD PTR[GLOBAL(_7500)] - paddd xmm5, XMMWORD PTR[GLOBAL(_7500)] - - psrad xmm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 - psrad xmm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 - psrad xmm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 - psrad xmm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 - - packssdw xmm1, xmm4 ; op[1] - packssdw xmm3, xmm5 ; op[3] - - ; done with vertical - ; transpose for the second stage - movdqa xmm4, xmm0 ; 00 10 20 30 04 14 24 34 - movdqa xmm5, xmm2 ; 02 12 22 32 06 16 26 36 - - punpcklwd xmm0, xmm1 ; 00 01 10 11 20 21 30 31 - punpckhwd xmm4, xmm1 ; 04 05 14 15 24 25 34 35 - - punpcklwd xmm2, xmm3 ; 02 03 12 13 22 23 32 33 - punpckhwd xmm5, xmm3 ; 06 07 16 17 26 27 36 37 - - movdqa xmm1, xmm0 ; 00 01 10 11 20 21 30 31 - punpckldq xmm0, xmm2 ; 00 01 02 03 10 11 12 13 - - punpckhdq xmm1, xmm2 ; 20 21 22 23 30 31 32 33 - - movdqa xmm2, xmm4 ; 04 05 14 15 24 25 34 35 - punpckldq xmm2, xmm5 ; 04 05 06 07 14 15 16 17 - - punpckhdq xmm4, xmm5 ; 24 25 26 27 34 35 36 37 - movdqa xmm3, xmm1 ; 20 21 22 23 30 31 32 33 - - punpckhqdq xmm3, xmm4 ; 30 31 32 33 34 35 36 37 - punpcklqdq xmm1, xmm4 ; 20 21 22 23 24 25 26 27 - - movdqa xmm4, xmm0 ; 00 01 02 03 10 11 12 13 - punpcklqdq xmm0, xmm2 ; 00 01 02 03 04 05 06 07 - - punpckhqdq xmm4, xmm2 ; 10 11 12 13 14 15 16 17 - - ; xmm0 0 - ; xmm1 4 - ; xmm2 1 - ; xmm3 3 - - movdqa xmm5, xmm0 - movdqa xmm2, xmm1 - - paddw xmm0, xmm3 ; a1 = 0 + 3 - paddw xmm1, xmm4 ; b1 = 1 + 2 - - psubw xmm4, xmm2 ; c1 = 1 - 2 - psubw xmm5, xmm3 ; d1 = 0 - 3 - - pxor xmm6, xmm6 ; zero out for compare - - pcmpeqw xmm6, xmm5 ; d1 != 0 - - pandn xmm6, XMMWORD PTR[GLOBAL(_cmp_mask8x4)] ; clear upper, - ; and keep bit 0 of lower - - ; output 0 and 2 - movdqa xmm2, xmm0 ; a1 - - paddw xmm0, xmm1 ; a1 + b1 - psubw xmm2, xmm1 ; a1 - b1 - - paddw xmm0, XMMWORD PTR[GLOBAL(_7w)] - paddw xmm2, XMMWORD PTR[GLOBAL(_7w)] - - psraw xmm0, 4 ; op[0] = (a1 + b1 + 7)>>4 - psraw xmm2, 4 ; op[8] = (a1 - b1 + 7)>>4 - - ; output 1 and 3 - ; interleave c1, d1 - movdqa xmm1, xmm5 ; d1 - punpcklwd xmm1, xmm4 ; c1 d1 - punpckhwd xmm5, xmm4 ; c1 d1 - - movdqa xmm3, xmm1 - movdqa xmm4, xmm5 - - pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - - pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - - paddd xmm1, XMMWORD PTR[GLOBAL(_12000)] - paddd xmm4, XMMWORD PTR[GLOBAL(_12000)] - paddd xmm3, XMMWORD PTR[GLOBAL(_51000)] - paddd xmm5, XMMWORD PTR[GLOBAL(_51000)] - - psrad xmm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 - psrad xmm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 - psrad xmm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 - psrad xmm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 - - packssdw xmm1, xmm4 ; op[4] - packssdw xmm3, xmm5 ; op[12] - - paddw xmm1, xmm6 ; op[4] += (d1!=0) - - movdqa xmm4, xmm0 - movdqa xmm5, xmm2 - - punpcklqdq xmm0, xmm1 - punpckhqdq xmm4, xmm1 - - punpcklqdq xmm2, xmm3 - punpckhqdq xmm5, xmm3 - - movdqa XMMWORD PTR[output + 0 ], xmm0 - movdqa XMMWORD PTR[output + 16], xmm2 - movdqa XMMWORD PTR[output + 32], xmm4 - movdqa XMMWORD PTR[output + 48], xmm5 - - STACK_FRAME_DESTROY - -SECTION_RODATA -align 16 -_5352_2217: - dw 5352 - dw 2217 - dw 5352 - dw 2217 - dw 5352 - dw 2217 - dw 5352 - dw 2217 -align 16 -_2217_neg5352: - dw 2217 - dw -5352 - dw 2217 - dw -5352 - dw 2217 - dw -5352 - dw 2217 - dw -5352 -align 16 -_mult_add: - times 8 dw 1 -align 16 -_cmp_mask: - times 4 dw 1 - times 4 dw 0 -align 16 -_cmp_mask8x4: - times 8 dw 1 -align 16 -_mult_sub: - dw 1 - dw -1 - dw 1 - dw -1 - dw 1 - dw -1 - dw 1 - dw -1 -align 16 -_7: - times 4 dd 7 -align 16 -_7w: - times 8 dw 7 -align 16 -_14500: - times 4 dd 14500 -align 16 -_7500: - times 4 dd 7500 -align 16 -_12000: - times 4 dd 12000 -align 16 -_51000: - times 4 dd 51000 diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm deleted file mode 100644 index 9e4cd1102..000000000 --- a/vp8/encoder/x86/encodeopt.asm +++ /dev/null @@ -1,386 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;int vp9_block_error_xmm(short *coeff_ptr, short *dcoef_ptr) -global sym(vp9_block_error_xmm) -sym(vp9_block_error_xmm): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - push rsi - push rdi - ; end prologue - - mov rsi, arg(0) ;coeff_ptr - mov rdi, arg(1) ;dcoef_ptr - - movdqa xmm0, [rsi] - movdqa xmm1, [rdi] - - movdqa xmm2, [rsi+16] - movdqa xmm3, [rdi+16] - - psubw xmm0, xmm1 - psubw xmm2, xmm3 - - pmaddwd xmm0, xmm0 - pmaddwd xmm2, xmm2 - - paddd xmm0, xmm2 - - pxor xmm5, xmm5 - movdqa xmm1, xmm0 - - punpckldq xmm0, xmm5 - punpckhdq xmm1, xmm5 - - paddd xmm0, xmm1 - movdqa xmm1, xmm0 - - psrldq xmm0, 8 - paddd xmm0, xmm1 - - movq rax, xmm0 - - pop rdi - pop rsi - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - -;int vp9_block_error_mmx(short *coeff_ptr, short *dcoef_ptr) -global sym(vp9_block_error_mmx) -sym(vp9_block_error_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;coeff_ptr - pxor mm7, mm7 - - mov rdi, arg(1) ;dcoef_ptr - movq mm3, [rsi] - - movq mm4, [rdi] - movq mm5, [rsi+8] - - movq mm6, [rdi+8] - pxor mm1, mm1 ; from movd mm1, dc ; dc =0 - - movq mm2, mm7 - psubw mm5, mm6 - - por mm1, mm2 - pmaddwd mm5, mm5 - - pcmpeqw mm1, mm7 - psubw mm3, mm4 - - pand mm1, mm3 - pmaddwd mm1, mm1 - - paddd mm1, mm5 - movq mm3, [rsi+16] - - movq mm4, [rdi+16] - movq mm5, [rsi+24] - - movq mm6, [rdi+24] - psubw mm5, mm6 - - pmaddwd mm5, mm5 - psubw mm3, mm4 - - pmaddwd mm3, mm3 - paddd mm3, mm5 - - paddd mm1, mm3 - movq mm0, mm1 - - psrlq mm1, 32 - paddd mm0, mm1 - - movq rax, mm0 - - pop rdi - pop rsi - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); -global sym(vp9_mbblock_error_mmx_impl) -sym(vp9_mbblock_error_mmx_impl): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;coeff_ptr - pxor mm7, mm7 - - mov rdi, arg(1) ;dcoef_ptr - pxor mm2, mm2 - - movd mm1, dword ptr arg(2) ;dc - por mm1, mm2 - - pcmpeqw mm1, mm7 - mov rcx, 16 - -.mberror_loop_mmx: - movq mm3, [rsi] - movq mm4, [rdi] - - movq mm5, [rsi+8] - movq mm6, [rdi+8] - - - psubw mm5, mm6 - pmaddwd mm5, mm5 - - psubw mm3, mm4 - pand mm3, mm1 - - pmaddwd mm3, mm3 - paddd mm2, mm5 - - paddd mm2, mm3 - movq mm3, [rsi+16] - - movq mm4, [rdi+16] - movq mm5, [rsi+24] - - movq mm6, [rdi+24] - psubw mm5, mm6 - - pmaddwd mm5, mm5 - psubw mm3, mm4 - - pmaddwd mm3, mm3 - paddd mm2, mm5 - - paddd mm2, mm3 - add rsi, 32 - - add rdi, 32 - sub rcx, 1 - - jnz .mberror_loop_mmx - - movq mm0, mm2 - psrlq mm2, 32 - - paddd mm0, mm2 - movq rax, mm0 - - pop rdi - pop rsi - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); -global sym(vp9_mbblock_error_xmm_impl) -sym(vp9_mbblock_error_xmm_impl): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - SAVE_XMM 6 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;coeff_ptr - pxor xmm6, xmm6 - - mov rdi, arg(1) ;dcoef_ptr - pxor xmm4, xmm4 - - movd xmm5, dword ptr arg(2) ;dc - por xmm5, xmm4 - - pcmpeqw xmm5, xmm6 - mov rcx, 16 - -.mberror_loop: - movdqa xmm0, [rsi] - movdqa xmm1, [rdi] - - movdqa xmm2, [rsi+16] - movdqa xmm3, [rdi+16] - - - psubw xmm2, xmm3 - pmaddwd xmm2, xmm2 - - psubw xmm0, xmm1 - pand xmm0, xmm5 - - pmaddwd xmm0, xmm0 - add rsi, 32 - - add rdi, 32 - - sub rcx, 1 - paddd xmm4, xmm2 - - paddd xmm4, xmm0 - jnz .mberror_loop - - movdqa xmm0, xmm4 - punpckldq xmm0, xmm6 - - punpckhdq xmm4, xmm6 - paddd xmm0, xmm4 - - movdqa xmm1, xmm0 - psrldq xmm0, 8 - - paddd xmm0, xmm1 - movq rax, xmm0 - - pop rdi - pop rsi - ; begin epilog - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); -global sym(vp9_mbuverror_mmx_impl) -sym(vp9_mbuverror_mmx_impl): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;s_ptr - mov rdi, arg(1) ;d_ptr - - mov rcx, 16 - pxor mm7, mm7 - -.mbuverror_loop_mmx: - - movq mm1, [rsi] - movq mm2, [rdi] - - psubw mm1, mm2 - pmaddwd mm1, mm1 - - - movq mm3, [rsi+8] - movq mm4, [rdi+8] - - psubw mm3, mm4 - pmaddwd mm3, mm3 - - - paddd mm7, mm1 - paddd mm7, mm3 - - - add rsi, 16 - add rdi, 16 - - dec rcx - jnz .mbuverror_loop_mmx - - movq mm0, mm7 - psrlq mm7, 32 - - paddd mm0, mm7 - movq rax, mm0 - - pop rdi - pop rsi - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); -global sym(vp9_mbuverror_xmm_impl) -sym(vp9_mbuverror_xmm_impl): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;s_ptr - mov rdi, arg(1) ;d_ptr - - mov rcx, 16 - pxor xmm3, xmm3 - -.mbuverror_loop: - - movdqa xmm1, [rsi] - movdqa xmm2, [rdi] - - psubw xmm1, xmm2 - pmaddwd xmm1, xmm1 - - paddd xmm3, xmm1 - - add rsi, 16 - add rdi, 16 - - dec rcx - jnz .mbuverror_loop - - pxor xmm0, xmm0 - movdqa xmm1, xmm3 - - movdqa xmm2, xmm1 - punpckldq xmm1, xmm0 - - punpckhdq xmm2, xmm0 - paddd xmm1, xmm2 - - movdqa xmm2, xmm1 - - psrldq xmm1, 8 - paddd xmm1, xmm2 - - movq rax, xmm1 - - pop rdi - pop rsi - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm deleted file mode 100644 index c6b18c1a1..000000000 --- a/vp8/encoder/x86/fwalsh_sse2.asm +++ /dev/null @@ -1,164 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void vp9_short_walsh4x4_sse2(short *input, short *output, int pitch) -global sym(vp9_short_walsh4x4_sse2) -sym(vp9_short_walsh4x4_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ; input - mov rdi, arg(1) ; output - movsxd rdx, dword ptr arg(2) ; pitch - - ; first for loop - movq xmm0, MMWORD PTR [rsi] ; load input - movq xmm1, MMWORD PTR [rsi + rdx] - lea rsi, [rsi + rdx*2] - movq xmm2, MMWORD PTR [rsi] - movq xmm3, MMWORD PTR [rsi + rdx] - - punpcklwd xmm0, xmm1 - punpcklwd xmm2, xmm3 - - movdqa xmm1, xmm0 - punpckldq xmm0, xmm2 ; ip[1] ip[0] - punpckhdq xmm1, xmm2 ; ip[3] ip[2] - - movdqa xmm2, xmm0 - paddw xmm0, xmm1 - psubw xmm2, xmm1 - - psllw xmm0, 2 ; d1 a1 - psllw xmm2, 2 ; c1 b1 - - movdqa xmm1, xmm0 - punpcklqdq xmm0, xmm2 ; b1 a1 - punpckhqdq xmm1, xmm2 ; c1 d1 - - pxor xmm6, xmm6 - movq xmm6, xmm0 - pxor xmm7, xmm7 - pcmpeqw xmm7, xmm6 - paddw xmm7, [GLOBAL(c1)] - - movdqa xmm2, xmm0 - paddw xmm0, xmm1 ; b1+c1 a1+d1 - psubw xmm2, xmm1 ; b1-c1 a1-d1 - paddw xmm0, xmm7 ; b1+c1 a1+d1+(a1!=0) - - ; second for loop - ; input: 13 9 5 1 12 8 4 0 (xmm0) - ; 14 10 6 2 15 11 7 3 (xmm2) - ; after shuffle: - ; 13 5 9 1 12 4 8 0 (xmm0) - ; 14 6 10 2 15 7 11 3 (xmm1) - pshuflw xmm3, xmm0, 0xd8 - pshufhw xmm0, xmm3, 0xd8 - pshuflw xmm3, xmm2, 0xd8 - pshufhw xmm1, xmm3, 0xd8 - - movdqa xmm2, xmm0 - pmaddwd xmm0, [GLOBAL(c1)] ; d11 a11 d10 a10 - pmaddwd xmm2, [GLOBAL(cn1)] ; c11 b11 c10 b10 - movdqa xmm3, xmm1 - pmaddwd xmm1, [GLOBAL(c1)] ; d12 a12 d13 a13 - pmaddwd xmm3, [GLOBAL(cn1)] ; c12 b12 c13 b13 - - pshufd xmm4, xmm0, 0xd8 ; d11 d10 a11 a10 - pshufd xmm5, xmm2, 0xd8 ; c11 c10 b11 b10 - pshufd xmm6, xmm1, 0x72 ; d13 d12 a13 a12 - pshufd xmm7, xmm3, 0x72 ; c13 c12 b13 b12 - - movdqa xmm0, xmm4 - punpcklqdq xmm0, xmm5 ; b11 b10 a11 a10 - punpckhqdq xmm4, xmm5 ; c11 c10 d11 d10 - movdqa xmm1, xmm6 - punpcklqdq xmm1, xmm7 ; b13 b12 a13 a12 - punpckhqdq xmm6, xmm7 ; c13 c12 d13 d12 - - movdqa xmm2, xmm0 - paddd xmm0, xmm4 ; b21 b20 a21 a20 - psubd xmm2, xmm4 ; c21 c20 d21 d20 - movdqa xmm3, xmm1 - paddd xmm1, xmm6 ; b23 b22 a23 a22 - psubd xmm3, xmm6 ; c23 c22 d23 d22 - - pxor xmm4, xmm4 - movdqa xmm5, xmm4 - pcmpgtd xmm4, xmm0 - pcmpgtd xmm5, xmm2 - pand xmm4, [GLOBAL(cd1)] - pand xmm5, [GLOBAL(cd1)] - - pxor xmm6, xmm6 - movdqa xmm7, xmm6 - pcmpgtd xmm6, xmm1 - pcmpgtd xmm7, xmm3 - pand xmm6, [GLOBAL(cd1)] - pand xmm7, [GLOBAL(cd1)] - - paddd xmm0, xmm4 - paddd xmm2, xmm5 - paddd xmm0, [GLOBAL(cd3)] - paddd xmm2, [GLOBAL(cd3)] - paddd xmm1, xmm6 - paddd xmm3, xmm7 - paddd xmm1, [GLOBAL(cd3)] - paddd xmm3, [GLOBAL(cd3)] - - psrad xmm0, 3 - psrad xmm1, 3 - psrad xmm2, 3 - psrad xmm3, 3 - movdqa xmm4, xmm0 - punpcklqdq xmm0, xmm1 ; a23 a22 a21 a20 - punpckhqdq xmm4, xmm1 ; b23 b22 b21 b20 - movdqa xmm5, xmm2 - punpckhqdq xmm2, xmm3 ; c23 c22 c21 c20 - punpcklqdq xmm5, xmm3 ; d23 d22 d21 d20 - - packssdw xmm0, xmm4 ; b23 b22 b21 b20 a23 a22 a21 a20 - packssdw xmm2, xmm5 ; d23 d22 d21 d20 c23 c22 c21 c20 - - movdqa XMMWORD PTR [rdi], xmm0 - movdqa XMMWORD PTR [rdi + 16], xmm2 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -c1: - dw 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001 -align 16 -cn1: - dw 0x0001, 0xffff, 0x0001, 0xffff, 0x0001, 0xffff, 0x0001, 0xffff -align 16 -cd1: - dd 0x00000001, 0x00000001, 0x00000001, 0x00000001 -align 16 -cd3: - dd 0x00000003, 0x00000003, 0x00000003, 0x00000003 diff --git a/vp8/encoder/x86/mcomp_x86.h b/vp8/encoder/x86/mcomp_x86.h deleted file mode 100644 index cde954550..000000000 --- a/vp8/encoder/x86/mcomp_x86.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef MCOMP_X86_H -#define MCOMP_X86_H - -#if HAVE_SSE3 -#if !CONFIG_RUNTIME_CPU_DETECT - -#undef vp9_search_full_search -#define vp9_search_full_search vp9_full_search_sadx3 - -#undef vp9_search_refining_search -#define vp9_search_refining_search vp9_refining_search_sadx4 - -#undef vp9_search_diamond_search -#define vp9_search_diamond_search vp9_diamond_search_sadx4 - -#endif -#endif - -#if HAVE_SSE4_1 -#if !CONFIG_RUNTIME_CPU_DETECT - -#undef vp9_search_full_search -#define vp9_search_full_search vp9_full_search_sadx8 - -#endif -#endif - -#endif - diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm deleted file mode 100644 index 050119a31..000000000 --- a/vp8/encoder/x86/quantize_mmx.asm +++ /dev/null @@ -1,286 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;int vp9_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr, -; short *qcoeff_ptr,short *dequant_ptr, -; short *scan_mask, short *round_ptr, -; short *quant_ptr, short *dqcoeff_ptr); -global sym(vp9_fast_quantize_b_impl_mmx) -sym(vp9_fast_quantize_b_impl_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;coeff_ptr - movq mm0, [rsi] - - mov rax, arg(1) ;zbin_ptr - movq mm1, [rax] - - movq mm3, mm0 - psraw mm0, 15 - - pxor mm3, mm0 - psubw mm3, mm0 ; abs - - movq mm2, mm3 - pcmpgtw mm1, mm2 - - pandn mm1, mm2 - movq mm3, mm1 - - mov rdx, arg(6) ;quant_ptr - movq mm1, [rdx] - - mov rcx, arg(5) ;round_ptr - movq mm2, [rcx] - - paddw mm3, mm2 - pmulhuw mm3, mm1 - - pxor mm3, mm0 - psubw mm3, mm0 ;gain the sign back - - mov rdi, arg(2) ;qcoeff_ptr - movq mm0, mm3 - - movq [rdi], mm3 - - mov rax, arg(3) ;dequant_ptr - movq mm2, [rax] - - pmullw mm3, mm2 - mov rax, arg(7) ;dqcoeff_ptr - - movq [rax], mm3 - - ; next 8 - movq mm4, [rsi+8] - - mov rax, arg(1) ;zbin_ptr - movq mm5, [rax+8] - - movq mm7, mm4 - psraw mm4, 15 - - pxor mm7, mm4 - psubw mm7, mm4 ; abs - - movq mm6, mm7 - pcmpgtw mm5, mm6 - - pandn mm5, mm6 - movq mm7, mm5 - - movq mm5, [rdx+8] - movq mm6, [rcx+8] - - paddw mm7, mm6 - pmulhuw mm7, mm5 - - pxor mm7, mm4 - psubw mm7, mm4;gain the sign back - - mov rdi, arg(2) ;qcoeff_ptr - - movq mm1, mm7 - movq [rdi+8], mm7 - - mov rax, arg(3) ;dequant_ptr - movq mm6, [rax+8] - - pmullw mm7, mm6 - mov rax, arg(7) ;dqcoeff_ptr - - movq [rax+8], mm7 - - - ; next 8 - movq mm4, [rsi+16] - - mov rax, arg(1) ;zbin_ptr - movq mm5, [rax+16] - - movq mm7, mm4 - psraw mm4, 15 - - pxor mm7, mm4 - psubw mm7, mm4 ; abs - - movq mm6, mm7 - pcmpgtw mm5, mm6 - - pandn mm5, mm6 - movq mm7, mm5 - - movq mm5, [rdx+16] - movq mm6, [rcx+16] - - paddw mm7, mm6 - pmulhuw mm7, mm5 - - pxor mm7, mm4 - psubw mm7, mm4;gain the sign back - - mov rdi, arg(2) ;qcoeff_ptr - - movq mm1, mm7 - movq [rdi+16], mm7 - - mov rax, arg(3) ;dequant_ptr - movq mm6, [rax+16] - - pmullw mm7, mm6 - mov rax, arg(7) ;dqcoeff_ptr - - movq [rax+16], mm7 - - - ; next 8 - movq mm4, [rsi+24] - - mov rax, arg(1) ;zbin_ptr - movq mm5, [rax+24] - - movq mm7, mm4 - psraw mm4, 15 - - pxor mm7, mm4 - psubw mm7, mm4 ; abs - - movq mm6, mm7 - pcmpgtw mm5, mm6 - - pandn mm5, mm6 - movq mm7, mm5 - - movq mm5, [rdx+24] - movq mm6, [rcx+24] - - paddw mm7, mm6 - pmulhuw mm7, mm5 - - pxor mm7, mm4 - psubw mm7, mm4;gain the sign back - - mov rdi, arg(2) ;qcoeff_ptr - - movq mm1, mm7 - movq [rdi+24], mm7 - - mov rax, arg(3) ;dequant_ptr - movq mm6, [rax+24] - - pmullw mm7, mm6 - mov rax, arg(7) ;dqcoeff_ptr - - movq [rax+24], mm7 - - - - mov rdi, arg(4) ;scan_mask - mov rsi, arg(2) ;qcoeff_ptr - - pxor mm5, mm5 - pxor mm7, mm7 - - movq mm0, [rsi] - movq mm1, [rsi+8] - - movq mm2, [rdi] - movq mm3, [rdi+8]; - - pcmpeqw mm0, mm7 - pcmpeqw mm1, mm7 - - pcmpeqw mm6, mm6 - pxor mm0, mm6 - - pxor mm1, mm6 - psrlw mm0, 15 - - psrlw mm1, 15 - pmaddwd mm0, mm2 - - pmaddwd mm1, mm3 - movq mm5, mm0 - - paddd mm5, mm1 - - movq mm0, [rsi+16] - movq mm1, [rsi+24] - - movq mm2, [rdi+16] - movq mm3, [rdi+24]; - - pcmpeqw mm0, mm7 - pcmpeqw mm1, mm7 - - pcmpeqw mm6, mm6 - pxor mm0, mm6 - - pxor mm1, mm6 - psrlw mm0, 15 - - psrlw mm1, 15 - pmaddwd mm0, mm2 - - pmaddwd mm1, mm3 - paddd mm5, mm0 - - paddd mm5, mm1 - movq mm0, mm5 - - psrlq mm5, 32 - paddd mm0, mm5 - - ; eob adjustment begins here - movq rcx, mm0 - and rcx, 0xffff - - xor rdx, rdx - sub rdx, rcx ; rdx=-rcx - - bsr rax, rcx - inc rax - - sar rdx, 31 - and rax, rdx - ; Substitute the sse assembly for the old mmx mixed assembly/C. The - ; following is kept as reference - ; movq rcx, mm0 - ; bsr rax, rcx - ; - ; mov eob, rax - ; mov eee, rcx - ; - ;if(eee==0) - ;{ - ; eob=-1; - ;} - ;else if(eee<0) - ;{ - ; eob=15; - ;} - ;d->eob = eob+1; - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm deleted file mode 100644 index 153060e74..000000000 --- a/vp8/encoder/x86/quantize_sse2.asm +++ /dev/null @@ -1,380 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" -%include "asm_enc_offsets.asm" - - -; void vp9_regular_quantize_b_sse2 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 - -global sym(vp9_regular_quantize_b_sse2) -sym(vp9_regular_quantize_b_sse2): - push rbp - mov rbp, rsp - SAVE_XMM 7 - GET_GOT rbx - -%if ABI_IS_32BIT - push rdi - push rsi -%else - %ifidn __OUTPUT_FORMAT__,x64 - push rdi - push rsi - %endif -%endif - - ALIGN_STACK 16, rax - %define zrun_zbin_boost 0 ; 8 - %define abs_minus_zbin 8 ; 32 - %define temp_qcoeff 40 ; 32 - %define qcoeff 72 ; 32 - %define stack_size 104 - sub rsp, stack_size - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %ifidn __OUTPUT_FORMAT__,x64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rdx, [rdi + vp9_block_coeff] ; coeff_ptr - mov rcx, [rdi + vp9_block_zbin] ; zbin_ptr - movd xmm7, [rdi + vp9_block_zbin_extra] ; zbin_oq_value - - ; z - movdqa xmm0, [rdx] - movdqa xmm4, [rdx + 16] - mov rdx, [rdi + vp9_block_round] ; round_ptr - - pshuflw xmm7, xmm7, 0 - punpcklwd xmm7, xmm7 ; duplicated zbin_oq_value - - movdqa xmm1, xmm0 - movdqa xmm5, xmm4 - - ; sz - psraw xmm0, 15 - psraw xmm4, 15 - - ; (z ^ sz) - pxor xmm1, xmm0 - pxor xmm5, xmm4 - - ; x = abs(z) - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - movdqa xmm2, [rcx] - movdqa xmm3, [rcx + 16] - mov rcx, [rdi + vp9_block_quant] ; quant_ptr - - ; *zbin_ptr + zbin_oq_value - paddw xmm2, xmm7 - paddw xmm3, xmm7 - - ; x - (*zbin_ptr + zbin_oq_value) - psubw xmm1, xmm2 - psubw xmm5, xmm3 - movdqa [rsp + abs_minus_zbin], xmm1 - movdqa [rsp + abs_minus_zbin + 16], xmm5 - - ; add (zbin_ptr + zbin_oq_value) back - paddw xmm1, xmm2 - paddw xmm5, xmm3 - - movdqa xmm2, [rdx] - movdqa xmm6, [rdx + 16] - - movdqa xmm3, [rcx] - movdqa xmm7, [rcx + 16] - - ; x + round - paddw xmm1, xmm2 - paddw xmm5, xmm6 - - ; y = x * quant_ptr >> 16 - pmulhw xmm3, xmm1 - pmulhw xmm7, xmm5 - - ; y += x - paddw xmm1, xmm3 - paddw xmm5, xmm7 - - movdqa [rsp + temp_qcoeff], xmm1 - movdqa [rsp + temp_qcoeff + 16], xmm5 - - pxor xmm6, xmm6 - ; zero qcoeff - movdqa [rsp + qcoeff], xmm6 - movdqa [rsp + qcoeff + 16], xmm6 - - mov rdx, [rdi + vp9_block_zrun_zbin_boost] ; zbin_boost_ptr - mov rax, [rdi + vp9_block_quant_shift] ; quant_shift_ptr - mov [rsp + zrun_zbin_boost], rdx - -%macro ZIGZAG_LOOP 1 - ; x - movsx ecx, WORD PTR[rsp + abs_minus_zbin + %1 * 2] - - ; if (x >= zbin) - sub cx, WORD PTR[rdx] ; x - zbin - lea rdx, [rdx + 2] ; zbin_boost_ptr++ - jl .rq_zigzag_loop_%1 ; x < zbin - - movsx edi, WORD PTR[rsp + temp_qcoeff + %1 * 2] - - ; downshift by quant_shift[rc] - movsx cx, BYTE PTR[rax + %1] ; quant_shift_ptr[rc] - sar edi, cl ; also sets Z bit - je .rq_zigzag_loop_%1 ; !y - mov WORD PTR[rsp + qcoeff + %1 * 2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc] - mov rdx, [rsp + zrun_zbin_boost] ; reset to b->zrun_zbin_boost -.rq_zigzag_loop_%1: -%endmacro -; in vp9_default_zig_zag1d order: see vp8/common/entropy.c -ZIGZAG_LOOP 0 -ZIGZAG_LOOP 1 -ZIGZAG_LOOP 4 -ZIGZAG_LOOP 8 -ZIGZAG_LOOP 5 -ZIGZAG_LOOP 2 -ZIGZAG_LOOP 3 -ZIGZAG_LOOP 6 -ZIGZAG_LOOP 9 -ZIGZAG_LOOP 12 -ZIGZAG_LOOP 13 -ZIGZAG_LOOP 10 -ZIGZAG_LOOP 7 -ZIGZAG_LOOP 11 -ZIGZAG_LOOP 14 -ZIGZAG_LOOP 15 - - movdqa xmm2, [rsp + qcoeff] - movdqa xmm3, [rsp + qcoeff + 16] - - mov rcx, [rsi + vp9_blockd_dequant] ; dequant_ptr - mov rdi, [rsi + vp9_blockd_dqcoeff] ; dqcoeff_ptr - - ; y ^ sz - pxor xmm2, xmm0 - pxor xmm3, xmm4 - ; x = (y ^ sz) - sz - psubw xmm2, xmm0 - psubw xmm3, xmm4 - - ; dequant - movdqa xmm0, [rcx] - movdqa xmm1, [rcx + 16] - - mov rcx, [rsi + vp9_blockd_qcoeff] ; qcoeff_ptr - - pmullw xmm0, xmm2 - pmullw xmm1, xmm3 - - movdqa [rcx], xmm2 ; store qcoeff - movdqa [rcx + 16], xmm3 - movdqa [rdi], xmm0 ; store dqcoeff - movdqa [rdi + 16], xmm1 - - ; select the last value (in zig_zag order) for EOB - pcmpeqw xmm2, xmm6 - pcmpeqw xmm3, xmm6 - ; ! - pcmpeqw xmm6, xmm6 - pxor xmm2, xmm6 - pxor xmm3, xmm6 - ; mask inv_zig_zag - pand xmm2, [GLOBAL(inv_zig_zag)] - pand xmm3, [GLOBAL(inv_zig_zag + 16)] - ; select the max value - pmaxsw xmm2, xmm3 - pshufd xmm3, xmm2, 00001110b - pmaxsw xmm2, xmm3 - pshuflw xmm3, xmm2, 00001110b - pmaxsw xmm2, xmm3 - pshuflw xmm3, xmm2, 00000001b - pmaxsw xmm2, xmm3 - movd eax, xmm2 - and eax, 0xff - mov [rsi + vp9_blockd_eob], eax - - ; begin epilog - add rsp, stack_size - pop rsp -%if ABI_IS_32BIT - pop rsi - pop rdi -%else - %ifidn __OUTPUT_FORMAT__,x64 - pop rsi - pop rdi - %endif -%endif - RESTORE_GOT - RESTORE_XMM - pop rbp - ret - -; void vp9_fast_quantize_b_sse2 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 - -global sym(vp9_fast_quantize_b_sse2) -sym(vp9_fast_quantize_b_sse2): - push rbp - mov rbp, rsp - GET_GOT rbx - -%if ABI_IS_32BIT - push rdi - push rsi -%else - %ifidn __OUTPUT_FORMAT__,x64 - push rdi - push rsi - %else - ; these registers are used for passing arguments - %endif -%endif - - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %ifidn __OUTPUT_FORMAT__,x64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rax, [rdi + vp9_block_coeff] - mov rcx, [rdi + vp9_block_round] - mov rdx, [rdi + vp9_block_quant_fast] - - ; z = coeff - movdqa xmm0, [rax] - movdqa xmm4, [rax + 16] - - ; dup z so we can save sz - movdqa xmm1, xmm0 - movdqa xmm5, xmm4 - - ; sz = z >> 15 - psraw xmm0, 15 - psraw xmm4, 15 - - ; x = abs(z) = (z ^ sz) - sz - pxor xmm1, xmm0 - pxor xmm5, xmm4 - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - ; x += round - paddw xmm1, [rcx] - paddw xmm5, [rcx + 16] - - mov rax, [rsi + vp9_blockd_qcoeff] - mov rcx, [rsi + vp9_blockd_dequant] - mov rdi, [rsi + vp9_blockd_dqcoeff] - - ; y = x * quant >> 16 - pmulhw xmm1, [rdx] - pmulhw xmm5, [rdx + 16] - - ; x = (y ^ sz) - sz - pxor xmm1, xmm0 - pxor xmm5, xmm4 - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - ; qcoeff = x - movdqa [rax], xmm1 - movdqa [rax + 16], xmm5 - - ; x * dequant - movdqa xmm2, xmm1 - movdqa xmm3, xmm5 - pmullw xmm2, [rcx] - pmullw xmm3, [rcx + 16] - - ; dqcoeff = x * dequant - movdqa [rdi], xmm2 - movdqa [rdi + 16], xmm3 - - pxor xmm4, xmm4 ;clear all bits - pcmpeqw xmm1, xmm4 - pcmpeqw xmm5, xmm4 - - pcmpeqw xmm4, xmm4 ;set all bits - pxor xmm1, xmm4 - pxor xmm5, xmm4 - - pand xmm1, [GLOBAL(inv_zig_zag)] - pand xmm5, [GLOBAL(inv_zig_zag + 16)] - - pmaxsw xmm1, xmm5 - - ; now down to 8 - pshufd xmm5, xmm1, 00001110b - - pmaxsw xmm1, xmm5 - - ; only 4 left - pshuflw xmm5, xmm1, 00001110b - - pmaxsw xmm1, xmm5 - - ; okay, just 2! - pshuflw xmm5, xmm1, 00000001b - - pmaxsw xmm1, xmm5 - - movd eax, xmm1 - and eax, 0xff - mov [rsi + vp9_blockd_eob], eax - - ; begin epilog -%if ABI_IS_32BIT - pop rsi - pop rdi -%else - %ifidn __OUTPUT_FORMAT__,x64 - pop rsi - pop rdi - %endif -%endif - - RESTORE_GOT - pop rbp - ret - -SECTION_RODATA -align 16 -inv_zig_zag: - dw 0x0001, 0x0002, 0x0006, 0x0007 - dw 0x0003, 0x0005, 0x0008, 0x000d - dw 0x0004, 0x0009, 0x000c, 0x000e - dw 0x000a, 0x000b, 0x000f, 0x0010 diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm deleted file mode 100644 index 8ce1b7cff..000000000 --- a/vp8/encoder/x86/quantize_sse4.asm +++ /dev/null @@ -1,254 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" -%include "asm_enc_offsets.asm" - - -; void vp9_regular_quantize_b_sse4 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 - -global sym(vp9_regular_quantize_b_sse4) -sym(vp9_regular_quantize_b_sse4): - -%if ABI_IS_32BIT - push rbp - mov rbp, rsp - GET_GOT rbx - push rdi - push rsi - - ALIGN_STACK 16, rax - %define qcoeff 0 ; 32 - %define stack_size 32 - sub rsp, stack_size -%else - %ifidn __OUTPUT_FORMAT__,x64 - SAVE_XMM 8, u - push rdi - push rsi - %endif -%endif - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %ifidn __OUTPUT_FORMAT__,x64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rax, [rdi + vp9_block_coeff] - mov rcx, [rdi + vp9_block_zbin] - mov rdx, [rdi + vp9_block_round] - movd xmm7, [rdi + vp9_block_zbin_extra] - - ; z - movdqa xmm0, [rax] - movdqa xmm1, [rax + 16] - - ; duplicate zbin_oq_value - pshuflw xmm7, xmm7, 0 - punpcklwd xmm7, xmm7 - - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - - ; sz - psraw xmm0, 15 - psraw xmm1, 15 - - ; (z ^ sz) - pxor xmm2, xmm0 - pxor xmm3, xmm1 - - ; x = abs(z) - psubw xmm2, xmm0 - psubw xmm3, xmm1 - - ; zbin - movdqa xmm4, [rcx] - movdqa xmm5, [rcx + 16] - - ; *zbin_ptr + zbin_oq_value - paddw xmm4, xmm7 - paddw xmm5, xmm7 - - movdqa xmm6, xmm2 - movdqa xmm7, xmm3 - - ; x - (*zbin_ptr + zbin_oq_value) - psubw xmm6, xmm4 - psubw xmm7, xmm5 - - ; round - movdqa xmm4, [rdx] - movdqa xmm5, [rdx + 16] - - mov rax, [rdi + vp9_block_quant_shift] - mov rcx, [rdi + vp9_block_quant] - mov rdx, [rdi + vp9_block_zrun_zbin_boost] - - ; x + round - paddw xmm2, xmm4 - paddw xmm3, xmm5 - - ; quant - movdqa xmm4, [rcx] - movdqa xmm5, [rcx + 16] - - ; y = x * quant_ptr >> 16 - pmulhw xmm4, xmm2 - pmulhw xmm5, xmm3 - - ; y += x - paddw xmm2, xmm4 - paddw xmm3, xmm5 - - pxor xmm4, xmm4 -%if ABI_IS_32BIT - movdqa [rsp + qcoeff], xmm4 - movdqa [rsp + qcoeff + 16], xmm4 -%else - pxor xmm8, xmm8 -%endif - - ; quant_shift - movdqa xmm5, [rax] - - ; zrun_zbin_boost - mov rax, rdx - -%macro ZIGZAG_LOOP 5 - ; x - pextrw ecx, %4, %2 - - ; if (x >= zbin) - sub cx, WORD PTR[rdx] ; x - zbin - lea rdx, [rdx + 2] ; zbin_boost_ptr++ - jl .rq_zigzag_loop_%1 ; x < zbin - - pextrw edi, %3, %2 ; y - - ; downshift by quant_shift[rc] - pextrb ecx, xmm5, %1 ; quant_shift[rc] - sar edi, cl ; also sets Z bit - je .rq_zigzag_loop_%1 ; !y -%if ABI_IS_32BIT - mov WORD PTR[rsp + qcoeff + %1 *2], di -%else - pinsrw %5, edi, %2 ; qcoeff[rc] -%endif - mov rdx, rax ; reset to b->zrun_zbin_boost -.rq_zigzag_loop_%1: -%endmacro -; in vp9_default_zig_zag1d order: see vp8/common/entropy.c -ZIGZAG_LOOP 0, 0, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 1, 1, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 4, 4, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 8, 0, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 5, 5, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 2, 2, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 3, 3, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 6, 6, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 9, 1, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 12, 4, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 13, 5, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 10, 2, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 7, 7, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 11, 3, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 14, 6, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8 - - mov rcx, [rsi + vp9_blockd_dequant] - mov rdi, [rsi + vp9_blockd_dqcoeff] - -%if ABI_IS_32BIT - movdqa xmm4, [rsp + qcoeff] - movdqa xmm5, [rsp + qcoeff + 16] -%else - %define xmm5 xmm8 -%endif - - ; y ^ sz - pxor xmm4, xmm0 - pxor xmm5, xmm1 - ; x = (y ^ sz) - sz - psubw xmm4, xmm0 - psubw xmm5, xmm1 - - ; dequant - movdqa xmm0, [rcx] - movdqa xmm1, [rcx + 16] - - mov rcx, [rsi + vp9_blockd_qcoeff] - - pmullw xmm0, xmm4 - pmullw xmm1, xmm5 - - ; store qcoeff - movdqa [rcx], xmm4 - movdqa [rcx + 16], xmm5 - - ; store dqcoeff - movdqa [rdi], xmm0 - movdqa [rdi + 16], xmm1 - - ; select the last value (in zig_zag order) for EOB - pxor xmm6, xmm6 - pcmpeqw xmm4, xmm6 - pcmpeqw xmm5, xmm6 - - packsswb xmm4, xmm5 - pshufb xmm4, [GLOBAL(zig_zag1d)] - pmovmskb edx, xmm4 - xor rdi, rdi - mov eax, -1 - xor dx, ax - bsr eax, edx - sub edi, edx - sar edi, 31 - add eax, 1 - and eax, edi - - mov [rsi + vp9_blockd_eob], eax - - ; begin epilog -%if ABI_IS_32BIT - add rsp, stack_size - pop rsp - - pop rsi - pop rdi - RESTORE_GOT - pop rbp -%else - %undef xmm5 - %ifidn __OUTPUT_FORMAT__,x64 - pop rsi - pop rdi - RESTORE_XMM - %endif -%endif - - ret - -SECTION_RODATA -align 16 -; vp8/common/entropy.c: vp9_default_zig_zag1d -zig_zag1d: - db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm deleted file mode 100644 index 14a9912d2..000000000 --- a/vp8/encoder/x86/quantize_ssse3.asm +++ /dev/null @@ -1,138 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" -%include "asm_enc_offsets.asm" - - -; void vp9_fast_quantize_b_ssse3 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 -; - -global sym(vp9_fast_quantize_b_ssse3) -sym(vp9_fast_quantize_b_ssse3): - push rbp - mov rbp, rsp - GET_GOT rbx - -%if ABI_IS_32BIT - push rdi - push rsi -%else - %ifidn __OUTPUT_FORMAT__,x64 - push rdi - push rsi - %endif -%endif - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %ifidn __OUTPUT_FORMAT__,x64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rax, [rdi + vp9_block_coeff] - mov rcx, [rdi + vp9_block_round] - mov rdx, [rdi + vp9_block_quant_fast] - - ; coeff - movdqa xmm0, [rax] - movdqa xmm4, [rax + 16] - - ; round - movdqa xmm2, [rcx] - movdqa xmm3, [rcx + 16] - - movdqa xmm1, xmm0 - movdqa xmm5, xmm4 - - ; sz = z >> 15 - psraw xmm0, 15 - psraw xmm4, 15 - - pabsw xmm1, xmm1 - pabsw xmm5, xmm5 - - paddw xmm1, xmm2 - paddw xmm5, xmm3 - - ; quant_fast - pmulhw xmm1, [rdx] - pmulhw xmm5, [rdx + 16] - - mov rax, [rsi + vp9_blockd_qcoeff] - mov rdi, [rsi + vp9_blockd_dequant] - mov rcx, [rsi + vp9_blockd_dqcoeff] - - pxor xmm1, xmm0 - pxor xmm5, xmm4 - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - movdqa [rax], xmm1 - movdqa [rax + 16], xmm5 - - movdqa xmm2, [rdi] - movdqa xmm3, [rdi + 16] - - pxor xmm4, xmm4 - pmullw xmm2, xmm1 - pmullw xmm3, xmm5 - - pcmpeqw xmm1, xmm4 ;non zero mask - pcmpeqw xmm5, xmm4 ;non zero mask - packsswb xmm1, xmm5 - pshufb xmm1, [GLOBAL(zz_shuf)] - - pmovmskb edx, xmm1 - - xor rdi, rdi - mov eax, -1 - xor dx, ax ;flip the bits for bsr - bsr eax, edx - - movdqa [rcx], xmm2 ;store dqcoeff - movdqa [rcx + 16], xmm3 ;store dqcoeff - - sub edi, edx ;check for all zeros in bit mask - sar edi, 31 ;0 or -1 - add eax, 1 - and eax, edi ;if the bit mask was all zero, - ;then eob = 0 - mov [rsi + vp9_blockd_eob], eax - - ; begin epilog -%if ABI_IS_32BIT - pop rsi - pop rdi -%else - %ifidn __OUTPUT_FORMAT__,x64 - pop rsi - pop rdi - %endif -%endif - - RESTORE_GOT - pop rbp - ret - -SECTION_RODATA -align 16 -zz_shuf: - db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 diff --git a/vp8/encoder/x86/quantize_x86.h b/vp8/encoder/x86/quantize_x86.h deleted file mode 100644 index 6946e7e29..000000000 --- a/vp8/encoder/x86/quantize_x86.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - -#ifndef QUANTIZE_X86_H -#define QUANTIZE_X86_H - - -/* Note: - * - * This platform is commonly built for runtime CPU detection. If you modify - * any of the function mappings present in this file, be sure to also update - * them in the function pointer initialization code - */ -#if HAVE_MMX - -#endif /* HAVE_MMX */ - - -#if HAVE_SSE2 -extern prototype_quantize_block(vp9_regular_quantize_b_sse2); -#if !CONFIG_RUNTIME_CPU_DETECT - -#undef vp9_quantize_quantb -#define vp9_quantize_quantb vp9_regular_quantize_b_sse2 -#endif /* !CONFIG_RUNTIME_CPU_DETECT */ - -#endif /* HAVE_SSE2 */ - - -#if HAVE_SSE4_1 -extern prototype_quantize_block(vp9_regular_quantize_b_sse4); - -#if !CONFIG_RUNTIME_CPU_DETECT - -#undef vp9_quantize_quantb -#define vp9_quantize_quantb vp9_regular_quantize_b_sse4 - -#endif /* !CONFIG_RUNTIME_CPU_DETECT */ - -#endif /* HAVE_SSE4_1 */ - -#endif /* QUANTIZE_X86_H */ diff --git a/vp8/encoder/x86/sad_mmx.asm b/vp8/encoder/x86/sad_mmx.asm deleted file mode 100644 index 827c58cbb..000000000 --- a/vp8/encoder/x86/sad_mmx.asm +++ /dev/null @@ -1,427 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -global sym(vp9_sad16x16_mmx) -global sym(vp9_sad8x16_mmx) -global sym(vp9_sad8x8_mmx) -global sym(vp9_sad4x4_mmx) -global sym(vp9_sad16x8_mmx) - -;unsigned int vp9_sad16x16_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vp9_sad16x16_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - - lea rcx, [rcx+rax*8] - pxor mm7, mm7 - - pxor mm6, mm6 - -.x16x16sad_mmx_loop: - - movq mm0, QWORD PTR [rsi] - movq mm2, QWORD PTR [rsi+8] - - movq mm1, QWORD PTR [rdi] - movq mm3, QWORD PTR [rdi+8] - - movq mm4, mm0 - movq mm5, mm2 - - psubusb mm0, mm1 - psubusb mm1, mm4 - - psubusb mm2, mm3 - psubusb mm3, mm5 - - por mm0, mm1 - por mm2, mm3 - - movq mm1, mm0 - movq mm3, mm2 - - punpcklbw mm0, mm6 - punpcklbw mm2, mm6 - - punpckhbw mm1, mm6 - punpckhbw mm3, mm6 - - paddw mm0, mm2 - paddw mm1, mm3 - - - lea rsi, [rsi+rax] - add rdi, rdx - - paddw mm7, mm0 - paddw mm7, mm1 - - cmp rsi, rcx - jne .x16x16sad_mmx_loop - - - movq mm0, mm7 - - punpcklwd mm0, mm6 - punpckhwd mm7, mm6 - - paddw mm0, mm7 - movq mm7, mm0 - - - psrlq mm0, 32 - paddw mm7, mm0 - - movq rax, mm7 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp9_sad8x16_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vp9_sad8x16_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - - lea rcx, [rcx+rax*8] - pxor mm7, mm7 - - pxor mm6, mm6 - -.x8x16sad_mmx_loop: - - movq mm0, QWORD PTR [rsi] - movq mm1, QWORD PTR [rdi] - - movq mm2, mm0 - psubusb mm0, mm1 - - psubusb mm1, mm2 - por mm0, mm1 - - movq mm2, mm0 - punpcklbw mm0, mm6 - - punpckhbw mm2, mm6 - lea rsi, [rsi+rax] - - add rdi, rdx - paddw mm7, mm0 - - paddw mm7, mm2 - cmp rsi, rcx - - jne .x8x16sad_mmx_loop - - movq mm0, mm7 - punpcklwd mm0, mm6 - - punpckhwd mm7, mm6 - paddw mm0, mm7 - - movq mm7, mm0 - psrlq mm0, 32 - - paddw mm7, mm0 - movq rax, mm7 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp9_sad8x8_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vp9_sad8x8_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - pxor mm7, mm7 - - pxor mm6, mm6 - -.x8x8sad_mmx_loop: - - movq mm0, QWORD PTR [rsi] - movq mm1, QWORD PTR [rdi] - - movq mm2, mm0 - psubusb mm0, mm1 - - psubusb mm1, mm2 - por mm0, mm1 - - movq mm2, mm0 - punpcklbw mm0, mm6 - - punpckhbw mm2, mm6 - paddw mm0, mm2 - - lea rsi, [rsi+rax] - add rdi, rdx - - paddw mm7, mm0 - cmp rsi, rcx - - jne .x8x8sad_mmx_loop - - movq mm0, mm7 - punpcklwd mm0, mm6 - - punpckhwd mm7, mm6 - paddw mm0, mm7 - - movq mm7, mm0 - psrlq mm0, 32 - - paddw mm7, mm0 - movq rax, mm7 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp9_sad4x4_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vp9_sad4x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - movd mm0, DWORD PTR [rsi] - movd mm1, DWORD PTR [rdi] - - movd mm2, DWORD PTR [rsi+rax] - movd mm3, DWORD PTR [rdi+rdx] - - punpcklbw mm0, mm2 - punpcklbw mm1, mm3 - - movq mm2, mm0 - psubusb mm0, mm1 - - psubusb mm1, mm2 - por mm0, mm1 - - movq mm2, mm0 - pxor mm3, mm3 - - punpcklbw mm0, mm3 - punpckhbw mm2, mm3 - - paddw mm0, mm2 - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - movd mm4, DWORD PTR [rsi] - movd mm5, DWORD PTR [rdi] - - movd mm6, DWORD PTR [rsi+rax] - movd mm7, DWORD PTR [rdi+rdx] - - punpcklbw mm4, mm6 - punpcklbw mm5, mm7 - - movq mm6, mm4 - psubusb mm4, mm5 - - psubusb mm5, mm6 - por mm4, mm5 - - movq mm5, mm4 - punpcklbw mm4, mm3 - - punpckhbw mm5, mm3 - paddw mm4, mm5 - - paddw mm0, mm4 - movq mm1, mm0 - - punpcklwd mm0, mm3 - punpckhwd mm1, mm3 - - paddw mm0, mm1 - movq mm1, mm0 - - psrlq mm0, 32 - paddw mm0, mm1 - - movq rax, mm0 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp9_sad16x8_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vp9_sad16x8_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - pxor mm7, mm7 - - pxor mm6, mm6 - -.x16x8sad_mmx_loop: - - movq mm0, [rsi] - movq mm1, [rdi] - - movq mm2, [rsi+8] - movq mm3, [rdi+8] - - movq mm4, mm0 - movq mm5, mm2 - - psubusb mm0, mm1 - psubusb mm1, mm4 - - psubusb mm2, mm3 - psubusb mm3, mm5 - - por mm0, mm1 - por mm2, mm3 - - movq mm1, mm0 - movq mm3, mm2 - - punpcklbw mm0, mm6 - punpckhbw mm1, mm6 - - punpcklbw mm2, mm6 - punpckhbw mm3, mm6 - - - paddw mm0, mm2 - paddw mm1, mm3 - - paddw mm0, mm1 - lea rsi, [rsi+rax] - - add rdi, rdx - paddw mm7, mm0 - - cmp rsi, rcx - jne .x16x8sad_mmx_loop - - movq mm0, mm7 - punpcklwd mm0, mm6 - - punpckhwd mm7, mm6 - paddw mm0, mm7 - - movq mm7, mm0 - psrlq mm0, 32 - - paddw mm7, mm0 - movq rax, mm7 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp8/encoder/x86/sad_sse2.asm b/vp8/encoder/x86/sad_sse2.asm deleted file mode 100644 index fe9fc4d55..000000000 --- a/vp8/encoder/x86/sad_sse2.asm +++ /dev/null @@ -1,410 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;unsigned int vp9_sad16x16_wmt( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -global sym(vp9_sad16x16_wmt) -sym(vp9_sad16x16_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - SAVE_XMM 6 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - - lea rcx, [rcx+rax*8] - pxor xmm6, xmm6 - -.x16x16sad_wmt_loop: - - movq xmm0, QWORD PTR [rsi] - movq xmm2, QWORD PTR [rsi+8] - - movq xmm1, QWORD PTR [rdi] - movq xmm3, QWORD PTR [rdi+8] - - movq xmm4, QWORD PTR [rsi+rax] - movq xmm5, QWORD PTR [rdi+rdx] - - - punpcklbw xmm0, xmm2 - punpcklbw xmm1, xmm3 - - psadbw xmm0, xmm1 - movq xmm2, QWORD PTR [rsi+rax+8] - - movq xmm3, QWORD PTR [rdi+rdx+8] - lea rsi, [rsi+rax*2] - - lea rdi, [rdi+rdx*2] - punpcklbw xmm4, xmm2 - - punpcklbw xmm5, xmm3 - psadbw xmm4, xmm5 - - paddw xmm6, xmm0 - paddw xmm6, xmm4 - - cmp rsi, rcx - jne .x16x16sad_wmt_loop - - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movq rax, xmm0 - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;unsigned int vp9_sad8x16_wmt( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int max_err) -global sym(vp9_sad8x16_wmt) -sym(vp9_sad8x16_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rbx*8] - - lea rcx, [rcx+rbx*8] - pxor mm7, mm7 - -.x8x16sad_wmt_loop: - - movq rax, mm7 - cmp eax, arg(4) - jg .x8x16sad_wmt_early_exit - - movq mm0, QWORD PTR [rsi] - movq mm1, QWORD PTR [rdi] - - movq mm2, QWORD PTR [rsi+rbx] - movq mm3, QWORD PTR [rdi+rdx] - - psadbw mm0, mm1 - psadbw mm2, mm3 - - lea rsi, [rsi+rbx*2] - lea rdi, [rdi+rdx*2] - - paddw mm7, mm0 - paddw mm7, mm2 - - cmp rsi, rcx - jne .x8x16sad_wmt_loop - - movq rax, mm7 - -.x8x16sad_wmt_early_exit: - - ; begin epilog - pop rdi - pop rsi - pop rbx - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp9_sad8x8_wmt( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -global sym(vp9_sad8x8_wmt) -sym(vp9_sad8x8_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rbx*8] - pxor mm7, mm7 - -.x8x8sad_wmt_loop: - - movq rax, mm7 - cmp eax, arg(4) - jg .x8x8sad_wmt_early_exit - - movq mm0, QWORD PTR [rsi] - movq mm1, QWORD PTR [rdi] - - psadbw mm0, mm1 - lea rsi, [rsi+rbx] - - add rdi, rdx - paddw mm7, mm0 - - cmp rsi, rcx - jne .x8x8sad_wmt_loop - - movq rax, mm7 -.x8x8sad_wmt_early_exit: - - ; begin epilog - pop rdi - pop rsi - pop rbx - UNSHADOW_ARGS - pop rbp - ret - -;unsigned int vp9_sad4x4_wmt( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -global sym(vp9_sad4x4_wmt) -sym(vp9_sad4x4_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - movd mm0, DWORD PTR [rsi] - movd mm1, DWORD PTR [rdi] - - movd mm2, DWORD PTR [rsi+rax] - movd mm3, DWORD PTR [rdi+rdx] - - punpcklbw mm0, mm2 - punpcklbw mm1, mm3 - - psadbw mm0, mm1 - lea rsi, [rsi+rax*2] - - lea rdi, [rdi+rdx*2] - movd mm4, DWORD PTR [rsi] - - movd mm5, DWORD PTR [rdi] - movd mm6, DWORD PTR [rsi+rax] - - movd mm7, DWORD PTR [rdi+rdx] - punpcklbw mm4, mm6 - - punpcklbw mm5, mm7 - psadbw mm4, mm5 - - paddw mm0, mm4 - movq rax, mm0 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp9_sad16x8_wmt( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -global sym(vp9_sad16x8_wmt) -sym(vp9_sad16x8_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rbx - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rbx*8] - pxor mm7, mm7 - -.x16x8sad_wmt_loop: - - movq rax, mm7 - cmp eax, arg(4) - jg .x16x8sad_wmt_early_exit - - movq mm0, QWORD PTR [rsi] - movq mm2, QWORD PTR [rsi+8] - - movq mm1, QWORD PTR [rdi] - movq mm3, QWORD PTR [rdi+8] - - movq mm4, QWORD PTR [rsi+rbx] - movq mm5, QWORD PTR [rdi+rdx] - - psadbw mm0, mm1 - psadbw mm2, mm3 - - movq mm1, QWORD PTR [rsi+rbx+8] - movq mm3, QWORD PTR [rdi+rdx+8] - - psadbw mm4, mm5 - psadbw mm1, mm3 - - lea rsi, [rsi+rbx*2] - lea rdi, [rdi+rdx*2] - - paddw mm0, mm2 - paddw mm4, mm1 - - paddw mm7, mm0 - paddw mm7, mm4 - - cmp rsi, rcx - jne .x16x8sad_wmt_loop - - movq rax, mm7 - -.x16x8sad_wmt_early_exit: - - ; begin epilog - pop rdi - pop rsi - pop rbx - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_copy32xn_sse2( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *dst_ptr, -; int dst_stride, -; int height); -global sym(vp9_copy32xn_sse2) -sym(vp9_copy32xn_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;dst_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;dst_stride - movsxd rcx, dword ptr arg(4) ;height - -.block_copy_sse2_loopx4: - movdqu xmm0, XMMWORD PTR [rsi] - movdqu xmm1, XMMWORD PTR [rsi + 16] - movdqu xmm2, XMMWORD PTR [rsi + rax] - movdqu xmm3, XMMWORD PTR [rsi + rax + 16] - - lea rsi, [rsi+rax*2] - - movdqu xmm4, XMMWORD PTR [rsi] - movdqu xmm5, XMMWORD PTR [rsi + 16] - movdqu xmm6, XMMWORD PTR [rsi + rax] - movdqu xmm7, XMMWORD PTR [rsi + rax + 16] - - lea rsi, [rsi+rax*2] - - movdqa XMMWORD PTR [rdi], xmm0 - movdqa XMMWORD PTR [rdi + 16], xmm1 - movdqa XMMWORD PTR [rdi + rdx], xmm2 - movdqa XMMWORD PTR [rdi + rdx + 16], xmm3 - - lea rdi, [rdi+rdx*2] - - movdqa XMMWORD PTR [rdi], xmm4 - movdqa XMMWORD PTR [rdi + 16], xmm5 - movdqa XMMWORD PTR [rdi + rdx], xmm6 - movdqa XMMWORD PTR [rdi + rdx + 16], xmm7 - - lea rdi, [rdi+rdx*2] - - sub rcx, 4 - cmp rcx, 4 - jge .block_copy_sse2_loopx4 - - cmp rcx, 0 - je .copy_is_done - -.block_copy_sse2_loop: - movdqu xmm0, XMMWORD PTR [rsi] - movdqu xmm1, XMMWORD PTR [rsi + 16] - lea rsi, [rsi+rax] - - movdqa XMMWORD PTR [rdi], xmm0 - movdqa XMMWORD PTR [rdi + 16], xmm1 - lea rdi, [rdi+rdx] - - sub rcx, 1 - jne .block_copy_sse2_loop - -.copy_is_done: - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm deleted file mode 100644 index e17485e5b..000000000 --- a/vp8/encoder/x86/sad_sse3.asm +++ /dev/null @@ -1,960 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -%macro STACK_FRAME_CREATE_X3 0 -%if ABI_IS_32BIT - %define src_ptr rsi - %define src_stride rax - %define ref_ptr rdi - %define ref_stride rdx - %define end_ptr rcx - %define ret_var rbx - %define result_ptr arg(4) - %define max_err arg(4) - %define height dword ptr arg(4) - push rbp - mov rbp, rsp - push rsi - push rdi - push rbx - - mov rsi, arg(0) ; src_ptr - mov rdi, arg(2) ; ref_ptr - - movsxd rax, dword ptr arg(1) ; src_stride - movsxd rdx, dword ptr arg(3) ; ref_stride -%else - %ifidn __OUTPUT_FORMAT__,x64 - SAVE_XMM 7, u - %define src_ptr rcx - %define src_stride rdx - %define ref_ptr r8 - %define ref_stride r9 - %define end_ptr r10 - %define ret_var r11 - %define result_ptr [rsp+xmm_stack_space+8+4*8] - %define max_err [rsp+xmm_stack_space+8+4*8] - %define height dword ptr [rsp+xmm_stack_space+8+4*8] - %else - %define src_ptr rdi - %define src_stride rsi - %define ref_ptr rdx - %define ref_stride rcx - %define end_ptr r9 - %define ret_var r10 - %define result_ptr r8 - %define max_err r8 - %define height r8 - %endif -%endif - -%endmacro - -%macro STACK_FRAME_DESTROY_X3 0 - %define src_ptr - %define src_stride - %define ref_ptr - %define ref_stride - %define end_ptr - %define ret_var - %define result_ptr - %define max_err - %define height - -%if ABI_IS_32BIT - pop rbx - pop rdi - pop rsi - pop rbp -%else - %ifidn __OUTPUT_FORMAT__,x64 - RESTORE_XMM - %endif -%endif - ret -%endmacro - -%macro STACK_FRAME_CREATE_X4 0 -%if ABI_IS_32BIT - %define src_ptr rsi - %define src_stride rax - %define r0_ptr rcx - %define r1_ptr rdx - %define r2_ptr rbx - %define r3_ptr rdi - %define ref_stride rbp - %define result_ptr arg(4) - push rbp - mov rbp, rsp - push rsi - push rdi - push rbx - - push rbp - mov rdi, arg(2) ; ref_ptr_base - - LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi - - mov rsi, arg(0) ; src_ptr - - movsxd rbx, dword ptr arg(1) ; src_stride - movsxd rbp, dword ptr arg(3) ; ref_stride - - xchg rbx, rax -%else - %ifidn __OUTPUT_FORMAT__,x64 - SAVE_XMM 7, u - %define src_ptr rcx - %define src_stride rdx - %define r0_ptr rsi - %define r1_ptr r10 - %define r2_ptr r11 - %define r3_ptr r8 - %define ref_stride r9 - %define result_ptr [rsp+xmm_stack_space+16+4*8] - push rsi - - LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr - %else - %define src_ptr rdi - %define src_stride rsi - %define r0_ptr r9 - %define r1_ptr r10 - %define r2_ptr r11 - %define r3_ptr rdx - %define ref_stride rcx - %define result_ptr r8 - - LOAD_X4_ADDRESSES rdx, r0_ptr, r1_ptr, r2_ptr, r3_ptr - - %endif -%endif -%endmacro - -%macro STACK_FRAME_DESTROY_X4 0 - %define src_ptr - %define src_stride - %define r0_ptr - %define r1_ptr - %define r2_ptr - %define r3_ptr - %define ref_stride - %define result_ptr - -%if ABI_IS_32BIT - pop rbx - pop rdi - pop rsi - pop rbp -%else - %ifidn __OUTPUT_FORMAT__,x64 - pop rsi - RESTORE_XMM - %endif -%endif - ret -%endmacro - -%macro PROCESS_16X2X3 5 -%if %1==0 - movdqa xmm0, XMMWORD PTR [%2] - lddqu xmm5, XMMWORD PTR [%3] - lddqu xmm6, XMMWORD PTR [%3+1] - lddqu xmm7, XMMWORD PTR [%3+2] - - psadbw xmm5, xmm0 - psadbw xmm6, xmm0 - psadbw xmm7, xmm0 -%else - movdqa xmm0, XMMWORD PTR [%2] - lddqu xmm1, XMMWORD PTR [%3] - lddqu xmm2, XMMWORD PTR [%3+1] - lddqu xmm3, XMMWORD PTR [%3+2] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endif - movdqa xmm0, XMMWORD PTR [%2+%4] - lddqu xmm1, XMMWORD PTR [%3+%5] - lddqu xmm2, XMMWORD PTR [%3+%5+1] - lddqu xmm3, XMMWORD PTR [%3+%5+2] - -%if %1==0 || %1==1 - lea %2, [%2+%4*2] - lea %3, [%3+%5*2] -%endif - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endmacro - -%macro PROCESS_8X2X3 5 -%if %1==0 - movq mm0, QWORD PTR [%2] - movq mm5, QWORD PTR [%3] - movq mm6, QWORD PTR [%3+1] - movq mm7, QWORD PTR [%3+2] - - psadbw mm5, mm0 - psadbw mm6, mm0 - psadbw mm7, mm0 -%else - movq mm0, QWORD PTR [%2] - movq mm1, QWORD PTR [%3] - movq mm2, QWORD PTR [%3+1] - movq mm3, QWORD PTR [%3+2] - - psadbw mm1, mm0 - psadbw mm2, mm0 - psadbw mm3, mm0 - - paddw mm5, mm1 - paddw mm6, mm2 - paddw mm7, mm3 -%endif - movq mm0, QWORD PTR [%2+%4] - movq mm1, QWORD PTR [%3+%5] - movq mm2, QWORD PTR [%3+%5+1] - movq mm3, QWORD PTR [%3+%5+2] - -%if %1==0 || %1==1 - lea %2, [%2+%4*2] - lea %3, [%3+%5*2] -%endif - - psadbw mm1, mm0 - psadbw mm2, mm0 - psadbw mm3, mm0 - - paddw mm5, mm1 - paddw mm6, mm2 - paddw mm7, mm3 -%endmacro - -%macro LOAD_X4_ADDRESSES 5 - mov %2, [%1+REG_SZ_BYTES*0] - mov %3, [%1+REG_SZ_BYTES*1] - - mov %4, [%1+REG_SZ_BYTES*2] - mov %5, [%1+REG_SZ_BYTES*3] -%endmacro - -%macro PROCESS_16X2X4 8 -%if %1==0 - movdqa xmm0, XMMWORD PTR [%2] - lddqu xmm4, XMMWORD PTR [%3] - lddqu xmm5, XMMWORD PTR [%4] - lddqu xmm6, XMMWORD PTR [%5] - lddqu xmm7, XMMWORD PTR [%6] - - psadbw xmm4, xmm0 - psadbw xmm5, xmm0 - psadbw xmm6, xmm0 - psadbw xmm7, xmm0 -%else - movdqa xmm0, XMMWORD PTR [%2] - lddqu xmm1, XMMWORD PTR [%3] - lddqu xmm2, XMMWORD PTR [%4] - lddqu xmm3, XMMWORD PTR [%5] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm4, xmm1 - lddqu xmm1, XMMWORD PTR [%6] - paddw xmm5, xmm2 - paddw xmm6, xmm3 - - psadbw xmm1, xmm0 - paddw xmm7, xmm1 -%endif - movdqa xmm0, XMMWORD PTR [%2+%7] - lddqu xmm1, XMMWORD PTR [%3+%8] - lddqu xmm2, XMMWORD PTR [%4+%8] - lddqu xmm3, XMMWORD PTR [%5+%8] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm4, xmm1 - lddqu xmm1, XMMWORD PTR [%6+%8] - paddw xmm5, xmm2 - paddw xmm6, xmm3 - -%if %1==0 || %1==1 - lea %2, [%2+%7*2] - lea %3, [%3+%8*2] - - lea %4, [%4+%8*2] - lea %5, [%5+%8*2] - - lea %6, [%6+%8*2] -%endif - psadbw xmm1, xmm0 - paddw xmm7, xmm1 - -%endmacro - -%macro PROCESS_8X2X4 8 -%if %1==0 - movq mm0, QWORD PTR [%2] - movq mm4, QWORD PTR [%3] - movq mm5, QWORD PTR [%4] - movq mm6, QWORD PTR [%5] - movq mm7, QWORD PTR [%6] - - psadbw mm4, mm0 - psadbw mm5, mm0 - psadbw mm6, mm0 - psadbw mm7, mm0 -%else - movq mm0, QWORD PTR [%2] - movq mm1, QWORD PTR [%3] - movq mm2, QWORD PTR [%4] - movq mm3, QWORD PTR [%5] - - psadbw mm1, mm0 - psadbw mm2, mm0 - psadbw mm3, mm0 - - paddw mm4, mm1 - movq mm1, QWORD PTR [%6] - paddw mm5, mm2 - paddw mm6, mm3 - - psadbw mm1, mm0 - paddw mm7, mm1 -%endif - movq mm0, QWORD PTR [%2+%7] - movq mm1, QWORD PTR [%3+%8] - movq mm2, QWORD PTR [%4+%8] - movq mm3, QWORD PTR [%5+%8] - - psadbw mm1, mm0 - psadbw mm2, mm0 - psadbw mm3, mm0 - - paddw mm4, mm1 - movq mm1, QWORD PTR [%6+%8] - paddw mm5, mm2 - paddw mm6, mm3 - -%if %1==0 || %1==1 - lea %2, [%2+%7*2] - lea %3, [%3+%8*2] - - lea %4, [%4+%8*2] - lea %5, [%5+%8*2] - - lea %6, [%6+%8*2] -%endif - psadbw mm1, mm0 - paddw mm7, mm1 - -%endmacro - -;void int vp9_sad16x16x3_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp9_sad16x16x3_sse3) -sym(vp9_sad16x16x3_sse3): - - STACK_FRAME_CREATE_X3 - - PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - - mov rcx, result_ptr - - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rcx], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rcx+4], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rcx+8], xmm0 - - STACK_FRAME_DESTROY_X3 - -;void int vp9_sad16x8x3_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp9_sad16x8x3_sse3) -sym(vp9_sad16x8x3_sse3): - - STACK_FRAME_CREATE_X3 - - PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - - mov rcx, result_ptr - - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rcx], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rcx+4], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rcx+8], xmm0 - - STACK_FRAME_DESTROY_X3 - -;void int vp9_sad8x16x3_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp9_sad8x16x3_sse3) -sym(vp9_sad8x16x3_sse3): - - STACK_FRAME_CREATE_X3 - - PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - - mov rcx, result_ptr - - punpckldq mm5, mm6 - - movq [rcx], mm5 - movd [rcx+8], mm7 - - STACK_FRAME_DESTROY_X3 - -;void int vp9_sad8x8x3_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp9_sad8x8x3_sse3) -sym(vp9_sad8x8x3_sse3): - - STACK_FRAME_CREATE_X3 - - PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - - mov rcx, result_ptr - - punpckldq mm5, mm6 - - movq [rcx], mm5 - movd [rcx+8], mm7 - - STACK_FRAME_DESTROY_X3 - -;void int vp9_sad4x4x3_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp9_sad4x4x3_sse3) -sym(vp9_sad4x4x3_sse3): - - STACK_FRAME_CREATE_X3 - - movd mm0, DWORD PTR [src_ptr] - movd mm1, DWORD PTR [ref_ptr] - - movd mm2, DWORD PTR [src_ptr+src_stride] - movd mm3, DWORD PTR [ref_ptr+ref_stride] - - punpcklbw mm0, mm2 - punpcklbw mm1, mm3 - - movd mm4, DWORD PTR [ref_ptr+1] - movd mm5, DWORD PTR [ref_ptr+2] - - movd mm2, DWORD PTR [ref_ptr+ref_stride+1] - movd mm3, DWORD PTR [ref_ptr+ref_stride+2] - - psadbw mm1, mm0 - - punpcklbw mm4, mm2 - punpcklbw mm5, mm3 - - psadbw mm4, mm0 - psadbw mm5, mm0 - - lea src_ptr, [src_ptr+src_stride*2] - lea ref_ptr, [ref_ptr+ref_stride*2] - - movd mm0, DWORD PTR [src_ptr] - movd mm2, DWORD PTR [ref_ptr] - - movd mm3, DWORD PTR [src_ptr+src_stride] - movd mm6, DWORD PTR [ref_ptr+ref_stride] - - punpcklbw mm0, mm3 - punpcklbw mm2, mm6 - - movd mm3, DWORD PTR [ref_ptr+1] - movd mm7, DWORD PTR [ref_ptr+2] - - psadbw mm2, mm0 - - paddw mm1, mm2 - - movd mm2, DWORD PTR [ref_ptr+ref_stride+1] - movd mm6, DWORD PTR [ref_ptr+ref_stride+2] - - punpcklbw mm3, mm2 - punpcklbw mm7, mm6 - - psadbw mm3, mm0 - psadbw mm7, mm0 - - paddw mm3, mm4 - paddw mm7, mm5 - - mov rcx, result_ptr - - punpckldq mm1, mm3 - - movq [rcx], mm1 - movd [rcx+8], mm7 - - STACK_FRAME_DESTROY_X3 - -;unsigned int vp9_sad16x16_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int max_err) -;%define lddqu movdqu -global sym(vp9_sad16x16_sse3) -sym(vp9_sad16x16_sse3): - - STACK_FRAME_CREATE_X3 - - mov end_ptr, 4 - pxor xmm7, xmm7 - -.vp9_sad16x16_sse3_loop: - movdqa xmm0, XMMWORD PTR [src_ptr] - movdqu xmm1, XMMWORD PTR [ref_ptr] - movdqa xmm2, XMMWORD PTR [src_ptr+src_stride] - movdqu xmm3, XMMWORD PTR [ref_ptr+ref_stride] - - lea src_ptr, [src_ptr+src_stride*2] - lea ref_ptr, [ref_ptr+ref_stride*2] - - movdqa xmm4, XMMWORD PTR [src_ptr] - movdqu xmm5, XMMWORD PTR [ref_ptr] - movdqa xmm6, XMMWORD PTR [src_ptr+src_stride] - - psadbw xmm0, xmm1 - - movdqu xmm1, XMMWORD PTR [ref_ptr+ref_stride] - - psadbw xmm2, xmm3 - psadbw xmm4, xmm5 - psadbw xmm6, xmm1 - - lea src_ptr, [src_ptr+src_stride*2] - lea ref_ptr, [ref_ptr+ref_stride*2] - - paddw xmm7, xmm0 - paddw xmm7, xmm2 - paddw xmm7, xmm4 - paddw xmm7, xmm6 - - sub end_ptr, 1 - jne .vp9_sad16x16_sse3_loop - - movq xmm0, xmm7 - psrldq xmm7, 8 - paddw xmm0, xmm7 - movq rax, xmm0 - - STACK_FRAME_DESTROY_X3 - -;void vp9_copy32xn_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *dst_ptr, -; int dst_stride, -; int height); -global sym(vp9_copy32xn_sse3) -sym(vp9_copy32xn_sse3): - - STACK_FRAME_CREATE_X3 - -.block_copy_sse3_loopx4: - lea end_ptr, [src_ptr+src_stride*2] - - movdqu xmm0, XMMWORD PTR [src_ptr] - movdqu xmm1, XMMWORD PTR [src_ptr + 16] - movdqu xmm2, XMMWORD PTR [src_ptr + src_stride] - movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16] - movdqu xmm4, XMMWORD PTR [end_ptr] - movdqu xmm5, XMMWORD PTR [end_ptr + 16] - movdqu xmm6, XMMWORD PTR [end_ptr + src_stride] - movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16] - - lea src_ptr, [src_ptr+src_stride*4] - - lea end_ptr, [ref_ptr+ref_stride*2] - - movdqa XMMWORD PTR [ref_ptr], xmm0 - movdqa XMMWORD PTR [ref_ptr + 16], xmm1 - movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2 - movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3 - movdqa XMMWORD PTR [end_ptr], xmm4 - movdqa XMMWORD PTR [end_ptr + 16], xmm5 - movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6 - movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7 - - lea ref_ptr, [ref_ptr+ref_stride*4] - - sub height, 4 - cmp height, 4 - jge .block_copy_sse3_loopx4 - - ;Check to see if there is more rows need to be copied. - cmp height, 0 - je .copy_is_done - -.block_copy_sse3_loop: - movdqu xmm0, XMMWORD PTR [src_ptr] - movdqu xmm1, XMMWORD PTR [src_ptr + 16] - lea src_ptr, [src_ptr+src_stride] - - movdqa XMMWORD PTR [ref_ptr], xmm0 - movdqa XMMWORD PTR [ref_ptr + 16], xmm1 - lea ref_ptr, [ref_ptr+ref_stride] - - sub height, 1 - jne .block_copy_sse3_loop - -.copy_is_done: - STACK_FRAME_DESTROY_X3 - -;void vp9_sad16x16x4d_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr_base, -; int ref_stride, -; int *results) -global sym(vp9_sad16x16x4d_sse3) -sym(vp9_sad16x16x4d_sse3): - - STACK_FRAME_CREATE_X4 - - PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - -%if ABI_IS_32BIT - pop rbp -%endif - mov rcx, result_ptr - - movq xmm0, xmm4 - psrldq xmm4, 8 - - paddw xmm0, xmm4 - movd [rcx], xmm0 -;- - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rcx+4], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rcx+8], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rcx+12], xmm0 - - STACK_FRAME_DESTROY_X4 - -;void vp9_sad16x8x4d_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr_base, -; int ref_stride, -; int *results) -global sym(vp9_sad16x8x4d_sse3) -sym(vp9_sad16x8x4d_sse3): - - STACK_FRAME_CREATE_X4 - - PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - -%if ABI_IS_32BIT - pop rbp -%endif - mov rcx, result_ptr - - movq xmm0, xmm4 - psrldq xmm4, 8 - - paddw xmm0, xmm4 - movd [rcx], xmm0 -;- - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rcx+4], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rcx+8], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rcx+12], xmm0 - - STACK_FRAME_DESTROY_X4 - -;void int vp9_sad8x16x4d_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp9_sad8x16x4d_sse3) -sym(vp9_sad8x16x4d_sse3): - - STACK_FRAME_CREATE_X4 - - PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - -%if ABI_IS_32BIT - pop rbp -%endif - mov rcx, result_ptr - - punpckldq mm4, mm5 - punpckldq mm6, mm7 - - movq [rcx], mm4 - movq [rcx+8], mm6 - - STACK_FRAME_DESTROY_X4 - -;void int vp9_sad8x8x4d_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp9_sad8x8x4d_sse3) -sym(vp9_sad8x8x4d_sse3): - - STACK_FRAME_CREATE_X4 - - PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride - -%if ABI_IS_32BIT - pop rbp -%endif - mov rcx, result_ptr - - punpckldq mm4, mm5 - punpckldq mm6, mm7 - - movq [rcx], mm4 - movq [rcx+8], mm6 - - STACK_FRAME_DESTROY_X4 - -;void int vp9_sad4x4x4d_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp9_sad4x4x4d_sse3) -sym(vp9_sad4x4x4d_sse3): - - STACK_FRAME_CREATE_X4 - - movd mm0, DWORD PTR [src_ptr] - movd mm1, DWORD PTR [r0_ptr] - - movd mm2, DWORD PTR [src_ptr+src_stride] - movd mm3, DWORD PTR [r0_ptr+ref_stride] - - punpcklbw mm0, mm2 - punpcklbw mm1, mm3 - - movd mm4, DWORD PTR [r1_ptr] - movd mm5, DWORD PTR [r2_ptr] - - movd mm6, DWORD PTR [r3_ptr] - movd mm2, DWORD PTR [r1_ptr+ref_stride] - - movd mm3, DWORD PTR [r2_ptr+ref_stride] - movd mm7, DWORD PTR [r3_ptr+ref_stride] - - psadbw mm1, mm0 - - punpcklbw mm4, mm2 - punpcklbw mm5, mm3 - - punpcklbw mm6, mm7 - psadbw mm4, mm0 - - psadbw mm5, mm0 - psadbw mm6, mm0 - - - - lea src_ptr, [src_ptr+src_stride*2] - lea r0_ptr, [r0_ptr+ref_stride*2] - - lea r1_ptr, [r1_ptr+ref_stride*2] - lea r2_ptr, [r2_ptr+ref_stride*2] - - lea r3_ptr, [r3_ptr+ref_stride*2] - - movd mm0, DWORD PTR [src_ptr] - movd mm2, DWORD PTR [r0_ptr] - - movd mm3, DWORD PTR [src_ptr+src_stride] - movd mm7, DWORD PTR [r0_ptr+ref_stride] - - punpcklbw mm0, mm3 - punpcklbw mm2, mm7 - - movd mm3, DWORD PTR [r1_ptr] - movd mm7, DWORD PTR [r2_ptr] - - psadbw mm2, mm0 -%if ABI_IS_32BIT - mov rax, rbp - - pop rbp -%define ref_stride rax -%endif - mov rsi, result_ptr - - paddw mm1, mm2 - movd [rsi], mm1 - - movd mm2, DWORD PTR [r1_ptr+ref_stride] - movd mm1, DWORD PTR [r2_ptr+ref_stride] - - punpcklbw mm3, mm2 - punpcklbw mm7, mm1 - - psadbw mm3, mm0 - psadbw mm7, mm0 - - movd mm2, DWORD PTR [r3_ptr] - movd mm1, DWORD PTR [r3_ptr+ref_stride] - - paddw mm3, mm4 - paddw mm7, mm5 - - movd [rsi+4], mm3 - punpcklbw mm2, mm1 - - movd [rsi+8], mm7 - psadbw mm2, mm0 - - paddw mm2, mm6 - movd [rsi+12], mm2 - - - STACK_FRAME_DESTROY_X4 - diff --git a/vp8/encoder/x86/sad_sse4.asm b/vp8/encoder/x86/sad_sse4.asm deleted file mode 100644 index 25980d624..000000000 --- a/vp8/encoder/x86/sad_sse4.asm +++ /dev/null @@ -1,353 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -%macro PROCESS_16X2X8 1 -%if %1 - movdqa xmm0, XMMWORD PTR [rsi] - movq xmm1, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - movq xmm2, MMWORD PTR [rdi+16] - punpcklqdq xmm1, xmm3 - punpcklqdq xmm3, xmm2 - - movdqa xmm2, xmm1 - mpsadbw xmm1, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - - psrldq xmm0, 8 - - movdqa xmm4, xmm3 - mpsadbw xmm3, xmm0, 0x0 - mpsadbw xmm4, xmm0, 0x5 - - paddw xmm1, xmm2 - paddw xmm1, xmm3 - paddw xmm1, xmm4 -%else - movdqa xmm0, XMMWORD PTR [rsi] - movq xmm5, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - movq xmm2, MMWORD PTR [rdi+16] - punpcklqdq xmm5, xmm3 - punpcklqdq xmm3, xmm2 - - movdqa xmm2, xmm5 - mpsadbw xmm5, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - - psrldq xmm0, 8 - - movdqa xmm4, xmm3 - mpsadbw xmm3, xmm0, 0x0 - mpsadbw xmm4, xmm0, 0x5 - - paddw xmm5, xmm2 - paddw xmm5, xmm3 - paddw xmm5, xmm4 - - paddw xmm1, xmm5 -%endif - movdqa xmm0, XMMWORD PTR [rsi + rax] - movq xmm5, MMWORD PTR [rdi+ rdx] - movq xmm3, MMWORD PTR [rdi+ rdx+8] - movq xmm2, MMWORD PTR [rdi+ rdx+16] - punpcklqdq xmm5, xmm3 - punpcklqdq xmm3, xmm2 - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - movdqa xmm2, xmm5 - mpsadbw xmm5, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - - psrldq xmm0, 8 - movdqa xmm4, xmm3 - mpsadbw xmm3, xmm0, 0x0 - mpsadbw xmm4, xmm0, 0x5 - - paddw xmm5, xmm2 - paddw xmm5, xmm3 - paddw xmm5, xmm4 - - paddw xmm1, xmm5 -%endmacro - -%macro PROCESS_8X2X8 1 -%if %1 - movq xmm0, MMWORD PTR [rsi] - movq xmm1, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - punpcklqdq xmm1, xmm3 - - movdqa xmm2, xmm1 - mpsadbw xmm1, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - paddw xmm1, xmm2 -%else - movq xmm0, MMWORD PTR [rsi] - movq xmm5, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - punpcklqdq xmm5, xmm3 - - movdqa xmm2, xmm5 - mpsadbw xmm5, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - paddw xmm5, xmm2 - - paddw xmm1, xmm5 -%endif - movq xmm0, MMWORD PTR [rsi + rax] - movq xmm5, MMWORD PTR [rdi+ rdx] - movq xmm3, MMWORD PTR [rdi+ rdx+8] - punpcklqdq xmm5, xmm3 - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - movdqa xmm2, xmm5 - mpsadbw xmm5, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - paddw xmm5, xmm2 - - paddw xmm1, xmm5 -%endmacro - -%macro PROCESS_4X2X8 1 -%if %1 - movd xmm0, [rsi] - movq xmm1, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - punpcklqdq xmm1, xmm3 - - mpsadbw xmm1, xmm0, 0x0 -%else - movd xmm0, [rsi] - movq xmm5, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - punpcklqdq xmm5, xmm3 - - mpsadbw xmm5, xmm0, 0x0 - - paddw xmm1, xmm5 -%endif - movd xmm0, [rsi + rax] - movq xmm5, MMWORD PTR [rdi+ rdx] - movq xmm3, MMWORD PTR [rdi+ rdx+8] - punpcklqdq xmm5, xmm3 - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - mpsadbw xmm5, xmm0, 0x0 - - paddw xmm1, xmm5 -%endmacro - - -;void vp9_sad16x16x8_sse4( -; const unsigned char *src_ptr, -; int src_stride, -; const unsigned char *ref_ptr, -; int ref_stride, -; unsigned short *sad_array); -global sym(vp9_sad16x16x8_sse4) -sym(vp9_sad16x16x8_sse4): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - PROCESS_16X2X8 1 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - - mov rdi, arg(4) ;Results - movdqa XMMWORD PTR [rdi], xmm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void vp9_sad16x8x8_sse4( -; const unsigned char *src_ptr, -; int src_stride, -; const unsigned char *ref_ptr, -; int ref_stride, -; unsigned short *sad_array -;); -global sym(vp9_sad16x8x8_sse4) -sym(vp9_sad16x8x8_sse4): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - PROCESS_16X2X8 1 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - - mov rdi, arg(4) ;Results - movdqa XMMWORD PTR [rdi], xmm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void vp9_sad8x8x8_sse4( -; const unsigned char *src_ptr, -; int src_stride, -; const unsigned char *ref_ptr, -; int ref_stride, -; unsigned short *sad_array -;); -global sym(vp9_sad8x8x8_sse4) -sym(vp9_sad8x8x8_sse4): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - PROCESS_8X2X8 1 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - - mov rdi, arg(4) ;Results - movdqa XMMWORD PTR [rdi], xmm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void vp9_sad8x16x8_sse4( -; const unsigned char *src_ptr, -; int src_stride, -; const unsigned char *ref_ptr, -; int ref_stride, -; unsigned short *sad_array -;); -global sym(vp9_sad8x16x8_sse4) -sym(vp9_sad8x16x8_sse4): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - PROCESS_8X2X8 1 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - mov rdi, arg(4) ;Results - movdqa XMMWORD PTR [rdi], xmm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void vp9_sad4x4x8_c( -; const unsigned char *src_ptr, -; int src_stride, -; const unsigned char *ref_ptr, -; int ref_stride, -; unsigned short *sad_array -;); -global sym(vp9_sad4x4x8_sse4) -sym(vp9_sad4x4x8_sse4): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - PROCESS_4X2X8 1 - PROCESS_4X2X8 0 - - mov rdi, arg(4) ;Results - movdqa XMMWORD PTR [rdi], xmm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - - - diff --git a/vp8/encoder/x86/sad_ssse3.asm b/vp8/encoder/x86/sad_ssse3.asm deleted file mode 100644 index 5623d8be4..000000000 --- a/vp8/encoder/x86/sad_ssse3.asm +++ /dev/null @@ -1,370 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -%macro PROCESS_16X2X3 1 -%if %1 - movdqa xmm0, XMMWORD PTR [rsi] - lddqu xmm5, XMMWORD PTR [rdi] - lddqu xmm6, XMMWORD PTR [rdi+1] - lddqu xmm7, XMMWORD PTR [rdi+2] - - psadbw xmm5, xmm0 - psadbw xmm6, xmm0 - psadbw xmm7, xmm0 -%else - movdqa xmm0, XMMWORD PTR [rsi] - lddqu xmm1, XMMWORD PTR [rdi] - lddqu xmm2, XMMWORD PTR [rdi+1] - lddqu xmm3, XMMWORD PTR [rdi+2] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endif - movdqa xmm0, XMMWORD PTR [rsi+rax] - lddqu xmm1, XMMWORD PTR [rdi+rdx] - lddqu xmm2, XMMWORD PTR [rdi+rdx+1] - lddqu xmm3, XMMWORD PTR [rdi+rdx+2] - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endmacro - -%macro PROCESS_16X2X3_OFFSET 2 -%if %1 - movdqa xmm0, XMMWORD PTR [rsi] - movdqa xmm4, XMMWORD PTR [rdi] - movdqa xmm7, XMMWORD PTR [rdi+16] - - movdqa xmm5, xmm7 - palignr xmm5, xmm4, %2 - - movdqa xmm6, xmm7 - palignr xmm6, xmm4, (%2+1) - - palignr xmm7, xmm4, (%2+2) - - psadbw xmm5, xmm0 - psadbw xmm6, xmm0 - psadbw xmm7, xmm0 -%else - movdqa xmm0, XMMWORD PTR [rsi] - movdqa xmm4, XMMWORD PTR [rdi] - movdqa xmm3, XMMWORD PTR [rdi+16] - - movdqa xmm1, xmm3 - palignr xmm1, xmm4, %2 - - movdqa xmm2, xmm3 - palignr xmm2, xmm4, (%2+1) - - palignr xmm3, xmm4, (%2+2) - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endif - movdqa xmm0, XMMWORD PTR [rsi+rax] - movdqa xmm4, XMMWORD PTR [rdi+rdx] - movdqa xmm3, XMMWORD PTR [rdi+rdx+16] - - movdqa xmm1, xmm3 - palignr xmm1, xmm4, %2 - - movdqa xmm2, xmm3 - palignr xmm2, xmm4, (%2+1) - - palignr xmm3, xmm4, (%2+2) - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endmacro - -%macro PROCESS_16X16X3_OFFSET 2 -%2_aligned_by_%1: - - sub rdi, %1 - - PROCESS_16X2X3_OFFSET 1, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - - jmp %2_store_off - -%endmacro - -%macro PROCESS_16X8X3_OFFSET 2 -%2_aligned_by_%1: - - sub rdi, %1 - - PROCESS_16X2X3_OFFSET 1, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - - jmp %2_store_off - -%endmacro - -;void int vp9_sad16x16x3_ssse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp9_sad16x16x3_ssse3) -sym(vp9_sad16x16x3_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - push rsi - push rdi - push rcx - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - mov rdx, 0xf - and rdx, rdi - - jmp .vp9_sad16x16x3_ssse3_skiptable -.vp9_sad16x16x3_ssse3_jumptable: - dd .vp9_sad16x16x3_ssse3_aligned_by_0 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_1 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_2 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_3 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_4 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_5 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_6 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_7 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_8 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_9 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_10 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_11 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_12 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_13 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_14 - .vp9_sad16x16x3_ssse3_do_jump - dd .vp9_sad16x16x3_ssse3_aligned_by_15 - .vp9_sad16x16x3_ssse3_do_jump -.vp9_sad16x16x3_ssse3_skiptable: - - call .vp9_sad16x16x3_ssse3_do_jump -.vp9_sad16x16x3_ssse3_do_jump: - pop rcx ; get the address of do_jump - mov rax, .vp9_sad16x16x3_ssse3_jumptable - .vp9_sad16x16x3_ssse3_do_jump - add rax, rcx ; get the absolute address of vp9_sad16x16x3_ssse3_jumptable - - movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable - add rcx, rax - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - jmp rcx - - PROCESS_16X16X3_OFFSET 0, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 1, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 2, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 3, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 4, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 5, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 6, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 7, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 8, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 9, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 10, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 11, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 12, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 13, .vp9_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 14, .vp9_sad16x16x3_ssse3 - -.vp9_sad16x16x3_ssse3_aligned_by_15: - PROCESS_16X2X3 1 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - -.vp9_sad16x16x3_ssse3_store_off: - mov rdi, arg(4) ;Results - - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rdi], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rdi+4], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rdi+8], xmm0 - - ; begin epilog - pop rcx - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void int vp9_sad16x8x3_ssse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(vp9_sad16x8x3_ssse3) -sym(vp9_sad16x8x3_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - push rsi - push rdi - push rcx - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - mov rdx, 0xf - and rdx, rdi - - jmp .vp9_sad16x8x3_ssse3_skiptable -.vp9_sad16x8x3_ssse3_jumptable: - dd .vp9_sad16x8x3_ssse3_aligned_by_0 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_1 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_2 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_3 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_4 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_5 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_6 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_7 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_8 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_9 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_10 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_11 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_12 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_13 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_14 - .vp9_sad16x8x3_ssse3_do_jump - dd .vp9_sad16x8x3_ssse3_aligned_by_15 - .vp9_sad16x8x3_ssse3_do_jump -.vp9_sad16x8x3_ssse3_skiptable: - - call .vp9_sad16x8x3_ssse3_do_jump -.vp9_sad16x8x3_ssse3_do_jump: - pop rcx ; get the address of do_jump - mov rax, .vp9_sad16x8x3_ssse3_jumptable - .vp9_sad16x8x3_ssse3_do_jump - add rax, rcx ; get the absolute address of vp9_sad16x8x3_ssse3_jumptable - - movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable - add rcx, rax - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - jmp rcx - - PROCESS_16X8X3_OFFSET 0, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 1, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 2, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 3, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 4, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 5, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 6, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 7, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 8, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 9, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 10, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 11, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 12, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 13, .vp9_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 14, .vp9_sad16x8x3_ssse3 - -.vp9_sad16x8x3_ssse3_aligned_by_15: - - PROCESS_16X2X3 1 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - -.vp9_sad16x8x3_ssse3_store_off: - mov rdi, arg(4) ;Results - - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rdi], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rdi+4], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rdi+8], xmm0 - - ; begin epilog - pop rcx - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp8/encoder/x86/ssim_opt.asm b/vp8/encoder/x86/ssim_opt.asm deleted file mode 100644 index 905c263a6..000000000 --- a/vp8/encoder/x86/ssim_opt.asm +++ /dev/null @@ -1,216 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr -%macro TABULATE_SSIM 0 - paddusw xmm15, xmm3 ; sum_s - paddusw xmm14, xmm4 ; sum_r - movdqa xmm1, xmm3 - pmaddwd xmm1, xmm1 - paddd xmm13, xmm1 ; sum_sq_s - movdqa xmm2, xmm4 - pmaddwd xmm2, xmm2 - paddd xmm12, xmm2 ; sum_sq_r - pmaddwd xmm3, xmm4 - paddd xmm11, xmm3 ; sum_sxr -%endmacro - -; Sum across the register %1 starting with q words -%macro SUM_ACROSS_Q 1 - movdqa xmm2,%1 - punpckldq %1,xmm0 - punpckhdq xmm2,xmm0 - paddq %1,xmm2 - movdqa xmm2,%1 - punpcklqdq %1,xmm0 - punpckhqdq xmm2,xmm0 - paddq %1,xmm2 -%endmacro - -; Sum across the register %1 starting with q words -%macro SUM_ACROSS_W 1 - movdqa xmm1, %1 - punpcklwd %1,xmm0 - punpckhwd xmm1,xmm0 - paddd %1, xmm1 - SUM_ACROSS_Q %1 -%endmacro -;void ssim_parms_sse2( -; unsigned char *s, -; int sp, -; unsigned char *r, -; int rp -; unsigned long *sum_s, -; unsigned long *sum_r, -; unsigned long *sum_sq_s, -; unsigned long *sum_sq_r, -; unsigned long *sum_sxr); -; -; TODO: Use parm passing through structure, probably don't need the pxors -; ( calling app will initialize to 0 ) could easily fit everything in sse2 -; without too much hastle, and can probably do better estimates with psadw -; or pavgb At this point this is just meant to be first pass for calculating -; all the parms needed for 16x16 ssim so we can play with dssim as distortion -; in mode selection code. -global sym(vp9_ssim_parms_16x16_sse2) -sym(vp9_ssim_parms_16x16_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - SAVE_XMM 15 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;s - mov rcx, arg(1) ;sp - mov rdi, arg(2) ;r - mov rax, arg(3) ;rp - - pxor xmm0, xmm0 - pxor xmm15,xmm15 ;sum_s - pxor xmm14,xmm14 ;sum_r - pxor xmm13,xmm13 ;sum_sq_s - pxor xmm12,xmm12 ;sum_sq_r - pxor xmm11,xmm11 ;sum_sxr - - mov rdx, 16 ;row counter -.NextRow: - - ;grab source and reference pixels - movdqu xmm5, [rsi] - movdqu xmm6, [rdi] - movdqa xmm3, xmm5 - movdqa xmm4, xmm6 - punpckhbw xmm3, xmm0 ; high_s - punpckhbw xmm4, xmm0 ; high_r - - TABULATE_SSIM - - movdqa xmm3, xmm5 - movdqa xmm4, xmm6 - punpcklbw xmm3, xmm0 ; low_s - punpcklbw xmm4, xmm0 ; low_r - - TABULATE_SSIM - - add rsi, rcx ; next s row - add rdi, rax ; next r row - - dec rdx ; counter - jnz .NextRow - - SUM_ACROSS_W xmm15 - SUM_ACROSS_W xmm14 - SUM_ACROSS_Q xmm13 - SUM_ACROSS_Q xmm12 - SUM_ACROSS_Q xmm11 - - mov rdi,arg(4) - movd [rdi], xmm15; - mov rdi,arg(5) - movd [rdi], xmm14; - mov rdi,arg(6) - movd [rdi], xmm13; - mov rdi,arg(7) - movd [rdi], xmm12; - mov rdi,arg(8) - movd [rdi], xmm11; - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void ssim_parms_sse2( -; unsigned char *s, -; int sp, -; unsigned char *r, -; int rp -; unsigned long *sum_s, -; unsigned long *sum_r, -; unsigned long *sum_sq_s, -; unsigned long *sum_sq_r, -; unsigned long *sum_sxr); -; -; TODO: Use parm passing through structure, probably don't need the pxors -; ( calling app will initialize to 0 ) could easily fit everything in sse2 -; without too much hastle, and can probably do better estimates with psadw -; or pavgb At this point this is just meant to be first pass for calculating -; all the parms needed for 16x16 ssim so we can play with dssim as distortion -; in mode selection code. -global sym(vp9_ssim_parms_8x8_sse2) -sym(vp9_ssim_parms_8x8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - SAVE_XMM 15 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;s - mov rcx, arg(1) ;sp - mov rdi, arg(2) ;r - mov rax, arg(3) ;rp - - pxor xmm0, xmm0 - pxor xmm15,xmm15 ;sum_s - pxor xmm14,xmm14 ;sum_r - pxor xmm13,xmm13 ;sum_sq_s - pxor xmm12,xmm12 ;sum_sq_r - pxor xmm11,xmm11 ;sum_sxr - - mov rdx, 8 ;row counter -.NextRow: - - ;grab source and reference pixels - movq xmm3, [rsi] - movq xmm4, [rdi] - punpcklbw xmm3, xmm0 ; low_s - punpcklbw xmm4, xmm0 ; low_r - - TABULATE_SSIM - - add rsi, rcx ; next s row - add rdi, rax ; next r row - - dec rdx ; counter - jnz .NextRow - - SUM_ACROSS_W xmm15 - SUM_ACROSS_W xmm14 - SUM_ACROSS_Q xmm13 - SUM_ACROSS_Q xmm12 - SUM_ACROSS_Q xmm11 - - mov rdi,arg(4) - movd [rdi], xmm15; - mov rdi,arg(5) - movd [rdi], xmm14; - mov rdi,arg(6) - movd [rdi], xmm13; - mov rdi,arg(7) - movd [rdi], xmm12; - mov rdi,arg(8) - movd [rdi], xmm11; - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm deleted file mode 100644 index 5b0e249ca..000000000 --- a/vp8/encoder/x86/subtract_mmx.asm +++ /dev/null @@ -1,432 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void vp9_subtract_b_mmx_impl(unsigned char *z, int src_stride, -; short *diff, unsigned char *Predictor, -; int pitch); -global sym(vp9_subtract_b_mmx_impl) -sym(vp9_subtract_b_mmx_impl): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - - mov rdi, arg(2) ;diff - mov rax, arg(3) ;Predictor - mov rsi, arg(0) ;z - movsxd rdx, dword ptr arg(1);src_stride; - movsxd rcx, dword ptr arg(4);pitch - pxor mm7, mm7 - - movd mm0, [rsi] - movd mm1, [rax] - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - psubw mm0, mm1 - movq [rdi], mm0 - - - movd mm0, [rsi+rdx] - movd mm1, [rax+rcx] - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - psubw mm0, mm1 - movq [rdi+rcx*2],mm0 - - - movd mm0, [rsi+rdx*2] - movd mm1, [rax+rcx*2] - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - psubw mm0, mm1 - movq [rdi+rcx*4], mm0 - - lea rsi, [rsi+rdx*2] - lea rcx, [rcx+rcx*2] - - - - movd mm0, [rsi+rdx] - movd mm1, [rax+rcx] - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - psubw mm0, mm1 - movq [rdi+rcx*2], mm0 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride) -global sym(vp9_subtract_mby_mmx) -sym(vp9_subtract_mby_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(1) ;src - mov rdi, arg(0) ;diff - - mov rax, arg(2) ;pred - movsxd rdx, dword ptr arg(3) ;stride - - mov rcx, 16 - pxor mm0, mm0 - -.submby_loop: - - movq mm1, [rsi] - movq mm3, [rax] - - movq mm2, mm1 - movq mm4, mm3 - - punpcklbw mm1, mm0 - punpcklbw mm3, mm0 - - punpckhbw mm2, mm0 - punpckhbw mm4, mm0 - - psubw mm1, mm3 - psubw mm2, mm4 - - movq [rdi], mm1 - movq [rdi+8], mm2 - - - movq mm1, [rsi+8] - movq mm3, [rax+8] - - movq mm2, mm1 - movq mm4, mm3 - - punpcklbw mm1, mm0 - punpcklbw mm3, mm0 - - punpckhbw mm2, mm0 - punpckhbw mm4, mm0 - - psubw mm1, mm3 - psubw mm2, mm4 - - movq [rdi+16], mm1 - movq [rdi+24], mm2 - - - add rdi, 32 - add rax, 16 - - lea rsi, [rsi+rdx] - - sub rcx, 1 - jnz .submby_loop - - pop rdi - pop rsi - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;void vp9_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) -global sym(vp9_subtract_mbuv_mmx) -sym(vp9_subtract_mbuv_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - ;short *udiff = diff + 256; - ;short *vdiff = diff + 320; - ;unsigned char *upred = pred + 256; - ;unsigned char *vpred = pred + 320; - - ;unsigned char *z = usrc; - ;unsigned short *diff = udiff; - ;unsigned char *Predictor= upred; - - mov rdi, arg(0) ;diff - mov rax, arg(3) ;pred - mov rsi, arg(1) ;z = usrc - add rdi, 256*2 ;diff = diff + 256 (shorts) - add rax, 256 ;Predictor = pred + 256 - movsxd rdx, dword ptr arg(4) ;stride; - pxor mm7, mm7 - - movq mm0, [rsi] - movq mm1, [rax] - movq mm3, mm0 - movq mm4, mm1 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - punpckhbw mm3, mm7 - punpckhbw mm4, mm7 - psubw mm0, mm1 - psubw mm3, mm4 - movq [rdi], mm0 - movq [rdi+8], mm3 - - - movq mm0, [rsi+rdx] - movq mm1, [rax+8] - movq mm3, mm0 - movq mm4, mm1 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - punpckhbw mm3, mm7 - punpckhbw mm4, mm7 - psubw mm0, mm1 - psubw mm3, mm4 - movq [rdi+16], mm0 - movq [rdi+24], mm3 - - movq mm0, [rsi+rdx*2] - movq mm1, [rax+16] - movq mm3, mm0 - movq mm4, mm1 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - punpckhbw mm3, mm7 - punpckhbw mm4, mm7 - psubw mm0, mm1 - psubw mm3, mm4 - movq [rdi+32], mm0 - movq [rdi+40], mm3 - lea rsi, [rsi+rdx*2] - - - movq mm0, [rsi+rdx] - movq mm1, [rax+24] - movq mm3, mm0 - movq mm4, mm1 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - punpckhbw mm3, mm7 - punpckhbw mm4, mm7 - psubw mm0, mm1 - psubw mm3, mm4 - - movq [rdi+48], mm0 - movq [rdi+56], mm3 - - - add rdi, 64 - add rax, 32 - lea rsi, [rsi+rdx*2] - - - movq mm0, [rsi] - movq mm1, [rax] - movq mm3, mm0 - movq mm4, mm1 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - punpckhbw mm3, mm7 - punpckhbw mm4, mm7 - psubw mm0, mm1 - psubw mm3, mm4 - movq [rdi], mm0 - movq [rdi+8], mm3 - - - movq mm0, [rsi+rdx] - movq mm1, [rax+8] - movq mm3, mm0 - movq mm4, mm1 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - punpckhbw mm3, mm7 - punpckhbw mm4, mm7 - psubw mm0, mm1 - psubw mm3, mm4 - movq [rdi+16], mm0 - movq [rdi+24], mm3 - - movq mm0, [rsi+rdx*2] - movq mm1, [rax+16] - movq mm3, mm0 - movq mm4, mm1 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - punpckhbw mm3, mm7 - punpckhbw mm4, mm7 - psubw mm0, mm1 - psubw mm3, mm4 - movq [rdi+32], mm0 - movq [rdi+40], mm3 - lea rsi, [rsi+rdx*2] - - - movq mm0, [rsi+rdx] - movq mm1, [rax+24] - movq mm3, mm0 - movq mm4, mm1 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - punpckhbw mm3, mm7 - punpckhbw mm4, mm7 - psubw mm0, mm1 - psubw mm3, mm4 - - movq [rdi+48], mm0 - movq [rdi+56], mm3 - - ;unsigned char *z = vsrc; - ;unsigned short *diff = vdiff; - ;unsigned char *Predictor= vpred; - - mov rdi, arg(0) ;diff - mov rax, arg(3) ;pred - mov rsi, arg(2) ;z = usrc - add rdi, 320*2 ;diff = diff + 320 (shorts) - add rax, 320 ;Predictor = pred + 320 - movsxd rdx, dword ptr arg(4) ;stride; - pxor mm7, mm7 - - movq mm0, [rsi] - movq mm1, [rax] - movq mm3, mm0 - movq mm4, mm1 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - punpckhbw mm3, mm7 - punpckhbw mm4, mm7 - psubw mm0, mm1 - psubw mm3, mm4 - movq [rdi], mm0 - movq [rdi+8], mm3 - - - movq mm0, [rsi+rdx] - movq mm1, [rax+8] - movq mm3, mm0 - movq mm4, mm1 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - punpckhbw mm3, mm7 - punpckhbw mm4, mm7 - psubw mm0, mm1 - psubw mm3, mm4 - movq [rdi+16], mm0 - movq [rdi+24], mm3 - - movq mm0, [rsi+rdx*2] - movq mm1, [rax+16] - movq mm3, mm0 - movq mm4, mm1 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - punpckhbw mm3, mm7 - punpckhbw mm4, mm7 - psubw mm0, mm1 - psubw mm3, mm4 - movq [rdi+32], mm0 - movq [rdi+40], mm3 - lea rsi, [rsi+rdx*2] - - - movq mm0, [rsi+rdx] - movq mm1, [rax+24] - movq mm3, mm0 - movq mm4, mm1 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - punpckhbw mm3, mm7 - punpckhbw mm4, mm7 - psubw mm0, mm1 - psubw mm3, mm4 - - movq [rdi+48], mm0 - movq [rdi+56], mm3 - - - add rdi, 64 - add rax, 32 - lea rsi, [rsi+rdx*2] - - - movq mm0, [rsi] - movq mm1, [rax] - movq mm3, mm0 - movq mm4, mm1 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - punpckhbw mm3, mm7 - punpckhbw mm4, mm7 - psubw mm0, mm1 - psubw mm3, mm4 - movq [rdi], mm0 - movq [rdi+8], mm3 - - - movq mm0, [rsi+rdx] - movq mm1, [rax+8] - movq mm3, mm0 - movq mm4, mm1 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - punpckhbw mm3, mm7 - punpckhbw mm4, mm7 - psubw mm0, mm1 - psubw mm3, mm4 - movq [rdi+16], mm0 - movq [rdi+24], mm3 - - movq mm0, [rsi+rdx*2] - movq mm1, [rax+16] - movq mm3, mm0 - movq mm4, mm1 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - punpckhbw mm3, mm7 - punpckhbw mm4, mm7 - psubw mm0, mm1 - psubw mm3, mm4 - movq [rdi+32], mm0 - movq [rdi+40], mm3 - lea rsi, [rsi+rdx*2] - - - movq mm0, [rsi+rdx] - movq mm1, [rax+24] - movq mm3, mm0 - movq mm4, mm1 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - punpckhbw mm3, mm7 - punpckhbw mm4, mm7 - psubw mm0, mm1 - psubw mm3, mm4 - - movq [rdi+48], mm0 - movq [rdi+56], mm3 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp8/encoder/x86/subtract_sse2.asm b/vp8/encoder/x86/subtract_sse2.asm deleted file mode 100644 index f84ed0697..000000000 --- a/vp8/encoder/x86/subtract_sse2.asm +++ /dev/null @@ -1,356 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void vp9_subtract_b_sse2_impl(unsigned char *z, int src_stride, -; short *diff, unsigned char *Predictor, -; int pitch); -global sym(vp9_subtract_b_sse2_impl) -sym(vp9_subtract_b_sse2_impl): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rdi, arg(2) ;diff - mov rax, arg(3) ;Predictor - mov rsi, arg(0) ;z - movsxd rdx, dword ptr arg(1);src_stride; - movsxd rcx, dword ptr arg(4);pitch - pxor mm7, mm7 - - movd mm0, [rsi] - movd mm1, [rax] - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - psubw mm0, mm1 - movq MMWORD PTR [rdi], mm0 - - movd mm0, [rsi+rdx] - movd mm1, [rax+rcx] - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - psubw mm0, mm1 - movq MMWORD PTR [rdi+rcx*2], mm0 - - movd mm0, [rsi+rdx*2] - movd mm1, [rax+rcx*2] - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - psubw mm0, mm1 - movq MMWORD PTR [rdi+rcx*4], mm0 - - lea rsi, [rsi+rdx*2] - lea rcx, [rcx+rcx*2] - - movd mm0, [rsi+rdx] - movd mm1, [rax+rcx] - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - psubw mm0, mm1 - movq MMWORD PTR [rdi+rcx*2], mm0 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp9_subtract_mby_sse2(short *diff, unsigned char *src, unsigned char *pred, int stride) -global sym(vp9_subtract_mby_sse2) -sym(vp9_subtract_mby_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(1) ;src - mov rdi, arg(0) ;diff - - mov rax, arg(2) ;pred - movsxd rdx, dword ptr arg(3) ;stride - - mov rcx, 8 ; do two lines at one time - -.submby_loop: - movdqa xmm0, XMMWORD PTR [rsi] ; src - movdqa xmm1, XMMWORD PTR [rax] ; pred - - movdqa xmm2, xmm0 - psubb xmm0, xmm1 - - pxor xmm1, [GLOBAL(t80)] ;convert to signed values - pxor xmm2, [GLOBAL(t80)] - pcmpgtb xmm1, xmm2 ; obtain sign information - - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - punpcklbw xmm0, xmm1 ; put sign back to subtraction - punpckhbw xmm2, xmm3 ; put sign back to subtraction - - movdqa XMMWORD PTR [rdi], xmm0 - movdqa XMMWORD PTR [rdi +16], xmm2 - - movdqa xmm4, XMMWORD PTR [rsi + rdx] - movdqa xmm5, XMMWORD PTR [rax + 16] - - movdqa xmm6, xmm4 - psubb xmm4, xmm5 - - pxor xmm5, [GLOBAL(t80)] ;convert to signed values - pxor xmm6, [GLOBAL(t80)] - pcmpgtb xmm5, xmm6 ; obtain sign information - - movdqa xmm6, xmm4 - movdqa xmm7, xmm5 - punpcklbw xmm4, xmm5 ; put sign back to subtraction - punpckhbw xmm6, xmm7 ; put sign back to subtraction - - movdqa XMMWORD PTR [rdi +32], xmm4 - movdqa XMMWORD PTR [rdi +48], xmm6 - - add rdi, 64 - add rax, 32 - lea rsi, [rsi+rdx*2] - - sub rcx, 1 - jnz .submby_loop - - pop rdi - pop rsi - ; begin epilog - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp9_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) -global sym(vp9_subtract_mbuv_sse2) -sym(vp9_subtract_mbuv_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rdi, arg(0) ;diff - mov rax, arg(3) ;pred - mov rsi, arg(1) ;z = usrc - add rdi, 256*2 ;diff = diff + 256 (shorts) - add rax, 256 ;Predictor = pred + 256 - movsxd rdx, dword ptr arg(4) ;stride; - lea rcx, [rdx + rdx*2] - - ;u - ;line 0 1 - movq xmm0, MMWORD PTR [rsi] ; src - movq xmm2, MMWORD PTR [rsi+rdx] - movdqa xmm1, XMMWORD PTR [rax] ; pred - punpcklqdq xmm0, xmm2 - - movdqa xmm2, xmm0 - psubb xmm0, xmm1 ; subtraction with sign missed - - pxor xmm1, [GLOBAL(t80)] ;convert to signed values - pxor xmm2, [GLOBAL(t80)] - pcmpgtb xmm1, xmm2 ; obtain sign information - - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - punpcklbw xmm0, xmm1 ; put sign back to subtraction - punpckhbw xmm2, xmm3 ; put sign back to subtraction - - movdqa XMMWORD PTR [rdi], xmm0 - movdqa XMMWORD PTR [rdi +16], xmm2 - - ;line 2 3 - movq xmm0, MMWORD PTR [rsi+rdx*2] ; src - movq xmm2, MMWORD PTR [rsi+rcx] - movdqa xmm1, XMMWORD PTR [rax+16] ; pred - punpcklqdq xmm0, xmm2 - - movdqa xmm2, xmm0 - psubb xmm0, xmm1 ; subtraction with sign missed - - pxor xmm1, [GLOBAL(t80)] ;convert to signed values - pxor xmm2, [GLOBAL(t80)] - pcmpgtb xmm1, xmm2 ; obtain sign information - - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - punpcklbw xmm0, xmm1 ; put sign back to subtraction - punpckhbw xmm2, xmm3 ; put sign back to subtraction - - movdqa XMMWORD PTR [rdi + 32], xmm0 - movdqa XMMWORD PTR [rdi + 48], xmm2 - - ;line 4 5 - lea rsi, [rsi + rdx*4] - - movq xmm0, MMWORD PTR [rsi] ; src - movq xmm2, MMWORD PTR [rsi+rdx] - movdqa xmm1, XMMWORD PTR [rax + 32] ; pred - punpcklqdq xmm0, xmm2 - - movdqa xmm2, xmm0 - psubb xmm0, xmm1 ; subtraction with sign missed - - pxor xmm1, [GLOBAL(t80)] ;convert to signed values - pxor xmm2, [GLOBAL(t80)] - pcmpgtb xmm1, xmm2 ; obtain sign information - - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - punpcklbw xmm0, xmm1 ; put sign back to subtraction - punpckhbw xmm2, xmm3 ; put sign back to subtraction - - movdqa XMMWORD PTR [rdi + 64], xmm0 - movdqa XMMWORD PTR [rdi + 80], xmm2 - - ;line 6 7 - movq xmm0, MMWORD PTR [rsi+rdx*2] ; src - movq xmm2, MMWORD PTR [rsi+rcx] - movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred - punpcklqdq xmm0, xmm2 - - movdqa xmm2, xmm0 - psubb xmm0, xmm1 ; subtraction with sign missed - - pxor xmm1, [GLOBAL(t80)] ;convert to signed values - pxor xmm2, [GLOBAL(t80)] - pcmpgtb xmm1, xmm2 ; obtain sign information - - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - punpcklbw xmm0, xmm1 ; put sign back to subtraction - punpckhbw xmm2, xmm3 ; put sign back to subtraction - - movdqa XMMWORD PTR [rdi + 96], xmm0 - movdqa XMMWORD PTR [rdi + 112], xmm2 - - ;v - mov rsi, arg(2) ;z = vsrc - add rdi, 64*2 ;diff = diff + 320 (shorts) - add rax, 64 ;Predictor = pred + 320 - - ;line 0 1 - movq xmm0, MMWORD PTR [rsi] ; src - movq xmm2, MMWORD PTR [rsi+rdx] - movdqa xmm1, XMMWORD PTR [rax] ; pred - punpcklqdq xmm0, xmm2 - - movdqa xmm2, xmm0 - psubb xmm0, xmm1 ; subtraction with sign missed - - pxor xmm1, [GLOBAL(t80)] ;convert to signed values - pxor xmm2, [GLOBAL(t80)] - pcmpgtb xmm1, xmm2 ; obtain sign information - - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - punpcklbw xmm0, xmm1 ; put sign back to subtraction - punpckhbw xmm2, xmm3 ; put sign back to subtraction - - movdqa XMMWORD PTR [rdi], xmm0 - movdqa XMMWORD PTR [rdi +16], xmm2 - - ;line 2 3 - movq xmm0, MMWORD PTR [rsi+rdx*2] ; src - movq xmm2, MMWORD PTR [rsi+rcx] - movdqa xmm1, XMMWORD PTR [rax+16] ; pred - punpcklqdq xmm0, xmm2 - - movdqa xmm2, xmm0 - psubb xmm0, xmm1 ; subtraction with sign missed - - pxor xmm1, [GLOBAL(t80)] ;convert to signed values - pxor xmm2, [GLOBAL(t80)] - pcmpgtb xmm1, xmm2 ; obtain sign information - - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - punpcklbw xmm0, xmm1 ; put sign back to subtraction - punpckhbw xmm2, xmm3 ; put sign back to subtraction - - movdqa XMMWORD PTR [rdi + 32], xmm0 - movdqa XMMWORD PTR [rdi + 48], xmm2 - - ;line 4 5 - lea rsi, [rsi + rdx*4] - - movq xmm0, MMWORD PTR [rsi] ; src - movq xmm2, MMWORD PTR [rsi+rdx] - movdqa xmm1, XMMWORD PTR [rax + 32] ; pred - punpcklqdq xmm0, xmm2 - - movdqa xmm2, xmm0 - psubb xmm0, xmm1 ; subtraction with sign missed - - pxor xmm1, [GLOBAL(t80)] ;convert to signed values - pxor xmm2, [GLOBAL(t80)] - pcmpgtb xmm1, xmm2 ; obtain sign information - - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - punpcklbw xmm0, xmm1 ; put sign back to subtraction - punpckhbw xmm2, xmm3 ; put sign back to subtraction - - movdqa XMMWORD PTR [rdi + 64], xmm0 - movdqa XMMWORD PTR [rdi + 80], xmm2 - - ;line 6 7 - movq xmm0, MMWORD PTR [rsi+rdx*2] ; src - movq xmm2, MMWORD PTR [rsi+rcx] - movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred - punpcklqdq xmm0, xmm2 - - movdqa xmm2, xmm0 - psubb xmm0, xmm1 ; subtraction with sign missed - - pxor xmm1, [GLOBAL(t80)] ;convert to signed values - pxor xmm2, [GLOBAL(t80)] - pcmpgtb xmm1, xmm2 ; obtain sign information - - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - punpcklbw xmm0, xmm1 ; put sign back to subtraction - punpckhbw xmm2, xmm3 ; put sign back to subtraction - - movdqa XMMWORD PTR [rdi + 96], xmm0 - movdqa XMMWORD PTR [rdi + 112], xmm2 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -t80: - times 16 db 0x80 diff --git a/vp8/encoder/x86/temporal_filter_apply_sse2.asm b/vp8/encoder/x86/temporal_filter_apply_sse2.asm deleted file mode 100644 index 60cc80f15..000000000 --- a/vp8/encoder/x86/temporal_filter_apply_sse2.asm +++ /dev/null @@ -1,207 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -; void vp9_temporal_filter_apply_sse2 | arg -; (unsigned char *frame1, | 0 -; unsigned int stride, | 1 -; unsigned char *frame2, | 2 -; unsigned int block_size, | 3 -; int strength, | 4 -; int filter_weight, | 5 -; unsigned int *accumulator, | 6 -; unsigned short *count) | 7 -global sym(vp9_temporal_filter_apply_sse2) -sym(vp9_temporal_filter_apply_sse2): - - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ALIGN_STACK 16, rax - %define block_size 0 - %define strength 16 - %define filter_weight 32 - %define rounding_bit 48 - %define rbp_backup 64 - %define stack_size 80 - sub rsp, stack_size - mov [rsp + rbp_backup], rbp - ; end prolog - - mov rdx, arg(3) - mov [rsp + block_size], rdx - movd xmm6, arg(4) - movdqa [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read - - ; calculate the rounding bit outside the loop - ; 0x8000 >> (16 - strength) - mov rdx, 16 - sub rdx, arg(4) ; 16 - strength - movd xmm4, rdx ; can't use rdx w/ shift - movdqa xmm5, [GLOBAL(_const_top_bit)] - psrlw xmm5, xmm4 - movdqa [rsp + rounding_bit], xmm5 - - mov rsi, arg(0) ; src/frame1 - mov rdx, arg(2) ; predictor frame - mov rdi, arg(6) ; accumulator - mov rax, arg(7) ; count - - ; dup the filter weight and store for later - movd xmm0, arg(5) ; filter_weight - pshuflw xmm0, xmm0, 0 - punpcklwd xmm0, xmm0 - movdqa [rsp + filter_weight], xmm0 - - mov rbp, arg(1) ; stride - pxor xmm7, xmm7 ; zero for extraction - - lea rcx, [rdx + 16*16*1] - cmp dword ptr [rsp + block_size], 8 - jne .temporal_filter_apply_load_16 - lea rcx, [rdx + 8*8*1] - -.temporal_filter_apply_load_8: - movq xmm0, [rsi] ; first row - lea rsi, [rsi + rbp] ; += stride - punpcklbw xmm0, xmm7 ; src[ 0- 7] - movq xmm1, [rsi] ; second row - lea rsi, [rsi + rbp] ; += stride - punpcklbw xmm1, xmm7 ; src[ 8-15] - jmp .temporal_filter_apply_load_finished - -.temporal_filter_apply_load_16: - movdqa xmm0, [rsi] ; src (frame1) - lea rsi, [rsi + rbp] ; += stride - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm7 ; src[ 0- 7] - punpckhbw xmm1, xmm7 ; src[ 8-15] - -.temporal_filter_apply_load_finished: - movdqa xmm2, [rdx] ; predictor (frame2) - movdqa xmm3, xmm2 - punpcklbw xmm2, xmm7 ; pred[ 0- 7] - punpckhbw xmm3, xmm7 ; pred[ 8-15] - - ; modifier = src_byte - pixel_value - psubw xmm0, xmm2 ; src - pred[ 0- 7] - psubw xmm1, xmm3 ; src - pred[ 8-15] - - ; modifier *= modifier - pmullw xmm0, xmm0 ; modifer[ 0- 7]^2 - pmullw xmm1, xmm1 ; modifer[ 8-15]^2 - - ; modifier *= 3 - pmullw xmm0, [GLOBAL(_const_3w)] - pmullw xmm1, [GLOBAL(_const_3w)] - - ; modifer += 0x8000 >> (16 - strength) - paddw xmm0, [rsp + rounding_bit] - paddw xmm1, [rsp + rounding_bit] - - ; modifier >>= strength - psrlw xmm0, [rsp + strength] - psrlw xmm1, [rsp + strength] - - ; modifier = 16 - modifier - ; saturation takes care of modifier > 16 - movdqa xmm3, [GLOBAL(_const_16w)] - movdqa xmm2, [GLOBAL(_const_16w)] - psubusw xmm3, xmm1 - psubusw xmm2, xmm0 - - ; modifier *= filter_weight - pmullw xmm2, [rsp + filter_weight] - pmullw xmm3, [rsp + filter_weight] - - ; count - movdqa xmm4, [rax] - movdqa xmm5, [rax+16] - ; += modifier - paddw xmm4, xmm2 - paddw xmm5, xmm3 - ; write back - movdqa [rax], xmm4 - movdqa [rax+16], xmm5 - lea rax, [rax + 16*2] ; count += 16*(sizeof(short)) - - ; load and extract the predictor up to shorts - pxor xmm7, xmm7 - movdqa xmm0, [rdx] - lea rdx, [rdx + 16*1] ; pred += 16*(sizeof(char)) - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm7 ; pred[ 0- 7] - punpckhbw xmm1, xmm7 ; pred[ 8-15] - - ; modifier *= pixel_value - pmullw xmm0, xmm2 - pmullw xmm1, xmm3 - - ; expand to double words - movdqa xmm2, xmm0 - punpcklwd xmm0, xmm7 ; [ 0- 3] - punpckhwd xmm2, xmm7 ; [ 4- 7] - movdqa xmm3, xmm1 - punpcklwd xmm1, xmm7 ; [ 8-11] - punpckhwd xmm3, xmm7 ; [12-15] - - ; accumulator - movdqa xmm4, [rdi] - movdqa xmm5, [rdi+16] - movdqa xmm6, [rdi+32] - movdqa xmm7, [rdi+48] - ; += modifier - paddd xmm4, xmm0 - paddd xmm5, xmm2 - paddd xmm6, xmm1 - paddd xmm7, xmm3 - ; write back - movdqa [rdi], xmm4 - movdqa [rdi+16], xmm5 - movdqa [rdi+32], xmm6 - movdqa [rdi+48], xmm7 - lea rdi, [rdi + 16*4] ; accumulator += 16*(sizeof(int)) - - cmp rdx, rcx - je .temporal_filter_apply_epilog - pxor xmm7, xmm7 ; zero for extraction - cmp dword ptr [rsp + block_size], 16 - je .temporal_filter_apply_load_16 - jmp .temporal_filter_apply_load_8 - -.temporal_filter_apply_epilog: - ; begin epilog - mov rbp, [rsp + rbp_backup] - add rsp, stack_size - pop rsp - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -_const_3w: - times 8 dw 3 -align 16 -_const_top_bit: - times 8 dw 1<<15 -align 16 -_const_16w - times 8 dw 16 diff --git a/vp8/encoder/x86/temporal_filter_x86.h b/vp8/encoder/x86/temporal_filter_x86.h deleted file mode 100644 index 8fae2200d..000000000 --- a/vp8/encoder/x86/temporal_filter_x86.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __INC_TEMPORAL_FILTER_X86_H -#define __INC_TEMPORAL_FILTER_X86_H - -#if HAVE_SSE2 -extern prototype_apply(vp9_temporal_filter_apply_sse2); - -#if !CONFIG_RUNTIME_CPU_DETECT - -#undef vp9_temporal_filter_apply -#define vp9_temporal_filter_apply vp9_temporal_filter_apply_sse2 - -#endif - -#endif - -#endif // __INC_TEMPORAL_FILTER_X86_H diff --git a/vp8/encoder/x86/variance_impl_mmx.asm b/vp8/encoder/x86/variance_impl_mmx.asm deleted file mode 100644 index 45c30b089..000000000 --- a/vp8/encoder/x86/variance_impl_mmx.asm +++ /dev/null @@ -1,851 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;unsigned int vp9_get_mb_ss_mmx( short *src_ptr ) -global sym(vp9_get_mb_ss_mmx) -sym(vp9_get_mb_ss_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - GET_GOT rbx - push rsi - push rdi - sub rsp, 8 - ; end prolog - - mov rax, arg(0) ;src_ptr - mov rcx, 16 - pxor mm4, mm4 - -.NEXTROW: - movq mm0, [rax] - movq mm1, [rax+8] - movq mm2, [rax+16] - movq mm3, [rax+24] - pmaddwd mm0, mm0 - pmaddwd mm1, mm1 - pmaddwd mm2, mm2 - pmaddwd mm3, mm3 - - paddd mm4, mm0 - paddd mm4, mm1 - paddd mm4, mm2 - paddd mm4, mm3 - - add rax, 32 - dec rcx - ja .NEXTROW - movq QWORD PTR [rsp], mm4 - - ;return sum[0]+sum[1]; - movsxd rax, dword ptr [rsp] - movsxd rcx, dword ptr [rsp+4] - add rax, rcx - - - ; begin epilog - add rsp, 8 - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp9_get8x8var_mmx -;( -; unsigned char *src_ptr, -; int source_stride, -; unsigned char *ref_ptr, -; int recon_stride, -; unsigned int *SSE, -; int *Sum -;) -global sym(vp9_get8x8var_mmx) -sym(vp9_get8x8var_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - push rbx - sub rsp, 16 - ; end prolog - - - pxor mm5, mm5 ; Blank mmx6 - pxor mm6, mm6 ; Blank mmx7 - pxor mm7, mm7 ; Blank mmx7 - - mov rax, arg(0) ;[src_ptr] ; Load base addresses - mov rbx, arg(2) ;[ref_ptr] - movsxd rcx, dword ptr arg(1) ;[source_stride] - movsxd rdx, dword ptr arg(3) ;[recon_stride] - - ; Row 1 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm1, [rbx] ; Copy eight bytes to mm1 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - - ; Row 2 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 3 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 4 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 5 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - ; movq mm4, [rbx + rdx] - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 6 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 7 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 8 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Now accumulate the final results. - movq QWORD PTR [rsp+8], mm5 ; copy back accumulated results into normal memory - movq QWORD PTR [rsp], mm7 ; copy back accumulated results into normal memory - movsx rdx, WORD PTR [rsp+8] - movsx rcx, WORD PTR [rsp+10] - movsx rbx, WORD PTR [rsp+12] - movsx rax, WORD PTR [rsp+14] - add rdx, rcx - add rbx, rax - add rdx, rbx ;XSum - movsxd rax, DWORD PTR [rsp] - movsxd rcx, DWORD PTR [rsp+4] - add rax, rcx ;XXSum - mov rsi, arg(4) ;SSE - mov rdi, arg(5) ;Sum - mov dword ptr [rsi], eax - mov dword ptr [rdi], edx - xor rax, rax ; return 0 - - - ; begin epilog - add rsp, 16 - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - - -;unsigned int -;vp9_get4x4var_mmx -;( -; unsigned char *src_ptr, -; int source_stride, -; unsigned char *ref_ptr, -; int recon_stride, -; unsigned int *SSE, -; int *Sum -;) -global sym(vp9_get4x4var_mmx) -sym(vp9_get4x4var_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - push rbx - sub rsp, 16 - ; end prolog - - - pxor mm5, mm5 ; Blank mmx6 - pxor mm6, mm6 ; Blank mmx7 - pxor mm7, mm7 ; Blank mmx7 - - mov rax, arg(0) ;[src_ptr] ; Load base addresses - mov rbx, arg(2) ;[ref_ptr] - movsxd rcx, dword ptr arg(1) ;[source_stride] - movsxd rdx, dword ptr arg(3) ;[recon_stride] - - ; Row 1 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm1, [rbx] ; Copy eight bytes to mm1 - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - paddw mm5, mm0 ; accumulate differences in mm5 - pmaddwd mm0, mm0 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - - - ; Row 2 - movq mm0, [rax] ; Copy eight bytes to mm0 - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - paddw mm5, mm0 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - - ; Row 3 - movq mm0, [rax] ; Copy eight bytes to mm0 - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - paddw mm5, mm0 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - - ; Row 4 - movq mm0, [rax] ; Copy eight bytes to mm0 - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - - paddw mm5, mm0 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - paddd mm7, mm0 ; accumulate in mm7 - - - ; Now accumulate the final results. - movq QWORD PTR [rsp+8], mm5 ; copy back accumulated results into normal memory - movq QWORD PTR [rsp], mm7 ; copy back accumulated results into normal memory - movsx rdx, WORD PTR [rsp+8] - movsx rcx, WORD PTR [rsp+10] - movsx rbx, WORD PTR [rsp+12] - movsx rax, WORD PTR [rsp+14] - add rdx, rcx - add rbx, rax - add rdx, rbx ;XSum - movsxd rax, DWORD PTR [rsp] - movsxd rcx, DWORD PTR [rsp+4] - add rax, rcx ;XXSum - mov rsi, arg(4) ;SSE - mov rdi, arg(5) ;Sum - mov dword ptr [rsi], eax - mov dword ptr [rdi], edx - xor rax, rax ; return 0 - - - ; begin epilog - add rsp, 16 - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - - -;unsigned int -;vp9_get4x4sse_cs_mmx -;( -; unsigned char *src_ptr, -; int source_stride, -; unsigned char *ref_ptr, -; int recon_stride -;) -global sym(vp9_get4x4sse_cs_mmx) -sym(vp9_get4x4sse_cs_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - push rbx - ; end prolog - - - pxor mm6, mm6 ; Blank mmx7 - pxor mm7, mm7 ; Blank mmx7 - - mov rax, arg(0) ;[src_ptr] ; Load base addresses - mov rbx, arg(2) ;[ref_ptr] - movsxd rcx, dword ptr arg(1) ;[source_stride] - movsxd rdx, dword ptr arg(3) ;[recon_stride] - ; Row 1 - movd mm0, [rax] ; Copy eight bytes to mm0 - movd mm1, [rbx] ; Copy eight bytes to mm1 - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - pmaddwd mm0, mm0 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movd mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - - ; Row 2 - movd mm0, [rax] ; Copy eight bytes to mm0 - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - pmaddwd mm0, mm0 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movd mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - - ; Row 3 - movd mm0, [rax] ; Copy eight bytes to mm0 - punpcklbw mm1, mm6 - punpcklbw mm0, mm6 ; unpack to higher prrcision - psubsw mm0, mm1 ; A-B (low order) to MM0 - - pmaddwd mm0, mm0 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movd mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - - ; Row 4 - movd mm0, [rax] ; Copy eight bytes to mm0 - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - pmaddwd mm0, mm0 ; square and accumulate - paddd mm7, mm0 ; accumulate in mm7 - - movq mm0, mm7 ; - psrlq mm7, 32 - - paddd mm0, mm7 - movq rax, mm0 - - - ; begin epilog - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -%define mmx_filter_shift 7 - -;void vp9_filter_block2d_bil4x4_var_mmx -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned short *HFilter, -; unsigned short *VFilter, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_filter_block2d_bil4x4_var_mmx) -sym(vp9_filter_block2d_bil4x4_var_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - GET_GOT rbx - push rsi - push rdi - sub rsp, 16 - ; end prolog - - - pxor mm6, mm6 ; - pxor mm7, mm7 ; - - mov rax, arg(4) ;HFilter ; - mov rdx, arg(5) ;VFilter ; - - mov rsi, arg(0) ;ref_ptr ; - mov rdi, arg(2) ;src_ptr ; - - mov rcx, 4 ; - pxor mm0, mm0 ; - - movd mm1, [rsi] ; - movd mm3, [rsi+1] ; - - punpcklbw mm1, mm0 ; - pmullw mm1, [rax] ; - - punpcklbw mm3, mm0 ; - pmullw mm3, [rax+8] ; - - paddw mm1, mm3 ; - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - - psraw mm1, mmx_filter_shift ; - movq mm5, mm1 - -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line ; -%else - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ; - add rsi, r8 -%endif - -.filter_block2d_bil4x4_var_mmx_loop: - - movd mm1, [rsi] ; - movd mm3, [rsi+1] ; - - punpcklbw mm1, mm0 ; - pmullw mm1, [rax] ; - - punpcklbw mm3, mm0 ; - pmullw mm3, [rax+8] ; - - paddw mm1, mm3 ; - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - - psraw mm1, mmx_filter_shift ; - movq mm3, mm5 ; - - movq mm5, mm1 ; - pmullw mm3, [rdx] ; - - pmullw mm1, [rdx+8] ; - paddw mm1, mm3 ; - - - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - psraw mm1, mmx_filter_shift ; - - movd mm3, [rdi] ; - punpcklbw mm3, mm0 ; - - psubw mm1, mm3 ; - paddw mm6, mm1 ; - - pmaddwd mm1, mm1 ; - paddd mm7, mm1 ; - -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line ; - add rdi, dword ptr arg(3) ;src_pixels_per_line ; -%else - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line - movsxd r9, dword ptr arg(3) ;src_pixels_per_line - add rsi, r8 - add rdi, r9 -%endif - sub rcx, 1 ; - jnz .filter_block2d_bil4x4_var_mmx_loop ; - - - pxor mm3, mm3 ; - pxor mm2, mm2 ; - - punpcklwd mm2, mm6 ; - punpckhwd mm3, mm6 ; - - paddd mm2, mm3 ; - movq mm6, mm2 ; - - psrlq mm6, 32 ; - paddd mm2, mm6 ; - - psrad mm2, 16 ; - movq mm4, mm7 ; - - psrlq mm4, 32 ; - paddd mm4, mm7 ; - - mov rdi, arg(6) ;sum - mov rsi, arg(7) ;sumsquared - - movd dword ptr [rdi], mm2 ; - movd dword ptr [rsi], mm4 ; - - - - ; begin epilog - add rsp, 16 - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - - - -;void vp9_filter_block2d_bil_var_mmx -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; unsigned short *HFilter, -; unsigned short *VFilter, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_filter_block2d_bil_var_mmx) -sym(vp9_filter_block2d_bil_var_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - GET_GOT rbx - push rsi - push rdi - sub rsp, 16 - ; end prolog - - pxor mm6, mm6 ; - pxor mm7, mm7 ; - mov rax, arg(5) ;HFilter ; - - mov rdx, arg(6) ;VFilter ; - mov rsi, arg(0) ;ref_ptr ; - - mov rdi, arg(2) ;src_ptr ; - movsxd rcx, dword ptr arg(4) ;Height ; - - pxor mm0, mm0 ; - movq mm1, [rsi] ; - - movq mm3, [rsi+1] ; - movq mm2, mm1 ; - - movq mm4, mm3 ; - punpcklbw mm1, mm0 ; - - punpckhbw mm2, mm0 ; - pmullw mm1, [rax] ; - - pmullw mm2, [rax] ; - punpcklbw mm3, mm0 ; - - punpckhbw mm4, mm0 ; - pmullw mm3, [rax+8] ; - - pmullw mm4, [rax+8] ; - paddw mm1, mm3 ; - - paddw mm2, mm4 ; - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - - psraw mm1, mmx_filter_shift ; - paddw mm2, [GLOBAL(mmx_bi_rd)] ; - - psraw mm2, mmx_filter_shift ; - movq mm5, mm1 - - packuswb mm5, mm2 ; -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line -%else - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line - add rsi, r8 -%endif - -.filter_block2d_bil_var_mmx_loop: - - movq mm1, [rsi] ; - movq mm3, [rsi+1] ; - - movq mm2, mm1 ; - movq mm4, mm3 ; - - punpcklbw mm1, mm0 ; - punpckhbw mm2, mm0 ; - - pmullw mm1, [rax] ; - pmullw mm2, [rax] ; - - punpcklbw mm3, mm0 ; - punpckhbw mm4, mm0 ; - - pmullw mm3, [rax+8] ; - pmullw mm4, [rax+8] ; - - paddw mm1, mm3 ; - paddw mm2, mm4 ; - - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - psraw mm1, mmx_filter_shift ; - - paddw mm2, [GLOBAL(mmx_bi_rd)] ; - psraw mm2, mmx_filter_shift ; - - movq mm3, mm5 ; - movq mm4, mm5 ; - - punpcklbw mm3, mm0 ; - punpckhbw mm4, mm0 ; - - movq mm5, mm1 ; - packuswb mm5, mm2 ; - - pmullw mm3, [rdx] ; - pmullw mm4, [rdx] ; - - pmullw mm1, [rdx+8] ; - pmullw mm2, [rdx+8] ; - - paddw mm1, mm3 ; - paddw mm2, mm4 ; - - paddw mm1, [GLOBAL(mmx_bi_rd)] ; - paddw mm2, [GLOBAL(mmx_bi_rd)] ; - - psraw mm1, mmx_filter_shift ; - psraw mm2, mmx_filter_shift ; - - movq mm3, [rdi] ; - movq mm4, mm3 ; - - punpcklbw mm3, mm0 ; - punpckhbw mm4, mm0 ; - - psubw mm1, mm3 ; - psubw mm2, mm4 ; - - paddw mm6, mm1 ; - pmaddwd mm1, mm1 ; - - paddw mm6, mm2 ; - pmaddwd mm2, mm2 ; - - paddd mm7, mm1 ; - paddd mm7, mm2 ; - -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line ; - add rdi, dword ptr arg(3) ;src_pixels_per_line ; -%else - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ; - movsxd r9, dword ptr arg(3) ;src_pixels_per_line ; - add rsi, r8 - add rdi, r9 -%endif - sub rcx, 1 ; - jnz .filter_block2d_bil_var_mmx_loop ; - - - pxor mm3, mm3 ; - pxor mm2, mm2 ; - - punpcklwd mm2, mm6 ; - punpckhwd mm3, mm6 ; - - paddd mm2, mm3 ; - movq mm6, mm2 ; - - psrlq mm6, 32 ; - paddd mm2, mm6 ; - - psrad mm2, 16 ; - movq mm4, mm7 ; - - psrlq mm4, 32 ; - paddd mm4, mm7 ; - - mov rdi, arg(7) ;sum - mov rsi, arg(8) ;sumsquared - - movd dword ptr [rdi], mm2 ; - movd dword ptr [rsi], mm4 ; - - ; begin epilog - add rsp, 16 - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -SECTION_RODATA -;short mmx_bi_rd[4] = { 64, 64, 64, 64}; -align 16 -mmx_bi_rd: - times 4 dw 64 diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm deleted file mode 100644 index 5b20f3b32..000000000 --- a/vp8/encoder/x86/variance_impl_sse2.asm +++ /dev/null @@ -1,1367 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -%define xmm_filter_shift 7 - -;unsigned int vp9_get_mb_ss_sse2 -;( -; short *src_ptr -;) -global sym(vp9_get_mb_ss_sse2) -sym(vp9_get_mb_ss_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 1 - GET_GOT rbx - push rsi - push rdi - sub rsp, 16 - ; end prolog - - - mov rax, arg(0) ;[src_ptr] - mov rcx, 8 - pxor xmm4, xmm4 - -.NEXTROW: - movdqa xmm0, [rax] - movdqa xmm1, [rax+16] - movdqa xmm2, [rax+32] - movdqa xmm3, [rax+48] - pmaddwd xmm0, xmm0 - pmaddwd xmm1, xmm1 - pmaddwd xmm2, xmm2 - pmaddwd xmm3, xmm3 - - paddd xmm0, xmm1 - paddd xmm2, xmm3 - paddd xmm4, xmm0 - paddd xmm4, xmm2 - - add rax, 0x40 - dec rcx - ja .NEXTROW - - movdqa xmm3,xmm4 - psrldq xmm4,8 - paddd xmm4,xmm3 - movdqa xmm3,xmm4 - psrldq xmm4,4 - paddd xmm4,xmm3 - movq rax,xmm4 - - - ; begin epilog - add rsp, 16 - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp9_get16x16var_sse2 -;( -; unsigned char * src_ptr, -; int source_stride, -; unsigned char * ref_ptr, -; int recon_stride, -; unsigned int * SSE, -; int * Sum -;) -global sym(vp9_get16x16var_sse2) -sym(vp9_get16x16var_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;[src_ptr] - mov rdi, arg(2) ;[ref_ptr] - - movsxd rax, DWORD PTR arg(1) ;[source_stride] - movsxd rdx, DWORD PTR arg(3) ;[recon_stride] - - ; Prefetch data - lea rcx, [rax+rax*2] - prefetcht0 [rsi] - prefetcht0 [rsi+rax] - prefetcht0 [rsi+rax*2] - prefetcht0 [rsi+rcx] - lea rbx, [rsi+rax*4] - prefetcht0 [rbx] - prefetcht0 [rbx+rax] - prefetcht0 [rbx+rax*2] - prefetcht0 [rbx+rcx] - - lea rcx, [rdx+rdx*2] - prefetcht0 [rdi] - prefetcht0 [rdi+rdx] - prefetcht0 [rdi+rdx*2] - prefetcht0 [rdi+rcx] - lea rbx, [rdi+rdx*4] - prefetcht0 [rbx] - prefetcht0 [rbx+rdx] - prefetcht0 [rbx+rdx*2] - prefetcht0 [rbx+rcx] - - pxor xmm0, xmm0 ; clear xmm0 for unpack - pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs - - pxor xmm6, xmm6 ; clear xmm6 for accumulating sse - mov rcx, 16 - -.var16loop: - movdqu xmm1, XMMWORD PTR [rsi] - movdqu xmm2, XMMWORD PTR [rdi] - - prefetcht0 [rsi+rax*8] - prefetcht0 [rdi+rdx*8] - - movdqa xmm3, xmm1 - movdqa xmm4, xmm2 - - - punpcklbw xmm1, xmm0 - punpckhbw xmm3, xmm0 - - punpcklbw xmm2, xmm0 - punpckhbw xmm4, xmm0 - - - psubw xmm1, xmm2 - psubw xmm3, xmm4 - - paddw xmm7, xmm1 - pmaddwd xmm1, xmm1 - - paddw xmm7, xmm3 - pmaddwd xmm3, xmm3 - - paddd xmm6, xmm1 - paddd xmm6, xmm3 - - add rsi, rax - add rdi, rdx - - sub rcx, 1 - jnz .var16loop - - - movdqa xmm1, xmm6 - pxor xmm6, xmm6 - - pxor xmm5, xmm5 - punpcklwd xmm6, xmm7 - - punpckhwd xmm5, xmm7 - psrad xmm5, 16 - - psrad xmm6, 16 - paddd xmm6, xmm5 - - movdqa xmm2, xmm1 - punpckldq xmm1, xmm0 - - punpckhdq xmm2, xmm0 - movdqa xmm7, xmm6 - - paddd xmm1, xmm2 - punpckldq xmm6, xmm0 - - punpckhdq xmm7, xmm0 - paddd xmm6, xmm7 - - movdqa xmm2, xmm1 - movdqa xmm7, xmm6 - - psrldq xmm1, 8 - psrldq xmm6, 8 - - paddd xmm7, xmm6 - paddd xmm1, xmm2 - - mov rax, arg(5) ;[Sum] - mov rdi, arg(4) ;[SSE] - - movd DWORD PTR [rax], xmm7 - movd DWORD PTR [rdi], xmm1 - - - ; begin epilog - pop rdi - pop rsi - pop rbx - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - - - -;unsigned int vp9_get8x8var_sse2 -;( -; unsigned char * src_ptr, -; int source_stride, -; unsigned char * ref_ptr, -; int recon_stride, -; unsigned int * SSE, -; int * Sum -;) -global sym(vp9_get8x8var_sse2) -sym(vp9_get8x8var_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - sub rsp, 16 - ; end prolog - - mov rsi, arg(0) ;[src_ptr] - mov rdi, arg(2) ;[ref_ptr] - - movsxd rax, DWORD PTR arg(1) ;[source_stride] - movsxd rdx, DWORD PTR arg(3) ;[recon_stride] - - pxor xmm0, xmm0 ; clear xmm0 for unpack - pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs - - movq xmm1, QWORD PTR [rsi] - movq xmm2, QWORD PTR [rdi] - - punpcklbw xmm1, xmm0 - punpcklbw xmm2, xmm0 - - psubsw xmm1, xmm2 - paddw xmm7, xmm1 - - pmaddwd xmm1, xmm1 - - movq xmm2, QWORD PTR[rsi + rax] - movq xmm3, QWORD PTR[rdi + rdx] - - punpcklbw xmm2, xmm0 - punpcklbw xmm3, xmm0 - - psubsw xmm2, xmm3 - paddw xmm7, xmm2 - - pmaddwd xmm2, xmm2 - paddd xmm1, xmm2 - - - movq xmm2, QWORD PTR[rsi + rax * 2] - movq xmm3, QWORD PTR[rdi + rdx * 2] - - punpcklbw xmm2, xmm0 - punpcklbw xmm3, xmm0 - - psubsw xmm2, xmm3 - paddw xmm7, xmm2 - - pmaddwd xmm2, xmm2 - paddd xmm1, xmm2 - - - lea rsi, [rsi + rax * 2] - lea rdi, [rdi + rdx * 2] - movq xmm2, QWORD PTR[rsi + rax] - movq xmm3, QWORD PTR[rdi + rdx] - - punpcklbw xmm2, xmm0 - punpcklbw xmm3, xmm0 - - psubsw xmm2, xmm3 - paddw xmm7, xmm2 - - pmaddwd xmm2, xmm2 - paddd xmm1, xmm2 - - movq xmm2, QWORD PTR[rsi + rax *2] - movq xmm3, QWORD PTR[rdi + rdx *2] - - punpcklbw xmm2, xmm0 - punpcklbw xmm3, xmm0 - - psubsw xmm2, xmm3 - paddw xmm7, xmm2 - - pmaddwd xmm2, xmm2 - paddd xmm1, xmm2 - - - lea rsi, [rsi + rax * 2] - lea rdi, [rdi + rdx * 2] - - - movq xmm2, QWORD PTR[rsi + rax] - movq xmm3, QWORD PTR[rdi + rdx] - - punpcklbw xmm2, xmm0 - punpcklbw xmm3, xmm0 - - psubsw xmm2, xmm3 - paddw xmm7, xmm2 - - pmaddwd xmm2, xmm2 - paddd xmm1, xmm2 - - movq xmm2, QWORD PTR[rsi + rax *2] - movq xmm3, QWORD PTR[rdi + rdx *2] - - punpcklbw xmm2, xmm0 - punpcklbw xmm3, xmm0 - - psubsw xmm2, xmm3 - paddw xmm7, xmm2 - - pmaddwd xmm2, xmm2 - paddd xmm1, xmm2 - - - lea rsi, [rsi + rax * 2] - lea rdi, [rdi + rdx * 2] - - movq xmm2, QWORD PTR[rsi + rax] - movq xmm3, QWORD PTR[rdi + rdx] - - punpcklbw xmm2, xmm0 - punpcklbw xmm3, xmm0 - - psubsw xmm2, xmm3 - paddw xmm7, xmm2 - - pmaddwd xmm2, xmm2 - paddd xmm1, xmm2 - - - movdqa xmm6, xmm7 - punpcklwd xmm6, xmm0 - - punpckhwd xmm7, xmm0 - movdqa xmm2, xmm1 - - paddw xmm6, xmm7 - punpckldq xmm1, xmm0 - - punpckhdq xmm2, xmm0 - movdqa xmm7, xmm6 - - paddd xmm1, xmm2 - punpckldq xmm6, xmm0 - - punpckhdq xmm7, xmm0 - paddw xmm6, xmm7 - - movdqa xmm2, xmm1 - movdqa xmm7, xmm6 - - psrldq xmm1, 8 - psrldq xmm6, 8 - - paddw xmm7, xmm6 - paddd xmm1, xmm2 - - mov rax, arg(5) ;[Sum] - mov rdi, arg(4) ;[SSE] - - movq rdx, xmm7 - movsx rcx, dx - - mov dword ptr [rax], ecx - movd DWORD PTR [rdi], xmm1 - - ; begin epilog - add rsp, 16 - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_filter_block2d_bil_var_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int xoffset, -; int yoffset, -; int *sum, -; unsigned int *sumsquared;; -; -;) -global sym(vp9_filter_block2d_bil_var_sse2) -sym(vp9_filter_block2d_bil_var_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - push rbx - ; end prolog - - pxor xmm6, xmm6 ; - pxor xmm7, xmm7 ; - - lea rsi, [GLOBAL(xmm_bi_rd)] ; rounding - movdqa xmm4, XMMWORD PTR [rsi] - - lea rcx, [GLOBAL(bilinear_filters_sse2)] - movsxd rax, dword ptr arg(5) ; xoffset - - cmp rax, 0 ; skip first_pass filter if xoffset=0 - je filter_block2d_bil_var_sse2_sp_only - - shl rax, 5 ; point to filter coeff with xoffset - lea rax, [rax + rcx] ; HFilter - - movsxd rdx, dword ptr arg(6) ; yoffset - - cmp rdx, 0 ; skip second_pass filter if yoffset=0 - je filter_block2d_bil_var_sse2_fp_only - - shl rdx, 5 - lea rdx, [rdx + rcx] ; VFilter - - mov rsi, arg(0) ;ref_ptr - mov rdi, arg(2) ;src_ptr - movsxd rcx, dword ptr arg(4) ;Height - - pxor xmm0, xmm0 ; - movq xmm1, QWORD PTR [rsi] ; - movq xmm3, QWORD PTR [rsi+1] ; - - punpcklbw xmm1, xmm0 ; - pmullw xmm1, [rax] ; - punpcklbw xmm3, xmm0 - pmullw xmm3, [rax+16] ; - - paddw xmm1, xmm3 ; - paddw xmm1, xmm4 ; - psraw xmm1, xmm_filter_shift ; - movdqa xmm5, xmm1 - - movsxd rbx, dword ptr arg(1) ;ref_pixels_per_line - lea rsi, [rsi + rbx] -%if ABI_IS_32BIT=0 - movsxd r9, dword ptr arg(3) ;src_pixels_per_line -%endif - -filter_block2d_bil_var_sse2_loop: - movq xmm1, QWORD PTR [rsi] ; - movq xmm3, QWORD PTR [rsi+1] ; - - punpcklbw xmm1, xmm0 ; - pmullw xmm1, [rax] ; - punpcklbw xmm3, xmm0 ; - pmullw xmm3, [rax+16] ; - - paddw xmm1, xmm3 ; - paddw xmm1, xmm4 ; - psraw xmm1, xmm_filter_shift ; - - movdqa xmm3, xmm5 ; - movdqa xmm5, xmm1 ; - - pmullw xmm3, [rdx] ; - pmullw xmm1, [rdx+16] ; - paddw xmm1, xmm3 ; - paddw xmm1, xmm4 ; - psraw xmm1, xmm_filter_shift ; - - movq xmm3, QWORD PTR [rdi] ; - punpcklbw xmm3, xmm0 ; - - psubw xmm1, xmm3 ; - paddw xmm6, xmm1 ; - - pmaddwd xmm1, xmm1 ; - paddd xmm7, xmm1 ; - - lea rsi, [rsi + rbx] ;ref_pixels_per_line -%if ABI_IS_32BIT - add rdi, dword ptr arg(3) ;src_pixels_per_line -%else - lea rdi, [rdi + r9] -%endif - - sub rcx, 1 ; - jnz filter_block2d_bil_var_sse2_loop ; - - jmp filter_block2d_bil_variance - -filter_block2d_bil_var_sse2_sp_only: - movsxd rdx, dword ptr arg(6) ; yoffset - - cmp rdx, 0 ; skip all if both xoffset=0 and yoffset=0 - je filter_block2d_bil_var_sse2_full_pixel - - shl rdx, 5 - lea rdx, [rdx + rcx] ; VFilter - - mov rsi, arg(0) ;ref_ptr - mov rdi, arg(2) ;src_ptr - movsxd rcx, dword ptr arg(4) ;Height - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - - pxor xmm0, xmm0 ; - movq xmm1, QWORD PTR [rsi] ; - punpcklbw xmm1, xmm0 ; - - movsxd rbx, dword ptr arg(3) ;src_pixels_per_line - lea rsi, [rsi + rax] - -filter_block2d_bil_sp_only_loop: - movq xmm3, QWORD PTR [rsi] ; - punpcklbw xmm3, xmm0 ; - movdqa xmm5, xmm3 - - pmullw xmm1, [rdx] ; - pmullw xmm3, [rdx+16] ; - paddw xmm1, xmm3 ; - paddw xmm1, xmm4 ; - psraw xmm1, xmm_filter_shift ; - - movq xmm3, QWORD PTR [rdi] ; - punpcklbw xmm3, xmm0 ; - - psubw xmm1, xmm3 ; - paddw xmm6, xmm1 ; - - pmaddwd xmm1, xmm1 ; - paddd xmm7, xmm1 ; - - movdqa xmm1, xmm5 ; - lea rsi, [rsi + rax] ;ref_pixels_per_line - lea rdi, [rdi + rbx] ;src_pixels_per_line - - sub rcx, 1 ; - jnz filter_block2d_bil_sp_only_loop ; - - jmp filter_block2d_bil_variance - -filter_block2d_bil_var_sse2_full_pixel: - mov rsi, arg(0) ;ref_ptr - mov rdi, arg(2) ;src_ptr - movsxd rcx, dword ptr arg(4) ;Height - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - movsxd rbx, dword ptr arg(3) ;src_pixels_per_line - pxor xmm0, xmm0 ; - -filter_block2d_bil_full_pixel_loop: - movq xmm1, QWORD PTR [rsi] ; - punpcklbw xmm1, xmm0 ; - - movq xmm2, QWORD PTR [rdi] ; - punpcklbw xmm2, xmm0 ; - - psubw xmm1, xmm2 ; - paddw xmm6, xmm1 ; - - pmaddwd xmm1, xmm1 ; - paddd xmm7, xmm1 ; - - lea rsi, [rsi + rax] ;ref_pixels_per_line - lea rdi, [rdi + rbx] ;src_pixels_per_line - - sub rcx, 1 ; - jnz filter_block2d_bil_full_pixel_loop ; - - jmp filter_block2d_bil_variance - -filter_block2d_bil_var_sse2_fp_only: - mov rsi, arg(0) ;ref_ptr - mov rdi, arg(2) ;src_ptr - movsxd rcx, dword ptr arg(4) ;Height - movsxd rdx, dword ptr arg(1) ;ref_pixels_per_line - - pxor xmm0, xmm0 ; - movsxd rbx, dword ptr arg(3) ;src_pixels_per_line - -filter_block2d_bil_fp_only_loop: - movq xmm1, QWORD PTR [rsi] ; - movq xmm3, QWORD PTR [rsi+1] ; - - punpcklbw xmm1, xmm0 ; - pmullw xmm1, [rax] ; - punpcklbw xmm3, xmm0 ; - pmullw xmm3, [rax+16] ; - - paddw xmm1, xmm3 ; - paddw xmm1, xmm4 ; - psraw xmm1, xmm_filter_shift ; - - movq xmm3, QWORD PTR [rdi] ; - punpcklbw xmm3, xmm0 ; - - psubw xmm1, xmm3 ; - paddw xmm6, xmm1 ; - - pmaddwd xmm1, xmm1 ; - paddd xmm7, xmm1 ; - lea rsi, [rsi + rdx] - lea rdi, [rdi + rbx] ;src_pixels_per_line - - sub rcx, 1 ; - jnz filter_block2d_bil_fp_only_loop ; - - jmp filter_block2d_bil_variance - -filter_block2d_bil_variance: - movdq2q mm6, xmm6 ; - movdq2q mm7, xmm7 ; - - psrldq xmm6, 8 - psrldq xmm7, 8 - - movdq2q mm2, xmm6 - movdq2q mm3, xmm7 - - paddw mm6, mm2 - paddd mm7, mm3 - - pxor mm3, mm3 ; - pxor mm2, mm2 ; - - punpcklwd mm2, mm6 ; - punpckhwd mm3, mm6 ; - - paddd mm2, mm3 ; - movq mm6, mm2 ; - - psrlq mm6, 32 ; - paddd mm2, mm6 ; - - psrad mm2, 16 ; - movq mm4, mm7 ; - - psrlq mm4, 32 ; - paddd mm4, mm7 ; - - mov rsi, arg(7) ; sum - mov rdi, arg(8) ; sumsquared - - movd [rsi], mm2 ; xsum - movd [rdi], mm4 ; xxsum - - ; begin epilog - pop rbx - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp9_half_horiz_vert_variance8x_h_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_half_horiz_vert_variance8x_h_sse2) -sym(vp9_half_horiz_vert_variance8x_h_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line - movsxd r9, dword ptr arg(3) ;src_pixels_per_line -%endif - - pxor xmm6, xmm6 ; error accumulator - pxor xmm7, xmm7 ; sse eaccumulator - mov rsi, arg(0) ;ref_ptr ; - - mov rdi, arg(2) ;src_ptr ; - movsxd rcx, dword ptr arg(4) ;Height ; - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - - pxor xmm0, xmm0 ; - - movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8 - movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9 - pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1 - -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line ; next source -%else - add rsi, r8 -%endif - -.half_horiz_vert_variance8x_h_1: - - movq xmm1, QWORD PTR [rsi] ; - movq xmm2, QWORD PTR [rsi+1] ; - pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1 - - pavgb xmm5, xmm1 ; xmm = vertical average of the above - punpcklbw xmm5, xmm0 ; xmm5 = words of above - - movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8 - punpcklbw xmm3, xmm0 ; xmm3 = words of above - - psubw xmm5, xmm3 ; xmm5 -= xmm3 - paddw xmm6, xmm5 ; xmm6 += accumulated column differences - pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 - paddd xmm7, xmm5 ; xmm7 += accumulated square column differences - - movdqa xmm5, xmm1 ; save xmm1 for use on the next row - -%if ABI_IS_32BIT - add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source - add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination -%else - add rsi, r8 - add rdi, r9 -%endif - - sub rcx, 1 ; - jnz .half_horiz_vert_variance8x_h_1 ; - - movdq2q mm6, xmm6 ; - movdq2q mm7, xmm7 ; - - psrldq xmm6, 8 - psrldq xmm7, 8 - - movdq2q mm2, xmm6 - movdq2q mm3, xmm7 - - paddw mm6, mm2 - paddd mm7, mm3 - - pxor mm3, mm3 ; - pxor mm2, mm2 ; - - punpcklwd mm2, mm6 ; - punpckhwd mm3, mm6 ; - - paddd mm2, mm3 ; - movq mm6, mm2 ; - - psrlq mm6, 32 ; - paddd mm2, mm6 ; - - psrad mm2, 16 ; - movq mm4, mm7 ; - - psrlq mm4, 32 ; - paddd mm4, mm7 ; - - mov rsi, arg(5) ; sum - mov rdi, arg(6) ; sumsquared - - movd [rsi], mm2 ; - movd [rdi], mm4 ; - - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_half_horiz_vert_variance16x_h_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_half_horiz_vert_variance16x_h_sse2) -sym(vp9_half_horiz_vert_variance16x_h_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - pxor xmm6, xmm6 ; error accumulator - pxor xmm7, xmm7 ; sse eaccumulator - mov rsi, arg(0) ;ref_ptr ; - - mov rdi, arg(2) ;src_ptr ; - movsxd rcx, dword ptr arg(4) ;Height ; - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - movsxd rdx, dword ptr arg(3) ;src_pixels_per_line - - pxor xmm0, xmm0 ; - - movdqu xmm5, XMMWORD PTR [rsi] - movdqu xmm3, XMMWORD PTR [rsi+1] - pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1 - - lea rsi, [rsi + rax] - -.half_horiz_vert_variance16x_h_1: - movdqu xmm1, XMMWORD PTR [rsi] ; - movdqu xmm2, XMMWORD PTR [rsi+1] ; - pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1 - - pavgb xmm5, xmm1 ; xmm = vertical average of the above - - movdqa xmm4, xmm5 - punpcklbw xmm5, xmm0 ; xmm5 = words of above - punpckhbw xmm4, xmm0 - - movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7 - punpcklbw xmm3, xmm0 ; xmm3 = words of above - psubw xmm5, xmm3 ; xmm5 -= xmm3 - - movq xmm3, QWORD PTR [rdi+8] - punpcklbw xmm3, xmm0 - psubw xmm4, xmm3 - - paddw xmm6, xmm5 ; xmm6 += accumulated column differences - paddw xmm6, xmm4 - pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 - pmaddwd xmm4, xmm4 - paddd xmm7, xmm5 ; xmm7 += accumulated square column differences - paddd xmm7, xmm4 - - movdqa xmm5, xmm1 ; save xmm1 for use on the next row - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - - sub rcx, 1 ; - jnz .half_horiz_vert_variance16x_h_1 ; - - pxor xmm1, xmm1 - pxor xmm5, xmm5 - - punpcklwd xmm0, xmm6 - punpckhwd xmm1, xmm6 - psrad xmm0, 16 - psrad xmm1, 16 - paddd xmm0, xmm1 - movdqa xmm1, xmm0 - - movdqa xmm6, xmm7 - punpckldq xmm6, xmm5 - punpckhdq xmm7, xmm5 - paddd xmm6, xmm7 - - punpckldq xmm0, xmm5 - punpckhdq xmm1, xmm5 - paddd xmm0, xmm1 - - movdqa xmm7, xmm6 - movdqa xmm1, xmm0 - - psrldq xmm7, 8 - psrldq xmm1, 8 - - paddd xmm6, xmm7 - paddd xmm0, xmm1 - - mov rsi, arg(5) ;[Sum] - mov rdi, arg(6) ;[SSE] - - movd [rsi], xmm0 - movd [rdi], xmm6 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp9_half_vert_variance8x_h_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_half_vert_variance8x_h_sse2) -sym(vp9_half_vert_variance8x_h_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line - movsxd r9, dword ptr arg(3) ;src_pixels_per_line -%endif - - pxor xmm6, xmm6 ; error accumulator - pxor xmm7, xmm7 ; sse eaccumulator - mov rsi, arg(0) ;ref_ptr ; - - mov rdi, arg(2) ;src_ptr ; - movsxd rcx, dword ptr arg(4) ;Height ; - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - - pxor xmm0, xmm0 ; -.half_vert_variance8x_h_1: - movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8 - movq xmm3, QWORD PTR [rsi+rax] ; xmm3 = s1,s2,s3..s9 - - pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) - punpcklbw xmm5, xmm0 ; xmm5 = words of above - - movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8 - punpcklbw xmm3, xmm0 ; xmm3 = words of above - - psubw xmm5, xmm3 ; xmm5 -= xmm3 - paddw xmm6, xmm5 ; xmm6 += accumulated column differences - pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 - paddd xmm7, xmm5 ; xmm7 += accumulated square column differences - -%if ABI_IS_32BIT - add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source - add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination -%else - add rsi, r8 - add rdi, r9 -%endif - - sub rcx, 1 ; - jnz .half_vert_variance8x_h_1 ; - - movdq2q mm6, xmm6 ; - movdq2q mm7, xmm7 ; - - psrldq xmm6, 8 - psrldq xmm7, 8 - - movdq2q mm2, xmm6 - movdq2q mm3, xmm7 - - paddw mm6, mm2 - paddd mm7, mm3 - - pxor mm3, mm3 ; - pxor mm2, mm2 ; - - punpcklwd mm2, mm6 ; - punpckhwd mm3, mm6 ; - - paddd mm2, mm3 ; - movq mm6, mm2 ; - - psrlq mm6, 32 ; - paddd mm2, mm6 ; - - psrad mm2, 16 ; - movq mm4, mm7 ; - - psrlq mm4, 32 ; - paddd mm4, mm7 ; - - mov rsi, arg(5) ; sum - mov rdi, arg(6) ; sumsquared - - movd [rsi], mm2 ; - movd [rdi], mm4 ; - - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_half_vert_variance16x_h_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_half_vert_variance16x_h_sse2) -sym(vp9_half_vert_variance16x_h_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - pxor xmm6, xmm6 ; error accumulator - pxor xmm7, xmm7 ; sse eaccumulator - mov rsi, arg(0) ;ref_ptr - - mov rdi, arg(2) ;src_ptr - movsxd rcx, dword ptr arg(4) ;Height - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - movsxd rdx, dword ptr arg(3) ;src_pixels_per_line - - movdqu xmm5, XMMWORD PTR [rsi] - lea rsi, [rsi + rax ] - pxor xmm0, xmm0 - -.half_vert_variance16x_h_1: - movdqu xmm3, XMMWORD PTR [rsi] - - pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) - movdqa xmm4, xmm5 - punpcklbw xmm5, xmm0 - punpckhbw xmm4, xmm0 - - movq xmm2, QWORD PTR [rdi] - punpcklbw xmm2, xmm0 - psubw xmm5, xmm2 - movq xmm2, QWORD PTR [rdi+8] - punpcklbw xmm2, xmm0 - psubw xmm4, xmm2 - - paddw xmm6, xmm5 ; xmm6 += accumulated column differences - paddw xmm6, xmm4 - pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 - pmaddwd xmm4, xmm4 - paddd xmm7, xmm5 ; xmm7 += accumulated square column differences - paddd xmm7, xmm4 - - movdqa xmm5, xmm3 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - - sub rcx, 1 - jnz .half_vert_variance16x_h_1 - - pxor xmm1, xmm1 - pxor xmm5, xmm5 - - punpcklwd xmm0, xmm6 - punpckhwd xmm1, xmm6 - psrad xmm0, 16 - psrad xmm1, 16 - paddd xmm0, xmm1 - movdqa xmm1, xmm0 - - movdqa xmm6, xmm7 - punpckldq xmm6, xmm5 - punpckhdq xmm7, xmm5 - paddd xmm6, xmm7 - - punpckldq xmm0, xmm5 - punpckhdq xmm1, xmm5 - paddd xmm0, xmm1 - - movdqa xmm7, xmm6 - movdqa xmm1, xmm0 - - psrldq xmm7, 8 - psrldq xmm1, 8 - - paddd xmm6, xmm7 - paddd xmm0, xmm1 - - mov rsi, arg(5) ;[Sum] - mov rdi, arg(6) ;[SSE] - - movd [rsi], xmm0 - movd [rdi], xmm6 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp9_half_horiz_variance8x_h_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_half_horiz_variance8x_h_sse2) -sym(vp9_half_horiz_variance8x_h_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line - movsxd r9, dword ptr arg(3) ;src_pixels_per_line -%endif - - pxor xmm6, xmm6 ; error accumulator - pxor xmm7, xmm7 ; sse eaccumulator - mov rsi, arg(0) ;ref_ptr ; - - mov rdi, arg(2) ;src_ptr ; - movsxd rcx, dword ptr arg(4) ;Height ; - - pxor xmm0, xmm0 ; -.half_horiz_variance8x_h_1: - movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8 - movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9 - - pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) - punpcklbw xmm5, xmm0 ; xmm5 = words of above - - movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8 - punpcklbw xmm3, xmm0 ; xmm3 = words of above - - psubw xmm5, xmm3 ; xmm5 -= xmm3 - paddw xmm6, xmm5 ; xmm6 += accumulated column differences - pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 - paddd xmm7, xmm5 ; xmm7 += accumulated square column differences - -%if ABI_IS_32BIT - add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source - add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination -%else - add rsi, r8 - add rdi, r9 -%endif - sub rcx, 1 ; - jnz .half_horiz_variance8x_h_1 ; - - movdq2q mm6, xmm6 ; - movdq2q mm7, xmm7 ; - - psrldq xmm6, 8 - psrldq xmm7, 8 - - movdq2q mm2, xmm6 - movdq2q mm3, xmm7 - - paddw mm6, mm2 - paddd mm7, mm3 - - pxor mm3, mm3 ; - pxor mm2, mm2 ; - - punpcklwd mm2, mm6 ; - punpckhwd mm3, mm6 ; - - paddd mm2, mm3 ; - movq mm6, mm2 ; - - psrlq mm6, 32 ; - paddd mm2, mm6 ; - - psrad mm2, 16 ; - movq mm4, mm7 ; - - psrlq mm4, 32 ; - paddd mm4, mm7 ; - - mov rsi, arg(5) ; sum - mov rdi, arg(6) ; sumsquared - - movd [rsi], mm2 ; - movd [rdi], mm4 ; - - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_half_horiz_variance16x_h_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_half_horiz_variance16x_h_sse2) -sym(vp9_half_horiz_variance16x_h_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - pxor xmm6, xmm6 ; error accumulator - pxor xmm7, xmm7 ; sse eaccumulator - mov rsi, arg(0) ;ref_ptr ; - - mov rdi, arg(2) ;src_ptr ; - movsxd rcx, dword ptr arg(4) ;Height ; - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - movsxd rdx, dword ptr arg(3) ;src_pixels_per_line - - pxor xmm0, xmm0 ; - -.half_horiz_variance16x_h_1: - movdqu xmm5, XMMWORD PTR [rsi] ; xmm5 = s0,s1,s2..s15 - movdqu xmm3, XMMWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s16 - - pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) - movdqa xmm1, xmm5 - punpcklbw xmm5, xmm0 ; xmm5 = words of above - punpckhbw xmm1, xmm0 - - movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7 - punpcklbw xmm3, xmm0 ; xmm3 = words of above - movq xmm2, QWORD PTR [rdi+8] - punpcklbw xmm2, xmm0 - - psubw xmm5, xmm3 ; xmm5 -= xmm3 - psubw xmm1, xmm2 - paddw xmm6, xmm5 ; xmm6 += accumulated column differences - paddw xmm6, xmm1 - pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 - pmaddwd xmm1, xmm1 - paddd xmm7, xmm5 ; xmm7 += accumulated square column differences - paddd xmm7, xmm1 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - - sub rcx, 1 ; - jnz .half_horiz_variance16x_h_1 ; - - pxor xmm1, xmm1 - pxor xmm5, xmm5 - - punpcklwd xmm0, xmm6 - punpckhwd xmm1, xmm6 - psrad xmm0, 16 - psrad xmm1, 16 - paddd xmm0, xmm1 - movdqa xmm1, xmm0 - - movdqa xmm6, xmm7 - punpckldq xmm6, xmm5 - punpckhdq xmm7, xmm5 - paddd xmm6, xmm7 - - punpckldq xmm0, xmm5 - punpckhdq xmm1, xmm5 - paddd xmm0, xmm1 - - movdqa xmm7, xmm6 - movdqa xmm1, xmm0 - - psrldq xmm7, 8 - psrldq xmm1, 8 - - paddd xmm6, xmm7 - paddd xmm0, xmm1 - - mov rsi, arg(5) ;[Sum] - mov rdi, arg(6) ;[SSE] - - movd [rsi], xmm0 - movd [rdi], xmm6 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -; short xmm_bi_rd[8] = { 64, 64, 64, 64,64, 64, 64, 64}; -align 16 -xmm_bi_rd: - times 8 dw 64 -align 16 -bilinear_filters_sse2: - dw 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 - dw 120, 120, 120, 120, 120, 120, 120, 120, 8, 8, 8, 8, 8, 8, 8, 8 - dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 - dw 104, 104, 104, 104, 104, 104, 104, 104, 24, 24, 24, 24, 24, 24, 24, 24 - dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 - dw 88, 88, 88, 88, 88, 88, 88, 88, 40, 40, 40, 40, 40, 40, 40, 40 - dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 - dw 72, 72, 72, 72, 72, 72, 72, 72, 56, 56, 56, 56, 56, 56, 56, 56 - dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 - dw 56, 56, 56, 56, 56, 56, 56, 56, 72, 72, 72, 72, 72, 72, 72, 72 - dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 - dw 40, 40, 40, 40, 40, 40, 40, 40, 88, 88, 88, 88, 88, 88, 88, 88 - dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 - dw 24, 24, 24, 24, 24, 24, 24, 24, 104, 104, 104, 104, 104, 104, 104, 104 - dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 - dw 8, 8, 8, 8, 8, 8, 8, 8, 120, 120, 120, 120, 120, 120, 120, 120 diff --git a/vp8/encoder/x86/variance_impl_ssse3.asm b/vp8/encoder/x86/variance_impl_ssse3.asm deleted file mode 100644 index 30c75a6ae..000000000 --- a/vp8/encoder/x86/variance_impl_ssse3.asm +++ /dev/null @@ -1,372 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -%define xmm_filter_shift 7 - - -;void vp9_filter_block2d_bil_var_ssse3 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int xoffset, -; int yoffset, -; int *sum, -; unsigned int *sumsquared;; -; -;) -;Note: The filter coefficient at offset=0 is 128. Since the second register -;for Pmaddubsw is signed bytes, we must calculate zero offset seperately. -global sym(vp9_filter_block2d_bil_var_ssse3) -sym(vp9_filter_block2d_bil_var_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - pxor xmm6, xmm6 - pxor xmm7, xmm7 - - lea rcx, [GLOBAL(bilinear_filters_ssse3)] - movsxd rax, dword ptr arg(5) ; xoffset - - cmp rax, 0 ; skip first_pass filter if xoffset=0 - je .filter_block2d_bil_var_ssse3_sp_only - - shl rax, 4 ; point to filter coeff with xoffset - lea rax, [rax + rcx] ; HFilter - - movsxd rdx, dword ptr arg(6) ; yoffset - - cmp rdx, 0 ; skip second_pass filter if yoffset=0 - je .filter_block2d_bil_var_ssse3_fp_only - - shl rdx, 4 - lea rdx, [rdx + rcx] ; VFilter - - mov rsi, arg(0) ;ref_ptr - mov rdi, arg(2) ;src_ptr - movsxd rcx, dword ptr arg(4) ;Height - - movdqu xmm0, XMMWORD PTR [rsi] - movdqu xmm1, XMMWORD PTR [rsi+1] - movdqa xmm2, xmm0 - - punpcklbw xmm0, xmm1 - punpckhbw xmm2, xmm1 - pmaddubsw xmm0, [rax] - pmaddubsw xmm2, [rax] - - paddw xmm0, [GLOBAL(xmm_bi_rd)] - paddw xmm2, [GLOBAL(xmm_bi_rd)] - psraw xmm0, xmm_filter_shift - psraw xmm2, xmm_filter_shift - - packuswb xmm0, xmm2 - -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line -%else - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line - movsxd r9, dword ptr arg(3) ;src_pixels_per_line - lea rsi, [rsi + r8] -%endif - -.filter_block2d_bil_var_ssse3_loop: - movdqu xmm1, XMMWORD PTR [rsi] - movdqu xmm2, XMMWORD PTR [rsi+1] - movdqa xmm3, xmm1 - - punpcklbw xmm1, xmm2 - punpckhbw xmm3, xmm2 - pmaddubsw xmm1, [rax] - pmaddubsw xmm3, [rax] - - paddw xmm1, [GLOBAL(xmm_bi_rd)] - paddw xmm3, [GLOBAL(xmm_bi_rd)] - psraw xmm1, xmm_filter_shift - psraw xmm3, xmm_filter_shift - packuswb xmm1, xmm3 - - movdqa xmm2, xmm0 - movdqa xmm0, xmm1 - movdqa xmm3, xmm2 - - punpcklbw xmm2, xmm1 - punpckhbw xmm3, xmm1 - pmaddubsw xmm2, [rdx] - pmaddubsw xmm3, [rdx] - - paddw xmm2, [GLOBAL(xmm_bi_rd)] - paddw xmm3, [GLOBAL(xmm_bi_rd)] - psraw xmm2, xmm_filter_shift - psraw xmm3, xmm_filter_shift - - movq xmm1, QWORD PTR [rdi] - pxor xmm4, xmm4 - punpcklbw xmm1, xmm4 - movq xmm5, QWORD PTR [rdi+8] - punpcklbw xmm5, xmm4 - - psubw xmm2, xmm1 - psubw xmm3, xmm5 - paddw xmm6, xmm2 - paddw xmm6, xmm3 - pmaddwd xmm2, xmm2 - pmaddwd xmm3, xmm3 - paddd xmm7, xmm2 - paddd xmm7, xmm3 - -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line - add rdi, dword ptr arg(3) ;src_pixels_per_line -%else - lea rsi, [rsi + r8] - lea rdi, [rdi + r9] -%endif - - sub rcx, 1 - jnz .filter_block2d_bil_var_ssse3_loop - - jmp .filter_block2d_bil_variance - -.filter_block2d_bil_var_ssse3_sp_only: - movsxd rdx, dword ptr arg(6) ; yoffset - - cmp rdx, 0 ; Both xoffset =0 and yoffset=0 - je .filter_block2d_bil_var_ssse3_full_pixel - - shl rdx, 4 - lea rdx, [rdx + rcx] ; VFilter - - mov rsi, arg(0) ;ref_ptr - mov rdi, arg(2) ;src_ptr - movsxd rcx, dword ptr arg(4) ;Height - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - - movdqu xmm1, XMMWORD PTR [rsi] - movdqa xmm0, xmm1 - -%if ABI_IS_32BIT=0 - movsxd r9, dword ptr arg(3) ;src_pixels_per_line -%endif - - lea rsi, [rsi + rax] - -.filter_block2d_bil_sp_only_loop: - movdqu xmm3, XMMWORD PTR [rsi] - movdqa xmm2, xmm1 - movdqa xmm0, xmm3 - - punpcklbw xmm1, xmm3 - punpckhbw xmm2, xmm3 - pmaddubsw xmm1, [rdx] - pmaddubsw xmm2, [rdx] - - paddw xmm1, [GLOBAL(xmm_bi_rd)] - paddw xmm2, [GLOBAL(xmm_bi_rd)] - psraw xmm1, xmm_filter_shift - psraw xmm2, xmm_filter_shift - - movq xmm3, QWORD PTR [rdi] - pxor xmm4, xmm4 - punpcklbw xmm3, xmm4 - movq xmm5, QWORD PTR [rdi+8] - punpcklbw xmm5, xmm4 - - psubw xmm1, xmm3 - psubw xmm2, xmm5 - paddw xmm6, xmm1 - paddw xmm6, xmm2 - pmaddwd xmm1, xmm1 - pmaddwd xmm2, xmm2 - paddd xmm7, xmm1 - paddd xmm7, xmm2 - - movdqa xmm1, xmm0 - lea rsi, [rsi + rax] ;ref_pixels_per_line - -%if ABI_IS_32BIT - add rdi, dword ptr arg(3) ;src_pixels_per_line -%else - lea rdi, [rdi + r9] -%endif - - sub rcx, 1 - jnz .filter_block2d_bil_sp_only_loop - - jmp .filter_block2d_bil_variance - -.filter_block2d_bil_var_ssse3_full_pixel: - mov rsi, arg(0) ;ref_ptr - mov rdi, arg(2) ;src_ptr - movsxd rcx, dword ptr arg(4) ;Height - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - movsxd rdx, dword ptr arg(3) ;src_pixels_per_line - pxor xmm0, xmm0 - -.filter_block2d_bil_full_pixel_loop: - movq xmm1, QWORD PTR [rsi] - punpcklbw xmm1, xmm0 - movq xmm2, QWORD PTR [rsi+8] - punpcklbw xmm2, xmm0 - - movq xmm3, QWORD PTR [rdi] - punpcklbw xmm3, xmm0 - movq xmm4, QWORD PTR [rdi+8] - punpcklbw xmm4, xmm0 - - psubw xmm1, xmm3 - psubw xmm2, xmm4 - paddw xmm6, xmm1 - paddw xmm6, xmm2 - pmaddwd xmm1, xmm1 - pmaddwd xmm2, xmm2 - paddd xmm7, xmm1 - paddd xmm7, xmm2 - - lea rsi, [rsi + rax] ;ref_pixels_per_line - lea rdi, [rdi + rdx] ;src_pixels_per_line - sub rcx, 1 - jnz .filter_block2d_bil_full_pixel_loop - - jmp .filter_block2d_bil_variance - -.filter_block2d_bil_var_ssse3_fp_only: - mov rsi, arg(0) ;ref_ptr - mov rdi, arg(2) ;src_ptr - movsxd rcx, dword ptr arg(4) ;Height - movsxd rdx, dword ptr arg(1) ;ref_pixels_per_line - - pxor xmm0, xmm0 - -%if ABI_IS_32BIT=0 - movsxd r9, dword ptr arg(3) ;src_pixels_per_line -%endif - -.filter_block2d_bil_fp_only_loop: - movdqu xmm1, XMMWORD PTR [rsi] - movdqu xmm2, XMMWORD PTR [rsi+1] - movdqa xmm3, xmm1 - - punpcklbw xmm1, xmm2 - punpckhbw xmm3, xmm2 - pmaddubsw xmm1, [rax] - pmaddubsw xmm3, [rax] - - paddw xmm1, [GLOBAL(xmm_bi_rd)] - paddw xmm3, [GLOBAL(xmm_bi_rd)] - psraw xmm1, xmm_filter_shift - psraw xmm3, xmm_filter_shift - - movq xmm2, XMMWORD PTR [rdi] - pxor xmm4, xmm4 - punpcklbw xmm2, xmm4 - movq xmm5, QWORD PTR [rdi+8] - punpcklbw xmm5, xmm4 - - psubw xmm1, xmm2 - psubw xmm3, xmm5 - paddw xmm6, xmm1 - paddw xmm6, xmm3 - pmaddwd xmm1, xmm1 - pmaddwd xmm3, xmm3 - paddd xmm7, xmm1 - paddd xmm7, xmm3 - - lea rsi, [rsi + rdx] -%if ABI_IS_32BIT - add rdi, dword ptr arg(3) ;src_pixels_per_line -%else - lea rdi, [rdi + r9] -%endif - - sub rcx, 1 - jnz .filter_block2d_bil_fp_only_loop - - jmp .filter_block2d_bil_variance - -.filter_block2d_bil_variance: - pxor xmm0, xmm0 - pxor xmm1, xmm1 - pxor xmm5, xmm5 - - punpcklwd xmm0, xmm6 - punpckhwd xmm1, xmm6 - psrad xmm0, 16 - psrad xmm1, 16 - paddd xmm0, xmm1 - movdqa xmm1, xmm0 - - movdqa xmm6, xmm7 - punpckldq xmm6, xmm5 - punpckhdq xmm7, xmm5 - paddd xmm6, xmm7 - - punpckldq xmm0, xmm5 - punpckhdq xmm1, xmm5 - paddd xmm0, xmm1 - - movdqa xmm7, xmm6 - movdqa xmm1, xmm0 - - psrldq xmm7, 8 - psrldq xmm1, 8 - - paddd xmm6, xmm7 - paddd xmm0, xmm1 - - mov rsi, arg(7) ;[Sum] - mov rdi, arg(8) ;[SSE] - - movd [rsi], xmm0 - movd [rdi], xmm6 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -SECTION_RODATA -align 16 -xmm_bi_rd: - times 8 dw 64 -align 16 -bilinear_filters_ssse3: - times 8 db 128, 0 - times 8 db 120, 8 - times 8 db 112, 16 - times 8 db 104, 24 - times 8 db 96, 32 - times 8 db 88, 40 - times 8 db 80, 48 - times 8 db 72, 56 - times 8 db 64, 64 - times 8 db 56, 72 - times 8 db 48, 80 - times 8 db 40, 88 - times 8 db 32, 96 - times 8 db 24, 104 - times 8 db 16, 112 - times 8 db 8, 120 diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c deleted file mode 100644 index 2d72d50a8..000000000 --- a/vp8/encoder/x86/variance_mmx.c +++ /dev/null @@ -1,406 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8/encoder/variance.h" -#include "vp8/common/pragmas.h" -#include "vpx_ports/mem.h" - -extern void filter_block1d_h6_mmx -( - const unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - short *vp7_filter -); -extern void filter_block1d_v6_mmx -( - const short *src_ptr, - unsigned char *output_ptr, - unsigned int pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - short *vp7_filter -); - -extern unsigned int vp9_get_mb_ss_mmx(const short *src_ptr); -extern unsigned int vp9_get8x8var_mmx -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); -extern unsigned int vp9_get4x4var_mmx -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); -extern void vp9_filter_block2d_bil4x4_var_mmx -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - const short *HFilter, - const short *VFilter, - int *sum, - unsigned int *sumsquared -); -extern void vp9_filter_block2d_bil_var_mmx -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - const short *HFilter, - const short *VFilter, - int *sum, - unsigned int *sumsquared -); - - -unsigned int vp9_variance4x4_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); - *sse = var; - return (var - ((avg * avg) >> 4)); - -} - -unsigned int vp9_variance8x8_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); - *sse = var; - - return (var - ((avg * avg) >> 6)); - -} - -unsigned int vp9_mse16x16_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int sse0, sse1, sse2, sse3, var; - int sum0, sum1, sum2, sum3; - - - vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0); - vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); - vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2); - vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); - - var = sse0 + sse1 + sse2 + sse3; - *sse = var; - return var; -} - - -unsigned int vp9_variance16x16_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int sse0, sse1, sse2, sse3, var; - int sum0, sum1, sum2, sum3, avg; - - - vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0); - vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); - vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2); - vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); - - var = sse0 + sse1 + sse2 + sse3; - avg = sum0 + sum1 + sum2 + sum3; - *sse = var; - return (var - ((avg * avg) >> 8)); -} - -unsigned int vp9_variance16x8_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - - vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0); - vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); - - var = sse0 + sse1; - avg = sum0 + sum1; - *sse = var; - return (var - ((avg * avg) >> 7)); - -} - - -unsigned int vp9_variance8x16_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - - vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0); - vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1); - - var = sse0 + sse1; - avg = sum0 + sum1; - *sse = var; - - return (var - ((avg * avg) >> 7)); - -} - - - - -/////////////////////////////////////////////////////////////////////////// -// the mmx function that does the bilinear filtering and var calculation // -// int one pass // -/////////////////////////////////////////////////////////////////////////// -DECLARE_ALIGNED(16, const short, vp9_bilinear_filters_mmx[16][8]) = { - { 128, 128, 128, 128, 0, 0, 0, 0 }, - { 120, 120, 120, 120, 8, 8, 8, 8 }, - { 112, 112, 112, 112, 16, 16, 16, 16 }, - { 104, 104, 104, 104, 24, 24, 24, 24 }, - { 96, 96, 96, 96, 32, 32, 32, 32 }, - { 88, 88, 88, 88, 40, 40, 40, 40 }, - { 80, 80, 80, 80, 48, 48, 48, 48 }, - { 72, 72, 72, 72, 56, 56, 56, 56 }, - { 64, 64, 64, 64, 64, 64, 64, 64 }, - { 56, 56, 56, 56, 72, 72, 72, 72 }, - { 48, 48, 48, 48, 80, 80, 80, 80 }, - { 40, 40, 40, 40, 88, 88, 88, 88 }, - { 32, 32, 32, 32, 96, 96, 96, 96 }, - { 24, 24, 24, 24, 104, 104, 104, 104 }, - { 16, 16, 16, 16, 112, 112, 112, 112 }, - { 8, 8, 8, 8, 120, 120, 120, 120 } -}; - -unsigned int vp9_sub_pixel_variance4x4_mmx -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) - -{ - int xsum; - unsigned int xxsum; - vp9_filter_block2d_bil4x4_var_mmx( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, - vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset], - &xsum, &xxsum - ); - *sse = xxsum; - return (xxsum - ((xsum * xsum) >> 4)); -} - - -unsigned int vp9_sub_pixel_variance8x8_mmx -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - - int xsum; - unsigned int xxsum; - vp9_filter_block2d_bil_var_mmx( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 8, - vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset], - &xsum, &xxsum - ); - *sse = xxsum; - return (xxsum - ((xsum * xsum) >> 6)); -} - -unsigned int vp9_sub_pixel_variance16x16_mmx -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - - int xsum0, xsum1; - unsigned int xxsum0, xxsum1; - - vp9_filter_block2d_bil_var_mmx( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset], - &xsum0, &xxsum0 - ); - - vp9_filter_block2d_bil_var_mmx( - src_ptr + 8, src_pixels_per_line, - dst_ptr + 8, dst_pixels_per_line, 16, - vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset], - &xsum1, &xxsum1 - ); - - xsum0 += xsum1; - xxsum0 += xxsum1; - - *sse = xxsum0; - return (xxsum0 - ((xsum0 * xsum0) >> 8)); - - -} - -unsigned int vp9_sub_pixel_mse16x16_mmx( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - vp9_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); - return *sse; -} - -unsigned int vp9_sub_pixel_variance16x8_mmx -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - int xsum0, xsum1; - unsigned int xxsum0, xxsum1; - - - vp9_filter_block2d_bil_var_mmx( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 8, - vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset], - &xsum0, &xxsum0 - ); - - - vp9_filter_block2d_bil_var_mmx( - src_ptr + 8, src_pixels_per_line, - dst_ptr + 8, dst_pixels_per_line, 8, - vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset], - &xsum1, &xxsum1 - ); - - xsum0 += xsum1; - xxsum0 += xxsum1; - - *sse = xxsum0; - return (xxsum0 - ((xsum0 * xsum0) >> 7)); -} - -unsigned int vp9_sub_pixel_variance8x16_mmx -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - int xsum; - unsigned int xxsum; - vp9_filter_block2d_bil_var_mmx( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset], - &xsum, &xxsum - ); - *sse = xxsum; - return (xxsum - ((xsum * xsum) >> 7)); -} - - -unsigned int vp9_variance_halfpixvar16x16_h_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 8, 0, - ref_ptr, recon_stride, sse); -} - - -unsigned int vp9_variance_halfpixvar16x16_v_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 8, - ref_ptr, recon_stride, sse); -} - - -unsigned int vp9_variance_halfpixvar16x16_hv_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 8, 8, - ref_ptr, recon_stride, sse); -} diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c deleted file mode 100644 index f3b0b600a..000000000 --- a/vp8/encoder/x86/variance_sse2.c +++ /dev/null @@ -1,517 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8/encoder/variance.h" -#include "vp8/common/pragmas.h" -#include "vpx_ports/mem.h" - -#define HALFNDX 8 - -extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); -extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); -extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); -extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); - -extern void vp9_filter_block2d_bil4x4_var_mmx -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - const short *HFilter, - const short *VFilter, - int *sum, - unsigned int *sumsquared -); - -extern unsigned int vp9_get4x4var_mmx -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); - -unsigned int vp9_get_mb_ss_sse2 -( - const short *src_ptr -); -unsigned int vp9_get16x16var_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); -unsigned int vp9_get8x8var_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); -void vp9_filter_block2d_bil_var_sse2 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int xoffset, - int yoffset, - int *sum, - unsigned int *sumsquared -); -void vp9_half_horiz_vert_variance8x_h_sse2 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int *sum, - unsigned int *sumsquared -); -void vp9_half_horiz_vert_variance16x_h_sse2 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int *sum, - unsigned int *sumsquared -); -void vp9_half_horiz_variance8x_h_sse2 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int *sum, - unsigned int *sumsquared -); -void vp9_half_horiz_variance16x_h_sse2 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int *sum, - unsigned int *sumsquared -); -void vp9_half_vert_variance8x_h_sse2 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int *sum, - unsigned int *sumsquared -); -void vp9_half_vert_variance16x_h_sse2 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int *sum, - unsigned int *sumsquared -); - -DECLARE_ALIGNED(16, extern short, vp9_bilinear_filters_mmx[16][8]); - -unsigned int vp9_variance4x4_wmt( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); - *sse = var; - return (var - ((avg * avg) >> 4)); - -} - -unsigned int vp9_variance8x8_wmt -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); - *sse = var; - return (var - ((avg * avg) >> 6)); - -} - - -unsigned int vp9_variance16x16_wmt -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int sse0; - int sum0; - - - vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0); - *sse = sse0; - return (sse0 - ((sum0 * sum0) >> 8)); -} -unsigned int vp9_mse16x16_wmt( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - - unsigned int sse0; - int sum0; - vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0); - *sse = sse0; - return sse0; - -} - - -unsigned int vp9_variance16x8_wmt -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - - vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0); - vp9_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); - - var = sse0 + sse1; - avg = sum0 + sum1; - *sse = var; - return (var - ((avg * avg) >> 7)); - -} - -unsigned int vp9_variance8x16_wmt -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - - vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0); - vp9_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1); - - var = sse0 + sse1; - avg = sum0 + sum1; - *sse = var; - return (var - ((avg * avg) >> 7)); - -} - -unsigned int vp9_sub_pixel_variance4x4_wmt -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - int xsum; - unsigned int xxsum; - vp9_filter_block2d_bil4x4_var_mmx( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, - vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset], - &xsum, &xxsum - ); - *sse = xxsum; - return (xxsum - ((xsum * xsum) >> 4)); -} - - -unsigned int vp9_sub_pixel_variance8x8_wmt -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - int xsum; - unsigned int xxsum; - - if (xoffset == HALFNDX && yoffset == 0) { - vp9_half_horiz_variance8x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 8, - &xsum, &xxsum); - } else if (xoffset == 0 && yoffset == HALFNDX) { - vp9_half_vert_variance8x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 8, - &xsum, &xxsum); - } else if (xoffset == HALFNDX && yoffset == HALFNDX) { - vp9_half_horiz_vert_variance8x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 8, - &xsum, &xxsum); - } else { - vp9_filter_block2d_bil_var_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 8, - xoffset, yoffset, - &xsum, &xxsum); - } - - *sse = xxsum; - return (xxsum - ((xsum * xsum) >> 6)); -} - -unsigned int vp9_sub_pixel_variance16x16_wmt -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - int xsum0, xsum1; - unsigned int xxsum0, xxsum1; - - - // note we could avoid these if statements if the calling function - // just called the appropriate functions inside. - if (xoffset == HALFNDX && yoffset == 0) { - vp9_half_horiz_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum0, &xxsum0); - } else if (xoffset == 0 && yoffset == HALFNDX) { - vp9_half_vert_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum0, &xxsum0); - } else if (xoffset == HALFNDX && yoffset == HALFNDX) { - vp9_half_horiz_vert_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum0, &xxsum0); - } else { - vp9_filter_block2d_bil_var_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - xoffset, yoffset, - &xsum0, &xxsum0 - ); - - vp9_filter_block2d_bil_var_sse2( - src_ptr + 8, src_pixels_per_line, - dst_ptr + 8, dst_pixels_per_line, 16, - xoffset, yoffset, - &xsum1, &xxsum1 - ); - xsum0 += xsum1; - xxsum0 += xxsum1; - } - - *sse = xxsum0; - return (xxsum0 - ((xsum0 * xsum0) >> 8)); -} - -unsigned int vp9_sub_pixel_mse16x16_wmt( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - vp9_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); - return *sse; -} - -unsigned int vp9_sub_pixel_variance16x8_wmt -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse - -) { - int xsum0, xsum1; - unsigned int xxsum0, xxsum1; - - if (xoffset == HALFNDX && yoffset == 0) { - vp9_half_horiz_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 8, - &xsum0, &xxsum0); - } else if (xoffset == 0 && yoffset == HALFNDX) { - vp9_half_vert_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 8, - &xsum0, &xxsum0); - } else if (xoffset == HALFNDX && yoffset == HALFNDX) { - vp9_half_horiz_vert_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 8, - &xsum0, &xxsum0); - } else { - vp9_filter_block2d_bil_var_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 8, - xoffset, yoffset, - &xsum0, &xxsum0); - - vp9_filter_block2d_bil_var_sse2( - src_ptr + 8, src_pixels_per_line, - dst_ptr + 8, dst_pixels_per_line, 8, - xoffset, yoffset, - &xsum1, &xxsum1); - xsum0 += xsum1; - xxsum0 += xxsum1; - } - - *sse = xxsum0; - return (xxsum0 - ((xsum0 * xsum0) >> 7)); -} - -unsigned int vp9_sub_pixel_variance8x16_wmt -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - int xsum; - unsigned int xxsum; - - if (xoffset == HALFNDX && yoffset == 0) { - vp9_half_horiz_variance8x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum, &xxsum); - } else if (xoffset == 0 && yoffset == HALFNDX) { - vp9_half_vert_variance8x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum, &xxsum); - } else if (xoffset == HALFNDX && yoffset == HALFNDX) { - vp9_half_horiz_vert_variance8x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum, &xxsum); - } else { - vp9_filter_block2d_bil_var_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - xoffset, yoffset, - &xsum, &xxsum); - } - - *sse = xxsum; - return (xxsum - ((xsum * xsum) >> 7)); -} - - -unsigned int vp9_variance_halfpixvar16x16_h_wmt( - const unsigned char *src_ptr, - int src_pixels_per_line, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - int xsum0; - unsigned int xxsum0; - - vp9_half_horiz_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum0, &xxsum0); - - *sse = xxsum0; - return (xxsum0 - ((xsum0 * xsum0) >> 8)); -} - - -unsigned int vp9_variance_halfpixvar16x16_v_wmt( - const unsigned char *src_ptr, - int src_pixels_per_line, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - int xsum0; - unsigned int xxsum0; - vp9_half_vert_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum0, &xxsum0); - - *sse = xxsum0; - return (xxsum0 - ((xsum0 * xsum0) >> 8)); -} - - -unsigned int vp9_variance_halfpixvar16x16_hv_wmt( - const unsigned char *src_ptr, - int src_pixels_per_line, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - int xsum0; - unsigned int xxsum0; - - vp9_half_horiz_vert_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum0, &xxsum0); - - *sse = xxsum0; - return (xxsum0 - ((xsum0 * xsum0) >> 8)); -} diff --git a/vp8/encoder/x86/variance_ssse3.c b/vp8/encoder/x86/variance_ssse3.c deleted file mode 100644 index f33c662e1..000000000 --- a/vp8/encoder/x86/variance_ssse3.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8/encoder/variance.h" -#include "vp8/common/pragmas.h" -#include "vpx_ports/mem.h" - -#define HALFNDX 8 - -extern unsigned int vp9_get16x16var_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); -extern void vp9_half_horiz_vert_variance16x_h_sse2 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int *sum, - unsigned int *sumsquared -); -extern void vp9_half_horiz_variance16x_h_sse2 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int *sum, - unsigned int *sumsquared -); -extern void vp9_half_vert_variance16x_h_sse2 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int *sum, - unsigned int *sumsquared -); -extern void vp9_filter_block2d_bil_var_ssse3 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int xoffset, - int yoffset, - int *sum, - unsigned int *sumsquared -); - -unsigned int vp9_sub_pixel_variance16x16_ssse3 -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - int xsum0; - unsigned int xxsum0; - - // note we could avoid these if statements if the calling function - // just called the appropriate functions inside. - if (xoffset == HALFNDX && yoffset == 0) { - vp9_half_horiz_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum0, &xxsum0); - } else if (xoffset == 0 && yoffset == HALFNDX) { - vp9_half_vert_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum0, &xxsum0); - } else if (xoffset == HALFNDX && yoffset == HALFNDX) { - vp9_half_horiz_vert_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum0, &xxsum0); - } else { - vp9_filter_block2d_bil_var_ssse3( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - xoffset, yoffset, - &xsum0, &xxsum0); - } - - *sse = xxsum0; - return (xxsum0 - ((xsum0 * xsum0) >> 8)); -} - -unsigned int vp9_sub_pixel_variance16x8_ssse3 -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse - -) { - int xsum0; - unsigned int xxsum0; - - if (xoffset == HALFNDX && yoffset == 0) { - vp9_half_horiz_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 8, - &xsum0, &xxsum0); - } else if (xoffset == 0 && yoffset == HALFNDX) { - vp9_half_vert_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 8, - &xsum0, &xxsum0); - } else if (xoffset == HALFNDX && yoffset == HALFNDX) { - vp9_half_horiz_vert_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 8, - &xsum0, &xxsum0); - } else { - vp9_filter_block2d_bil_var_ssse3( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 8, - xoffset, yoffset, - &xsum0, &xxsum0); - } - - *sse = xxsum0; - return (xxsum0 - ((xsum0 * xsum0) >> 7)); -} diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c deleted file mode 100644 index 4af69c719..000000000 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "vpx_ports/x86.h" -#include "vp8/encoder/variance.h" -#include "vp8/encoder/onyx_int.h" - - -#if HAVE_MMX -void vp9_short_fdct8x4_mmx(short *input, short *output, int pitch) { - vp9_short_fdct4x4_mmx(input, output, pitch); - vp9_short_fdct4x4_mmx(input + 4, output + 16, pitch); -} - -int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc); -int vp9_mbblock_error_mmx(MACROBLOCK *mb, int dc) { - short *coeff_ptr = mb->block[0].coeff; - short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; - return vp9_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr, dc); -} - -int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); -int vp9_mbuverror_mmx(MACROBLOCK *mb) { - short *s_ptr = &mb->coeff[256]; - short *d_ptr = &mb->e_mbd.dqcoeff[256]; - return vp9_mbuverror_mmx_impl(s_ptr, d_ptr); -} - -void vp9_subtract_b_mmx_impl(unsigned char *z, int src_stride, - short *diff, unsigned char *predictor, - int pitch); -void vp9_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) { - unsigned char *z = *(be->base_src) + be->src; - unsigned int src_stride = be->src_stride; - short *diff = &be->src_diff[0]; - unsigned char *predictor = &bd->predictor[0]; - vp9_subtract_b_mmx_impl(z, src_stride, diff, predictor, pitch); -} - -#endif - -#if HAVE_SSE2 -int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); -int vp9_mbblock_error_xmm(MACROBLOCK *mb, int dc) { - short *coeff_ptr = mb->block[0].coeff; - short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; - return vp9_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr, dc); -} - -int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); -int vp9_mbuverror_xmm(MACROBLOCK *mb) { - short *s_ptr = &mb->coeff[256]; - short *d_ptr = &mb->e_mbd.dqcoeff[256]; - return vp9_mbuverror_xmm_impl(s_ptr, d_ptr); -} - -void vp9_subtract_b_sse2_impl(unsigned char *z, int src_stride, - short *diff, unsigned char *predictor, - int pitch); -void vp9_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) { - unsigned char *z = *(be->base_src) + be->src; - unsigned int src_stride = be->src_stride; - short *diff = &be->src_diff[0]; - unsigned char *predictor = &bd->predictor[0]; - vp9_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch); -} - -#endif - -void vp9_arch_x86_encoder_init(VP9_COMP *cpi) { -#if CONFIG_RUNTIME_CPU_DETECT - int flags = x86_simd_caps(); - - /* Note: - * - * This platform can be built without runtime CPU detection as well. If - * you modify any of the function mappings present in this file, be sure - * to also update them in static mapings (<arch>/filename_<arch>.h) - */ - - /* Override default functions with fastest ones for this CPU. */ -#if HAVE_SSE2 - if (flags & HAS_SSE2) { - cpi->rtcd.temporal.apply = vp9_temporal_filter_apply_sse2; - - } -#endif - -#if HAVE_SSE3 - if (flags & HAS_SSE3) { - cpi->rtcd.search.full_search = vp9_full_search_sadx3; - cpi->rtcd.search.diamond_search = vp9_diamond_search_sadx4; - cpi->rtcd.search.refining_search = vp9_refining_search_sadx4; - } -#endif - - -#if HAVE_SSE4_1 - if (flags & HAS_SSE4_1) { - cpi->rtcd.search.full_search = vp9_full_search_sadx8; - } -#endif - -#endif -} |