diff options
Diffstat (limited to 'vp8')
-rw-r--r-- | vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm | 2 | ||||
-rw-r--r-- | vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm | 4 | ||||
-rw-r--r-- | vp8/encoder/arm/variance_arm.c | 36 | ||||
-rw-r--r-- | vp8/encoder/arm/variance_arm.h | 1 | ||||
-rw-r--r-- | vp8/encoder/firstpass.c | 31 | ||||
-rw-r--r-- | vp8/encoder/quantize.c | 9 | ||||
-rw-r--r-- | vp8/encoder/x86/quantize_sse2.asm | 316 | ||||
-rw-r--r--[-rwxr-xr-x] | vp8/encoder/x86/quantize_ssse3.asm | 0 | ||||
-rw-r--r-- | vp8/encoder/x86/quantize_x86.h | 10 | ||||
-rw-r--r-- | vp8/encoder/x86/x86_csystemdependent.c | 49 | ||||
-rw-r--r-- | vp8/vp8cx_arm.mk | 1 |
11 files changed, 228 insertions, 231 deletions
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm index 57cd318ee..42dae13de 100644 --- a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm +++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm @@ -65,6 +65,8 @@ numparts_loop ldr r10, [sp, #40] ; ptr ldr r5, [sp, #36] ; move mb_rows to the counting section + sub r5, r5, r11 ; move start point with each partition + ; mb_rows starts at i str r5, [sp, #12] ; Reset all of the VP8 Writer data for each partition that diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm index 1b09cfe4c..1475f76df 100644 --- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm +++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm @@ -9,7 +9,7 @@ ; - EXPORT |vp8_sub_pixel_variance16x16_neon| + EXPORT |vp8_sub_pixel_variance16x16_neon_func| ARM REQUIRE8 PRESERVE8 @@ -24,7 +24,7 @@ ; stack(r6) unsigned int *sse ;note: most of the code is copied from bilinear_predict16x16_neon and vp8_variance16x16_neon. -|vp8_sub_pixel_variance16x16_neon| PROC +|vp8_sub_pixel_variance16x16_neon_func| PROC push {r4-r6, lr} ldr r12, _BilinearTaps_coeff_ diff --git a/vp8/encoder/arm/variance_arm.c b/vp8/encoder/arm/variance_arm.c new file mode 100644 index 000000000..4c7248543 --- /dev/null +++ b/vp8/encoder/arm/variance_arm.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_config.h" + +#if HAVE_ARMV7 + +unsigned int vp8_sub_pixel_variance16x16_neon +( + const unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse +) +{ + if (xoffset == 4 && yoffset == 0) + return vp8_variance_halfpixvar16x16_h_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse); + else if (xoffset == 0 && yoffset == 4) + return vp8_variance_halfpixvar16x16_v_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse); + else if (xoffset == 4 && yoffset == 4) + return vp8_variance_halfpixvar16x16_hv_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse); + else + return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); +} + +#endif diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h index 0e5f62fcf..3cbacfac3 100644 --- a/vp8/encoder/arm/variance_arm.h +++ b/vp8/encoder/arm/variance_arm.h @@ -30,6 +30,7 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_neon); //extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_c); //extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_c); extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_neon); +extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_neon_func); extern prototype_variance(vp8_variance_halfpixvar16x16_h_neon); extern prototype_variance(vp8_variance_halfpixvar16x16_v_neon); extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon); diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index c47265548..26c9e72e1 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -2423,12 +2423,35 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) if (cpi->oxcf.auto_key && cpi->frames_to_key > (int)cpi->key_frame_frequency ) { + int current_pos = cpi->stats_in; + FIRSTPASS_STATS tmp_frame; + cpi->frames_to_key /= 2; - // Estimate corrected kf group error - kf_group_err /= 2.0; - kf_group_intra_err /= 2.0; - kf_group_coded_err /= 2.0; + // Copy first frame details + vpx_memcpy(&tmp_frame, &first_frame, sizeof(first_frame)); + + // Reset to the start of the group + reset_fpf_position(cpi, start_position); + + kf_group_err = 0; + kf_group_intra_err = 0; + kf_group_coded_err = 0; + + // Rescan to get the correct error data for the forced kf group + for( i = 0; i < cpi->frames_to_key; i++ ) + { + // Accumulate kf group errors + kf_group_err += calculate_modified_err(cpi, &tmp_frame); + kf_group_intra_err += tmp_frame.intra_error; + kf_group_coded_err += tmp_frame.coded_error; + + // Load a the next frame's stats + vp8_input_stats(cpi, &tmp_frame); + } + + // Reset to the start of the group + reset_fpf_position(cpi, current_pos); cpi->next_key_frame_forced = TRUE; } diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c index be9f26c7f..4a2329fc1 100644 --- a/vp8/encoder/quantize.c +++ b/vp8/encoder/quantize.c @@ -129,9 +129,6 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) rc = vp8_default_zig_zag1d[i]; z = coeff_ptr[rc]; - //if ( i == 0 ) - // zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value/2; - //else zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value; zbin_boost_ptr ++; @@ -144,13 +141,13 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) y = (((x * quant_ptr[rc]) >> 16) + x) >> quant_shift_ptr[rc]; // quantize (x) x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + qcoeff_ptr[rc] = x; // write to destination + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value if (y) { eob = i; // last nonzero coeffs - zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength + zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength } } } diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm index 57bf3c93a..45e1a2ad3 100644 --- a/vp8/encoder/x86/quantize_sse2.asm +++ b/vp8/encoder/x86/quantize_sse2.asm @@ -11,220 +11,169 @@ %include "vpx_ports/x86_abi_support.asm" -;int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr, -; short *qcoeff_ptr,short *dequant_ptr, -; const int *default_zig_zag, short *round_ptr, -; short *quant_ptr, short *dqcoeff_ptr, +;int vp8_regular_quantize_b_impl_sse2( +; short *coeff_ptr, +; short *zbin_ptr, +; short *qcoeff_ptr, +; short *dequant_ptr, +; const int *default_zig_zag, +; short *round_ptr, +; short *quant_ptr, +; short *dqcoeff_ptr, ; unsigned short zbin_oq_value, -; short *zbin_boost_ptr); +; short *zbin_boost_ptr, +; short *quant_shift); ; global sym(vp8_regular_quantize_b_impl_sse2) sym(vp8_regular_quantize_b_impl_sse2): push rbp mov rbp, rsp - SHADOW_ARGS_TO_STACK 10 + SHADOW_ARGS_TO_STACK 11 + SAVE_XMM push rsi push rdi push rbx - ; end prolog - ALIGN_STACK 16, rax + %define abs_minus_zbin 0 + %define temp_qcoeff 32 + %define qcoeff 64 + %define eob_tmp 96 + %define stack_size 112 + sub rsp, stack_size + ; end prolog - %define abs_minus_zbin_lo 0 - %define abs_minus_zbin_hi 16 - %define temp_qcoeff_lo 32 - %define temp_qcoeff_hi 48 - %define save_xmm6 64 - %define save_xmm7 80 - %define eob 96 - - %define vp8_regularquantizeb_stack_size eob + 16 - - sub rsp, vp8_regularquantizeb_stack_size - - movdqa OWORD PTR[rsp + save_xmm6], xmm6 - movdqa OWORD PTR[rsp + save_xmm7], xmm7 - - mov rdx, arg(0) ;coeff_ptr - mov eax, arg(8) ;zbin_oq_value - - mov rcx, arg(1) ;zbin_ptr - movd xmm7, eax + mov rdx, arg(0) ; coeff_ptr + mov rcx, arg(1) ; zbin_ptr + movd xmm7, arg(8) ; zbin_oq_value + mov rdi, arg(5) ; round_ptr + mov rsi, arg(6) ; quant_ptr + ; z movdqa xmm0, OWORD PTR[rdx] movdqa xmm4, OWORD PTR[rdx + 16] + pshuflw xmm7, xmm7, 0 + punpcklwd xmm7, xmm7 ; duplicated zbin_oq_value + movdqa xmm1, xmm0 movdqa xmm5, xmm4 - psraw xmm0, 15 ;sign of z (aka sz) - psraw xmm4, 15 ;sign of z (aka sz) + ; sz + psraw xmm0, 15 + psraw xmm4, 15 + ; (z ^ sz) pxor xmm1, xmm0 pxor xmm5, xmm4 - movdqa xmm2, OWORD PTR[rcx] ;load zbin_ptr - movdqa xmm3, OWORD PTR[rcx + 16] ;load zbin_ptr - - pshuflw xmm7, xmm7, 0 - psubw xmm1, xmm0 ;x = abs(z) + ; x = abs(z) + psubw xmm1, xmm0 + psubw xmm5, xmm4 - punpcklwd xmm7, xmm7 ;duplicated zbin_oq_value - psubw xmm5, xmm4 ;x = abs(z) + movdqa xmm2, OWORD PTR[rcx] + movdqa xmm3, OWORD PTR[rcx + 16] + ; *zbin_ptr + zbin_oq_value paddw xmm2, xmm7 paddw xmm3, xmm7 - psubw xmm1, xmm2 ;sub (zbin_ptr + zbin_oq_value) - psubw xmm5, xmm3 ;sub (zbin_ptr + zbin_oq_value) - - mov rdi, arg(5) ;round_ptr - mov rsi, arg(6) ;quant_ptr + ; x - (*zbin_ptr + zbin_oq_value) + psubw xmm1, xmm2 + psubw xmm5, xmm3 + movdqa OWORD PTR[rsp + abs_minus_zbin], xmm1 + movdqa OWORD PTR[rsp + abs_minus_zbin + 16], xmm5 - movdqa OWORD PTR[rsp + abs_minus_zbin_lo], xmm1 - movdqa OWORD PTR[rsp + abs_minus_zbin_hi], xmm5 - - paddw xmm1, xmm2 ;add (zbin_ptr + zbin_oq_value) back - paddw xmm5, xmm3 ;add (zbin_ptr + zbin_oq_value) back + ; add (zbin_ptr + zbin_oq_value) back + paddw xmm1, xmm2 + paddw xmm5, xmm3 movdqa xmm2, OWORD PTR[rdi] - movdqa xmm3, OWORD PTR[rsi] - movdqa xmm6, OWORD PTR[rdi + 16] + + movdqa xmm3, OWORD PTR[rsi] movdqa xmm7, OWORD PTR[rsi + 16] + ; x + round paddw xmm1, xmm2 paddw xmm5, xmm6 - pmulhw xmm1, xmm3 - pmulhw xmm5, xmm7 - - mov rsi, arg(2) ;qcoeff_ptr - pxor xmm6, xmm6 - - pxor xmm1, xmm0 - pxor xmm5, xmm4 - - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - movdqa OWORD PTR[rsp + temp_qcoeff_lo], xmm1 - movdqa OWORD PTR[rsp + temp_qcoeff_hi], xmm5 - - movdqa OWORD PTR[rsi], xmm6 ;zero qcoeff - movdqa OWORD PTR[rsi + 16], xmm6 ;zero qcoeff - - xor rax, rax - mov rcx, -1 - - mov [rsp + eob], rcx - mov rsi, arg(9) ;zbin_boost_ptr - - mov rbx, arg(4) ;default_zig_zag + ; y = x * quant_ptr >> 16 + pmulhw xmm3, xmm1 + pmulhw xmm7, xmm5 -rq_zigzag_loop: - movsxd rcx, DWORD PTR[rbx + rax*4] ;now we have rc - movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin - lea rsi, [rsi + 2] ;zbin_boost_ptr++ + ; y += x + paddw xmm1, xmm3 + paddw xmm5, xmm7 - movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2] + movdqa OWORD PTR[rsp + temp_qcoeff], xmm1 + movdqa OWORD PTR[rsp + temp_qcoeff + 16], xmm5 - sub edx, edi ;x - zbin - jl rq_zigzag_1 - - mov rdi, arg(2) ;qcoeff_ptr - - movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2] - - cmp edx, 0 - je rq_zigzag_1 - - mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc] - - mov rsi, arg(9) ;zbin_boost_ptr - mov [rsp + eob], rax ;eob = i - -rq_zigzag_1: - movsxd rcx, DWORD PTR[rbx + rax*4 + 4] - movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin - lea rsi, [rsi + 2] ;zbin_boost_ptr++ - - movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2] - lea rax, [rax + 1] - - sub edx, edi ;x - zbin - jl rq_zigzag_1a - - mov rdi, arg(2) ;qcoeff_ptr - - movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2] - - cmp edx, 0 - je rq_zigzag_1a - - mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc] - - mov rsi, arg(9) ;zbin_boost_ptr - mov [rsp + eob], rax ;eob = i - -rq_zigzag_1a: - movsxd rcx, DWORD PTR[rbx + rax*4 + 4] - movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin - lea rsi, [rsi + 2] ;zbin_boost_ptr++ - - movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2] - lea rax, [rax + 1] - - sub edx, edi ;x - zbin - jl rq_zigzag_1b - - mov rdi, arg(2) ;qcoeff_ptr - - movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2] - - cmp edx, 0 - je rq_zigzag_1b - - mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc] - - mov rsi, arg(9) ;zbin_boost_ptr - mov [rsp + eob], rax ;eob = i - -rq_zigzag_1b: - movsxd rcx, DWORD PTR[rbx + rax*4 + 4] - movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin - lea rsi, [rsi + 2] ;zbin_boost_ptr++ - - movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2] - lea rax, [rax + 1] - - sub edx, edi ;x - zbin - jl rq_zigzag_1c - - mov rdi, arg(2) ;qcoeff_ptr - - movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2] - - cmp edx, 0 - je rq_zigzag_1c - - mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc] - - mov rsi, arg(9) ;zbin_boost_ptr - mov [rsp + eob], rax ;eob = i - -rq_zigzag_1c: - lea rax, [rax + 1] - - cmp rax, 16 - jl rq_zigzag_loop - - mov rdi, arg(2) ;qcoeff_ptr - mov rcx, arg(3) ;dequant_ptr - mov rsi, arg(7) ;dqcoeff_ptr - - movdqa xmm2, OWORD PTR[rdi] - movdqa xmm3, OWORD PTR[rdi + 16] + pxor xmm6, xmm6 + ; zero qcoeff + movdqa OWORD PTR[rsp + qcoeff], xmm6 + movdqa OWORD PTR[rsp + qcoeff + 16], xmm6 + + mov [rsp + eob_tmp], DWORD -1 ; eob + mov rsi, arg(9) ; zbin_boost_ptr + mov rdi, arg(4) ; default_zig_zag + mov rax, arg(10) ; quant_shift_ptr + +%macro ZIGZAG_LOOP 2 +rq_zigzag_loop_%1: + movsxd rdx, DWORD PTR[rdi + (%1 * 4)] ; rc + movsx ebx, WORD PTR [rsi] ; *zbin_boost_ptr + lea rsi, [rsi + 2] ; zbin_boost_ptr++ + + ; x + movsx ecx, WORD PTR[rsp + abs_minus_zbin + rdx *2] + + ; if (x >= zbin) + sub ecx, ebx ; x - zbin + jl rq_zigzag_loop_%2 ; x < zbin + + movsx ebx, WORD PTR[rsp + temp_qcoeff + rdx *2] + + ; downshift by quant_shift[rdx] + movsx ecx, WORD PTR[rax + rdx*2] ; quant_shift_ptr[rc] + sar ebx, cl ; also sets Z bit + je rq_zigzag_loop_%2 ; !y + mov WORD PTR[rsp + qcoeff + rdx * 2], bx ;qcoeff_ptr[rc] = temp_qcoeff[rc] + + mov rsi, arg(9) ; reset to b->zrun_zbin_boost + mov [rsp + eob_tmp], DWORD %1 ; eob = i +%endmacro +ZIGZAG_LOOP 0, 1 +ZIGZAG_LOOP 1, 2 +ZIGZAG_LOOP 2, 3 +ZIGZAG_LOOP 3, 4 +ZIGZAG_LOOP 4, 5 +ZIGZAG_LOOP 5, 6 +ZIGZAG_LOOP 6, 7 +ZIGZAG_LOOP 7, 8 +ZIGZAG_LOOP 8, 9 +ZIGZAG_LOOP 9, 10 +ZIGZAG_LOOP 10, 11 +ZIGZAG_LOOP 11, 12 +ZIGZAG_LOOP 12, 13 +ZIGZAG_LOOP 13, 14 +ZIGZAG_LOOP 14, 15 +ZIGZAG_LOOP 15, end +rq_zigzag_loop_end: + + mov rbx, arg(2) ; qcoeff_ptr + mov rcx, arg(3) ; dequant_ptr + mov rsi, arg(7) ; dqcoeff_ptr + mov rax, [rsp + eob_tmp] ; eob + + movdqa xmm2, OWORD PTR[rsp + qcoeff] + movdqa xmm3, OWORD PTR[rsp + qcoeff + 16] + + ; y ^ sz + pxor xmm2, xmm0 + pxor xmm3, xmm4 + ; x = (y ^ sz) - sz + psubw xmm2, xmm0 + psubw xmm3, xmm4 movdqa xmm0, OWORD PTR[rcx] movdqa xmm1, OWORD PTR[rcx + 16] @@ -232,23 +181,20 @@ rq_zigzag_1c: pmullw xmm0, xmm2 pmullw xmm1, xmm3 - movdqa OWORD PTR[rsi], xmm0 ;store dqcoeff - movdqa OWORD PTR[rsi + 16], xmm1 ;store dqcoeff - - mov rax, [rsp + eob] - - movdqa xmm6, OWORD PTR[rsp + save_xmm6] - movdqa xmm7, OWORD PTR[rsp + save_xmm7] + movdqa OWORD PTR[rbx], xmm2 + movdqa OWORD PTR[rbx + 16], xmm3 + movdqa OWORD PTR[rsi], xmm0 ; store dqcoeff + movdqa OWORD PTR[rsi + 16], xmm1 ; store dqcoeff add rax, 1 - add rsp, vp8_regularquantizeb_stack_size - pop rsp - ; begin epilog + add rsp, stack_size + pop rsp pop rbx pop rdi pop rsi + RESTORE_XMM UNSHADOW_ARGS pop rbp ret diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm index 2f33199e5..2f33199e5 100755..100644 --- a/vp8/encoder/x86/quantize_ssse3.asm +++ b/vp8/encoder/x86/quantize_ssse3.asm diff --git a/vp8/encoder/x86/quantize_x86.h b/vp8/encoder/x86/quantize_x86.h index b5b22c022..266efb446 100644 --- a/vp8/encoder/x86/quantize_x86.h +++ b/vp8/encoder/x86/quantize_x86.h @@ -27,11 +27,11 @@ extern prototype_quantize_block(vp8_regular_quantize_b_sse2); #if !CONFIG_RUNTIME_CPU_DETECT -/* The sse2 quantizer has not been updated to match the new exact - * quantizer introduced in commit e04e2935 - *#undef vp8_quantize_quantb - *#define vp8_quantize_quantb vp8_regular_quantize_b_sse2 - */ +// Currently, this function realizes a gain on x86 and a loss on x86_64 +#if ARCH_X86 +#undef vp8_quantize_quantb +#define vp8_quantize_quantb vp8_regular_quantize_b_sse2 +#endif #endif diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index f9b3ea1d8..31438f916 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -108,37 +108,26 @@ void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d) int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr, - short *qcoeff_ptr,short *dequant_ptr, - const int *default_zig_zag, short *round_ptr, - short *quant_ptr, short *dqcoeff_ptr, - unsigned short zbin_oq_value, - short *zbin_boost_ptr); + short *qcoeff_ptr,short *dequant_ptr, + const int *default_zig_zag, short *round_ptr, + short *quant_ptr, short *dqcoeff_ptr, + unsigned short zbin_oq_value, + short *zbin_boost_ptr, + short *quant_shift_ptr); void vp8_regular_quantize_b_sse2(BLOCK *b,BLOCKD *d) { - short *zbin_boost_ptr = b->zrun_zbin_boost; - short *coeff_ptr = b->coeff; - short *zbin_ptr = b->zbin; - short *round_ptr = b->round; - short *quant_ptr = b->quant; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = d->dequant; - short zbin_oq_value = b->zbin_extra; - - d->eob = vp8_regular_quantize_b_impl_sse2( - coeff_ptr, - zbin_ptr, - qcoeff_ptr, - dequant_ptr, - vp8_default_zig_zag1d, - - round_ptr, - quant_ptr, - dqcoeff_ptr, - zbin_oq_value, - zbin_boost_ptr - ); + d->eob = vp8_regular_quantize_b_impl_sse2(b->coeff, + b->zbin, + d->qcoeff, + d->dequant, + vp8_default_zig_zag1d, + b->round, + b->quant, + d->dqcoeff, + b->zbin_extra, + b->zrun_zbin_boost, + b->quant_shift); } int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); @@ -307,7 +296,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.encodemb.submby = vp8_subtract_mby_sse2; cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_sse2; - /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2;*/ +#if ARCH_X86 + cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2; +#endif cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2; cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2; diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk index da27e0897..4113f2395 100644 --- a/vp8/vp8cx_arm.mk +++ b/vp8/vp8cx_arm.mk @@ -16,6 +16,7 @@ VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/arm_csystemdependent.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/encodemb_arm.c +VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/variance_arm.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/quantize_arm.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/picklpf_arm.c VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c |