diff options
39 files changed, 435 insertions, 222 deletions
diff --git a/build/make/ads2gas.pl b/build/make/ads2gas.pl index efdfce7a1..be4658253 100755 --- a/build/make/ads2gas.pl +++ b/build/make/ads2gas.pl @@ -82,7 +82,10 @@ while (<STDIN>) s/CODE([0-9][0-9])/.code $1/; # No AREA required - s/^\s*AREA.*$/.text/; + # But ALIGNs in AREA must be obeyed + s/^\s*AREA.*ALIGN=([0-9])$/.text\n.p2align $1/; + # If no ALIGN, strip the AREA and align to 4 bytes + s/^\s*AREA.*$/.text\n.p2align 2/; # DCD to .word # This one is for incoming symbols diff --git a/build/make/ads2gas_apple.pl b/build/make/ads2gas_apple.pl index 1b3039374..78f4a97f5 100755 --- a/build/make/ads2gas_apple.pl +++ b/build/make/ads2gas_apple.pl @@ -100,7 +100,10 @@ while (<STDIN>) s/CODE([0-9][0-9])/.code $1/; # No AREA required - s/^\s*AREA.*$/.text/; + # But ALIGNs in AREA must be obeyed + s/^\s*AREA.*ALIGN=([0-9])$/.text\n.p2align $1/; + # If no ALIGN, strip the AREA and align to 4 bytes + s/^\s*AREA.*$/.text\n.p2align 2/; # DCD to .word # This one is for incoming symbols diff --git a/vp8/common/arm/armv6/bilinearfilter_v6.asm b/vp8/common/arm/armv6/bilinearfilter_v6.asm index a86ed5d0a..9704b4210 100644 --- a/vp8/common/arm/armv6/bilinearfilter_v6.asm +++ b/vp8/common/arm/armv6/bilinearfilter_v6.asm @@ -30,11 +30,11 @@ ldr r4, [sp, #36] ; width mov r12, r3 ; outer-loop counter - sub r2, r2, r4 ; src increment for height loop - ;;IF ARCHITECTURE=6 - pld [r0] - ;;ENDIF + add r7, r2, r4 ; preload next row + pld [r0, r7] + + sub r2, r2, r4 ; src increment for height loop ldr r5, [r11] ; load up filter coefficients @@ -96,9 +96,8 @@ add r0, r0, r2 ; move to next input row subs r12, r12, #1 - ;;IF ARCHITECTURE=6 - pld [r0] - ;;ENDIF + add r9, r2, r4, lsl #1 ; adding back block width + pld [r0, r9] ; preload next row add r11, r11, #2 ; move over to next column mov r1, r11 diff --git a/vp8/common/arm/armv6/copymem16x16_v6.asm b/vp8/common/arm/armv6/copymem16x16_v6.asm index fca91a0db..abf048c2f 100644 --- a/vp8/common/arm/armv6/copymem16x16_v6.asm +++ b/vp8/common/arm/armv6/copymem16x16_v6.asm @@ -22,9 +22,7 @@ ;push {r4-r7} ;preload - pld [r0] - pld [r0, r1] - pld [r0, r1, lsl #1] + pld [r0, #31] ; preload for next 16x16 block ands r4, r0, #15 beq copy_mem16x16_fast @@ -90,6 +88,8 @@ copy_mem16x16_1_loop ldrneb r6, [r0, #2] ldrneb r7, [r0, #3] + pld [r0, #31] ; preload for next 16x16 block + bne copy_mem16x16_1_loop ldmia sp!, {r4 - r7} @@ -121,6 +121,8 @@ copy_mem16x16_4_loop ldrne r6, [r0, #8] ldrne r7, [r0, #12] + pld [r0, #31] ; preload for next 16x16 block + bne copy_mem16x16_4_loop ldmia sp!, {r4 - r7} @@ -148,6 +150,7 @@ copy_mem16x16_8_loop add r2, r2, r3 + pld [r0, #31] ; preload for next 16x16 block bne copy_mem16x16_8_loop ldmia sp!, {r4 - r7} @@ -171,6 +174,7 @@ copy_mem16x16_fast_loop ;stm r2, {r4-r7} add r2, r2, r3 + pld [r0, #31] ; preload for next 16x16 block bne copy_mem16x16_fast_loop ldmia sp!, {r4 - r7} diff --git a/vp8/common/arm/armv6/filter_v6.asm b/vp8/common/arm/armv6/filter_v6.asm index 03b5bccd7..1ba91ddd6 100644 --- a/vp8/common/arm/armv6/filter_v6.asm +++ b/vp8/common/arm/armv6/filter_v6.asm @@ -10,6 +10,8 @@ EXPORT |vp8_filter_block2d_first_pass_armv6| + EXPORT |vp8_filter_block2d_first_pass_16x16_armv6| + EXPORT |vp8_filter_block2d_first_pass_8x8_armv6| EXPORT |vp8_filter_block2d_second_pass_armv6| EXPORT |vp8_filter4_block2d_second_pass_armv6| EXPORT |vp8_filter_block2d_first_pass_only_armv6| @@ -40,11 +42,6 @@ add r12, r3, #16 ; square off the output sub sp, sp, #4 - ;;IF ARCHITECTURE=6 - ;pld [r0, #-2] - ;;pld [r0, #30] - ;;ENDIF - ldr r4, [r11] ; load up packed filter coefficients ldr r5, [r11, #4] ldr r6, [r11, #8] @@ -101,15 +98,10 @@ bne width_loop_1st_6 - ;;add r9, r2, #30 ; attempt to load 2 adjacent cache lines - ;;IF ARCHITECTURE=6 - ;pld [r0, r2] - ;;pld [r0, r9] - ;;ENDIF - ldr r1, [sp] ; load and update dst address subs r7, r7, #0x10000 add r0, r0, r2 ; move to next input line + add r1, r1, #2 ; move over to next column str r1, [sp] @@ -120,6 +112,192 @@ ENDP +; -------------------------- +; 16x16 version +; ----------------------------- +|vp8_filter_block2d_first_pass_16x16_armv6| PROC + stmdb sp!, {r4 - r11, lr} + + ldr r11, [sp, #40] ; vp8_filter address + ldr r7, [sp, #36] ; output height + + add r4, r2, #18 ; preload next low + pld [r0, r4] + + sub r2, r2, r3 ; inside loop increments input array, + ; so the height loop only needs to add + ; r2 - width to the input pointer + + mov r3, r3, lsl #1 ; multiply width by 2 because using shorts + add r12, r3, #16 ; square off the output + sub sp, sp, #4 + + ldr r4, [r11] ; load up packed filter coefficients + ldr r5, [r11, #4] + ldr r6, [r11, #8] + + str r1, [sp] ; push destination to stack + mov r7, r7, lsl #16 ; height is top part of counter + +; six tap filter +|height_loop_1st_16_6| + ldrb r8, [r0, #-2] ; load source data + ldrb r9, [r0, #-1] + ldrb r10, [r0], #2 + orr r7, r7, r3, lsr #2 ; construct loop counter + +|width_loop_1st_16_6| + ldrb r11, [r0, #-1] + + pkhbt lr, r8, r9, lsl #16 ; r9 | r8 + pkhbt r8, r9, r10, lsl #16 ; r10 | r9 + + ldrb r9, [r0] + + smuad lr, lr, r4 ; apply the filter + pkhbt r10, r10, r11, lsl #16 ; r11 | r10 + smuad r8, r8, r4 + pkhbt r11, r11, r9, lsl #16 ; r9 | r11 + + smlad lr, r10, r5, lr + ldrb r10, [r0, #1] + smlad r8, r11, r5, r8 + ldrb r11, [r0, #2] + + sub r7, r7, #1 + + pkhbt r9, r9, r10, lsl #16 ; r10 | r9 + pkhbt r10, r10, r11, lsl #16 ; r11 | r10 + + smlad lr, r9, r6, lr + smlad r11, r10, r6, r8 + + ands r10, r7, #0xff ; test loop counter + + add lr, lr, #0x40 ; round_shift_and_clamp + ldrneb r8, [r0, #-2] ; load data for next loop + usat lr, #8, lr, asr #7 + add r11, r11, #0x40 + ldrneb r9, [r0, #-1] + usat r11, #8, r11, asr #7 + + strh lr, [r1], r12 ; result is transposed and stored, which + ; will make second pass filtering easier. + ldrneb r10, [r0], #2 + strh r11, [r1], r12 + + bne width_loop_1st_16_6 + + ldr r1, [sp] ; load and update dst address + subs r7, r7, #0x10000 + add r0, r0, r2 ; move to next input line + + add r11, r2, #34 ; adding back block width(=16) + pld [r0, r11] ; preload next low + + add r1, r1, #2 ; move over to next column + str r1, [sp] + + bne height_loop_1st_16_6 + + add sp, sp, #4 + ldmia sp!, {r4 - r11, pc} + + ENDP + +; -------------------------- +; 8x8 version +; ----------------------------- +|vp8_filter_block2d_first_pass_8x8_armv6| PROC + stmdb sp!, {r4 - r11, lr} + + ldr r11, [sp, #40] ; vp8_filter address + ldr r7, [sp, #36] ; output height + + add r4, r2, #10 ; preload next low + pld [r0, r4] + + sub r2, r2, r3 ; inside loop increments input array, + ; so the height loop only needs to add + ; r2 - width to the input pointer + + mov r3, r3, lsl #1 ; multiply width by 2 because using shorts + add r12, r3, #16 ; square off the output + sub sp, sp, #4 + + ldr r4, [r11] ; load up packed filter coefficients + ldr r5, [r11, #4] + ldr r6, [r11, #8] + + str r1, [sp] ; push destination to stack + mov r7, r7, lsl #16 ; height is top part of counter + +; six tap filter +|height_loop_1st_8_6| + ldrb r8, [r0, #-2] ; load source data + ldrb r9, [r0, #-1] + ldrb r10, [r0], #2 + orr r7, r7, r3, lsr #2 ; construct loop counter + +|width_loop_1st_8_6| + ldrb r11, [r0, #-1] + + pkhbt lr, r8, r9, lsl #16 ; r9 | r8 + pkhbt r8, r9, r10, lsl #16 ; r10 | r9 + + ldrb r9, [r0] + + smuad lr, lr, r4 ; apply the filter + pkhbt r10, r10, r11, lsl #16 ; r11 | r10 + smuad r8, r8, r4 + pkhbt r11, r11, r9, lsl #16 ; r9 | r11 + + smlad lr, r10, r5, lr + ldrb r10, [r0, #1] + smlad r8, r11, r5, r8 + ldrb r11, [r0, #2] + + sub r7, r7, #1 + + pkhbt r9, r9, r10, lsl #16 ; r10 | r9 + pkhbt r10, r10, r11, lsl #16 ; r11 | r10 + + smlad lr, r9, r6, lr + smlad r11, r10, r6, r8 + + ands r10, r7, #0xff ; test loop counter + + add lr, lr, #0x40 ; round_shift_and_clamp + ldrneb r8, [r0, #-2] ; load data for next loop + usat lr, #8, lr, asr #7 + add r11, r11, #0x40 + ldrneb r9, [r0, #-1] + usat r11, #8, r11, asr #7 + + strh lr, [r1], r12 ; result is transposed and stored, which + ; will make second pass filtering easier. + ldrneb r10, [r0], #2 + strh r11, [r1], r12 + + bne width_loop_1st_8_6 + + ldr r1, [sp] ; load and update dst address + subs r7, r7, #0x10000 + add r0, r0, r2 ; move to next input line + + add r11, r2, #18 ; adding back block width(=8) + pld [r0, r11] ; preload next low + + add r1, r1, #2 ; move over to next column + str r1, [sp] + + bne height_loop_1st_8_6 + + add sp, sp, #4 + ldmia sp!, {r4 - r11, pc} + + ENDP + ;--------------------------------- ; r0 short *src_ptr, ; r1 unsigned char *output_ptr, @@ -262,6 +440,10 @@ |vp8_filter_block2d_first_pass_only_armv6| PROC stmdb sp!, {r4 - r11, lr} + add r7, r2, r3 ; preload next low + add r7, r7, #2 + pld [r0, r7] + ldr r4, [sp, #36] ; output pitch ldr r11, [sp, #40] ; HFilter address sub sp, sp, #8 @@ -330,16 +512,15 @@ bne width_loop_1st_only_6 - ;;add r9, r2, #30 ; attempt to load 2 adjacent cache lines - ;;IF ARCHITECTURE=6 - ;pld [r0, r2] - ;;pld [r0, r9] - ;;ENDIF - ldr lr, [sp] ; load back output pitch ldr r12, [sp, #4] ; load back output pitch subs r7, r7, #1 add r0, r0, r12 ; updata src for next loop + + add r11, r12, r3 ; preload next low + add r11, r11, #2 + pld [r0, r11] + add r1, r1, lr ; update dst for next loop bne height_loop_1st_only_6 diff --git a/vp8/common/arm/armv6/loopfilter_v6.asm b/vp8/common/arm/armv6/loopfilter_v6.asm index b6417dee6..c7441b055 100644 --- a/vp8/common/arm/armv6/loopfilter_v6.asm +++ b/vp8/common/arm/armv6/loopfilter_v6.asm @@ -253,12 +253,6 @@ count RN r5 subs count, count, #1 - ;pld [src] - ;pld [src, pstep] - ;pld [src, pstep, lsl #1] - ;pld [src, pstep, lsl #2] - ;pld [src, pstep, lsl #3] - ldrne r9, [src], pstep ; p3 ldrne r10, [src], pstep ; p2 ldrne r11, [src], pstep ; p1 @@ -857,15 +851,19 @@ count RN r5 sub src, src, #4 ; move src pointer down by 4 ldr count, [sp, #40] ; count for 8-in-parallel ldr r12, [sp, #36] ; load thresh address + pld [src, #23] ; preload for next block sub sp, sp, #16 ; create temp buffer ldr r6, [src], pstep ; load source data ldr r4, [r2], #4 ; flimit + pld [src, #23] ldr r7, [src], pstep ldr r2, [r3], #4 ; limit + pld [src, #23] ldr r8, [src], pstep uadd8 r4, r4, r4 ; flimit * 2 ldr r3, [r12], #4 ; thresh + pld [src, #23] ldr lr, [src], pstep mov count, count, lsl #1 ; 4-in-parallel uadd8 r4, r4, r2 ; flimit * 2 + limit @@ -1242,9 +1240,13 @@ count RN r5 sub src, src, #4 subs count, count, #1 + pld [src, #23] ; preload for next block ldrne r6, [src], pstep ; load source data + pld [src, #23] ldrne r7, [src], pstep + pld [src, #23] ldrne r8, [src], pstep + pld [src, #23] ldrne lr, [src], pstep bne MBVnext8 diff --git a/vp8/common/arm/armv6/simpleloopfilter_v6.asm b/vp8/common/arm/armv6/simpleloopfilter_v6.asm index 013712036..40a71f49d 100644 --- a/vp8/common/arm/armv6/simpleloopfilter_v6.asm +++ b/vp8/common/arm/armv6/simpleloopfilter_v6.asm @@ -154,22 +154,26 @@ pstep RN r1 ; load soure data to r7, r8, r9, r10 ldrh r3, [src, #-2] + pld [src, #23] ; preload for next block ldrh r4, [src], pstep uadd8 r12, r12, r12 ; flimit * 2 ldrh r5, [src, #-2] + pld [src, #23] ldrh r6, [src], pstep uadd8 r12, r12, r7 ; flimit * 2 + limit pkhbt r7, r3, r4, lsl #16 ldrh r3, [src, #-2] + pld [src, #23] ldrh r4, [src], pstep ldr r11, [sp, #40] ; count (r11) for 8-in-parallel pkhbt r8, r5, r6, lsl #16 ldrh r5, [src, #-2] + pld [src, #23] ldrh r6, [src], pstep mov r11, r11, lsl #1 ; 4-in-parallel @@ -259,19 +263,23 @@ pstep RN r1 ; load soure data to r7, r8, r9, r10 ldrneh r3, [src, #-2] + pld [src, #23] ; preload for next block ldrneh r4, [src], pstep ldrneh r5, [src, #-2] + pld [src, #23] ldrneh r6, [src], pstep pkhbt r7, r3, r4, lsl #16 ldrneh r3, [src, #-2] + pld [src, #23] ldrneh r4, [src], pstep pkhbt r8, r5, r6, lsl #16 ldrneh r5, [src, #-2] + pld [src, #23] ldrneh r6, [src], pstep bne simple_vnext8 diff --git a/vp8/common/arm/armv6/sixtappredict8x4_v6.asm b/vp8/common/arm/armv6/sixtappredict8x4_v6.asm index 029e02aa0..e81aef53d 100644 --- a/vp8/common/arm/armv6/sixtappredict8x4_v6.asm +++ b/vp8/common/arm/armv6/sixtappredict8x4_v6.asm @@ -32,9 +32,12 @@ beq skip_firstpass_filter ;first-pass filter - ldr r12, _filter8_coeff_ + adr r12, filter8_coeff sub r0, r0, r1, lsl #1 + add r3, r1, #10 ; preload next low + pld [r0, r3] + add r2, r12, r2, lsl #4 ;calculate filter location add r0, r0, #3 ;adjust src only for loading convinience @@ -110,6 +113,9 @@ add r0, r0, r1 ; move to next input line + add r11, r1, #18 ; preload next low. adding back block width(=8), which is subtracted earlier + pld [r0, r11] + bne first_pass_hloop_v6 ;second pass filter @@ -121,7 +127,7 @@ secondpass_filter cmp r3, #0 beq skip_secondpass_filter - ldr r12, _filter8_coeff_ + adr r12, filter8_coeff add lr, r12, r3, lsl #4 ;calculate filter location mov r2, #0x00080000 @@ -245,8 +251,6 @@ skip_secondpass_hloop ;----------------- ;One word each is reserved. Label filter_coeff can be used to access the data. ;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... -_filter8_coeff_ - DCD filter8_coeff filter8_coeff DCD 0x00000000, 0x00000080, 0x00000000, 0x00000000 DCD 0xfffa0000, 0x000c007b, 0x0000ffff, 0x00000000 diff --git a/vp8/common/arm/filter_arm.c b/vp8/common/arm/filter_arm.c index fe3c5a52e..6582fb29a 100644 --- a/vp8/common/arm/filter_arm.c +++ b/vp8/common/arm/filter_arm.c @@ -25,6 +25,28 @@ extern void vp8_filter_block2d_first_pass_armv6 const short *vp8_filter ); +// 8x8 +extern void vp8_filter_block2d_first_pass_8x8_armv6 +( + unsigned char *src_ptr, + short *output_ptr, + unsigned int src_pixels_per_line, + unsigned int output_width, + unsigned int output_height, + const short *vp8_filter +); + +// 16x16 +extern void vp8_filter_block2d_first_pass_16x16_armv6 +( + unsigned char *src_ptr, + short *output_ptr, + unsigned int src_pixels_per_line, + unsigned int output_width, + unsigned int output_height, + const short *vp8_filter +); + extern void vp8_filter_block2d_second_pass_armv6 ( short *src_ptr, @@ -143,12 +165,12 @@ void vp8_sixtap_predict8x8_armv6 { if (yoffset & 0x1) { - vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter); + vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter); vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter); } else { - vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter); + vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter); vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter); } } @@ -185,12 +207,12 @@ void vp8_sixtap_predict16x16_armv6 { if (yoffset & 0x1) { - vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter); + vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter); vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter); } else { - vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter); + vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter); vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter); } } diff --git a/vp8/common/arm/neon/bilinearpredict16x16_neon.asm b/vp8/common/arm/neon/bilinearpredict16x16_neon.asm index 79e1a6935..e392786d4 100644 --- a/vp8/common/arm/neon/bilinearpredict16x16_neon.asm +++ b/vp8/common/arm/neon/bilinearpredict16x16_neon.asm @@ -25,7 +25,7 @@ |vp8_bilinear_predict16x16_neon| PROC push {r4-r5, lr} - ldr r12, _bifilter16_coeff_ + adr r12, bifilter16_coeff ldr r4, [sp, #12] ;load parameters from stack ldr r5, [sp, #16] ;load parameters from stack @@ -351,8 +351,6 @@ filt_blk2d_spo16x16_loop_neon ;----------------- -_bifilter16_coeff_ - DCD bifilter16_coeff bifilter16_coeff DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112 diff --git a/vp8/common/arm/neon/bilinearpredict4x4_neon.asm b/vp8/common/arm/neon/bilinearpredict4x4_neon.asm index 10cd1b8bd..0ac62436f 100644 --- a/vp8/common/arm/neon/bilinearpredict4x4_neon.asm +++ b/vp8/common/arm/neon/bilinearpredict4x4_neon.asm @@ -25,7 +25,7 @@ |vp8_bilinear_predict4x4_neon| PROC push {r4, lr} - ldr r12, _bifilter4_coeff_ + adr r12, bifilter4_coeff ldr r4, [sp, #8] ;load parameters from stack ldr lr, [sp, #12] ;load parameters from stack @@ -124,8 +124,6 @@ skip_secondpass_filter ;----------------- -_bifilter4_coeff_ - DCD bifilter4_coeff bifilter4_coeff DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112 diff --git a/vp8/common/arm/neon/bilinearpredict8x4_neon.asm b/vp8/common/arm/neon/bilinearpredict8x4_neon.asm index bf37bb0d6..41f5c45ff 100644 --- a/vp8/common/arm/neon/bilinearpredict8x4_neon.asm +++ b/vp8/common/arm/neon/bilinearpredict8x4_neon.asm @@ -25,7 +25,7 @@ |vp8_bilinear_predict8x4_neon| PROC push {r4, lr} - ldr r12, _bifilter8x4_coeff_ + adr r12, bifilter8x4_coeff ldr r4, [sp, #8] ;load parameters from stack ldr lr, [sp, #12] ;load parameters from stack @@ -129,8 +129,6 @@ skip_secondpass_filter ;----------------- -_bifilter8x4_coeff_ - DCD bifilter8x4_coeff bifilter8x4_coeff DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112 diff --git a/vp8/common/arm/neon/bilinearpredict8x8_neon.asm b/vp8/common/arm/neon/bilinearpredict8x8_neon.asm index 9b29df6c3..c4711bc4d 100644 --- a/vp8/common/arm/neon/bilinearpredict8x8_neon.asm +++ b/vp8/common/arm/neon/bilinearpredict8x8_neon.asm @@ -25,7 +25,7 @@ |vp8_bilinear_predict8x8_neon| PROC push {r4, lr} - ldr r12, _bifilter8_coeff_ + adr r12, bifilter8_coeff ldr r4, [sp, #8] ;load parameters from stack ldr lr, [sp, #12] ;load parameters from stack @@ -177,8 +177,6 @@ skip_secondpass_filter ;----------------- -_bifilter8_coeff_ - DCD bifilter8_coeff bifilter8_coeff DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112 diff --git a/vp8/common/arm/neon/shortidct4x4llm_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_neon.asm index 16c4d2d24..b74c31521 100644 --- a/vp8/common/arm/neon/shortidct4x4llm_neon.asm +++ b/vp8/common/arm/neon/shortidct4x4llm_neon.asm @@ -31,7 +31,7 @@ ;result of the multiplication that is needed in IDCT. |vp8_short_idct4x4llm_neon| PROC - ldr r12, _idct_coeff_ + adr r12, idct_coeff vld1.16 {q1, q2}, [r0] vld1.16 {d0}, [r12] @@ -114,8 +114,6 @@ ;----------------- -_idct_coeff_ - DCD idct_coeff idct_coeff DCD 0x4e7b4e7b, 0x8a8c8a8c diff --git a/vp8/common/arm/neon/sixtappredict16x16_neon.asm b/vp8/common/arm/neon/sixtappredict16x16_neon.asm index 3c22fa19b..9fdafd360 100644 --- a/vp8/common/arm/neon/sixtappredict16x16_neon.asm +++ b/vp8/common/arm/neon/sixtappredict16x16_neon.asm @@ -15,6 +15,17 @@ PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 + +filter16_coeff + DCD 0, 0, 128, 0, 0, 0, 0, 0 + DCD 0, -6, 123, 12, -1, 0, 0, 0 + DCD 2, -11, 108, 36, -8, 1, 0, 0 + DCD 0, -9, 93, 50, -6, 0, 0, 0 + DCD 3, -16, 77, 77, -16, 3, 0, 0 + DCD 0, -6, 50, 93, -9, 0, 0, 0 + DCD 1, -8, 36, 108, -11, 2, 0, 0 + DCD 0, -1, 12, 123, -6, 0, 0, 0 + ; r0 unsigned char *src_ptr, ; r1 int src_pixels_per_line, ; r2 int xoffset, @@ -33,7 +44,7 @@ |vp8_sixtap_predict16x16_neon| PROC push {r4-r5, lr} - ldr r12, _filter16_coeff_ + adr r12, filter16_coeff ldr r4, [sp, #12] ;load parameters from stack ldr r5, [sp, #16] ;load parameters from stack @@ -476,17 +487,4 @@ secondpass_only_inner_loop_neon ENDP ;----------------- - -_filter16_coeff_ - DCD filter16_coeff -filter16_coeff - DCD 0, 0, 128, 0, 0, 0, 0, 0 - DCD 0, -6, 123, 12, -1, 0, 0, 0 - DCD 2, -11, 108, 36, -8, 1, 0, 0 - DCD 0, -9, 93, 50, -6, 0, 0, 0 - DCD 3, -16, 77, 77, -16, 3, 0, 0 - DCD 0, -6, 50, 93, -9, 0, 0, 0 - DCD 1, -8, 36, 108, -11, 2, 0, 0 - DCD 0, -1, 12, 123, -6, 0, 0, 0 - END diff --git a/vp8/common/arm/neon/sixtappredict4x4_neon.asm b/vp8/common/arm/neon/sixtappredict4x4_neon.asm index 2dc3f591f..41510e854 100644 --- a/vp8/common/arm/neon/sixtappredict4x4_neon.asm +++ b/vp8/common/arm/neon/sixtappredict4x4_neon.asm @@ -15,6 +15,17 @@ PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 + +filter4_coeff + DCD 0, 0, 128, 0, 0, 0, 0, 0 + DCD 0, -6, 123, 12, -1, 0, 0, 0 + DCD 2, -11, 108, 36, -8, 1, 0, 0 + DCD 0, -9, 93, 50, -6, 0, 0, 0 + DCD 3, -16, 77, 77, -16, 3, 0, 0 + DCD 0, -6, 50, 93, -9, 0, 0, 0 + DCD 1, -8, 36, 108, -11, 2, 0, 0 + DCD 0, -1, 12, 123, -6, 0, 0, 0 + ; r0 unsigned char *src_ptr, ; r1 int src_pixels_per_line, ; r2 int xoffset, @@ -25,7 +36,7 @@ |vp8_sixtap_predict_neon| PROC push {r4, lr} - ldr r12, _filter4_coeff_ + adr r12, filter4_coeff ldr r4, [sp, #8] ;load parameters from stack ldr lr, [sp, #12] ;load parameters from stack @@ -408,16 +419,4 @@ secondpass_filter4x4_only ;----------------- -_filter4_coeff_ - DCD filter4_coeff -filter4_coeff - DCD 0, 0, 128, 0, 0, 0, 0, 0 - DCD 0, -6, 123, 12, -1, 0, 0, 0 - DCD 2, -11, 108, 36, -8, 1, 0, 0 - DCD 0, -9, 93, 50, -6, 0, 0, 0 - DCD 3, -16, 77, 77, -16, 3, 0, 0 - DCD 0, -6, 50, 93, -9, 0, 0, 0 - DCD 1, -8, 36, 108, -11, 2, 0, 0 - DCD 0, -1, 12, 123, -6, 0, 0, 0 - END diff --git a/vp8/common/arm/neon/sixtappredict8x4_neon.asm b/vp8/common/arm/neon/sixtappredict8x4_neon.asm index 0904f52ca..a57ec015f 100644 --- a/vp8/common/arm/neon/sixtappredict8x4_neon.asm +++ b/vp8/common/arm/neon/sixtappredict8x4_neon.asm @@ -15,6 +15,17 @@ PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 + +filter8_coeff + DCD 0, 0, 128, 0, 0, 0, 0, 0 + DCD 0, -6, 123, 12, -1, 0, 0, 0 + DCD 2, -11, 108, 36, -8, 1, 0, 0 + DCD 0, -9, 93, 50, -6, 0, 0, 0 + DCD 3, -16, 77, 77, -16, 3, 0, 0 + DCD 0, -6, 50, 93, -9, 0, 0, 0 + DCD 1, -8, 36, 108, -11, 2, 0, 0 + DCD 0, -1, 12, 123, -6, 0, 0, 0 + ; r0 unsigned char *src_ptr, ; r1 int src_pixels_per_line, ; r2 int xoffset, @@ -25,7 +36,7 @@ |vp8_sixtap_predict8x4_neon| PROC push {r4-r5, lr} - ldr r12, _filter8_coeff_ + adr r12, filter8_coeff ldr r4, [sp, #12] ;load parameters from stack ldr r5, [sp, #16] ;load parameters from stack @@ -459,16 +470,4 @@ secondpass_filter8x4_only ;----------------- -_filter8_coeff_ - DCD filter8_coeff -filter8_coeff - DCD 0, 0, 128, 0, 0, 0, 0, 0 - DCD 0, -6, 123, 12, -1, 0, 0, 0 - DCD 2, -11, 108, 36, -8, 1, 0, 0 - DCD 0, -9, 93, 50, -6, 0, 0, 0 - DCD 3, -16, 77, 77, -16, 3, 0, 0 - DCD 0, -6, 50, 93, -9, 0, 0, 0 - DCD 1, -8, 36, 108, -11, 2, 0, 0 - DCD 0, -1, 12, 123, -6, 0, 0, 0 - END diff --git a/vp8/common/arm/neon/sixtappredict8x8_neon.asm b/vp8/common/arm/neon/sixtappredict8x8_neon.asm index 33af86f8f..00ed5aeef 100644 --- a/vp8/common/arm/neon/sixtappredict8x8_neon.asm +++ b/vp8/common/arm/neon/sixtappredict8x8_neon.asm @@ -15,6 +15,17 @@ PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 + +filter8_coeff + DCD 0, 0, 128, 0, 0, 0, 0, 0 + DCD 0, -6, 123, 12, -1, 0, 0, 0 + DCD 2, -11, 108, 36, -8, 1, 0, 0 + DCD 0, -9, 93, 50, -6, 0, 0, 0 + DCD 3, -16, 77, 77, -16, 3, 0, 0 + DCD 0, -6, 50, 93, -9, 0, 0, 0 + DCD 1, -8, 36, 108, -11, 2, 0, 0 + DCD 0, -1, 12, 123, -6, 0, 0, 0 + ; r0 unsigned char *src_ptr, ; r1 int src_pixels_per_line, ; r2 int xoffset, @@ -25,7 +36,7 @@ |vp8_sixtap_predict8x8_neon| PROC push {r4-r5, lr} - ldr r12, _filter8_coeff_ + adr r12, filter8_coeff ldr r4, [sp, #12] ;load parameters from stack ldr r5, [sp, #16] ;load parameters from stack @@ -510,16 +521,4 @@ filt_blk2d_spo8x8_loop_neon ;----------------- -_filter8_coeff_ - DCD filter8_coeff -filter8_coeff - DCD 0, 0, 128, 0, 0, 0, 0, 0 - DCD 0, -6, 123, 12, -1, 0, 0, 0 - DCD 2, -11, 108, 36, -8, 1, 0, 0 - DCD 0, -9, 93, 50, -6, 0, 0, 0 - DCD 3, -16, 77, 77, -16, 3, 0, 0 - DCD 0, -6, 50, 93, -9, 0, 0, 0 - DCD 1, -8, 36, 108, -11, 2, 0, 0 - DCD 0, -1, 12, 123, -6, 0, 0, 0 - END diff --git a/vp8/common/coefupdateprobs.h b/vp8/common/coefupdateprobs.h index 785e3ff70..9e194dc9a 100644 --- a/vp8/common/coefupdateprobs.h +++ b/vp8/common/coefupdateprobs.h @@ -12,7 +12,7 @@ /* Update probabilities for the nodes in the token entropy tree. Generated file included by entropy.c */ -const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1] = +const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] = { { { diff --git a/vp8/common/defaultcoefcounts.c b/vp8/common/defaultcoefcounts.c index ebb78164c..b0e2e702a 100644 --- a/vp8/common/defaultcoefcounts.c +++ b/vp8/common/defaultcoefcounts.c @@ -15,7 +15,7 @@ const unsigned int vp8_default_coef_counts[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] - [vp8_coef_tokens] = + [MAX_ENTROPY_TOKENS] = { { diff --git a/vp8/common/defaultcoefcounts.h b/vp8/common/defaultcoefcounts.h index ebe8d7a2a..7a1e28b7b 100644 --- a/vp8/common/defaultcoefcounts.h +++ b/vp8/common/defaultcoefcounts.h @@ -16,6 +16,6 @@ extern const unsigned int vp8_default_coef_counts[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] - [vp8_coef_tokens]; + [MAX_ENTROPY_TOKENS]; #endif //__DEFAULTCOEFCOUNTS_H diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c index b89ca92eb..fba7a07ff 100644 --- a/vp8/common/entropy.c +++ b/vp8/common/entropy.c @@ -65,7 +65,7 @@ const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */ -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */ }; -struct vp8_token_struct vp8_coef_encodings[vp8_coef_tokens]; +struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS]; /* Trees for extra bits. Probabilities are constant and do not depend on previously encoded bits */ @@ -145,9 +145,9 @@ void vp8_default_coef_probs(VP8_COMMON *pc) do { - unsigned int branch_ct [vp8_coef_tokens-1] [2]; + unsigned int branch_ct [ENTROPY_NODES] [2]; vp8_tree_probs_from_distribution( - vp8_coef_tokens, vp8_coef_encodings, vp8_coef_tree, + MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, pc->fc.coef_probs[h][i][k], branch_ct, vp8_default_coef_counts[h][i][k], diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h index d174e45b9..66d282b61 100644 --- a/vp8/common/entropy.h +++ b/vp8/common/entropy.h @@ -30,13 +30,12 @@ #define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */ #define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */ -#define vp8_coef_tokens 12 -#define MAX_ENTROPY_TOKENS vp8_coef_tokens +#define MAX_ENTROPY_TOKENS 12 #define ENTROPY_NODES 11 extern const vp8_tree_index vp8_coef_tree[]; -extern struct vp8_token_struct vp8_coef_encodings[vp8_coef_tokens]; +extern struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS]; typedef struct { @@ -85,9 +84,9 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]); /*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */ # define PREV_COEF_CONTEXTS 3 -extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[vp8_coef_tokens]); +extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]); -extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1]; +extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; struct VP8Common; diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index d366b9b1b..a381dfe87 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -43,7 +43,7 @@ typedef struct frame_contexts vp8_prob ymode_prob [VP8_YMODES-1]; /* interframe intra mode probs */ vp8_prob uv_mode_prob [VP8_UV_MODES-1]; vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1]; - vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1]; + vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; MV_CONTEXT mvc[2]; MV_CONTEXT pre_mvc[2]; /* not to caculate the mvcost for the frame if mvc doesn't change. */ } FRAME_CONTEXT; diff --git a/vp8/decoder/arm/neon/dequant_idct_neon.asm b/vp8/decoder/arm/neon/dequant_idct_neon.asm index 1923be42a..4bf661857 100644 --- a/vp8/decoder/arm/neon/dequant_idct_neon.asm +++ b/vp8/decoder/arm/neon/dequant_idct_neon.asm @@ -35,7 +35,7 @@ ldr r1, [sp, #4] ; stride - ldr r12, _CONSTANTS_ + adr r12, cospi8sqrt2minus1 ; pointer to the first constant vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon vmul.i16 q2, q4, q6 @@ -123,7 +123,6 @@ ENDP ; |vp8_dequant_idct_add_neon| ; Constant Pool -_CONSTANTS_ DCD cospi8sqrt2minus1 cospi8sqrt2minus1 DCD 0x4e7b4e7b sinpi8sqrt2 DCD 0x8a8c8a8c diff --git a/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm index ad4364adc..61fa66075 100644 --- a/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm +++ b/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm @@ -41,7 +41,7 @@ ldr r1, [sp, #4] vld1.32 {d31[1]}, [r12] - ldr r2, _CONSTANTS_ + adr r2, cospi8sqrt2minus1 ; pointer to the first constant ldrh r12, [r1], #2 ; lo *dc ldrh r1, [r1] ; hi *dc @@ -198,7 +198,6 @@ ENDP ; |idct_dequant_dc_full_2x_neon| ; Constant Pool -_CONSTANTS_ DCD cospi8sqrt2minus1 cospi8sqrt2minus1 DCD 0x4e7b ; because the lowest bit in 0x8a8c is 0, we can pre-shift this sinpi8sqrt2 DCD 0x4546 diff --git a/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm index 85fff11b3..772ec4685 100644 --- a/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm +++ b/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm @@ -40,7 +40,7 @@ vld1.32 {d31[0]}, [r2] vld1.32 {d31[1]}, [r12] - ldr r2, _CONSTANTS_ + adr r2, cospi8sqrt2minus1 ; pointer to the first constant ; dequant: q[i] = q[i] * dq[i] vmul.i16 q2, q2, q0 @@ -190,7 +190,6 @@ ENDP ; |idct_dequant_full_2x_neon| ; Constant Pool -_CONSTANTS_ DCD cospi8sqrt2minus1 cospi8sqrt2minus1 DCD 0x4e7b ; because the lowest bit in 0x8a8c is 0, we can pre-shift this sinpi8sqrt2 DCD 0x4546 diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 7537ec648..a300bb5fd 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -183,7 +183,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_idx) { int eobtotal = 0; - int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs; + MB_PREDICTION_MODE mode; + int i; if (xd->mode_info_context->mbmi.mb_skip_coeff) { @@ -195,14 +196,14 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, } /* Perform temporary clamping of the MV to be used for prediction */ - if (do_clamp) + if (xd->mode_info_context->mbmi.need_to_clamp_mvs) { clamp_mvs(xd); } - eobtotal |= (xd->mode_info_context->mbmi.mode == B_PRED || - xd->mode_info_context->mbmi.mode == SPLITMV); - if (!eobtotal) + mode = xd->mode_info_context->mbmi.mode; + + if (eobtotal == 0 && mode != B_PRED && mode != SPLITMV) { /* Special case: Force the loopfilter to skip when eobtotal and * mb_skip_coeff are zero. @@ -221,15 +222,12 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, { RECON_INVOKE(&pbi->common.rtcd.recon, build_intra_predictors_mbuv)(xd); - if (xd->mode_info_context->mbmi.mode != B_PRED) + if (mode != B_PRED) { RECON_INVOKE(&pbi->common.rtcd.recon, build_intra_predictors_mby)(xd); } else { vp8_intra_prediction_down_copy(xd); - - - } } else @@ -252,41 +250,10 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, #endif /* dequantization and idct */ - if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV) - { - BLOCKD *b = &xd->block[24]; - - DEQUANT_INVOKE(&pbi->dequant, block)(b); - - /* do 2nd order transform on the dc block */ - if (xd->eobs[24] > 1) - { - IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; - ((int *)b->qcoeff)[1] = 0; - ((int *)b->qcoeff)[2] = 0; - ((int *)b->qcoeff)[3] = 0; - ((int *)b->qcoeff)[4] = 0; - ((int *)b->qcoeff)[5] = 0; - ((int *)b->qcoeff)[6] = 0; - ((int *)b->qcoeff)[7] = 0; - } - else - { - IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; - } - - DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block) - (xd->qcoeff, xd->block[0].dequant, - xd->predictor, xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs, xd->block[24].diff); - } - else if (xd->mode_info_context->mbmi.mode == B_PRED) + if (mode == B_PRED) { for (i = 0; i < 16; i++) { - BLOCKD *b = &xd->block[i]; RECON_INVOKE(RTCD_VTABLE(recon), intra4x4_predict) (b, b->bmi.as_mode, b->predictor); @@ -307,13 +274,43 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, } } - else + else if (mode == SPLITMV) { DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block) (xd->qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs); } + else + { + BLOCKD *b = &xd->block[24]; + + DEQUANT_INVOKE(&pbi->dequant, block)(b); + + /* do 2nd order transform on the dc block */ + if (xd->eobs[24] > 1) + { + IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff); + ((int *)b->qcoeff)[0] = 0; + ((int *)b->qcoeff)[1] = 0; + ((int *)b->qcoeff)[2] = 0; + ((int *)b->qcoeff)[3] = 0; + ((int *)b->qcoeff)[4] = 0; + ((int *)b->qcoeff)[5] = 0; + ((int *)b->qcoeff)[6] = 0; + ((int *)b->qcoeff)[7] = 0; + } + else + { + IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff); + ((int *)b->qcoeff)[0] = 0; + } + + DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block) + (xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs, xd->block[24].diff); + } DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block) (xd->qcoeff+16*16, xd->block[16].dequant, @@ -982,7 +979,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) for (i = 0; i < BLOCK_TYPES; i++) for (j = 0; j < COEF_BANDS; j++) for (k = 0; k < PREV_COEF_CONTEXTS; k++) - for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++) + for (l = 0; l < ENTROPY_NODES; l++) { vp8_prob *const p = pc->fc.coef_probs [i][j][k] + l; diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index ac2981263..134e84881 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -50,7 +50,7 @@ unsigned __int64 Sectionbits[500]; #ifdef ENTROPY_STATS int intra_mode_stats[10][10][10]; -static unsigned int tree_update_hist [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1] [2]; +static unsigned int tree_update_hist [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] [2]; extern unsigned int active_section; #endif @@ -1133,7 +1133,7 @@ static void write_kfmodes(VP8_COMP *cpi) /* This function is used for debugging probability trees. */ static void print_prob_tree(vp8_prob - coef_probs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][vp8_coef_tokens-1]) + coef_probs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]) { /* print coef probability tree */ int i,j,k,l; @@ -1148,7 +1148,7 @@ static void print_prob_tree(vp8_prob for (k = 0; k < PREV_COEF_CONTEXTS; k++) { fprintf(f, " {"); - for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++) + for (l = 0; l < ENTROPY_NODES; l++) { fprintf(f, "%3u, ", (unsigned int)(coef_probs [i][j][k][l])); @@ -1164,11 +1164,11 @@ static void print_prob_tree(vp8_prob } static void sum_probs_over_prev_coef_context( - const unsigned int probs[PREV_COEF_CONTEXTS][vp8_coef_tokens], + const unsigned int probs[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS], unsigned int* out) { int i, j; - for (i=0; i < vp8_coef_tokens; ++i) + for (i=0; i < MAX_ENTROPY_TOKENS; ++i) { for (j=0; j < PREV_COEF_CONTEXTS; ++j) { @@ -1203,8 +1203,8 @@ static int independent_coef_context_savings(VP8_COMP *cpi) do { int k = 0; - unsigned int prev_coef_count_sum[vp8_coef_tokens] = {0}; - int prev_coef_savings[vp8_coef_tokens] = {0}; + unsigned int prev_coef_count_sum[MAX_ENTROPY_TOKENS] = {0}; + int prev_coef_savings[MAX_ENTROPY_TOKENS] = {0}; /* Calculate new probabilities given the constraint that * they must be equal over the prev coef contexts */ @@ -1224,13 +1224,13 @@ static int independent_coef_context_savings(VP8_COMP *cpi) /* at every context */ /* calc probs and branch cts for this frame only */ - //vp8_prob new_p [vp8_coef_tokens-1]; - //unsigned int branch_ct [vp8_coef_tokens-1] [2]; + //vp8_prob new_p [ENTROPY_NODES]; + //unsigned int branch_ct [ENTROPY_NODES] [2]; int t = 0; /* token/prob index */ vp8_tree_probs_from_distribution( - vp8_coef_tokens, vp8_coef_encodings, vp8_coef_tree, + MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, cpi->frame_coef_probs[i][j][k], cpi->frame_branch_ct [i][j][k], prev_coef_count_sum, @@ -1248,7 +1248,7 @@ static int independent_coef_context_savings(VP8_COMP *cpi) (cpi->common.frame_type == KEY_FRAME && newp != oldp)) prev_coef_savings[t] += s; } - while (++t < vp8_coef_tokens - 1); + while (++t < ENTROPY_NODES); } while (++k < PREV_COEF_CONTEXTS); k = 0; @@ -1263,7 +1263,7 @@ static int independent_coef_context_savings(VP8_COMP *cpi) cpi->common.frame_type == KEY_FRAME) savings += prev_coef_savings[k]; } - while (++k < vp8_coef_tokens - 1); + while (++k < ENTROPY_NODES); } while (++j < COEF_BANDS); } @@ -1286,14 +1286,14 @@ static int default_coef_context_savings(VP8_COMP *cpi) /* at every context */ /* calc probs and branch cts for this frame only */ - //vp8_prob new_p [vp8_coef_tokens-1]; - //unsigned int branch_ct [vp8_coef_tokens-1] [2]; + //vp8_prob new_p [ENTROPY_NODES]; + //unsigned int branch_ct [ENTROPY_NODES] [2]; int t = 0; /* token/prob index */ vp8_tree_probs_from_distribution( - vp8_coef_tokens, vp8_coef_encodings, vp8_coef_tree, + MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, cpi->frame_coef_probs [i][j][k], cpi->frame_branch_ct [i][j][k], cpi->coef_counts [i][j][k], @@ -1313,7 +1313,7 @@ static int default_coef_context_savings(VP8_COMP *cpi) savings += s; } } - while (++t < vp8_coef_tokens - 1); + while (++t < ENTROPY_NODES); } while (++k < PREV_COEF_CONTEXTS); } @@ -1408,13 +1408,13 @@ static void update_coef_probs(VP8_COMP *cpi) do { int k = 0; - int prev_coef_savings[vp8_coef_tokens - 1] = {0}; + int prev_coef_savings[ENTROPY_NODES] = {0}; if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS) { for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { int t; /* token/prob index */ - for (t = 0; t < vp8_coef_tokens - 1; ++t) + for (t = 0; t < ENTROPY_NODES; ++t) { const unsigned int *ct = cpi->frame_branch_ct [i][j] [k][t]; @@ -1435,13 +1435,13 @@ static void update_coef_probs(VP8_COMP *cpi) /* at every context */ /* calc probs and branch cts for this frame only */ - //vp8_prob new_p [vp8_coef_tokens-1]; - //unsigned int branch_ct [vp8_coef_tokens-1] [2]; + //vp8_prob new_p [ENTROPY_NODES]; + //unsigned int branch_ct [ENTROPY_NODES] [2]; int t = 0; /* token/prob index */ //vp8_tree_probs_from_distribution( - // vp8_coef_tokens, vp8_coef_encodings, vp8_coef_tree, + // MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, // new_p, branch_ct, (unsigned int *)cpi->coef_counts [i][j][k], // 256, 1 // ); @@ -1495,7 +1495,7 @@ static void update_coef_probs(VP8_COMP *cpi) } } - while (++t < vp8_coef_tokens - 1); + while (++t < ENTROPY_NODES); /* Accum token counts for generation of default statistics */ #ifdef ENTROPY_STATS @@ -1505,7 +1505,7 @@ static void update_coef_probs(VP8_COMP *cpi) { context_counters [i][j][k][t] += cpi->coef_counts [i][j][k][t]; } - while (++t < vp8_coef_tokens); + while (++t < MAX_ENTROPY_TOKENS); #endif @@ -1881,7 +1881,7 @@ void print_tree_update_probs() FILE *f = fopen("context.c", "a"); int Sum; fprintf(f, "\n/* Update probabilities for token entropy tree. */\n\n"); - fprintf(f, "const vp8_prob tree_update_probs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1] = {\n"); + fprintf(f, "const vp8_prob tree_update_probs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {\n"); for (i = 0; i < BLOCK_TYPES; i++) { @@ -1895,7 +1895,7 @@ void print_tree_update_probs() { fprintf(f, " {"); - for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++) + for (l = 0; l < ENTROPY_NODES; l++) { Sum = tree_update_hist[i][j][k][l][0] + tree_update_hist[i][j][k][l][1]; diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h index e8a5b78eb..82a93cdab 100644 --- a/vp8/encoder/block.h +++ b/vp8/encoder/block.h @@ -115,7 +115,7 @@ typedef struct unsigned char *active_ptr; MV_CONTEXT *mvc; - unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens]; + unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; int optimize; int q_index; diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 6dbdd9346..ce401fd19 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -401,11 +401,11 @@ typedef struct VP8_COMP unsigned int MVcount [2] [MVvals]; /* (row,col) MV cts this frame */ - unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens]; /* for this frame */ - //DECLARE_ALIGNED(16, int, coef_counts_backup [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens]); //not used any more + unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ + //DECLARE_ALIGNED(16, int, coef_counts_backup [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]); //not used any more //save vp8_tree_probs_from_distribution result for each frame to avoid repeat calculation - vp8_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1]; - unsigned int frame_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1][2]; + vp8_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + unsigned int frame_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; int gfu_boost; int kf_boost; diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index e05ffdb1a..e0359057a 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -134,8 +134,8 @@ const MV_REFERENCE_FRAME vp8_ref_frame_order[MAX_MODES] = }; static void fill_token_costs( - unsigned int c [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens], - const vp8_prob p [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1] + unsigned int c [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS], + const vp8_prob p [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] ) { int i, j, k; diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index 329abac68..e14e6fc92 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -21,7 +21,7 @@ compressions, then generating context.c = initial stats. */ #ifdef ENTROPY_STATS -_int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens]; +_int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; #endif void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; void vp8_fix_contexts(MACROBLOCKD *x); @@ -282,9 +282,9 @@ void print_context_counters() fprintf(f, "\n/* *** GENERATED FILE: DO NOT EDIT *** */\n\n"); - fprintf(f, "int Contexts[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens];\n\n"); + fprintf(f, "int Contexts[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];\n\n"); - fprintf(f, "const int default_contexts[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens] = {"); + fprintf(f, "const int default_contexts[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {"); # define Comma( X) (X? ",":"") @@ -317,7 +317,7 @@ void print_context_counters() fprintf(f, "%s %d", Comma(t), y); } - while (++t < vp8_coef_tokens); + while (++t < MAX_ENTROPY_TOKENS); fprintf(f, "}"); } diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h index d87c1a3e7..04a8879cf 100644 --- a/vp8/encoder/tokenize.h +++ b/vp8/encoder/tokenize.h @@ -37,7 +37,7 @@ int rd_cost_mby(MACROBLOCKD *); void init_context_counters(); void print_context_counters(); -extern _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens]; +extern _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; #endif extern const int *vp8_dct_value_cost_ptr; @@ -30,7 +30,7 @@ */ #ifndef VP8_H #define VP8_H -#include "vpx/vpx_codec_impl_top.h" +#include "vpx_codec_impl_top.h" /*!\brief Control functions * @@ -126,5 +126,5 @@ VPX_CTRL_USE_TYPE(VP8_SET_DBG_DISPLAY_MV, int) DECLSPEC_DEPRECATED extern vpx_codec_iface_t vpx_codec_vp8_algo DEPRECATED; #endif -#include "vpx/vpx_codec_impl_bottom.h" +#include "vpx_codec_impl_bottom.h" #endif diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h index 6fd161ba2..b6bd7fc5f 100644 --- a/vpx/vp8cx.h +++ b/vpx/vp8cx.h @@ -22,7 +22,7 @@ */ #ifndef VP8CX_H #define VP8CX_H -#include "vpx/vpx_codec_impl_top.h" +#include "vpx_codec_impl_top.h" /*!\name Algorithm interface for VP8 * @@ -296,5 +296,5 @@ VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *) VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *) /*! @} - end defgroup vp8_encoder */ -#include "vpx/vpx_codec_impl_bottom.h" +#include "vpx_codec_impl_bottom.h" #endif diff --git a/vpx/vp8dx.h b/vpx/vp8dx.h index 4a3aef760..1d9d53165 100644 --- a/vpx/vp8dx.h +++ b/vpx/vp8dx.h @@ -22,7 +22,7 @@ */ #ifndef VP8DX_H #define VP8DX_H -#include "vpx/vpx_codec_impl_top.h" +#include "vpx_codec_impl_top.h" /*!\name Algorithm interface for VP8 * @@ -74,5 +74,5 @@ VPX_CTRL_USE_TYPE(VP8D_GET_FRAME_CORRUPTED, int *) /*! @} - end defgroup vp8_decoder */ -#include "vpx/vpx_codec_impl_bottom.h" +#include "vpx_codec_impl_bottom.h" #endif diff --git a/vpx/vp8e.h b/vpx/vp8e.h index abfce333a..ca907c96f 100644 --- a/vpx/vp8e.h +++ b/vpx/vp8e.h @@ -14,7 +14,7 @@ */ #ifndef VP8E_H #define VP8E_H -#include "vpx/vpx_codec_impl_top.h" +#include "vpx_codec_impl_top.h" #if defined(VPX_CODEC_DISABLE_COMPAT) && VPX_CODEC_DISABLE_COMPAT #error "Backwards compatibility disabled: don't include vp8e.h" @@ -59,5 +59,5 @@ VPX_CTRL_USE_TYPE_DEPRECATED(VP8E_SET_FRAMETYPE, int) * #VPX_DL_BEST_QUALITY constants to that parameter instead. */ VPX_CTRL_USE_TYPE_DEPRECATED(VP8E_SET_ENCODING_MODE, vp8e_encoding_mode) -#include "vpx/vpx_codec_impl_bottom.h" +#include "vpx_codec_impl_bottom.h" #endif @@ -1302,11 +1302,16 @@ struct rate_hist static void init_rate_histogram(struct rate_hist *hist, - const vpx_codec_enc_cfg_t *cfg) + const vpx_codec_enc_cfg_t *cfg, + const vpx_rational_t *fps) { int i; - hist->samples = cfg->rc_buf_sz * 60 / 1000; // max 60 fps + /* Determine the number of samples in the buffer. Use the file's framerate + * to determine the number of frames in rc_buf_sz milliseconds, with an + * adjustment (5/4) to account for alt-refs + */ + hist->samples = cfg->rc_buf_sz * 5 / 4 * fps->num / fps->den / 1000; hist->pts = calloc(hist->samples, sizeof(*hist->pts)); hist->sz = calloc(hist->samples, sizeof(*hist->sz)); for(i=0; i<RATE_BINS; i++) @@ -1343,7 +1348,7 @@ static void update_rate_histogram(struct rate_hist *hist, return; /* Sum the size over the past rc_buf_sz ms */ - for(i = hist->frames; i > 0; i--) + for(i = hist->frames; i > 0 && hist->frames - i < hist->samples; i--) { int i_idx = (i-1) % hist->samples; @@ -1699,8 +1704,6 @@ int main(int argc, const char **argv_) memset(&stats, 0, sizeof(stats)); - init_rate_histogram(&rate_hist, &cfg); - for (pass = one_pass_only ? one_pass_only - 1 : 0; pass < arg_passes; pass++) { int frames_in = 0, frames_out = 0; @@ -1827,6 +1830,8 @@ int main(int argc, const char **argv_) else vpx_img_alloc(&raw, arg_use_i420 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_YV12, cfg.g_w, cfg.g_h, 1); + + init_rate_histogram(&rate_hist, &cfg, &arg_framerate); } outfile = strcmp(out_fn, "-") ? fopen(out_fn, "wb") @@ -2040,10 +2045,14 @@ int main(int argc, const char **argv_) vpx_codec_destroy(&encoder); fclose(infile); + if (file_type == FILE_TYPE_Y4M) + y4m_input_close(&y4m); if(write_webm) { write_webm_file_footer(&ebml, hash); + free(ebml.cue_list); + ebml.cue_list = NULL; } else { |