diff options
Diffstat (limited to 'vp9')
35 files changed, 466 insertions, 7102 deletions
diff --git a/vp9/common/ppc/vp9_copy_altivec.asm b/vp9/common/ppc/vp9_copy_altivec.asm deleted file mode 100644 index a4ce91583..000000000 --- a/vp9/common/ppc/vp9_copy_altivec.asm +++ /dev/null @@ -1,47 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl copy_mem16x16_ppc - -;# r3 unsigned char *src -;# r4 int src_stride -;# r5 unsigned char *dst -;# r6 int dst_stride - -;# Make the assumption that input will not be aligned, -;# but the output will be. So two reads and a perm -;# for the input, but only one store for the output. -copy_mem16x16_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xe000 - mtspr 256, r12 ;# set VRSAVE - - li r10, 16 - mtctr r10 - -cp_16x16_loop: - lvsl v0, 0, r3 ;# permutate value for alignment - - lvx v1, 0, r3 - lvx v2, r10, r3 - - vperm v1, v1, v2, v0 - - stvx v1, 0, r5 - - add r3, r3, r4 ;# increment source pointer - add r5, r5, r6 ;# increment destination pointer - - bdnz cp_16x16_loop - - mtspr 256, r11 ;# reset old VRSAVE - - blr diff --git a/vp9/common/ppc/vp9_filter_altivec.asm b/vp9/common/ppc/vp9_filter_altivec.asm deleted file mode 100644 index 4da2e94f9..000000000 --- a/vp9/common/ppc/vp9_filter_altivec.asm +++ /dev/null @@ -1,1013 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl sixtap_predict_ppc - .globl sixtap_predict8x4_ppc - .globl sixtap_predict8x8_ppc - .globl sixtap_predict16x16_ppc - -.macro load_c V, LABEL, OFF, R0, R1 - lis \R0, \LABEL@ha - la \R1, \LABEL@l(\R0) - lvx \V, \OFF, \R1 -.endm - -.macro load_hfilter V0, V1 - load_c \V0, HFilter, r5, r9, r10 - - addi r5, r5, 16 - lvx \V1, r5, r10 -.endm - -;# Vertical filtering -.macro Vprolog - load_c v0, VFilter, r6, r3, r10 - - vspltish v5, 8 - vspltish v6, 3 - vslh v6, v5, v6 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - vspltb v1, v0, 1 - vspltb v2, v0, 2 - vspltb v3, v0, 3 - vspltb v4, v0, 4 - vspltb v5, v0, 5 - vspltb v0, v0, 0 -.endm - -.macro vpre_load - Vprolog - li r10, 16 - lvx v10, 0, r9 ;# v10..v14 = first 5 rows - lvx v11, r10, r9 - addi r9, r9, 32 - lvx v12, 0, r9 - lvx v13, r10, r9 - addi r9, r9, 32 - lvx v14, 0, r9 -.endm - -.macro Msum Re, Ro, V, T, TMP - ;# (Re,Ro) += (V*T) - vmuleub \TMP, \V, \T ;# trashes v8 - vadduhm \Re, \Re, \TMP ;# Re = evens, saturation unnecessary - vmuloub \TMP, \V, \T - vadduhm \Ro, \Ro, \TMP ;# Ro = odds -.endm - -.macro vinterp_no_store P0 P1 P2 P3 P4 P5 - vmuleub v8, \P0, v0 ;# 64 + 4 positive taps - vadduhm v16, v6, v8 - vmuloub v8, \P0, v0 - vadduhm v17, v6, v8 - Msum v16, v17, \P2, v2, v8 - Msum v16, v17, \P3, v3, v8 - Msum v16, v17, \P5, v5, v8 - - vmuleub v18, \P1, v1 ;# 2 negative taps - vmuloub v19, \P1, v1 - Msum v18, v19, \P4, v4, v8 - - vsubuhs v16, v16, v18 ;# subtract neg from pos - vsubuhs v17, v17, v19 - vsrh v16, v16, v7 ;# divide by 128 - vsrh v17, v17, v7 ;# v16 v17 = evens, odds - vmrghh v18, v16, v17 ;# v18 v19 = 16-bit result in order - vmrglh v19, v16, v17 - vpkuhus \P0, v18, v19 ;# P0 = 8-bit result -.endm - -.macro vinterp_no_store_8x8 P0 P1 P2 P3 P4 P5 - vmuleub v24, \P0, v13 ;# 64 + 4 positive taps - vadduhm v21, v20, v24 - vmuloub v24, \P0, v13 - vadduhm v22, v20, v24 - Msum v21, v22, \P2, v15, v25 - Msum v21, v22, \P3, v16, v25 - Msum v21, v22, \P5, v18, v25 - - vmuleub v23, \P1, v14 ;# 2 negative taps - vmuloub v24, \P1, v14 - Msum v23, v24, \P4, v17, v25 - - vsubuhs v21, v21, v23 ;# subtract neg from pos - vsubuhs v22, v22, v24 - vsrh v21, v21, v19 ;# divide by 128 - vsrh v22, v22, v19 ;# v16 v17 = evens, odds - vmrghh v23, v21, v22 ;# v18 v19 = 16-bit result in order - vmrglh v24, v21, v22 - vpkuhus \P0, v23, v24 ;# P0 = 8-bit result -.endm - - -.macro Vinterp P0 P1 P2 P3 P4 P5 - vinterp_no_store \P0, \P1, \P2, \P3, \P4, \P5 - stvx \P0, 0, r7 - add r7, r7, r8 ;# 33 ops per 16 pels -.endm - - -.macro luma_v P0, P1, P2, P3, P4, P5 - addi r9, r9, 16 ;# P5 = newest input row - lvx \P5, 0, r9 - Vinterp \P0, \P1, \P2, \P3, \P4, \P5 -.endm - -.macro luma_vtwo - luma_v v10, v11, v12, v13, v14, v15 - luma_v v11, v12, v13, v14, v15, v10 -.endm - -.macro luma_vfour - luma_vtwo - luma_v v12, v13, v14, v15, v10, v11 - luma_v v13, v14, v15, v10, v11, v12 -.endm - -.macro luma_vsix - luma_vfour - luma_v v14, v15, v10, v11, v12, v13 - luma_v v15, v10, v11, v12, v13, v14 -.endm - -.macro Interp4 R I I4 - vmsummbm \R, v13, \I, v15 - vmsummbm \R, v14, \I4, \R -.endm - -.macro Read8x8 VD, RS, RP, increment_counter - lvsl v21, 0, \RS ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx \VD, 0, \RS - lvx v20, r10, \RS - -.if \increment_counter - add \RS, \RS, \RP -.endif - - vperm \VD, \VD, v20, v21 -.endm - -.macro interp_8x8 R - vperm v20, \R, \R, v16 ;# v20 = 0123 1234 2345 3456 - vperm v21, \R, \R, v17 ;# v21 = 4567 5678 6789 789A - Interp4 v20, v20, v21 ;# v20 = result 0 1 2 3 - vperm \R, \R, \R, v18 ;# R = 89AB 9ABC ABCx BCxx - Interp4 v21, v21, \R ;# v21 = result 4 5 6 7 - - vpkswus \R, v20, v21 ;# R = 0 1 2 3 4 5 6 7 - vsrh \R, \R, v19 - - vpkuhus \R, \R, \R ;# saturate and pack - -.endm - -.macro Read4x4 VD, RS, RP, increment_counter - lvsl v21, 0, \RS ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx v20, 0, \RS - -.if \increment_counter - add \RS, \RS, \RP -.endif - - vperm \VD, v20, v20, v21 -.endm - .text - - .align 2 -;# r3 unsigned char * src -;# r4 int src_pitch -;# r5 int x_offset -;# r6 int y_offset -;# r7 unsigned char * dst -;# r8 int dst_pitch -sixtap_predict_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xff87 - ori r12, r12, 0xffc0 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - slwi. r5, r5, 5 ;# index into horizontal filter array - - vspltish v19, 7 - - ;# If there isn't any filtering to be done for the horizontal, then - ;# just skip to the second pass. - beq- vertical_only_4x4 - - ;# load up horizontal filter - load_hfilter v13, v14 - - ;# rounding added in on the multiply - vspltisw v16, 8 - vspltisw v15, 3 - vslw v15, v16, v15 ;# 0x00000040000000400000004000000040 - - ;# Load up permutation constants - load_c v16, B_0123, 0, r9, r10 - load_c v17, B_4567, 0, r9, r10 - load_c v18, B_89AB, 0, r9, r10 - - ;# Back off input buffer by 2 bytes. Need 2 before and 3 after - addi r3, r3, -2 - - addi r9, r3, 0 - li r10, 16 - Read8x8 v2, r3, r4, 1 - Read8x8 v3, r3, r4, 1 - Read8x8 v4, r3, r4, 1 - Read8x8 v5, r3, r4, 1 - - slwi. r6, r6, 4 ;# index into vertical filter array - - ;# filter a line - interp_8x8 v2 - interp_8x8 v3 - interp_8x8 v4 - interp_8x8 v5 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional 5 lines that are needed - ;# for the vertical filter. - beq- store_4x4 - - ;# only needed if there is a vertical filter present - ;# if the second filter is not null then need to back off by 2*pitch - sub r9, r9, r4 - sub r9, r9, r4 - - Read8x8 v0, r9, r4, 1 - Read8x8 v1, r9, r4, 0 - Read8x8 v6, r3, r4, 1 - Read8x8 v7, r3, r4, 1 - Read8x8 v8, r3, r4, 0 - - interp_8x8 v0 - interp_8x8 v1 - interp_8x8 v6 - interp_8x8 v7 - interp_8x8 v8 - - b second_pass_4x4 - -vertical_only_4x4: - ;# only needed if there is a vertical filter present - ;# if the second filter is not null then need to back off by 2*pitch - sub r3, r3, r4 - sub r3, r3, r4 - li r10, 16 - - Read8x8 v0, r3, r4, 1 - Read8x8 v1, r3, r4, 1 - Read8x8 v2, r3, r4, 1 - Read8x8 v3, r3, r4, 1 - Read8x8 v4, r3, r4, 1 - Read8x8 v5, r3, r4, 1 - Read8x8 v6, r3, r4, 1 - Read8x8 v7, r3, r4, 1 - Read8x8 v8, r3, r4, 0 - - slwi r6, r6, 4 ;# index into vertical filter array - -second_pass_4x4: - load_c v20, b_hilo_4x4, 0, r9, r10 - load_c v21, b_hilo, 0, r9, r10 - - ;# reposition input so that it can go through the - ;# filtering phase with one pass. - vperm v0, v0, v1, v20 ;# 0 1 x x - vperm v2, v2, v3, v20 ;# 2 3 x x - vperm v4, v4, v5, v20 ;# 4 5 x x - vperm v6, v6, v7, v20 ;# 6 7 x x - - vperm v0, v0, v2, v21 ;# 0 1 2 3 - vperm v4, v4, v6, v21 ;# 4 5 6 7 - - vsldoi v1, v0, v4, 4 - vsldoi v2, v0, v4, 8 - vsldoi v3, v0, v4, 12 - - vsldoi v5, v4, v8, 4 - - load_c v13, VFilter, r6, r9, r10 - - vspltish v15, 8 - vspltish v20, 3 - vslh v20, v15, v20 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - vspltb v14, v13, 1 - vspltb v15, v13, 2 - vspltb v16, v13, 3 - vspltb v17, v13, 4 - vspltb v18, v13, 5 - vspltb v13, v13, 0 - - vinterp_no_store_8x8 v0, v1, v2, v3, v4, v5 - - stvx v0, 0, r1 - - lwz r0, 0(r1) - stw r0, 0(r7) - add r7, r7, r8 - - lwz r0, 4(r1) - stw r0, 0(r7) - add r7, r7, r8 - - lwz r0, 8(r1) - stw r0, 0(r7) - add r7, r7, r8 - - lwz r0, 12(r1) - stw r0, 0(r7) - - b exit_4x4 - -store_4x4: - - stvx v2, 0, r1 - lwz r0, 0(r1) - stw r0, 0(r7) - add r7, r7, r8 - - stvx v3, 0, r1 - lwz r0, 0(r1) - stw r0, 0(r7) - add r7, r7, r8 - - stvx v4, 0, r1 - lwz r0, 0(r1) - stw r0, 0(r7) - add r7, r7, r8 - - stvx v5, 0, r1 - lwz r0, 0(r1) - stw r0, 0(r7) - -exit_4x4: - - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -.macro w_8x8 V, D, R, P - stvx \V, 0, r1 - lwz \R, 0(r1) - stw \R, 0(r7) - lwz \R, 4(r1) - stw \R, 4(r7) - add \D, \D, \P -.endm - - .align 2 -;# r3 unsigned char * src -;# r4 int src_pitch -;# r5 int x_offset -;# r6 int y_offset -;# r7 unsigned char * dst -;# r8 int dst_pitch - -sixtap_predict8x4_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xffc0 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - slwi. r5, r5, 5 ;# index into horizontal filter array - - vspltish v19, 7 - - ;# If there isn't any filtering to be done for the horizontal, then - ;# just skip to the second pass. - beq- second_pass_pre_copy_8x4 - - load_hfilter v13, v14 - - ;# rounding added in on the multiply - vspltisw v16, 8 - vspltisw v15, 3 - vslw v15, v16, v15 ;# 0x00000040000000400000004000000040 - - ;# Load up permutation constants - load_c v16, B_0123, 0, r9, r10 - load_c v17, B_4567, 0, r9, r10 - load_c v18, B_89AB, 0, r9, r10 - - ;# Back off input buffer by 2 bytes. Need 2 before and 3 after - addi r3, r3, -2 - - addi r9, r3, 0 - li r10, 16 - Read8x8 v2, r3, r4, 1 - Read8x8 v3, r3, r4, 1 - Read8x8 v4, r3, r4, 1 - Read8x8 v5, r3, r4, 1 - - slwi. r6, r6, 4 ;# index into vertical filter array - - ;# filter a line - interp_8x8 v2 - interp_8x8 v3 - interp_8x8 v4 - interp_8x8 v5 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional 5 lines that are needed - ;# for the vertical filter. - beq- store_8x4 - - ;# only needed if there is a vertical filter present - ;# if the second filter is not null then need to back off by 2*pitch - sub r9, r9, r4 - sub r9, r9, r4 - - Read8x8 v0, r9, r4, 1 - Read8x8 v1, r9, r4, 0 - Read8x8 v6, r3, r4, 1 - Read8x8 v7, r3, r4, 1 - Read8x8 v8, r3, r4, 0 - - interp_8x8 v0 - interp_8x8 v1 - interp_8x8 v6 - interp_8x8 v7 - interp_8x8 v8 - - b second_pass_8x4 - -second_pass_pre_copy_8x4: - ;# only needed if there is a vertical filter present - ;# if the second filter is not null then need to back off by 2*pitch - sub r3, r3, r4 - sub r3, r3, r4 - li r10, 16 - - Read8x8 v0, r3, r4, 1 - Read8x8 v1, r3, r4, 1 - Read8x8 v2, r3, r4, 1 - Read8x8 v3, r3, r4, 1 - Read8x8 v4, r3, r4, 1 - Read8x8 v5, r3, r4, 1 - Read8x8 v6, r3, r4, 1 - Read8x8 v7, r3, r4, 1 - Read8x8 v8, r3, r4, 1 - - slwi r6, r6, 4 ;# index into vertical filter array - -second_pass_8x4: - load_c v13, VFilter, r6, r9, r10 - - vspltish v15, 8 - vspltish v20, 3 - vslh v20, v15, v20 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - vspltb v14, v13, 1 - vspltb v15, v13, 2 - vspltb v16, v13, 3 - vspltb v17, v13, 4 - vspltb v18, v13, 5 - vspltb v13, v13, 0 - - vinterp_no_store_8x8 v0, v1, v2, v3, v4, v5 - vinterp_no_store_8x8 v1, v2, v3, v4, v5, v6 - vinterp_no_store_8x8 v2, v3, v4, v5, v6, v7 - vinterp_no_store_8x8 v3, v4, v5, v6, v7, v8 - - cmpi cr0, r8, 8 - beq cr0, store_aligned_8x4 - - w_8x8 v0, r7, r0, r8 - w_8x8 v1, r7, r0, r8 - w_8x8 v2, r7, r0, r8 - w_8x8 v3, r7, r0, r8 - - b exit_8x4 - -store_aligned_8x4: - - load_c v10, b_hilo, 0, r9, r10 - - vperm v0, v0, v1, v10 - vperm v2, v2, v3, v10 - - stvx v0, 0, r7 - addi r7, r7, 16 - stvx v2, 0, r7 - - b exit_8x4 - -store_8x4: - cmpi cr0, r8, 8 - beq cr0, store_aligned2_8x4 - - w_8x8 v2, r7, r0, r8 - w_8x8 v3, r7, r0, r8 - w_8x8 v4, r7, r0, r8 - w_8x8 v5, r7, r0, r8 - - b exit_8x4 - -store_aligned2_8x4: - load_c v10, b_hilo, 0, r9, r10 - - vperm v2, v2, v3, v10 - vperm v4, v4, v5, v10 - - stvx v2, 0, r7 - addi r7, r7, 16 - stvx v4, 0, r7 - -exit_8x4: - - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE - - - blr - - .align 2 -;# r3 unsigned char * src -;# r4 int src_pitch -;# r5 int x_offset -;# r6 int y_offset -;# r7 unsigned char * dst -;# r8 int dst_pitch - -;# Because the width that needs to be filtered will fit in a single altivec -;# register there is no need to loop. Everything can stay in registers. -sixtap_predict8x8_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xffc0 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - slwi. r5, r5, 5 ;# index into horizontal filter array - - vspltish v19, 7 - - ;# If there isn't any filtering to be done for the horizontal, then - ;# just skip to the second pass. - beq- second_pass_pre_copy_8x8 - - load_hfilter v13, v14 - - ;# rounding added in on the multiply - vspltisw v16, 8 - vspltisw v15, 3 - vslw v15, v16, v15 ;# 0x00000040000000400000004000000040 - - ;# Load up permutation constants - load_c v16, B_0123, 0, r9, r10 - load_c v17, B_4567, 0, r9, r10 - load_c v18, B_89AB, 0, r9, r10 - - ;# Back off input buffer by 2 bytes. Need 2 before and 3 after - addi r3, r3, -2 - - addi r9, r3, 0 - li r10, 16 - Read8x8 v2, r3, r4, 1 - Read8x8 v3, r3, r4, 1 - Read8x8 v4, r3, r4, 1 - Read8x8 v5, r3, r4, 1 - Read8x8 v6, r3, r4, 1 - Read8x8 v7, r3, r4, 1 - Read8x8 v8, r3, r4, 1 - Read8x8 v9, r3, r4, 1 - - slwi. r6, r6, 4 ;# index into vertical filter array - - ;# filter a line - interp_8x8 v2 - interp_8x8 v3 - interp_8x8 v4 - interp_8x8 v5 - interp_8x8 v6 - interp_8x8 v7 - interp_8x8 v8 - interp_8x8 v9 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional 5 lines that are needed - ;# for the vertical filter. - beq- store_8x8 - - ;# only needed if there is a vertical filter present - ;# if the second filter is not null then need to back off by 2*pitch - sub r9, r9, r4 - sub r9, r9, r4 - - Read8x8 v0, r9, r4, 1 - Read8x8 v1, r9, r4, 0 - Read8x8 v10, r3, r4, 1 - Read8x8 v11, r3, r4, 1 - Read8x8 v12, r3, r4, 0 - - interp_8x8 v0 - interp_8x8 v1 - interp_8x8 v10 - interp_8x8 v11 - interp_8x8 v12 - - b second_pass_8x8 - -second_pass_pre_copy_8x8: - ;# only needed if there is a vertical filter present - ;# if the second filter is not null then need to back off by 2*pitch - sub r3, r3, r4 - sub r3, r3, r4 - li r10, 16 - - Read8x8 v0, r3, r4, 1 - Read8x8 v1, r3, r4, 1 - Read8x8 v2, r3, r4, 1 - Read8x8 v3, r3, r4, 1 - Read8x8 v4, r3, r4, 1 - Read8x8 v5, r3, r4, 1 - Read8x8 v6, r3, r4, 1 - Read8x8 v7, r3, r4, 1 - Read8x8 v8, r3, r4, 1 - Read8x8 v9, r3, r4, 1 - Read8x8 v10, r3, r4, 1 - Read8x8 v11, r3, r4, 1 - Read8x8 v12, r3, r4, 0 - - slwi r6, r6, 4 ;# index into vertical filter array - -second_pass_8x8: - load_c v13, VFilter, r6, r9, r10 - - vspltish v15, 8 - vspltish v20, 3 - vslh v20, v15, v20 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - vspltb v14, v13, 1 - vspltb v15, v13, 2 - vspltb v16, v13, 3 - vspltb v17, v13, 4 - vspltb v18, v13, 5 - vspltb v13, v13, 0 - - vinterp_no_store_8x8 v0, v1, v2, v3, v4, v5 - vinterp_no_store_8x8 v1, v2, v3, v4, v5, v6 - vinterp_no_store_8x8 v2, v3, v4, v5, v6, v7 - vinterp_no_store_8x8 v3, v4, v5, v6, v7, v8 - vinterp_no_store_8x8 v4, v5, v6, v7, v8, v9 - vinterp_no_store_8x8 v5, v6, v7, v8, v9, v10 - vinterp_no_store_8x8 v6, v7, v8, v9, v10, v11 - vinterp_no_store_8x8 v7, v8, v9, v10, v11, v12 - - cmpi cr0, r8, 8 - beq cr0, store_aligned_8x8 - - w_8x8 v0, r7, r0, r8 - w_8x8 v1, r7, r0, r8 - w_8x8 v2, r7, r0, r8 - w_8x8 v3, r7, r0, r8 - w_8x8 v4, r7, r0, r8 - w_8x8 v5, r7, r0, r8 - w_8x8 v6, r7, r0, r8 - w_8x8 v7, r7, r0, r8 - - b exit_8x8 - -store_aligned_8x8: - - load_c v10, b_hilo, 0, r9, r10 - - vperm v0, v0, v1, v10 - vperm v2, v2, v3, v10 - vperm v4, v4, v5, v10 - vperm v6, v6, v7, v10 - - stvx v0, 0, r7 - addi r7, r7, 16 - stvx v2, 0, r7 - addi r7, r7, 16 - stvx v4, 0, r7 - addi r7, r7, 16 - stvx v6, 0, r7 - - b exit_8x8 - -store_8x8: - cmpi cr0, r8, 8 - beq cr0, store_aligned2_8x8 - - w_8x8 v2, r7, r0, r8 - w_8x8 v3, r7, r0, r8 - w_8x8 v4, r7, r0, r8 - w_8x8 v5, r7, r0, r8 - w_8x8 v6, r7, r0, r8 - w_8x8 v7, r7, r0, r8 - w_8x8 v8, r7, r0, r8 - w_8x8 v9, r7, r0, r8 - - b exit_8x8 - -store_aligned2_8x8: - load_c v10, b_hilo, 0, r9, r10 - - vperm v2, v2, v3, v10 - vperm v4, v4, v5, v10 - vperm v6, v6, v7, v10 - vperm v8, v8, v9, v10 - - stvx v2, 0, r7 - addi r7, r7, 16 - stvx v4, 0, r7 - addi r7, r7, 16 - stvx v6, 0, r7 - addi r7, r7, 16 - stvx v8, 0, r7 - -exit_8x8: - - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char * src -;# r4 int src_pitch -;# r5 int x_offset -;# r6 int y_offset -;# r7 unsigned char * dst -;# r8 int dst_pitch - -;# Two pass filtering. First pass is Horizontal edges, second pass is vertical -;# edges. One of the filters can be null, but both won't be. Needs to use a -;# temporary buffer because the source buffer can't be modified and the buffer -;# for the destination is not large enough to hold the temporary data. -sixtap_predict16x16_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xf000 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-416(r1) ;# create space on the stack - - ;# Three possiblities - ;# 1. First filter is null. Don't use a temp buffer. - ;# 2. Second filter is null. Don't use a temp buffer. - ;# 3. Neither are null, use temp buffer. - - ;# First Pass (horizontal edge) - ;# setup pointers for src - ;# if possiblity (1) then setup the src pointer to be the orginal and jump - ;# to second pass. this is based on if x_offset is 0. - - ;# load up horizontal filter - slwi. r5, r5, 5 ;# index into horizontal filter array - - load_hfilter v4, v5 - - beq- copy_horizontal_16x21 - - ;# Back off input buffer by 2 bytes. Need 2 before and 3 after - addi r3, r3, -2 - - slwi. r6, r6, 4 ;# index into vertical filter array - - ;# setup constants - ;# v14 permutation value for alignment - load_c v14, b_hperm, 0, r9, r10 - - ;# These statements are guessing that there won't be a second pass, - ;# but if there is then inside the bypass they need to be set - li r0, 16 ;# prepare for no vertical filter - - ;# Change the output pointer and pitch to be the actual - ;# desination instead of a temporary buffer. - addi r9, r7, 0 - addi r5, r8, 0 - - ;# no vertical filter, so write the output from the first pass - ;# directly into the output buffer. - beq- no_vertical_filter_bypass - - ;# if the second filter is not null then need to back off by 2*pitch - sub r3, r3, r4 - sub r3, r3, r4 - - ;# setup counter for the number of lines that are going to be filtered - li r0, 21 - - ;# use the stack as temporary storage - la r9, 48(r1) - li r5, 16 - -no_vertical_filter_bypass: - - mtctr r0 - - ;# rounding added in on the multiply - vspltisw v10, 8 - vspltisw v12, 3 - vslw v12, v10, v12 ;# 0x00000040000000400000004000000040 - - ;# downshift by 7 ( divide by 128 ) at the end - vspltish v13, 7 - - ;# index to the next set of vectors in the row. - li r10, 16 - li r12, 32 - -horizontal_loop_16x16: - - lvsl v15, 0, r3 ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx v1, 0, r3 - lvx v2, r10, r3 - lvx v3, r12, r3 - - vperm v8, v1, v2, v15 - vperm v9, v2, v3, v15 ;# v8 v9 = 21 input pixels left-justified - - vsldoi v11, v8, v9, 4 - - ;# set 0 - vmsummbm v6, v4, v8, v12 ;# taps times elements - vmsummbm v0, v5, v11, v6 - - ;# set 1 - vsldoi v10, v8, v9, 1 - vsldoi v11, v8, v9, 5 - - vmsummbm v6, v4, v10, v12 - vmsummbm v1, v5, v11, v6 - - ;# set 2 - vsldoi v10, v8, v9, 2 - vsldoi v11, v8, v9, 6 - - vmsummbm v6, v4, v10, v12 - vmsummbm v2, v5, v11, v6 - - ;# set 3 - vsldoi v10, v8, v9, 3 - vsldoi v11, v8, v9, 7 - - vmsummbm v6, v4, v10, v12 - vmsummbm v3, v5, v11, v6 - - vpkswus v0, v0, v1 ;# v0 = 0 4 8 C 1 5 9 D (16-bit) - vpkswus v1, v2, v3 ;# v1 = 2 6 A E 3 7 B F - - vsrh v0, v0, v13 ;# divide v0, v1 by 128 - vsrh v1, v1, v13 - - vpkuhus v0, v0, v1 ;# v0 = scrambled 8-bit result - vperm v0, v0, v0, v14 ;# v0 = correctly-ordered result - - stvx v0, 0, r9 - add r9, r9, r5 - - add r3, r3, r4 - - bdnz horizontal_loop_16x16 - - ;# check again to see if vertical filter needs to be done. - cmpi cr0, r6, 0 - beq cr0, end_16x16 - - ;# yes there is, so go to the second pass - b second_pass_16x16 - -copy_horizontal_16x21: - li r10, 21 - mtctr r10 - - li r10, 16 - - sub r3, r3, r4 - sub r3, r3, r4 - - ;# this is done above if there is a horizontal filter, - ;# if not it needs to be done down here. - slwi r6, r6, 4 ;# index into vertical filter array - - ;# always write to the stack when doing a horizontal copy - la r9, 48(r1) - -copy_horizontal_loop_16x21: - lvsl v15, 0, r3 ;# permutate value for alignment - - lvx v1, 0, r3 - lvx v2, r10, r3 - - vperm v8, v1, v2, v15 - - stvx v8, 0, r9 - addi r9, r9, 16 - - add r3, r3, r4 - - bdnz copy_horizontal_loop_16x21 - -second_pass_16x16: - - ;# always read from the stack when doing a vertical filter - la r9, 48(r1) - - ;# downshift by 7 ( divide by 128 ) at the end - vspltish v7, 7 - - vpre_load - - luma_vsix - luma_vsix - luma_vfour - -end_16x16: - - addi r1, r1, 416 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .data - - .align 4 -HFilter: - .byte 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, -6,123, 12, 0, -6,123, 12, 0, -6,123, 12, 0, -6,123, 12 - .byte -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0 - .byte 2,-11,108, 36, 2,-11,108, 36, 2,-11,108, 36, 2,-11,108, 36 - .byte -8, 1, 0, 0, -8, 1, 0, 0, -8, 1, 0, 0, -8, 1, 0, 0 - .byte 0, -9, 93, 50, 0, -9, 93, 50, 0, -9, 93, 50, 0, -9, 93, 50 - .byte -6, 0, 0, 0, -6, 0, 0, 0, -6, 0, 0, 0, -6, 0, 0, 0 - .byte 3,-16, 77, 77, 3,-16, 77, 77, 3,-16, 77, 77, 3,-16, 77, 77 - .byte -16, 3, 0, 0,-16, 3, 0, 0,-16, 3, 0, 0,-16, 3, 0, 0 - .byte 0, -6, 50, 93, 0, -6, 50, 93, 0, -6, 50, 93, 0, -6, 50, 93 - .byte -9, 0, 0, 0, -9, 0, 0, 0, -9, 0, 0, 0, -9, 0, 0, 0 - .byte 1, -8, 36,108, 1, -8, 36,108, 1, -8, 36,108, 1, -8, 36,108 - .byte -11, 2, 0, 0,-11, 2, 0, 0,-11, 2, 0, 0,-11, 2, 0, 0 - .byte 0, -1, 12,123, 0, -1, 12,123, 0, -1, 12,123, 0, -1, 12,123 - .byte -6, 0, 0, 0, -6, 0, 0, 0, -6, 0, 0, 0, -6, 0, 0, 0 - - .align 4 -VFilter: - .byte 0, 0,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 6,123, 12, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 2, 11,108, 36, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 9, 93, 50, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 3, 16, 77, 77, 16, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 6, 50, 93, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 1, 8, 36,108, 11, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 1, 12,123, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - - .align 4 -b_hperm: - .byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 - - .align 4 -B_0123: - .byte 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 - - .align 4 -B_4567: - .byte 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 - - .align 4 -B_89AB: - .byte 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14 - - .align 4 -b_hilo: - .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 - - .align 4 -b_hilo_4x4: - .byte 0, 1, 2, 3, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0 diff --git a/vp9/common/ppc/vp9_filter_bilinear_altivec.asm b/vp9/common/ppc/vp9_filter_bilinear_altivec.asm deleted file mode 100644 index fd8aa665f..000000000 --- a/vp9/common/ppc/vp9_filter_bilinear_altivec.asm +++ /dev/null @@ -1,677 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl bilinear_predict4x4_ppc - .globl bilinear_predict8x4_ppc - .globl bilinear_predict8x8_ppc - .globl bilinear_predict16x16_ppc - -.macro load_c V, LABEL, OFF, R0, R1 - lis \R0, \LABEL@ha - la \R1, \LABEL@l(\R0) - lvx \V, \OFF, \R1 -.endm - -.macro load_vfilter V0, V1 - load_c \V0, vfilter_b, r6, r9, r10 - - addi r6, r6, 16 - lvx \V1, r6, r10 -.endm - -.macro HProlog jump_label - ;# load up horizontal filter - slwi. r5, r5, 4 ;# index into horizontal filter array - - ;# index to the next set of vectors in the row. - li r10, 16 - li r12, 32 - - ;# downshift by 7 ( divide by 128 ) at the end - vspltish v19, 7 - - ;# If there isn't any filtering to be done for the horizontal, then - ;# just skip to the second pass. - beq \jump_label - - load_c v20, hfilter_b, r5, r9, r0 - - ;# setup constants - ;# v14 permutation value for alignment - load_c v28, b_hperm_b, 0, r9, r0 - - ;# rounding added in on the multiply - vspltisw v21, 8 - vspltisw v18, 3 - vslw v18, v21, v18 ;# 0x00000040000000400000004000000040 - - slwi. r6, r6, 5 ;# index into vertical filter array -.endm - -;# Filters a horizontal line -;# expects: -;# r3 src_ptr -;# r4 pitch -;# r10 16 -;# r12 32 -;# v17 perm intput -;# v18 rounding -;# v19 shift -;# v20 filter taps -;# v21 tmp -;# v22 tmp -;# v23 tmp -;# v24 tmp -;# v25 tmp -;# v26 tmp -;# v27 tmp -;# v28 perm output -;# -.macro HFilter V - vperm v24, v21, v21, v10 ;# v20 = 0123 1234 2345 3456 - vperm v25, v21, v21, v11 ;# v21 = 4567 5678 6789 789A - - vmsummbm v24, v20, v24, v18 - vmsummbm v25, v20, v25, v18 - - vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit) - - vsrh v24, v24, v19 ;# divide v0, v1 by 128 - - vpkuhus \V, v24, v24 ;# \V = scrambled 8-bit result -.endm - -.macro hfilter_8 V, increment_counter - lvsl v17, 0, r3 ;# permutate value for alignment - - ;# input to filter is 9 bytes wide, output is 8 bytes. - lvx v21, 0, r3 - lvx v22, r10, r3 - -.if \increment_counter - add r3, r3, r4 -.endif - vperm v21, v21, v22, v17 - - HFilter \V -.endm - - -.macro load_and_align_8 V, increment_counter - lvsl v17, 0, r3 ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx v21, 0, r3 - lvx v22, r10, r3 - -.if \increment_counter - add r3, r3, r4 -.endif - - vperm \V, v21, v22, v17 -.endm - -.macro write_aligned_8 V, increment_counter - stvx \V, 0, r7 - -.if \increment_counter - add r7, r7, r8 -.endif -.endm - -.macro vfilter_16 P0 P1 - vmuleub v22, \P0, v20 ;# 64 + 4 positive taps - vadduhm v22, v18, v22 - vmuloub v23, \P0, v20 - vadduhm v23, v18, v23 - - vmuleub v24, \P1, v21 - vadduhm v22, v22, v24 ;# Re = evens, saturation unnecessary - vmuloub v25, \P1, v21 - vadduhm v23, v23, v25 ;# Ro = odds - - vsrh v22, v22, v19 ;# divide by 128 - vsrh v23, v23, v19 ;# v16 v17 = evens, odds - vmrghh \P0, v22, v23 ;# v18 v19 = 16-bit result in order - vmrglh v23, v22, v23 - vpkuhus \P0, \P0, v23 ;# P0 = 8-bit result -.endm - - -.macro w_8x8 V, D, R, P - stvx \V, 0, r1 - lwz \R, 0(r1) - stw \R, 0(r7) - lwz \R, 4(r1) - stw \R, 4(r7) - add \D, \D, \P -.endm - - - .align 2 -;# r3 unsigned char * src -;# r4 int src_pitch -;# r5 int x_offset -;# r6 int y_offset -;# r7 unsigned char * dst -;# r8 int dst_pitch -bilinear_predict4x4_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xf830 - ori r12, r12, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - HProlog second_pass_4x4_pre_copy_b - - ;# Load up permutation constants - load_c v10, b_0123_b, 0, r9, r12 - load_c v11, b_4567_b, 0, r9, r12 - - hfilter_8 v0, 1 - hfilter_8 v1, 1 - hfilter_8 v2, 1 - hfilter_8 v3, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq store_out_4x4_b - - hfilter_8 v4, 0 - - b second_pass_4x4_b - -second_pass_4x4_pre_copy_b: - slwi r6, r6, 5 ;# index into vertical filter array - - load_and_align_8 v0, 1 - load_and_align_8 v1, 1 - load_and_align_8 v2, 1 - load_and_align_8 v3, 1 - load_and_align_8 v4, 1 - -second_pass_4x4_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - -store_out_4x4_b: - - stvx v0, 0, r1 - lwz r0, 0(r1) - stw r0, 0(r7) - add r7, r7, r8 - - stvx v1, 0, r1 - lwz r0, 0(r1) - stw r0, 0(r7) - add r7, r7, r8 - - stvx v2, 0, r1 - lwz r0, 0(r1) - stw r0, 0(r7) - add r7, r7, r8 - - stvx v3, 0, r1 - lwz r0, 0(r1) - stw r0, 0(r7) - -exit_4x4: - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char * src -;# r4 int src_pitch -;# r5 int x_offset -;# r6 int y_offset -;# r7 unsigned char * dst -;# r8 int dst_pitch -bilinear_predict8x4_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xf830 - ori r12, r12, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - HProlog second_pass_8x4_pre_copy_b - - ;# Load up permutation constants - load_c v10, b_0123_b, 0, r9, r12 - load_c v11, b_4567_b, 0, r9, r12 - - hfilter_8 v0, 1 - hfilter_8 v1, 1 - hfilter_8 v2, 1 - hfilter_8 v3, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq store_out_8x4_b - - hfilter_8 v4, 0 - - b second_pass_8x4_b - -second_pass_8x4_pre_copy_b: - slwi r6, r6, 5 ;# index into vertical filter array - - load_and_align_8 v0, 1 - load_and_align_8 v1, 1 - load_and_align_8 v2, 1 - load_and_align_8 v3, 1 - load_and_align_8 v4, 1 - -second_pass_8x4_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - -store_out_8x4_b: - - cmpi cr0, r8, 8 - beq cr0, store_aligned_8x4_b - - w_8x8 v0, r7, r0, r8 - w_8x8 v1, r7, r0, r8 - w_8x8 v2, r7, r0, r8 - w_8x8 v3, r7, r0, r8 - - b exit_8x4 - -store_aligned_8x4_b: - load_c v10, b_hilo_b, 0, r9, r10 - - vperm v0, v0, v1, v10 - vperm v2, v2, v3, v10 - - stvx v0, 0, r7 - addi r7, r7, 16 - stvx v2, 0, r7 - -exit_8x4: - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char * src -;# r4 int src_pitch -;# r5 int x_offset -;# r6 int y_offset -;# r7 unsigned char * dst -;# r8 int dst_pitch -bilinear_predict8x8_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xfff0 - ori r12, r12, 0xffff - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - HProlog second_pass_8x8_pre_copy_b - - ;# Load up permutation constants - load_c v10, b_0123_b, 0, r9, r12 - load_c v11, b_4567_b, 0, r9, r12 - - hfilter_8 v0, 1 - hfilter_8 v1, 1 - hfilter_8 v2, 1 - hfilter_8 v3, 1 - hfilter_8 v4, 1 - hfilter_8 v5, 1 - hfilter_8 v6, 1 - hfilter_8 v7, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq store_out_8x8_b - - hfilter_8 v8, 0 - - b second_pass_8x8_b - -second_pass_8x8_pre_copy_b: - slwi r6, r6, 5 ;# index into vertical filter array - - load_and_align_8 v0, 1 - load_and_align_8 v1, 1 - load_and_align_8 v2, 1 - load_and_align_8 v3, 1 - load_and_align_8 v4, 1 - load_and_align_8 v5, 1 - load_and_align_8 v6, 1 - load_and_align_8 v7, 1 - load_and_align_8 v8, 0 - -second_pass_8x8_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - vfilter_16 v4, v5 - vfilter_16 v5, v6 - vfilter_16 v6, v7 - vfilter_16 v7, v8 - -store_out_8x8_b: - - cmpi cr0, r8, 8 - beq cr0, store_aligned_8x8_b - - w_8x8 v0, r7, r0, r8 - w_8x8 v1, r7, r0, r8 - w_8x8 v2, r7, r0, r8 - w_8x8 v3, r7, r0, r8 - w_8x8 v4, r7, r0, r8 - w_8x8 v5, r7, r0, r8 - w_8x8 v6, r7, r0, r8 - w_8x8 v7, r7, r0, r8 - - b exit_8x8 - -store_aligned_8x8_b: - load_c v10, b_hilo_b, 0, r9, r10 - - vperm v0, v0, v1, v10 - vperm v2, v2, v3, v10 - vperm v4, v4, v5, v10 - vperm v6, v6, v7, v10 - - stvx v0, 0, r7 - addi r7, r7, 16 - stvx v2, 0, r7 - addi r7, r7, 16 - stvx v4, 0, r7 - addi r7, r7, 16 - stvx v6, 0, r7 - -exit_8x8: - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - - blr - -;# Filters a horizontal line -;# expects: -;# r3 src_ptr -;# r4 pitch -;# r10 16 -;# r12 32 -;# v17 perm intput -;# v18 rounding -;# v19 shift -;# v20 filter taps -;# v21 tmp -;# v22 tmp -;# v23 tmp -;# v24 tmp -;# v25 tmp -;# v26 tmp -;# v27 tmp -;# v28 perm output -;# -.macro hfilter_16 V, increment_counter - - lvsl v17, 0, r3 ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx v21, 0, r3 - lvx v22, r10, r3 - lvx v23, r12, r3 - -.if \increment_counter - add r3, r3, r4 -.endif - vperm v21, v21, v22, v17 - vperm v22, v22, v23, v17 ;# v8 v9 = 21 input pixels left-justified - - ;# set 0 - vmsummbm v24, v20, v21, v18 ;# taps times elements - - ;# set 1 - vsldoi v23, v21, v22, 1 - vmsummbm v25, v20, v23, v18 - - ;# set 2 - vsldoi v23, v21, v22, 2 - vmsummbm v26, v20, v23, v18 - - ;# set 3 - vsldoi v23, v21, v22, 3 - vmsummbm v27, v20, v23, v18 - - vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit) - vpkswus v25, v26, v27 ;# v25 = 2 6 A E 3 7 B F - - vsrh v24, v24, v19 ;# divide v0, v1 by 128 - vsrh v25, v25, v19 - - vpkuhus \V, v24, v25 ;# \V = scrambled 8-bit result - vperm \V, \V, v0, v28 ;# \V = correctly-ordered result -.endm - -.macro load_and_align_16 V, increment_counter - lvsl v17, 0, r3 ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx v21, 0, r3 - lvx v22, r10, r3 - -.if \increment_counter - add r3, r3, r4 -.endif - - vperm \V, v21, v22, v17 -.endm - -.macro write_16 V, increment_counter - stvx \V, 0, r7 - -.if \increment_counter - add r7, r7, r8 -.endif -.endm - - .align 2 -;# r3 unsigned char * src -;# r4 int src_pitch -;# r5 int x_offset -;# r6 int y_offset -;# r7 unsigned char * dst -;# r8 int dst_pitch -bilinear_predict16x16_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - HProlog second_pass_16x16_pre_copy_b - - hfilter_16 v0, 1 - hfilter_16 v1, 1 - hfilter_16 v2, 1 - hfilter_16 v3, 1 - hfilter_16 v4, 1 - hfilter_16 v5, 1 - hfilter_16 v6, 1 - hfilter_16 v7, 1 - hfilter_16 v8, 1 - hfilter_16 v9, 1 - hfilter_16 v10, 1 - hfilter_16 v11, 1 - hfilter_16 v12, 1 - hfilter_16 v13, 1 - hfilter_16 v14, 1 - hfilter_16 v15, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq store_out_16x16_b - - hfilter_16 v16, 0 - - b second_pass_16x16_b - -second_pass_16x16_pre_copy_b: - slwi r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, 1 - load_and_align_16 v1, 1 - load_and_align_16 v2, 1 - load_and_align_16 v3, 1 - load_and_align_16 v4, 1 - load_and_align_16 v5, 1 - load_and_align_16 v6, 1 - load_and_align_16 v7, 1 - load_and_align_16 v8, 1 - load_and_align_16 v9, 1 - load_and_align_16 v10, 1 - load_and_align_16 v11, 1 - load_and_align_16 v12, 1 - load_and_align_16 v13, 1 - load_and_align_16 v14, 1 - load_and_align_16 v15, 1 - load_and_align_16 v16, 0 - -second_pass_16x16_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - vfilter_16 v4, v5 - vfilter_16 v5, v6 - vfilter_16 v6, v7 - vfilter_16 v7, v8 - vfilter_16 v8, v9 - vfilter_16 v9, v10 - vfilter_16 v10, v11 - vfilter_16 v11, v12 - vfilter_16 v12, v13 - vfilter_16 v13, v14 - vfilter_16 v14, v15 - vfilter_16 v15, v16 - -store_out_16x16_b: - - write_16 v0, 1 - write_16 v1, 1 - write_16 v2, 1 - write_16 v3, 1 - write_16 v4, 1 - write_16 v5, 1 - write_16 v6, 1 - write_16 v7, 1 - write_16 v8, 1 - write_16 v9, 1 - write_16 v10, 1 - write_16 v11, 1 - write_16 v12, 1 - write_16 v13, 1 - write_16 v14, 1 - write_16 v15, 0 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .data - - .align 4 -hfilter_b: - .byte 128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0 - .byte 112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0 - .byte 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0 - .byte 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0 - .byte 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0 - .byte 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0 - .byte 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0 - .byte 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0 - - .align 4 -vfilter_b: - .byte 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112 - .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 - .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96 - .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 - .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 - .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48 - .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 - .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 - .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48 - .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 - .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 - .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96 - .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 - .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112 - - .align 4 -b_hperm_b: - .byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 - - .align 4 -b_0123_b: - .byte 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 - - .align 4 -b_4567_b: - .byte 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 - -b_hilo_b: - .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 diff --git a/vp9/common/ppc/vp9_idct_altivec.asm b/vp9/common/ppc/vp9_idct_altivec.asm deleted file mode 100644 index b87aa4200..000000000 --- a/vp9/common/ppc/vp9_idct_altivec.asm +++ /dev/null @@ -1,189 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl short_idct4x4_ppc - -.macro load_c V, LABEL, OFF, R0, R1 - lis \R0, \LABEL@ha - la \R1, \LABEL@l(\R0) - lvx \V, \OFF, \R1 -.endm - -;# r3 short *input -;# r4 short *output -;# r5 int pitch - .align 2 -short_idct4x4_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - load_c v8, sinpi8sqrt2, 0, r9, r10 - load_c v9, cospi8sqrt2minus1, 0, r9, r10 - load_c v10, hi_hi, 0, r9, r10 - load_c v11, lo_lo, 0, r9, r10 - load_c v12, shift_16, 0, r9, r10 - - li r10, 16 - lvx v0, 0, r3 ;# input ip[0], ip[ 4] - lvx v1, r10, r3 ;# input ip[8], ip[12] - - ;# first pass - vupkhsh v2, v0 - vupkhsh v3, v1 - vaddsws v6, v2, v3 ;# a1 = ip[0]+ip[8] - vsubsws v7, v2, v3 ;# b1 = ip[0]-ip[8] - - vupklsh v0, v0 - vmulosh v4, v0, v8 - vsraw v4, v4, v12 - vaddsws v4, v4, v0 ;# ip[ 4] * sin(pi/8) * sqrt(2) - - vupklsh v1, v1 - vmulosh v5, v1, v9 - vsraw v5, v5, v12 ;# ip[12] * cos(pi/8) * sqrt(2) - vaddsws v5, v5, v1 - - vsubsws v4, v4, v5 ;# c1 - - vmulosh v3, v1, v8 - vsraw v3, v3, v12 - vaddsws v3, v3, v1 ;# ip[12] * sin(pi/8) * sqrt(2) - - vmulosh v5, v0, v9 - vsraw v5, v5, v12 ;# ip[ 4] * cos(pi/8) * sqrt(2) - vaddsws v5, v5, v0 - - vaddsws v3, v3, v5 ;# d1 - - vaddsws v0, v6, v3 ;# a1 + d1 - vsubsws v3, v6, v3 ;# a1 - d1 - - vaddsws v1, v7, v4 ;# b1 + c1 - vsubsws v2, v7, v4 ;# b1 - c1 - - ;# transpose input - vmrghw v4, v0, v1 ;# a0 b0 a1 b1 - vmrghw v5, v2, v3 ;# c0 d0 c1 d1 - - vmrglw v6, v0, v1 ;# a2 b2 a3 b3 - vmrglw v7, v2, v3 ;# c2 d2 c3 d3 - - vperm v0, v4, v5, v10 ;# a0 b0 c0 d0 - vperm v1, v4, v5, v11 ;# a1 b1 c1 d1 - - vperm v2, v6, v7, v10 ;# a2 b2 c2 d2 - vperm v3, v6, v7, v11 ;# a3 b3 c3 d3 - - ;# second pass - vaddsws v6, v0, v2 ;# a1 = ip[0]+ip[8] - vsubsws v7, v0, v2 ;# b1 = ip[0]-ip[8] - - vmulosh v4, v1, v8 - vsraw v4, v4, v12 - vaddsws v4, v4, v1 ;# ip[ 4] * sin(pi/8) * sqrt(2) - - vmulosh v5, v3, v9 - vsraw v5, v5, v12 ;# ip[12] * cos(pi/8) * sqrt(2) - vaddsws v5, v5, v3 - - vsubsws v4, v4, v5 ;# c1 - - vmulosh v2, v3, v8 - vsraw v2, v2, v12 - vaddsws v2, v2, v3 ;# ip[12] * sin(pi/8) * sqrt(2) - - vmulosh v5, v1, v9 - vsraw v5, v5, v12 ;# ip[ 4] * cos(pi/8) * sqrt(2) - vaddsws v5, v5, v1 - - vaddsws v3, v2, v5 ;# d1 - - vaddsws v0, v6, v3 ;# a1 + d1 - vsubsws v3, v6, v3 ;# a1 - d1 - - vaddsws v1, v7, v4 ;# b1 + c1 - vsubsws v2, v7, v4 ;# b1 - c1 - - vspltish v6, 4 - vspltish v7, 3 - - vpkswss v0, v0, v1 - vpkswss v1, v2, v3 - - vaddshs v0, v0, v6 - vaddshs v1, v1, v6 - - vsrah v0, v0, v7 - vsrah v1, v1, v7 - - ;# transpose output - vmrghh v2, v0, v1 ;# a0 c0 a1 c1 a2 c2 a3 c3 - vmrglh v3, v0, v1 ;# b0 d0 b1 d1 b2 d2 b3 d3 - - vmrghh v0, v2, v3 ;# a0 b0 c0 d0 a1 b1 c1 d1 - vmrglh v1, v2, v3 ;# a2 b2 c2 d2 a3 b3 c3 d3 - - stwu r1,-416(r1) ;# create space on the stack - - stvx v0, 0, r1 - lwz r6, 0(r1) - stw r6, 0(r4) - lwz r6, 4(r1) - stw r6, 4(r4) - - add r4, r4, r5 - - lwz r6, 8(r1) - stw r6, 0(r4) - lwz r6, 12(r1) - stw r6, 4(r4) - - add r4, r4, r5 - - stvx v1, 0, r1 - lwz r6, 0(r1) - stw r6, 0(r4) - lwz r6, 4(r1) - stw r6, 4(r4) - - add r4, r4, r5 - - lwz r6, 8(r1) - stw r6, 0(r4) - lwz r6, 12(r1) - stw r6, 4(r4) - - addi r1, r1, 416 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 4 -sinpi8sqrt2: - .short 35468, 35468, 35468, 35468, 35468, 35468, 35468, 35468 - - .align 4 -cospi8sqrt2minus1: - .short 20091, 20091, 20091, 20091, 20091, 20091, 20091, 20091 - - .align 4 -shift_16: - .long 16, 16, 16, 16 - - .align 4 -hi_hi: - .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 - - .align 4 -lo_lo: - .byte 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 diff --git a/vp9/common/ppc/vp9_loopfilter_altivec.c b/vp9/common/ppc/vp9_loopfilter_altivec.c deleted file mode 100644 index 599070a75..000000000 --- a/vp9/common/ppc/vp9_loopfilter_altivec.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp9/common/vp9_loopfilter.h" -#include "vp9/common/vp9_onyxc_int.h" - -typedef void loop_filter_function_y_ppc -( - unsigned char *s, // source pointer - int p, // pitch - const signed char *flimit, - const signed char *limit, - const signed char *thresh -); - -typedef void loop_filter_function_uv_ppc -( - unsigned char *u, // source pointer - unsigned char *v, // source pointer - int p, // pitch - const signed char *flimit, - const signed char *limit, - const signed char *thresh -); - -typedef void loop_filter_function_s_ppc -( - unsigned char *s, // source pointer - int p, // pitch - const signed char *flimit -); - -loop_filter_function_y_ppc mbloop_filter_horizontal_edge_y_ppc; -loop_filter_function_y_ppc mbloop_filter_vertical_edge_y_ppc; -loop_filter_function_y_ppc loop_filter_horizontal_edge_y_ppc; -loop_filter_function_y_ppc loop_filter_vertical_edge_y_ppc; - -loop_filter_function_uv_ppc mbloop_filter_horizontal_edge_uv_ppc; -loop_filter_function_uv_ppc mbloop_filter_vertical_edge_uv_ppc; -loop_filter_function_uv_ppc loop_filter_horizontal_edge_uv_ppc; -loop_filter_function_uv_ppc loop_filter_vertical_edge_uv_ppc; - -loop_filter_function_s_ppc loop_filter_simple_horizontal_edge_ppc; -loop_filter_function_s_ppc loop_filter_simple_vertical_edge_ppc; - -// Horizontal MB filtering -void loop_filter_mbh_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - mbloop_filter_horizontal_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr); - - if (u_ptr) - mbloop_filter_horizontal_edge_uv_ppc(u_ptr, v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr); -} - -void loop_filter_mbhs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - (void)u_ptr; - (void)v_ptr; - (void)uv_stride; - loop_filter_simple_horizontal_edge_ppc(y_ptr, y_stride, lfi->mbflim); -} - -// Vertical MB Filtering -void loop_filter_mbv_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - mbloop_filter_vertical_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr); - - if (u_ptr) - mbloop_filter_vertical_edge_uv_ppc(u_ptr, v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr); -} - -void loop_filter_mbvs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - (void)u_ptr; - (void)v_ptr; - (void)uv_stride; - loop_filter_simple_vertical_edge_ppc(y_ptr, y_stride, lfi->mbflim); -} - -// Horizontal B Filtering -void loop_filter_bh_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - // These should all be done at once with one call, instead of 3 - loop_filter_horizontal_edge_y_ppc(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr); - loop_filter_horizontal_edge_y_ppc(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr); - loop_filter_horizontal_edge_y_ppc(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr); - - if (u_ptr) - loop_filter_horizontal_edge_uv_ppc(u_ptr + 4 * uv_stride, v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr); -} - -void loop_filter_bhs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - (void)u_ptr; - (void)v_ptr; - (void)uv_stride; - loop_filter_simple_horizontal_edge_ppc(y_ptr + 4 * y_stride, y_stride, lfi->flim); - loop_filter_simple_horizontal_edge_ppc(y_ptr + 8 * y_stride, y_stride, lfi->flim); - loop_filter_simple_horizontal_edge_ppc(y_ptr + 12 * y_stride, y_stride, lfi->flim); -} - -// Vertical B Filtering -void loop_filter_bv_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - loop_filter_vertical_edge_y_ppc(y_ptr, y_stride, lfi->flim, lfi->lim, lfi->thr); - - if (u_ptr) - loop_filter_vertical_edge_uv_ppc(u_ptr + 4, v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr); -} - -void loop_filter_bvs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - (void)u_ptr; - (void)v_ptr; - (void)uv_stride; - loop_filter_simple_vertical_edge_ppc(y_ptr + 4, y_stride, lfi->flim); - loop_filter_simple_vertical_edge_ppc(y_ptr + 8, y_stride, lfi->flim); - loop_filter_simple_vertical_edge_ppc(y_ptr + 12, y_stride, lfi->flim); -} diff --git a/vp9/common/ppc/vp9_loopfilter_filters_altivec.asm b/vp9/common/ppc/vp9_loopfilter_filters_altivec.asm deleted file mode 100644 index 61df4e976..000000000 --- a/vp9/common/ppc/vp9_loopfilter_filters_altivec.asm +++ /dev/null @@ -1,1253 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl mbloop_filter_horizontal_edge_y_ppc - .globl loop_filter_horizontal_edge_y_ppc - .globl mbloop_filter_vertical_edge_y_ppc - .globl loop_filter_vertical_edge_y_ppc - - .globl mbloop_filter_horizontal_edge_uv_ppc - .globl loop_filter_horizontal_edge_uv_ppc - .globl mbloop_filter_vertical_edge_uv_ppc - .globl loop_filter_vertical_edge_uv_ppc - - .globl loop_filter_simple_horizontal_edge_ppc - .globl loop_filter_simple_vertical_edge_ppc - - .text -;# We often need to perform transposes (and other transpose-like operations) -;# on matrices of data. This is simplified by the fact that we usually -;# operate on hunks of data whose dimensions are powers of 2, or at least -;# divisible by highish powers of 2. -;# -;# These operations can be very confusing. They become more straightforward -;# when we think of them as permutations of address bits: Concatenate a -;# group of vector registers and think of it as occupying a block of -;# memory beginning at address zero. The low four bits 0...3 of the -;# address then correspond to position within a register, the higher-order -;# address bits select the register. -;# -;# Although register selection, at the code level, is arbitrary, things -;# are simpler if we use contiguous ranges of register numbers, simpler -;# still if the low-order bits of the register number correspond to -;# conceptual address bits. We do this whenever reasonable. -;# -;# A 16x16 transpose can then be thought of as an operation on -;# a 256-element block of memory. It takes 8 bits 0...7 to address this -;# memory and the effect of a transpose is to interchange address bit -;# 0 with 4, 1 with 5, 2 with 6, and 3 with 7. Bits 0...3 index the -;# column, which is interchanged with the row addressed by bits 4..7. -;# -;# The altivec merge instructions provide a rapid means of effecting -;# many of these transforms. They operate at three widths (8,16,32). -;# Writing V(x) for vector register #x, paired merges permute address -;# indices as follows. -;# -;# 0->1 1->2 2->3 3->(4+d) (4+s)->0: -;# -;# vmrghb V( x), V( y), V( y + (1<<s)) -;# vmrglb V( x + (1<<d)), V( y), V( y + (1<<s)) -;# -;# -;# =0= 1->2 2->3 3->(4+d) (4+s)->1: -;# -;# vmrghh V( x), V( y), V( y + (1<<s)) -;# vmrglh V( x + (1<<d)), V( y), V( y + (1<<s)) -;# -;# -;# =0= =1= 2->3 3->(4+d) (4+s)->2: -;# -;# vmrghw V( x), V( y), V( y + (1<<s)) -;# vmrglw V( x + (1<<d)), V( y), V( y + (1<<s)) -;# -;# -;# Unfortunately, there is no doubleword merge instruction. -;# The following sequence uses "vperm" is a substitute. -;# Assuming that the selection masks b_hihi and b_lolo (defined in LFppc.c) -;# are in registers Vhihi and Vlolo, we can also effect the permutation -;# -;# =0= =1= =2= 3->(4+d) (4+s)->3 by the sequence: -;# -;# vperm V( x), V( y), V( y + (1<<s)), Vhihi -;# vperm V( x + (1<<d)), V( y), V( y + (1<<s)), Vlolo -;# -;# -;# Except for bits s and d, the other relationships between register -;# number (= high-order part of address) bits are at the disposal of -;# the programmer. -;# - -;# To avoid excess transposes, we filter all 3 vertical luma subblock -;# edges together. This requires a single 16x16 transpose, which, in -;# the above language, amounts to the following permutation of address -;# indices: 0<->4 1<->5 2<->6 3<->7, which we accomplish by -;# 4 iterations of the cyclic transform 0->1->2->3->4->5->6->7->0. -;# -;# Except for the fact that the destination registers get written -;# before we are done referencing the old contents, the cyclic transform -;# is effected by -;# -;# x = 0; do { -;# vmrghb V(2x), V(x), V(x+8); -;# vmrghb V(2x+1), V(x), V(x+8); -;# } while( ++x < 8); -;# -;# For clarity, and because we can afford it, we do this transpose -;# using all 32 registers, alternating the banks 0..15 and 16 .. 31, -;# leaving the final result in 16 .. 31, as the lower registers are -;# used in the filtering itself. -;# -.macro Tpair A, B, X, Y - vmrghb \A, \X, \Y - vmrglb \B, \X, \Y -.endm - -;# Each step takes 8*2 = 16 instructions - -.macro t16_even - Tpair v16,v17, v0,v8 - Tpair v18,v19, v1,v9 - Tpair v20,v21, v2,v10 - Tpair v22,v23, v3,v11 - Tpair v24,v25, v4,v12 - Tpair v26,v27, v5,v13 - Tpair v28,v29, v6,v14 - Tpair v30,v31, v7,v15 -.endm - -.macro t16_odd - Tpair v0,v1, v16,v24 - Tpair v2,v3, v17,v25 - Tpair v4,v5, v18,v26 - Tpair v6,v7, v19,v27 - Tpair v8,v9, v20,v28 - Tpair v10,v11, v21,v29 - Tpair v12,v13, v22,v30 - Tpair v14,v15, v23,v31 -.endm - -;# Whole transpose takes 4*16 = 64 instructions - -.macro t16_full - t16_odd - t16_even - t16_odd - t16_even -.endm - -;# Vertical edge filtering requires transposes. For the simple filter, -;# we need to convert 16 rows of 4 pels each into 4 registers of 16 pels -;# each. Writing 0 ... 63 for the pixel indices, the desired result is: -;# -;# v0 = 0 1 ... 14 15 -;# v1 = 16 17 ... 30 31 -;# v2 = 32 33 ... 47 48 -;# v3 = 49 50 ... 62 63 -;# -;# In frame-buffer memory, the layout is: -;# -;# 0 16 32 48 -;# 1 17 33 49 -;# ... -;# 15 31 47 63. -;# -;# We begin by reading the data 32 bits at a time (using scalar operations) -;# into a temporary array, reading the rows of the array into vector registers, -;# with the following layout: -;# -;# v0 = 0 16 32 48 4 20 36 52 8 24 40 56 12 28 44 60 -;# v1 = 1 17 33 49 5 21 ... 45 61 -;# v2 = 2 18 ... 46 62 -;# v3 = 3 19 ... 47 63 -;# -;# From the "address-bit" perspective discussed above, we simply need to -;# interchange bits 0 <-> 4 and 1 <-> 5, leaving bits 2 and 3 alone. -;# In other words, we transpose each of the four 4x4 submatrices. -;# -;# This transformation is its own inverse, and we need to perform it -;# again before writing the pixels back into the frame buffer. -;# -;# It acts in place on registers v0...v3, uses v4...v7 as temporaries, -;# and assumes that v14/v15 contain the b_hihi/b_lolo selectors -;# defined above. We think of both groups of 4 registers as having -;# "addresses" {0,1,2,3} * 16. -;# -.macro Transpose4times4x4 Vlo, Vhi - - ;# d=s=0 0->1 1->2 2->3 3->4 4->0 =5= - - vmrghb v4, v0, v1 - vmrglb v5, v0, v1 - vmrghb v6, v2, v3 - vmrglb v7, v2, v3 - - ;# d=0 s=1 =0= 1->2 2->3 3->4 4->5 5->1 - - vmrghh v0, v4, v6 - vmrglh v1, v4, v6 - vmrghh v2, v5, v7 - vmrglh v3, v5, v7 - - ;# d=s=0 =0= =1= 2->3 3->4 4->2 =5= - - vmrghw v4, v0, v1 - vmrglw v5, v0, v1 - vmrghw v6, v2, v3 - vmrglw v7, v2, v3 - - ;# d=0 s=1 =0= =1= =2= 3->4 4->5 5->3 - - vperm v0, v4, v6, \Vlo - vperm v1, v4, v6, \Vhi - vperm v2, v5, v7, \Vlo - vperm v3, v5, v7, \Vhi -.endm -;# end Transpose4times4x4 - - -;# Normal mb vertical edge filter transpose. -;# -;# We read 8 columns of data, initially in the following pattern: -;# -;# (0,0) (1,0) ... (7,0) (0,1) (1,1) ... (7,1) -;# (0,2) (1,2) ... (7,2) (0,3) (1,3) ... (7,3) -;# ... -;# (0,14) (1,14) .. (7,14) (0,15) (1,15) .. (7,15) -;# -;# and wish to convert to: -;# -;# (0,0) ... (0,15) -;# (1,0) ... (1,15) -;# ... -;# (7,0) ... (7,15). -;# -;# In "address bit" language, we wish to map -;# -;# 0->4 1->5 2->6 3->0 4->1 5->2 6->3, i.e., I -> (I+4) mod 7. -;# -;# This can be accomplished by 4 iterations of the cyclic transform -;# -;# I -> (I+1) mod 7; -;# -;# each iteration can be realized by (d=0, s=2): -;# -;# x = 0; do Tpair( V(2x),V(2x+1), V(x),V(x+4)) while( ++x < 4); -;# -;# The input/output is in registers v0...v7. We use v10...v17 as mirrors; -;# preserving v8 = sign converter. -;# -;# Inverse transpose is similar, except here I -> (I+3) mod 7 and the -;# result lands in the "mirror" registers v10...v17 -;# -.macro t8x16_odd - Tpair v10, v11, v0, v4 - Tpair v12, v13, v1, v5 - Tpair v14, v15, v2, v6 - Tpair v16, v17, v3, v7 -.endm - -.macro t8x16_even - Tpair v0, v1, v10, v14 - Tpair v2, v3, v11, v15 - Tpair v4, v5, v12, v16 - Tpair v6, v7, v13, v17 -.endm - -.macro transpose8x16_fwd - t8x16_odd - t8x16_even - t8x16_odd - t8x16_even -.endm - -.macro transpose8x16_inv - t8x16_odd - t8x16_even - t8x16_odd -.endm - -.macro Transpose16x16 - vmrghb v0, v16, v24 - vmrglb v1, v16, v24 - vmrghb v2, v17, v25 - vmrglb v3, v17, v25 - vmrghb v4, v18, v26 - vmrglb v5, v18, v26 - vmrghb v6, v19, v27 - vmrglb v7, v19, v27 - vmrghb v8, v20, v28 - vmrglb v9, v20, v28 - vmrghb v10, v21, v29 - vmrglb v11, v21, v29 - vmrghb v12, v22, v30 - vmrglb v13, v22, v30 - vmrghb v14, v23, v31 - vmrglb v15, v23, v31 - vmrghb v16, v0, v8 - vmrglb v17, v0, v8 - vmrghb v18, v1, v9 - vmrglb v19, v1, v9 - vmrghb v20, v2, v10 - vmrglb v21, v2, v10 - vmrghb v22, v3, v11 - vmrglb v23, v3, v11 - vmrghb v24, v4, v12 - vmrglb v25, v4, v12 - vmrghb v26, v5, v13 - vmrglb v27, v5, v13 - vmrghb v28, v6, v14 - vmrglb v29, v6, v14 - vmrghb v30, v7, v15 - vmrglb v31, v7, v15 - vmrghb v0, v16, v24 - vmrglb v1, v16, v24 - vmrghb v2, v17, v25 - vmrglb v3, v17, v25 - vmrghb v4, v18, v26 - vmrglb v5, v18, v26 - vmrghb v6, v19, v27 - vmrglb v7, v19, v27 - vmrghb v8, v20, v28 - vmrglb v9, v20, v28 - vmrghb v10, v21, v29 - vmrglb v11, v21, v29 - vmrghb v12, v22, v30 - vmrglb v13, v22, v30 - vmrghb v14, v23, v31 - vmrglb v15, v23, v31 - vmrghb v16, v0, v8 - vmrglb v17, v0, v8 - vmrghb v18, v1, v9 - vmrglb v19, v1, v9 - vmrghb v20, v2, v10 - vmrglb v21, v2, v10 - vmrghb v22, v3, v11 - vmrglb v23, v3, v11 - vmrghb v24, v4, v12 - vmrglb v25, v4, v12 - vmrghb v26, v5, v13 - vmrglb v27, v5, v13 - vmrghb v28, v6, v14 - vmrglb v29, v6, v14 - vmrghb v30, v7, v15 - vmrglb v31, v7, v15 -.endm - -;# load_g loads a global vector (whose address is in the local variable Gptr) -;# into vector register Vreg. Trashes r0 -.macro load_g Vreg, Gptr - lwz r0, \Gptr - lvx \Vreg, 0, r0 -.endm - -;# exploit the saturation here. if the answer is negative -;# it will be clamped to 0. orring 0 with a positive -;# number will be the positive number (abs) -;# RES = abs( A-B), trashes TMP -.macro Abs RES, TMP, A, B - vsububs \RES, \A, \B - vsububs \TMP, \B, \A - vor \RES, \RES, \TMP -.endm - -;# RES = Max( RES, abs( A-B)), trashes TMP -.macro max_abs RES, TMP, A, B - vsububs \TMP, \A, \B - vmaxub \RES, \RES, \TMP - vsububs \TMP, \B, \A - vmaxub \RES, \RES, \TMP -.endm - -.macro Masks - ;# build masks - ;# input is all 8 bit unsigned (0-255). need to - ;# do abs(vala-valb) > limit. but no need to compare each - ;# value to the limit. find the max of the absolute differences - ;# and compare that to the limit. - ;# First hev - Abs v14, v13, v2, v3 ;# |P1 - P0| - max_abs v14, v13, v5, v4 ;# |Q1 - Q0| - - vcmpgtub v10, v14, v10 ;# HEV = true if thresh exceeded - - ;# Next limit - max_abs v14, v13, v0, v1 ;# |P3 - P2| - max_abs v14, v13, v1, v2 ;# |P2 - P1| - max_abs v14, v13, v6, v5 ;# |Q2 - Q1| - max_abs v14, v13, v7, v6 ;# |Q3 - Q2| - - vcmpgtub v9, v14, v9 ;# R = true if limit exceeded - - ;# flimit - Abs v14, v13, v3, v4 ;# |P0 - Q0| - - vcmpgtub v8, v14, v8 ;# X = true if flimit exceeded - - vor v8, v8, v9 ;# R = true if flimit or limit exceeded - ;# done building masks -.endm - -.macro build_constants RFL, RLI, RTH, FL, LI, TH - ;# build constants - lvx \FL, 0, \RFL ;# flimit - lvx \LI, 0, \RLI ;# limit - lvx \TH, 0, \RTH ;# thresh - - vspltisb v11, 8 - vspltisb v12, 4 - vslb v11, v11, v12 ;# 0x80808080808080808080808080808080 -.endm - -.macro load_data_y - ;# setup strides/pointers to be able to access - ;# all of the data - add r5, r4, r4 ;# r5 = 2 * stride - sub r6, r3, r5 ;# r6 -> 2 rows back - neg r7, r4 ;# r7 = -stride - - ;# load 16 pixels worth of data to work on - sub r0, r6, r5 ;# r0 -> 4 rows back (temp) - lvx v0, 0, r0 ;# P3 (read only) - lvx v1, r7, r6 ;# P2 - lvx v2, 0, r6 ;# P1 - lvx v3, r7, r3 ;# P0 - lvx v4, 0, r3 ;# Q0 - lvx v5, r4, r3 ;# Q1 - lvx v6, r5, r3 ;# Q2 - add r0, r3, r5 ;# r0 -> 2 rows fwd (temp) - lvx v7, r4, r0 ;# Q3 (read only) -.endm - -;# Expects -;# v10 == HEV -;# v13 == tmp -;# v14 == tmp -.macro common_adjust P0, Q0, P1, Q1, HEV_PRESENT - vxor \P1, \P1, v11 ;# SP1 - vxor \P0, \P0, v11 ;# SP0 - vxor \Q0, \Q0, v11 ;# SQ0 - vxor \Q1, \Q1, v11 ;# SQ1 - - vsubsbs v13, \P1, \Q1 ;# f = c (P1 - Q1) -.if \HEV_PRESENT - vand v13, v13, v10 ;# f &= hev -.endif - vsubsbs v14, \Q0, \P0 ;# -126 <= X = Q0-P0 <= +126 - vaddsbs v13, v13, v14 - vaddsbs v13, v13, v14 - vaddsbs v13, v13, v14 ;# A = c( c(P1-Q1) + 3*(Q0-P0)) - - vandc v13, v13, v8 ;# f &= mask - - vspltisb v8, 3 - vspltisb v9, 4 - - vaddsbs v14, v13, v9 ;# f1 = c (f+4) - vaddsbs v15, v13, v8 ;# f2 = c (f+3) - - vsrab v13, v14, v8 ;# f1 >>= 3 - vsrab v15, v15, v8 ;# f2 >>= 3 - - vsubsbs \Q0, \Q0, v13 ;# u1 = c (SQ0 - f1) - vaddsbs \P0, \P0, v15 ;# u2 = c (SP0 + f2) -.endm - -.macro vp8_mbfilter - Masks - - ;# start the fitering here - vxor v1, v1, v11 ;# SP2 - vxor v2, v2, v11 ;# SP1 - vxor v3, v3, v11 ;# SP0 - vxor v4, v4, v11 ;# SQ0 - vxor v5, v5, v11 ;# SQ1 - vxor v6, v6, v11 ;# SQ2 - - ;# add outer taps if we have high edge variance - vsubsbs v13, v2, v5 ;# f = c (SP1-SQ1) - - vsubsbs v14, v4, v3 ;# SQ0-SP0 - vaddsbs v13, v13, v14 - vaddsbs v13, v13, v14 - vaddsbs v13, v13, v14 ;# f = c( c(SP1-SQ1) + 3*(SQ0-SP0)) - - vandc v13, v13, v8 ;# f &= mask - vand v15, v13, v10 ;# f2 = f & hev - - ;# save bottom 3 bits so that we round one side +4 and the other +3 - vspltisb v8, 3 - vspltisb v9, 4 - - vaddsbs v14, v15, v9 ;# f1 = c (f+4) - vaddsbs v15, v15, v8 ;# f2 = c (f+3) - - vsrab v14, v14, v8 ;# f1 >>= 3 - vsrab v15, v15, v8 ;# f2 >>= 3 - - vsubsbs v4, v4, v14 ;# u1 = c (SQ0 - f1) - vaddsbs v3, v3, v15 ;# u2 = c (SP0 + f2) - - ;# only apply wider filter if not high edge variance - vandc v13, v13, v10 ;# f &= ~hev - - vspltisb v9, 2 - vnor v8, v8, v8 - vsrb v9, v8, v9 ;# 0x3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f - vupkhsb v9, v9 ;# 0x003f003f003f003f003f003f003f003f - vspltisb v8, 9 - - ;# roughly 1/7th difference across boundary - vspltish v10, 7 - vmulosb v14, v8, v13 ;# A = c( c(P1-Q1) + 3*(Q0-P0)) - vmulesb v15, v8, v13 - vaddshs v14, v14, v9 ;# += 63 - vaddshs v15, v15, v9 - vsrah v14, v14, v10 ;# >>= 7 - vsrah v15, v15, v10 - vmrglh v10, v15, v14 - vmrghh v15, v15, v14 - - vpkshss v10, v15, v10 ;# X = saturated down to bytes - - vsubsbs v6, v6, v10 ;# subtract from Q and add to P - vaddsbs v1, v1, v10 - - vxor v6, v6, v11 - vxor v1, v1, v11 - - ;# roughly 2/7th difference across boundary - vspltish v10, 7 - vaddubm v12, v8, v8 - vmulosb v14, v12, v13 ;# A = c( c(P1-Q1) + 3*(Q0-P0)) - vmulesb v15, v12, v13 - vaddshs v14, v14, v9 - vaddshs v15, v15, v9 - vsrah v14, v14, v10 ;# >>= 7 - vsrah v15, v15, v10 - vmrglh v10, v15, v14 - vmrghh v15, v15, v14 - - vpkshss v10, v15, v10 ;# X = saturated down to bytes - - vsubsbs v5, v5, v10 ;# subtract from Q and add to P - vaddsbs v2, v2, v10 - - vxor v5, v5, v11 - vxor v2, v2, v11 - - ;# roughly 3/7th difference across boundary - vspltish v10, 7 - vaddubm v12, v12, v8 - vmulosb v14, v12, v13 ;# A = c( c(P1-Q1) + 3*(Q0-P0)) - vmulesb v15, v12, v13 - vaddshs v14, v14, v9 - vaddshs v15, v15, v9 - vsrah v14, v14, v10 ;# >>= 7 - vsrah v15, v15, v10 - vmrglh v10, v15, v14 - vmrghh v15, v15, v14 - - vpkshss v10, v15, v10 ;# X = saturated down to bytes - - vsubsbs v4, v4, v10 ;# subtract from Q and add to P - vaddsbs v3, v3, v10 - - vxor v4, v4, v11 - vxor v3, v3, v11 -.endm - -.macro SBFilter - Masks - - common_adjust v3, v4, v2, v5, 1 - - ;# outer tap adjustments - vspltisb v8, 1 - - vaddubm v13, v13, v8 ;# f += 1 - vsrab v13, v13, v8 ;# f >>= 1 - - vandc v13, v13, v10 ;# f &= ~hev - - vsubsbs v5, v5, v13 ;# u1 = c (SQ1 - f) - vaddsbs v2, v2, v13 ;# u2 = c (SP1 + f) - - vxor v2, v2, v11 - vxor v3, v3, v11 - vxor v4, v4, v11 - vxor v5, v5, v11 -.endm - - .align 2 -mbloop_filter_horizontal_edge_y_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - mtspr 256, r12 ;# set VRSAVE - - build_constants r5, r6, r7, v8, v9, v10 - - load_data_y - - vp8_mbfilter - - stvx v1, r7, r6 ;# P2 - stvx v2, 0, r6 ;# P1 - stvx v3, r7, r3 ;# P0 - stvx v4, 0, r3 ;# Q0 - stvx v5, r4, r3 ;# Q1 - stvx v6, r5, r3 ;# Q2 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char *s -;# r4 int p -;# r5 const signed char *flimit -;# r6 const signed char *limit -;# r7 const signed char *thresh -loop_filter_horizontal_edge_y_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - mtspr 256, r12 ;# set VRSAVE - - build_constants r5, r6, r7, v8, v9, v10 - - load_data_y - - SBFilter - - stvx v2, 0, r6 ;# P1 - stvx v3, r7, r3 ;# P0 - stvx v4, 0, r3 ;# Q0 - stvx v5, r4, r3 ;# Q1 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -;# Filtering a vertical mb. Each mb is aligned on a 16 byte boundary. -;# So we can read in an entire mb aligned. However if we want to filter the mb -;# edge we run into problems. For the loopfilter we require 4 bytes before the mb -;# and 4 after for a total of 8 bytes. Reading 16 bytes inorder to get 4 is a bit -;# of a waste. So this is an even uglier way to get around that. -;# Using the regular register file words are read in and then saved back out to -;# memory to align and order them up. Then they are read in using the -;# vector register file. -.macro RLVmb V, R - lwzux r0, r3, r4 - stw r0, 4(\R) - lwz r0,-4(r3) - stw r0, 0(\R) - lwzux r0, r3, r4 - stw r0,12(\R) - lwz r0,-4(r3) - stw r0, 8(\R) - lvx \V, 0, \R -.endm - -.macro WLVmb V, R - stvx \V, 0, \R - lwz r0,12(\R) - stwux r0, r3, r4 - lwz r0, 8(\R) - stw r0,-4(r3) - lwz r0, 4(\R) - stwux r0, r3, r4 - lwz r0, 0(\R) - stw r0,-4(r3) -.endm - - .align 2 -;# r3 unsigned char *s -;# r4 int p -;# r5 const signed char *flimit -;# r6 const signed char *limit -;# r7 const signed char *thresh -mbloop_filter_vertical_edge_y_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xc000 - mtspr 256, r12 ;# set VRSAVE - - la r9, -48(r1) ;# temporary space for reading in vectors - sub r3, r3, r4 - - RLVmb v0, r9 - RLVmb v1, r9 - RLVmb v2, r9 - RLVmb v3, r9 - RLVmb v4, r9 - RLVmb v5, r9 - RLVmb v6, r9 - RLVmb v7, r9 - - transpose8x16_fwd - - build_constants r5, r6, r7, v8, v9, v10 - - vp8_mbfilter - - transpose8x16_inv - - add r3, r3, r4 - neg r4, r4 - - WLVmb v17, r9 - WLVmb v16, r9 - WLVmb v15, r9 - WLVmb v14, r9 - WLVmb v13, r9 - WLVmb v12, r9 - WLVmb v11, r9 - WLVmb v10, r9 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -.macro RL V, R, P - lvx \V, 0, \R - add \R, \R, \P -.endm - -.macro WL V, R, P - stvx \V, 0, \R - add \R, \R, \P -.endm - -.macro Fil P3, P2, P1, P0, Q0, Q1, Q2, Q3 - ;# K = |P0-P1| already - Abs v14, v13, \Q0, \Q1 ;# M = |Q0-Q1| - vmaxub v14, v14, v4 ;# M = max( |P0-P1|, |Q0-Q1|) - vcmpgtub v10, v14, v0 - - Abs v4, v5, \Q2, \Q3 ;# K = |Q2-Q3| = next |P0-P1] - - max_abs v14, v13, \Q1, \Q2 ;# M = max( M, |Q1-Q2|) - max_abs v14, v13, \P1, \P2 ;# M = max( M, |P1-P2|) - max_abs v14, v13, \P2, \P3 ;# M = max( M, |P2-P3|) - - vmaxub v14, v14, v4 ;# M = max interior abs diff - vcmpgtub v9, v14, v2 ;# M = true if int_l exceeded - - Abs v14, v13, \P0, \Q0 ;# X = Abs( P0-Q0) - vcmpgtub v8, v14, v3 ;# X = true if edge_l exceeded - vor v8, v8, v9 ;# M = true if edge_l or int_l exceeded - - ;# replace P1,Q1 w/signed versions - common_adjust \P0, \Q0, \P1, \Q1, 1 - - vaddubm v13, v13, v1 ;# -16 <= M <= 15, saturation irrelevant - vsrab v13, v13, v1 - vandc v13, v13, v10 ;# adjust P1,Q1 by (M+1)>>1 if ! hev - vsubsbs \Q1, \Q1, v13 - vaddsbs \P1, \P1, v13 - - vxor \P1, \P1, v11 ;# P1 - vxor \P0, \P0, v11 ;# P0 - vxor \Q0, \Q0, v11 ;# Q0 - vxor \Q1, \Q1, v11 ;# Q1 -.endm - - - .align 2 -;# r3 unsigned char *s -;# r4 int p -;# r5 const signed char *flimit -;# r6 const signed char *limit -;# r7 const signed char *thresh -loop_filter_vertical_edge_y_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xffff - mtspr 256, r12 ;# set VRSAVE - - addi r9, r3, 0 - RL v16, r9, r4 - RL v17, r9, r4 - RL v18, r9, r4 - RL v19, r9, r4 - RL v20, r9, r4 - RL v21, r9, r4 - RL v22, r9, r4 - RL v23, r9, r4 - RL v24, r9, r4 - RL v25, r9, r4 - RL v26, r9, r4 - RL v27, r9, r4 - RL v28, r9, r4 - RL v29, r9, r4 - RL v30, r9, r4 - lvx v31, 0, r9 - - Transpose16x16 - - vspltisb v1, 1 - - build_constants r5, r6, r7, v3, v2, v0 - - Abs v4, v5, v19, v18 ;# K(v14) = first |P0-P1| - - Fil v16, v17, v18, v19, v20, v21, v22, v23 - Fil v20, v21, v22, v23, v24, v25, v26, v27 - Fil v24, v25, v26, v27, v28, v29, v30, v31 - - Transpose16x16 - - addi r9, r3, 0 - WL v16, r9, r4 - WL v17, r9, r4 - WL v18, r9, r4 - WL v19, r9, r4 - WL v20, r9, r4 - WL v21, r9, r4 - WL v22, r9, r4 - WL v23, r9, r4 - WL v24, r9, r4 - WL v25, r9, r4 - WL v26, r9, r4 - WL v27, r9, r4 - WL v28, r9, r4 - WL v29, r9, r4 - WL v30, r9, r4 - stvx v31, 0, r9 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -;# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- UV FILTERING -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- -.macro active_chroma_sel V - andi. r7, r3, 8 ;# row origin modulo 16 - add r7, r7, r7 ;# selects selectors - lis r12, _chromaSelectors@ha - la r0, _chromaSelectors@l(r12) - lwzux r0, r7, r0 ;# leave selector addr in r7 - - lvx \V, 0, r0 ;# mask to concatenate active U,V pels -.endm - -.macro hread_uv Dest, U, V, Offs, VMask - lvx \U, \Offs, r3 - lvx \V, \Offs, r4 - vperm \Dest, \U, \V, \VMask ;# Dest = active part of U then V -.endm - -.macro hwrite_uv New, U, V, Offs, Umask, Vmask - vperm \U, \New, \U, \Umask ;# Combine new pels with siblings - vperm \V, \New, \V, \Vmask - stvx \U, \Offs, r3 ;# Write to frame buffer - stvx \V, \Offs, r4 -.endm - -;# Process U,V in parallel. -.macro load_chroma_h - neg r9, r5 ;# r9 = -1 * stride - add r8, r9, r9 ;# r8 = -2 * stride - add r10, r5, r5 ;# r10 = 2 * stride - - active_chroma_sel v12 - - ;# P3, Q3 are read-only; need not save addresses or sibling pels - add r6, r8, r8 ;# r6 = -4 * stride - hread_uv v0, v14, v15, r6, v12 - add r6, r10, r5 ;# r6 = 3 * stride - hread_uv v7, v14, v15, r6, v12 - - ;# Others are read/write; save addresses and sibling pels - - add r6, r8, r9 ;# r6 = -3 * stride - hread_uv v1, v16, v17, r6, v12 - hread_uv v2, v18, v19, r8, v12 - hread_uv v3, v20, v21, r9, v12 - hread_uv v4, v22, v23, 0, v12 - hread_uv v5, v24, v25, r5, v12 - hread_uv v6, v26, v27, r10, v12 -.endm - -.macro uresult_sel V - load_g \V, 4(r7) -.endm - -.macro vresult_sel V - load_g \V, 8(r7) -.endm - -;# always write P1,P0,Q0,Q1 -.macro store_chroma_h - uresult_sel v11 - vresult_sel v12 - hwrite_uv v2, v18, v19, r8, v11, v12 - hwrite_uv v3, v20, v21, r9, v11, v12 - hwrite_uv v4, v22, v23, 0, v11, v12 - hwrite_uv v5, v24, v25, r5, v11, v12 -.endm - - .align 2 -;# r3 unsigned char *u -;# r4 unsigned char *v -;# r5 int p -;# r6 const signed char *flimit -;# r7 const signed char *limit -;# r8 const signed char *thresh -mbloop_filter_horizontal_edge_uv_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xffff - mtspr 256, r12 ;# set VRSAVE - - build_constants r6, r7, r8, v8, v9, v10 - - load_chroma_h - - vp8_mbfilter - - store_chroma_h - - hwrite_uv v1, v16, v17, r6, v11, v12 ;# v1 == P2 - hwrite_uv v6, v26, v27, r10, v11, v12 ;# v6 == Q2 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char *u -;# r4 unsigned char *v -;# r5 int p -;# r6 const signed char *flimit -;# r7 const signed char *limit -;# r8 const signed char *thresh -loop_filter_horizontal_edge_uv_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xffff - mtspr 256, r12 ;# set VRSAVE - - build_constants r6, r7, r8, v8, v9, v10 - - load_chroma_h - - SBFilter - - store_chroma_h - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -.macro R V, R - lwzux r0, r3, r5 - stw r0, 4(\R) - lwz r0,-4(r3) - stw r0, 0(\R) - lwzux r0, r4, r5 - stw r0,12(\R) - lwz r0,-4(r4) - stw r0, 8(\R) - lvx \V, 0, \R -.endm - - -.macro W V, R - stvx \V, 0, \R - lwz r0,12(\R) - stwux r0, r4, r5 - lwz r0, 8(\R) - stw r0,-4(r4) - lwz r0, 4(\R) - stwux r0, r3, r5 - lwz r0, 0(\R) - stw r0,-4(r3) -.endm - -.macro chroma_vread R - sub r3, r3, r5 ;# back up one line for simplicity - sub r4, r4, r5 - - R v0, \R - R v1, \R - R v2, \R - R v3, \R - R v4, \R - R v5, \R - R v6, \R - R v7, \R - - transpose8x16_fwd -.endm - -.macro chroma_vwrite R - - transpose8x16_inv - - add r3, r3, r5 - add r4, r4, r5 - neg r5, r5 ;# Write rows back in reverse order - - W v17, \R - W v16, \R - W v15, \R - W v14, \R - W v13, \R - W v12, \R - W v11, \R - W v10, \R -.endm - - .align 2 -;# r3 unsigned char *u -;# r4 unsigned char *v -;# r5 int p -;# r6 const signed char *flimit -;# r7 const signed char *limit -;# r8 const signed char *thresh -mbloop_filter_vertical_edge_uv_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xc000 - mtspr 256, r12 ;# set VRSAVE - - la r9, -48(r1) ;# temporary space for reading in vectors - - chroma_vread r9 - - build_constants r6, r7, r8, v8, v9, v10 - - vp8_mbfilter - - chroma_vwrite r9 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char *u -;# r4 unsigned char *v -;# r5 int p -;# r6 const signed char *flimit -;# r7 const signed char *limit -;# r8 const signed char *thresh -loop_filter_vertical_edge_uv_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xc000 - mtspr 256, r12 ;# set VRSAVE - - la r9, -48(r1) ;# temporary space for reading in vectors - - chroma_vread r9 - - build_constants r6, r7, r8, v8, v9, v10 - - SBFilter - - chroma_vwrite r9 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -;# -=-=-=-=-=-=-=-=-=-=-=-=-=-= SIMPLE LOOP FILTER =-=-=-=-=-=-=-=-=-=-=-=-=-=- - -.macro vp8_simple_filter - Abs v14, v13, v1, v2 ;# M = abs( P0 - Q0) - vcmpgtub v8, v14, v8 ;# v5 = true if _over_ limit - - ;# preserve unsigned v0 and v3 - common_adjust v1, v2, v0, v3, 0 - - vxor v1, v1, v11 - vxor v2, v2, v11 ;# cvt Q0, P0 back to pels -.endm - -.macro simple_vertical - addi r8, 0, 16 - addi r7, r5, 32 - - lvx v0, 0, r5 - lvx v1, r8, r5 - lvx v2, 0, r7 - lvx v3, r8, r7 - - lis r12, _B_hihi@ha - la r0, _B_hihi@l(r12) - lvx v16, 0, r0 - - lis r12, _B_lolo@ha - la r0, _B_lolo@l(r12) - lvx v17, 0, r0 - - Transpose4times4x4 v16, v17 - vp8_simple_filter - - vxor v0, v0, v11 - vxor v3, v3, v11 ;# cvt Q0, P0 back to pels - - Transpose4times4x4 v16, v17 - - stvx v0, 0, r5 - stvx v1, r8, r5 - stvx v2, 0, r7 - stvx v3, r8, r7 -.endm - - .align 2 -;# r3 unsigned char *s -;# r4 int p -;# r5 const signed char *flimit -loop_filter_simple_horizontal_edge_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - mtspr 256, r12 ;# set VRSAVE - - ;# build constants - lvx v8, 0, r5 ;# flimit - - vspltisb v11, 8 - vspltisb v12, 4 - vslb v11, v11, v12 ;# 0x80808080808080808080808080808080 - - neg r5, r4 ;# r5 = -1 * stride - add r6, r5, r5 ;# r6 = -2 * stride - - lvx v0, r6, r3 ;# v0 = P1 = 16 pels two rows above edge - lvx v1, r5, r3 ;# v1 = P0 = 16 pels one row above edge - lvx v2, 0, r3 ;# v2 = Q0 = 16 pels one row below edge - lvx v3, r4, r3 ;# v3 = Q1 = 16 pels two rows below edge - - vp8_simple_filter - - stvx v1, r5, r3 ;# store P0 - stvx v2, 0, r3 ;# store Q0 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -.macro RLV Offs - stw r0, (\Offs*4)(r5) - lwzux r0, r7, r4 -.endm - -.macro WLV Offs - lwz r0, (\Offs*4)(r5) - stwux r0, r7, r4 -.endm - - .align 2 -;# r3 unsigned char *s -;# r4 int p -;# r5 const signed char *flimit -loop_filter_simple_vertical_edge_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xc000 - mtspr 256, r12 ;# set VRSAVE - - ;# build constants - lvx v8, 0, r5 ;# flimit - - vspltisb v11, 8 - vspltisb v12, 4 - vslb v11, v11, v12 ;# 0x80808080808080808080808080808080 - - la r5, -96(r1) ;# temporary space for reading in vectors - - ;# Store 4 pels at word "Offs" in temp array, then advance r7 - ;# to next row and read another 4 pels from the frame buffer. - - subi r7, r3, 2 ;# r7 -> 2 pels before start - lwzx r0, 0, r7 ;# read first 4 pels - - ;# 16 unaligned word accesses - RLV 0 - RLV 4 - RLV 8 - RLV 12 - RLV 1 - RLV 5 - RLV 9 - RLV 13 - RLV 2 - RLV 6 - RLV 10 - RLV 14 - RLV 3 - RLV 7 - RLV 11 - - stw r0, (15*4)(r5) ;# write last 4 pels - - simple_vertical - - ;# Read temp array, write frame buffer. - subi r7, r3, 2 ;# r7 -> 2 pels before start - lwzx r0, 0, r5 ;# read/write first 4 pels - stwx r0, 0, r7 - - WLV 4 - WLV 8 - WLV 12 - WLV 1 - WLV 5 - WLV 9 - WLV 13 - WLV 2 - WLV 6 - WLV 10 - WLV 14 - WLV 3 - WLV 7 - WLV 11 - WLV 15 - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .data - -_chromaSelectors: - .long _B_hihi - .long _B_Ures0 - .long _B_Vres0 - .long 0 - .long _B_lolo - .long _B_Ures8 - .long _B_Vres8 - .long 0 - - .align 4 -_B_Vres8: - .byte 16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15 - - .align 4 -_B_Ures8: - .byte 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7 - - .align 4 -_B_lolo: - .byte 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 - - .align 4 -_B_Vres0: - .byte 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 - .align 4 -_B_Ures0: - .byte 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 - - .align 4 -_B_hihi: - .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 diff --git a/vp9/common/ppc/vp9_platform_altivec.asm b/vp9/common/ppc/vp9_platform_altivec.asm deleted file mode 100644 index f81d86f74..000000000 --- a/vp9/common/ppc/vp9_platform_altivec.asm +++ /dev/null @@ -1,59 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl save_platform_context - .globl restore_platform_context - -.macro W V P - stvx \V, 0, \P - addi \P, \P, 16 -.endm - -.macro R V P - lvx \V, 0, \P - addi \P, \P, 16 -.endm - -;# r3 context_ptr - .align 2 -save_platform_contex: - W v20, r3 - W v21, r3 - W v22, r3 - W v23, r3 - W v24, r3 - W v25, r3 - W v26, r3 - W v27, r3 - W v28, r3 - W v29, r3 - W v30, r3 - W v31, r3 - - blr - -;# r3 context_ptr - .align 2 -restore_platform_context: - R v20, r3 - R v21, r3 - R v22, r3 - R v23, r3 - R v24, r3 - R v25, r3 - R v26, r3 - R v27, r3 - R v28, r3 - R v29, r3 - R v30, r3 - R v31, r3 - - blr diff --git a/vp9/common/ppc/vp9_recon_altivec.asm b/vp9/common/ppc/vp9_recon_altivec.asm deleted file mode 100644 index dd39e05a8..000000000 --- a/vp9/common/ppc/vp9_recon_altivec.asm +++ /dev/null @@ -1,175 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl recon4b_ppc - .globl recon2b_ppc - .globl recon_b_ppc - -.macro row_of16 Diff Pred Dst Stride - lvx v1, 0, \Pred ;# v1 = pred = p0..p15 - addi \Pred, \Pred, 16 ;# next pred - vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7 - lvx v3, 0, \Diff ;# v3 = d0..d7 - vaddshs v2, v2, v3 ;# v2 = r0..r7 - vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15 - lvx v3, r8, \Diff ;# v3 = d8..d15 - addi \Diff, \Diff, 32 ;# next diff - vaddshs v3, v3, v1 ;# v3 = r8..r15 - vpkshus v2, v2, v3 ;# v2 = 8-bit r0..r15 - stvx v2, 0, \Dst ;# to dst - add \Dst, \Dst, \Stride ;# next dst -.endm - - .text - .align 2 -;# r3 = short *diff_ptr, -;# r4 = unsigned char *pred_ptr, -;# r5 = unsigned char *dst_ptr, -;# r6 = int stride -recon4b_ppc: - mfspr r0, 256 ;# get old VRSAVE - stw r0, -8(r1) ;# save old VRSAVE to stack - oris r0, r0, 0xf000 - mtspr 256,r0 ;# set VRSAVE - - vxor v0, v0, v0 - li r8, 16 - - row_of16 r3, r4, r5, r6 - row_of16 r3, r4, r5, r6 - row_of16 r3, r4, r5, r6 - row_of16 r3, r4, r5, r6 - - lwz r12, -8(r1) ;# restore old VRSAVE from stack - mtspr 256, r12 ;# reset old VRSAVE - - blr - -.macro two_rows_of8 Diff Pred Dst Stride write_first_four_pels - lvx v1, 0, \Pred ;# v1 = pred = p0..p15 - vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7 - lvx v3, 0, \Diff ;# v3 = d0..d7 - vaddshs v2, v2, v3 ;# v2 = r0..r7 - vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15 - lvx v3, r8, \Diff ;# v2 = d8..d15 - vaddshs v3, v3, v1 ;# v3 = r8..r15 - vpkshus v2, v2, v3 ;# v3 = 8-bit r0..r15 - stvx v2, 0, r10 ;# 2 rows to dst from buf - lwz r0, 0(r10) -.if \write_first_four_pels - stw r0, 0(\Dst) - .else - stwux r0, \Dst, \Stride -.endif - lwz r0, 4(r10) - stw r0, 4(\Dst) - lwz r0, 8(r10) - stwux r0, \Dst, \Stride ;# advance dst to next row - lwz r0, 12(r10) - stw r0, 4(\Dst) -.endm - - .align 2 -;# r3 = short *diff_ptr, -;# r4 = unsigned char *pred_ptr, -;# r5 = unsigned char *dst_ptr, -;# r6 = int stride - -recon2b_ppc: - mfspr r0, 256 ;# get old VRSAVE - stw r0, -8(r1) ;# save old VRSAVE to stack - oris r0, r0, 0xf000 - mtspr 256,r0 ;# set VRSAVE - - vxor v0, v0, v0 - li r8, 16 - - la r10, -48(r1) ;# buf - - two_rows_of8 r3, r4, r5, r6, 1 - - addi r4, r4, 16; ;# next pred - addi r3, r3, 32; ;# next diff - - two_rows_of8 r3, r4, r5, r6, 0 - - lwz r12, -8(r1) ;# restore old VRSAVE from stack - mtspr 256, r12 ;# reset old VRSAVE - - blr - -.macro get_two_diff_rows - stw r0, 0(r10) - lwz r0, 4(r3) - stw r0, 4(r10) - lwzu r0, 32(r3) - stw r0, 8(r10) - lwz r0, 4(r3) - stw r0, 12(r10) - lvx v3, 0, r10 -.endm - - .align 2 -;# r3 = short *diff_ptr, -;# r4 = unsigned char *pred_ptr, -;# r5 = unsigned char *dst_ptr, -;# r6 = int stride -recon_b_ppc: - mfspr r0, 256 ;# get old VRSAVE - stw r0, -8(r1) ;# save old VRSAVE to stack - oris r0, r0, 0xf000 - mtspr 256,r0 ;# set VRSAVE - - vxor v0, v0, v0 - - la r10, -48(r1) ;# buf - - lwz r0, 0(r4) - stw r0, 0(r10) - lwz r0, 16(r4) - stw r0, 4(r10) - lwz r0, 32(r4) - stw r0, 8(r10) - lwz r0, 48(r4) - stw r0, 12(r10) - - lvx v1, 0, r10; ;# v1 = pred = p0..p15 - - lwz r0, 0(r3) ;# v3 = d0..d7 - - get_two_diff_rows - - vmrghb v2, v0, v1; ;# v2 = 16-bit p0..p7 - vaddshs v2, v2, v3; ;# v2 = r0..r7 - - lwzu r0, 32(r3) ;# v3 = d8..d15 - - get_two_diff_rows - - vmrglb v1, v0, v1; ;# v1 = 16-bit p8..p15 - vaddshs v3, v3, v1; ;# v3 = r8..r15 - - vpkshus v2, v2, v3; ;# v2 = 8-bit r0..r15 - stvx v2, 0, r10; ;# 16 pels to dst from buf - - lwz r0, 0(r10) - stw r0, 0(r5) - lwz r0, 4(r10) - stwux r0, r5, r6 - lwz r0, 8(r10) - stwux r0, r5, r6 - lwz r0, 12(r10) - stwx r0, r5, r6 - - lwz r12, -8(r1) ;# restore old VRSAVE from stack - mtspr 256, r12 ;# reset old VRSAVE - - blr diff --git a/vp9/common/ppc/vp9_systemdependent.c b/vp9/common/ppc/vp9_systemdependent.c deleted file mode 100644 index a6be550a1..000000000 --- a/vp9/common/ppc/vp9_systemdependent.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_loopfilter.h" -#include "recon.h" -#include "vp9/common/vp9_onyxc_int.h" - -void (*vp8_short_idct4x4)(short *input, short *output, int pitch); -void (*vp8_short_idct4x4_1)(short *input, short *output, int pitch); -void (*vp8_dc_only_idct)(short input_dc, short *output, int pitch); - -extern void (*vp9_post_proc_down_and_across)(unsigned char *src_ptr, - unsigned char *dst_ptr, - int src_pixels_per_line, - int dst_pixels_per_line, - int rows, int cols, int flimit); - -extern void (*vp9_mbpost_proc_down)(unsigned char *dst, int pitch, - int rows, int cols, int flimit); -extern void vp9_mbpost_proc_down_c(unsigned char *dst, int pitch, - int rows, int cols, int flimit); -extern void (*vp9_mbpost_proc_across_ip)(unsigned char *src, int pitch, - int rows, int cols, int flimit); -extern void vp9_mbpost_proc_across_ip_c(unsigned char *src, int pitch, - int rows, int cols, int flimit); -extern void vp9_post_proc_down_and_across_c(unsigned char *src_ptr, - unsigned char *dst_ptr, - int src_pixels_per_line, - int dst_pixels_per_line, - int rows, int cols, int flimit); -void vp9_plane_add_noise_c(unsigned char *start, - unsigned int width, unsigned int height, - int pitch, int q, int a); - -extern copy_mem_block_function *vp9_copy_mem16x16; -extern copy_mem_block_function *vp9_copy_mem8x8; -extern copy_mem_block_function *vp9_copy_mem8x4; - -// PPC -extern subpixel_predict_function sixtap_predict_ppc; -extern subpixel_predict_function sixtap_predict8x4_ppc; -extern subpixel_predict_function sixtap_predict8x8_ppc; -extern subpixel_predict_function sixtap_predict16x16_ppc; -extern subpixel_predict_function bilinear_predict4x4_ppc; -extern subpixel_predict_function bilinear_predict8x4_ppc; -extern subpixel_predict_function bilinear_predict8x8_ppc; -extern subpixel_predict_function bilinear_predict16x16_ppc; - -extern copy_mem_block_function copy_mem16x16_ppc; - -void recon_b_ppc(short *diff_ptr, unsigned char *pred_ptr, - unsigned char *dst_ptr, int stride); -void recon2b_ppc(short *diff_ptr, unsigned char *pred_ptr, - unsigned char *dst_ptr, int stride); -void recon4b_ppc(short *diff_ptr, unsigned char *pred_ptr, - unsigned char *dst_ptr, int stride); - -extern void short_idct4x4_ppc(short *input, short *output, int pitch); - -// Generic C -extern subpixel_predict_function vp9_sixtap_predict_c; -extern subpixel_predict_function vp9_sixtap_predict8x4_c; -extern subpixel_predict_function vp9_sixtap_predict8x8_c; -extern subpixel_predict_function vp9_sixtap_predict16x16_c; -extern subpixel_predict_function vp9_bilinear_predict4x4_c; -extern subpixel_predict_function vp9_bilinear_predict8x4_c; -extern subpixel_predict_function vp9_bilinear_predict8x8_c; -extern subpixel_predict_function vp9_bilinear_predict16x16_c; - -extern copy_mem_block_function vp9_copy_mem16x16_c; -extern copy_mem_block_function vp9_copy_mem8x8_c; -extern copy_mem_block_function vp9_copy_mem8x4_c; - -void vp9_recon_b_c(short *diff_ptr, unsigned char *pred_ptr, - unsigned char *dst_ptr, int stride); -void vp9_recon2b_c(short *diff_ptr, unsigned char *pred_ptr, - unsigned char *dst_ptr, int stride); -void vp9_recon4b_c(short *diff_ptr, unsigned char *pred_ptr, - unsigned char *dst_ptr, int stride); - -extern void vp9_short_idct4x4_1_c(short *input, short *output, int pitch); -extern void vp9_short_idct4x4_c(short *input, short *output, int pitch); -extern void vp8_dc_only_idct_c(short input_dc, short *output, int pitch); - -// PPC -extern loop_filter_block_function loop_filter_mbv_ppc; -extern loop_filter_block_function loop_filter_bv_ppc; -extern loop_filter_block_function loop_filter_mbh_ppc; -extern loop_filter_block_function loop_filter_bh_ppc; - -extern loop_filter_block_function loop_filter_mbvs_ppc; -extern loop_filter_block_function loop_filter_bvs_ppc; -extern loop_filter_block_function loop_filter_mbhs_ppc; -extern loop_filter_block_function loop_filter_bhs_ppc; - -// Generic C -extern loop_filter_block_function vp9_loop_filter_mbv_c; -extern loop_filter_block_function vp9_loop_filter_bv_c; -extern loop_filter_block_function vp9_loop_filter_mbh_c; -extern loop_filter_block_function vp9_loop_filter_bh_c; - -extern loop_filter_block_function vp9_loop_filter_mbvs_c; -extern loop_filter_block_function vp9_loop_filter_bvs_c; -extern loop_filter_block_function vp9_loop_filter_mbhs_c; -extern loop_filter_block_function vp9_loop_filter_bhs_c; - -extern loop_filter_block_function *vp8_lf_mbvfull; -extern loop_filter_block_function *vp8_lf_mbhfull; -extern loop_filter_block_function *vp8_lf_bvfull; -extern loop_filter_block_function *vp8_lf_bhfull; - -extern loop_filter_block_function *vp8_lf_mbvsimple; -extern loop_filter_block_function *vp8_lf_mbhsimple; -extern loop_filter_block_function *vp8_lf_bvsimple; -extern loop_filter_block_function *vp8_lf_bhsimple; - -void vp9_clear_c(void) { -} - -void vp9_machine_specific_config(void) { - // Pure C: - vp9_clear_system_state = vp9_clear_c; - vp9_recon_b = vp9_recon_b_c; - vp9_recon4b = vp9_recon4b_c; - vp9_recon2b = vp9_recon2b_c; - - vp9_bilinear_predict16x16 = bilinear_predict16x16_ppc; - vp9_bilinear_predict8x8 = bilinear_predict8x8_ppc; - vp9_bilinear_predict8x4 = bilinear_predict8x4_ppc; - vp8_bilinear_predict = bilinear_predict4x4_ppc; - - vp9_sixtap_predict16x16 = sixtap_predict16x16_ppc; - vp9_sixtap_predict8x8 = sixtap_predict8x8_ppc; - vp9_sixtap_predict8x4 = sixtap_predict8x4_ppc; - vp9_sixtap_predict = sixtap_predict_ppc; - - vp8_short_idct4x4_1 = vp9_short_idct4x4_1_c; - vp8_short_idct4x4 = short_idct4x4_ppc; - vp8_dc_only_idct = vp8_dc_only_idct_c; - - vp8_lf_mbvfull = loop_filter_mbv_ppc; - vp8_lf_bvfull = loop_filter_bv_ppc; - vp8_lf_mbhfull = loop_filter_mbh_ppc; - vp8_lf_bhfull = loop_filter_bh_ppc; - - vp8_lf_mbvsimple = loop_filter_mbvs_ppc; - vp8_lf_bvsimple = loop_filter_bvs_ppc; - vp8_lf_mbhsimple = loop_filter_mbhs_ppc; - vp8_lf_bhsimple = loop_filter_bhs_ppc; - - vp9_post_proc_down_and_across = vp9_post_proc_down_and_across_c; - vp9_mbpost_proc_down = vp9_mbpost_proc_down_c; - vp9_mbpost_proc_across_ip = vp9_mbpost_proc_across_ip_c; - vp9_plane_add_noise = vp9_plane_add_noise_c; - - vp9_copy_mem16x16 = copy_mem16x16_ppc; - vp9_copy_mem8x8 = vp9_copy_mem8x8_c; - vp9_copy_mem8x4 = vp9_copy_mem8x4_c; - -} diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index d372325a6..ada2ded10 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -425,9 +425,10 @@ typedef struct macroblockd { void (*itxm_add)(int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *output, int pitch, int stride, int eob); void (*itxm_add_y_block)(int16_t *q, const int16_t *dq, - uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd); + uint8_t *pre, int pre_stride, uint8_t *dst, int stride, + struct macroblockd *xd); void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq, - uint8_t *pre, uint8_t *dst, int stride, + uint8_t *pre, int pre_stride, uint8_t *dst, int stride, uint16_t *eobs); struct subpix_fn_table subpix; @@ -758,5 +759,92 @@ static INLINE struct plane_block_idx plane_block_idx(int y_blocks, return res; } +/* TODO(jkoleszar): Probably best to remove instances that require this, + * as the data likely becomes per-plane and stored in the per-plane structures. + * This is a stub to work with the existing code. + */ +static INLINE int old_block_idx_4x4(MACROBLOCKD* const xd, int block_size_b, + int plane, int i) { + const int luma_blocks = 1 << block_size_b; + assert(xd->plane[0].subsampling_x == 0); + assert(xd->plane[0].subsampling_y == 0); + assert(xd->plane[1].subsampling_x == 1); + assert(xd->plane[1].subsampling_y == 1); + assert(xd->plane[2].subsampling_x == 1); + assert(xd->plane[2].subsampling_y == 1); + return plane == 0 ? i : + plane == 1 ? luma_blocks + i : + luma_blocks * 5 / 4 + i; +} + +typedef void (*foreach_transformed_block_visitor)(int plane, int block, + int block_size_b, + int ss_txfrm_size, + void *arg); +static INLINE void foreach_transformed_block_in_plane( + const MACROBLOCKD* const xd, int block_size, int plane, + int is_split, foreach_transformed_block_visitor visit, void *arg) { + // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") + // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 + const TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; + const int block_size_b = block_size; + const int txfrm_size_b = tx_size * 2; + + // subsampled size of the block + const int ss_sum = xd->plane[plane].subsampling_x + + xd->plane[plane].subsampling_y; + const int ss_block_size = block_size_b - ss_sum; + + // size of the transform to use. scale the transform down if it's larger + // than the size of the subsampled data, or forced externally by the mb mode. + const int ss_max = MAX(xd->plane[plane].subsampling_x, + xd->plane[plane].subsampling_y); + const int ss_txfrm_size = txfrm_size_b > ss_block_size || is_split + ? txfrm_size_b - ss_max * 2 + : txfrm_size_b; + + // TODO(jkoleszar): 1 may not be correct here with larger chroma planes. + const int inc = is_split ? 1 : (1 << ss_txfrm_size); + int i; + + assert(txfrm_size_b <= block_size_b); + assert(ss_txfrm_size <= ss_block_size); + for (i = 0; i < (1 << ss_block_size); i += inc) { + visit(plane, i, block_size_b, ss_txfrm_size, arg); + } +} + +static INLINE void foreach_transformed_block( + const MACROBLOCKD* const xd, int block_size, + foreach_transformed_block_visitor visit, void *arg) { + const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; + const int is_split = + xd->mode_info_context->mbmi.txfm_size == TX_8X8 && + (mode == I8X8_PRED || mode == SPLITMV); + int plane; + + for (plane = 0; plane < MAX_MB_PLANE; plane++) { + const int is_split_chroma = is_split && + xd->plane[plane].plane_type == PLANE_TYPE_UV; + + foreach_transformed_block_in_plane(xd, block_size, plane, is_split_chroma, + visit, arg); + } +} + +static INLINE void foreach_transformed_block_uv( + const MACROBLOCKD* const xd, int block_size, + foreach_transformed_block_visitor visit, void *arg) { + const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; + const int is_split = + xd->mode_info_context->mbmi.txfm_size == TX_8X8 && + (mode == I8X8_PRED || mode == SPLITMV); + int plane; + + for (plane = 1; plane < MAX_MB_PLANE; plane++) { + foreach_transformed_block_in_plane(xd, block_size, plane, is_split, + visit, arg); + } +} #endif // VP9_COMMON_VP9_BLOCKD_H_ diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 25cea5680..500a278ff 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -461,267 +461,7 @@ const int vp9_basenzcvalue[NZC32X32_TOKENS] = { #if CONFIG_MODELCOEFPROB -const vp9_prob vp9_modelcoefprobs_gg875[COEFPROB_MODELS][ENTROPY_NODES - 1] = { - // Probs generated with a Generalized Gaussian (with shape parameter 0.875) - // source model with varying quantizer step size for a uniform quantizer - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, // do not use - {1, 2, 6, 86, 129, 11, 87, 42, 92, 52,}, - {2, 4, 12, 87, 129, 22, 89, 75, 97, 91,}, - {3, 6, 17, 88, 130, 32, 90, 102, 102, 121,}, - {4, 8, 22, 89, 131, 41, 91, 125, 107, 145,}, - {5, 10, 28, 90, 131, 50, 93, 144, 112, 164,}, - {6, 12, 33, 90, 132, 59, 94, 160, 117, 180,}, - {7, 14, 38, 91, 132, 67, 95, 173, 122, 193,}, - {8, 15, 42, 92, 133, 75, 97, 185, 126, 204,}, - {9, 17, 47, 92, 133, 82, 98, 195, 131, 212,}, - {10, 19, 52, 93, 134, 89, 99, 203, 135, 220,}, - {11, 21, 56, 94, 134, 96, 101, 211, 140, 226,}, - {12, 23, 60, 95, 135, 102, 102, 217, 144, 231,}, - {13, 25, 65, 95, 135, 109, 103, 222, 148, 235,}, - {14, 26, 69, 96, 136, 115, 105, 227, 153, 238,}, - {15, 28, 73, 97, 136, 120, 106, 231, 157, 241,}, - {16, 30, 77, 97, 137, 126, 107, 234, 161, 244,}, - {17, 32, 81, 98, 138, 131, 108, 237, 164, 246,}, - {18, 34, 85, 99, 138, 136, 110, 240, 168, 247,}, - {19, 35, 89, 100, 139, 141, 111, 242, 172, 249,}, - {20, 37, 92, 100, 139, 145, 112, 244, 175, 250,}, - {21, 39, 96, 101, 140, 150, 113, 246, 179, 251,}, - {22, 41, 99, 102, 140, 154, 115, 247, 182, 252,}, - {23, 42, 103, 102, 141, 158, 116, 248, 185, 252,}, - {24, 44, 106, 103, 141, 162, 117, 249, 188, 253,}, - {25, 46, 110, 104, 142, 166, 118, 250, 191, 253,}, - {26, 48, 113, 104, 142, 170, 120, 251, 194, 254,}, - {27, 49, 116, 105, 143, 173, 121, 252, 197, 254,}, - {28, 51, 119, 106, 143, 176, 122, 252, 200, 254,}, - {29, 53, 122, 107, 144, 180, 123, 253, 202, 255,}, - {30, 54, 125, 107, 144, 183, 125, 253, 205, 255,}, - {31, 56, 128, 108, 145, 186, 126, 254, 207, 255,}, - {32, 58, 131, 109, 145, 189, 127, 254, 209, 255,}, - {33, 59, 134, 109, 146, 191, 128, 254, 212, 255,}, - {34, 61, 137, 110, 146, 194, 130, 254, 214, 255,}, - {35, 62, 139, 111, 147, 196, 131, 255, 216, 255,}, - {36, 64, 142, 112, 147, 199, 132, 255, 218, 255,}, - {37, 66, 145, 112, 148, 201, 134, 255, 220, 255,}, - {38, 67, 147, 113, 148, 203, 135, 255, 221, 255,}, - {39, 69, 150, 114, 149, 206, 136, 255, 223, 255,}, - {40, 70, 152, 114, 149, 208, 137, 255, 225, 255,}, - {41, 72, 155, 115, 150, 210, 138, 255, 226, 255,}, - {42, 74, 157, 116, 150, 212, 140, 255, 228, 255,}, - {43, 75, 159, 117, 151, 213, 141, 255, 229, 255,}, - {44, 77, 161, 117, 151, 215, 142, 255, 230, 255,}, - {45, 78, 164, 118, 152, 217, 143, 255, 232, 255,}, - {46, 80, 166, 119, 152, 219, 145, 255, 233, 255,}, - {47, 81, 168, 120, 153, 220, 146, 255, 234, 255,}, - {48, 83, 170, 120, 153, 222, 147, 255, 235, 255,}, - {49, 84, 172, 121, 154, 223, 148, 255, 236, 255,}, - {50, 86, 174, 122, 154, 225, 150, 255, 237, 255,}, - {51, 87, 176, 123, 155, 226, 151, 255, 238, 255,}, - {52, 89, 178, 123, 155, 227, 152, 255, 239, 255,}, - {53, 90, 180, 124, 156, 228, 153, 255, 240, 255,}, - {54, 92, 182, 125, 156, 230, 154, 255, 241, 255,}, - {55, 93, 183, 126, 157, 231, 156, 255, 242, 255,}, - {56, 95, 185, 126, 157, 232, 157, 255, 242, 255,}, - {57, 96, 187, 127, 158, 233, 158, 255, 243, 255,}, - {58, 98, 189, 128, 158, 234, 159, 255, 244, 255,}, - {59, 99, 190, 129, 159, 235, 160, 255, 244, 255,}, - {60, 101, 192, 129, 159, 236, 162, 255, 245, 255,}, - {61, 102, 193, 130, 160, 237, 163, 255, 246, 255,}, - {62, 104, 195, 131, 160, 238, 164, 255, 246, 255,}, - {63, 105, 197, 132, 161, 238, 165, 255, 247, 255,}, - {64, 106, 198, 132, 162, 239, 166, 255, 247, 255,}, - {65, 108, 199, 133, 162, 240, 167, 255, 248, 255,}, - {66, 109, 201, 134, 163, 241, 169, 255, 248, 255,}, - {67, 111, 202, 135, 163, 241, 170, 255, 249, 255,}, - {68, 112, 204, 135, 164, 242, 171, 255, 249, 255,}, - {69, 113, 205, 136, 164, 243, 172, 255, 249, 255,}, - {70, 115, 206, 137, 165, 243, 173, 255, 250, 255,}, - {71, 116, 208, 138, 165, 244, 174, 255, 250, 255,}, - {72, 117, 209, 138, 166, 244, 175, 255, 250, 255,}, - {73, 119, 210, 139, 166, 245, 177, 255, 251, 255,}, - {74, 120, 211, 140, 167, 245, 178, 255, 251, 255,}, - {75, 121, 212, 141, 167, 246, 179, 255, 251, 255,}, - {76, 123, 214, 142, 168, 246, 180, 255, 252, 255,}, - {77, 124, 215, 142, 168, 247, 181, 255, 252, 255,}, - {78, 125, 216, 143, 169, 247, 182, 255, 252, 255,}, - {79, 127, 217, 144, 170, 248, 183, 255, 252, 255,}, - {80, 128, 218, 145, 170, 248, 184, 255, 253, 255,}, - {81, 129, 219, 146, 171, 248, 185, 255, 253, 255,}, - {82, 131, 220, 146, 171, 249, 186, 255, 253, 255,}, - {83, 132, 221, 147, 172, 249, 187, 255, 253, 255,}, - {84, 133, 222, 148, 172, 249, 188, 255, 253, 255,}, - {85, 134, 223, 149, 173, 250, 189, 255, 253, 255,}, - {86, 136, 224, 149, 173, 250, 190, 255, 254, 255,}, - {87, 137, 225, 150, 174, 250, 191, 255, 254, 255,}, - {88, 138, 226, 151, 174, 251, 192, 255, 254, 255,}, - {89, 139, 226, 152, 175, 251, 193, 255, 254, 255,}, - {90, 141, 227, 153, 175, 251, 194, 255, 254, 255,}, - {91, 142, 228, 153, 176, 251, 195, 255, 254, 255,}, - {92, 143, 229, 154, 177, 252, 196, 255, 254, 255,}, - {93, 144, 230, 155, 177, 252, 197, 255, 254, 255,}, - {94, 146, 230, 156, 178, 252, 198, 255, 255, 255,}, - {95, 147, 231, 157, 178, 252, 199, 255, 255, 255,}, - {96, 148, 232, 157, 179, 252, 200, 255, 255, 255,}, - {97, 149, 233, 158, 179, 253, 201, 255, 255, 255,}, - {98, 150, 233, 159, 180, 253, 202, 255, 255, 255,}, - {99, 152, 234, 160, 180, 253, 203, 255, 255, 255,}, - {100, 153, 235, 161, 181, 253, 204, 255, 255, 255,}, - {101, 154, 235, 161, 182, 253, 205, 255, 255, 255,}, - {102, 155, 236, 162, 182, 253, 206, 255, 255, 255,}, - {103, 156, 236, 163, 183, 254, 207, 255, 255, 255,}, - {104, 157, 237, 164, 183, 254, 207, 255, 255, 255,}, - {105, 159, 238, 165, 184, 254, 208, 255, 255, 255,}, - {106, 160, 238, 166, 184, 254, 209, 255, 255, 255,}, - {107, 161, 239, 166, 185, 254, 210, 255, 255, 255,}, - {108, 162, 239, 167, 185, 254, 211, 255, 255, 255,}, - {109, 163, 240, 168, 186, 254, 212, 255, 255, 255,}, - {110, 164, 240, 169, 187, 254, 212, 255, 255, 255,}, - {111, 165, 241, 170, 187, 254, 213, 255, 255, 255,}, - {112, 166, 241, 170, 188, 255, 214, 255, 255, 255,}, - {113, 167, 242, 171, 188, 255, 215, 255, 255, 255,}, - {114, 169, 242, 172, 189, 255, 216, 255, 255, 255,}, - {115, 170, 243, 173, 189, 255, 216, 255, 255, 255,}, - {116, 171, 243, 174, 190, 255, 217, 255, 255, 255,}, - {117, 172, 244, 174, 190, 255, 218, 255, 255, 255,}, - {118, 173, 244, 175, 191, 255, 219, 255, 255, 255,}, - {119, 174, 244, 176, 192, 255, 219, 255, 255, 255,}, - {120, 175, 245, 177, 192, 255, 220, 255, 255, 255,}, - {121, 176, 245, 178, 193, 255, 221, 255, 255, 255,}, - {122, 177, 245, 178, 193, 255, 222, 255, 255, 255,}, - {123, 178, 246, 179, 194, 255, 222, 255, 255, 255,}, - {124, 179, 246, 180, 194, 255, 223, 255, 255, 255,}, - {125, 180, 247, 181, 195, 255, 224, 255, 255, 255,}, - {126, 181, 247, 182, 196, 255, 224, 255, 255, 255,}, - {127, 182, 247, 182, 196, 255, 225, 255, 255, 255,}, - {128, 183, 247, 183, 197, 255, 226, 255, 255, 255,}, - {129, 184, 248, 184, 197, 255, 226, 255, 255, 255,}, - {130, 185, 248, 185, 198, 255, 227, 255, 255, 255,}, - {131, 186, 248, 186, 198, 255, 228, 255, 255, 255,}, - {132, 187, 249, 186, 199, 255, 228, 255, 255, 255,}, - {133, 188, 249, 187, 200, 255, 229, 255, 255, 255,}, - {134, 189, 249, 188, 200, 255, 230, 255, 255, 255,}, - {135, 190, 249, 189, 201, 255, 230, 255, 255, 255,}, - {136, 191, 250, 190, 201, 255, 231, 255, 255, 255,}, - {137, 192, 250, 190, 202, 255, 231, 255, 255, 255,}, - {138, 193, 250, 191, 202, 255, 232, 255, 255, 255,}, - {139, 194, 250, 192, 203, 255, 232, 255, 255, 255,}, - {140, 195, 251, 193, 204, 255, 233, 255, 255, 255,}, - {141, 195, 251, 194, 204, 255, 234, 255, 255, 255,}, - {142, 196, 251, 194, 205, 255, 234, 255, 255, 255,}, - {143, 197, 251, 195, 205, 255, 235, 255, 255, 255,}, - {144, 198, 251, 196, 206, 255, 235, 255, 255, 255,}, - {145, 199, 252, 197, 206, 255, 236, 255, 255, 255,}, - {146, 200, 252, 197, 207, 255, 236, 255, 255, 255,}, - {147, 201, 252, 198, 208, 255, 237, 255, 255, 255,}, - {148, 202, 252, 199, 208, 255, 237, 255, 255, 255,}, - {149, 203, 252, 200, 209, 255, 238, 255, 255, 255,}, - {150, 203, 252, 201, 209, 255, 238, 255, 255, 255,}, - {151, 204, 253, 201, 210, 255, 239, 255, 255, 255,}, - {152, 205, 253, 202, 210, 255, 239, 255, 255, 255,}, - {153, 206, 253, 203, 211, 255, 239, 255, 255, 255,}, - {154, 207, 253, 204, 212, 255, 240, 255, 255, 255,}, - {155, 208, 253, 204, 212, 255, 240, 255, 255, 255,}, - {156, 209, 253, 205, 213, 255, 241, 255, 255, 255,}, - {157, 209, 253, 206, 213, 255, 241, 255, 255, 255,}, - {158, 210, 254, 207, 214, 255, 242, 255, 255, 255,}, - {159, 211, 254, 207, 214, 255, 242, 255, 255, 255,}, - {160, 212, 254, 208, 215, 255, 242, 255, 255, 255,}, - {161, 213, 254, 209, 215, 255, 243, 255, 255, 255,}, - {162, 213, 254, 210, 216, 255, 243, 255, 255, 255,}, - {163, 214, 254, 210, 217, 255, 244, 255, 255, 255,}, - {164, 215, 254, 211, 217, 255, 244, 255, 255, 255,}, - {165, 216, 254, 212, 218, 255, 244, 255, 255, 255,}, - {166, 216, 254, 212, 218, 255, 245, 255, 255, 255,}, - {167, 217, 254, 213, 219, 255, 245, 255, 255, 255,}, - {168, 218, 254, 214, 219, 255, 245, 255, 255, 255,}, - {169, 219, 255, 215, 220, 255, 246, 255, 255, 255,}, - {170, 219, 255, 215, 221, 255, 246, 255, 255, 255,}, - {171, 220, 255, 216, 221, 255, 246, 255, 255, 255,}, - {172, 221, 255, 217, 222, 255, 247, 255, 255, 255,}, - {173, 222, 255, 217, 222, 255, 247, 255, 255, 255,}, - {174, 222, 255, 218, 223, 255, 247, 255, 255, 255,}, - {175, 223, 255, 219, 223, 255, 248, 255, 255, 255,}, - {176, 224, 255, 220, 224, 255, 248, 255, 255, 255,}, - {177, 224, 255, 220, 224, 255, 248, 255, 255, 255,}, - {178, 225, 255, 221, 225, 255, 248, 255, 255, 255,}, - {179, 226, 255, 222, 225, 255, 249, 255, 255, 255,}, - {180, 226, 255, 222, 226, 255, 249, 255, 255, 255,}, - {181, 227, 255, 223, 227, 255, 249, 255, 255, 255,}, - {182, 228, 255, 224, 227, 255, 249, 255, 255, 255,}, - {183, 228, 255, 224, 228, 255, 250, 255, 255, 255,}, - {184, 229, 255, 225, 228, 255, 250, 255, 255, 255,}, - {185, 230, 255, 226, 229, 255, 250, 255, 255, 255,}, - {186, 230, 255, 226, 229, 255, 250, 255, 255, 255,}, - {187, 231, 255, 227, 230, 255, 251, 255, 255, 255,}, - {188, 232, 255, 228, 230, 255, 251, 255, 255, 255,}, - {189, 232, 255, 228, 231, 255, 251, 255, 255, 255,}, - {190, 233, 255, 229, 231, 255, 251, 255, 255, 255,}, - {191, 233, 255, 229, 232, 255, 251, 255, 255, 255,}, - {192, 234, 255, 230, 232, 255, 252, 255, 255, 255,}, - {193, 234, 255, 231, 233, 255, 252, 255, 255, 255,}, - {194, 235, 255, 231, 233, 255, 252, 255, 255, 255,}, - {195, 236, 255, 232, 234, 255, 252, 255, 255, 255,}, - {196, 236, 255, 232, 234, 255, 252, 255, 255, 255,}, - {197, 237, 255, 233, 235, 255, 252, 255, 255, 255,}, - {198, 237, 255, 234, 235, 255, 253, 255, 255, 255,}, - {199, 238, 255, 234, 236, 255, 253, 255, 255, 255,}, - {200, 238, 255, 235, 236, 255, 253, 255, 255, 255,}, - {201, 239, 255, 235, 237, 255, 253, 255, 255, 255,}, - {202, 239, 255, 236, 237, 255, 253, 255, 255, 255,}, - {203, 240, 255, 237, 238, 255, 253, 255, 255, 255,}, - {204, 240, 255, 237, 238, 255, 254, 255, 255, 255,}, - {205, 241, 255, 238, 239, 255, 254, 255, 255, 255,}, - {206, 241, 255, 238, 239, 255, 254, 255, 255, 255,}, - {207, 242, 255, 239, 240, 255, 254, 255, 255, 255,}, - {208, 242, 255, 239, 240, 255, 254, 255, 255, 255,}, - {209, 243, 255, 240, 241, 255, 254, 255, 255, 255,}, - {210, 243, 255, 240, 241, 255, 254, 255, 255, 255,}, - {211, 244, 255, 241, 242, 255, 254, 255, 255, 255,}, - {212, 244, 255, 241, 242, 255, 254, 255, 255, 255,}, - {213, 245, 255, 242, 243, 255, 255, 255, 255, 255,}, - {214, 245, 255, 242, 243, 255, 255, 255, 255, 255,}, - {215, 246, 255, 243, 244, 255, 255, 255, 255, 255,}, - {216, 246, 255, 243, 244, 255, 255, 255, 255, 255,}, - {217, 246, 255, 244, 244, 255, 255, 255, 255, 255,}, - {218, 247, 255, 244, 245, 255, 255, 255, 255, 255,}, - {219, 247, 255, 245, 245, 255, 255, 255, 255, 255,}, - {220, 248, 255, 245, 246, 255, 255, 255, 255, 255,}, - {221, 248, 255, 246, 246, 255, 255, 255, 255, 255,}, - {222, 248, 255, 246, 247, 255, 255, 255, 255, 255,}, - {223, 249, 255, 247, 247, 255, 255, 255, 255, 255,}, - {224, 249, 255, 247, 247, 255, 255, 255, 255, 255,}, - {225, 250, 255, 247, 248, 255, 255, 255, 255, 255,}, - {226, 250, 255, 248, 248, 255, 255, 255, 255, 255,}, - {227, 250, 255, 248, 249, 255, 255, 255, 255, 255,}, - {228, 251, 255, 249, 249, 255, 255, 255, 255, 255,}, - {229, 251, 255, 249, 249, 255, 255, 255, 255, 255,}, - {230, 251, 255, 249, 250, 255, 255, 255, 255, 255,}, - {231, 251, 255, 250, 250, 255, 255, 255, 255, 255,}, - {232, 252, 255, 250, 250, 255, 255, 255, 255, 255,}, - {233, 252, 255, 251, 251, 255, 255, 255, 255, 255,}, - {234, 252, 255, 251, 251, 255, 255, 255, 255, 255,}, - {235, 253, 255, 251, 251, 255, 255, 255, 255, 255,}, - {236, 253, 255, 252, 252, 255, 255, 255, 255, 255,}, - {237, 253, 255, 252, 252, 255, 255, 255, 255, 255,}, - {238, 253, 255, 252, 252, 255, 255, 255, 255, 255,}, - {239, 254, 255, 253, 253, 255, 255, 255, 255, 255,}, - {240, 254, 255, 253, 253, 255, 255, 255, 255, 255,}, - {241, 254, 255, 253, 253, 255, 255, 255, 255, 255,}, - {242, 254, 255, 253, 254, 255, 255, 255, 255, 255,}, - {243, 254, 255, 254, 254, 255, 255, 255, 255, 255,}, - {244, 255, 255, 254, 254, 255, 255, 255, 255, 255,}, - {245, 255, 255, 254, 254, 255, 255, 255, 255, 255,}, - {246, 255, 255, 254, 254, 255, 255, 255, 255, 255,}, - {247, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {248, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {249, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {250, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {251, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {252, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {253, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255,}, -}; - +#if UNCONSTRAINED_NODES == 2 const vp9_prob vp9_modelcoefprobs_gg75[COEFPROB_MODELS][ENTROPY_NODES - 1] = { // Probs generated with a Generalized Gaussian (with shape parameter 0.75) // source model with varying quantizer step size for a uniform quantizer @@ -1244,788 +984,274 @@ const vp9_prob vp9_modelcoefprobs_gg625[COEFPROB_MODELS][ENTROPY_NODES - 1] = { {255, 255, 255, 254, 254, 255, 255, 255, 255, 255,}, }; -const vp9_prob vp9_modelcoefprobs_gg875p1[COEFPROB_MODELS][ENTROPY_NODES - 1] = { - // Probs generated with a Generalized Gaussian (with shape parameter 0.625) - // source model with varying quantizer step size for a uniform quantizer - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, // do not use - {1, 1, 3, 86, 128, 6, 86, 22, 89, 28,}, - {1, 2, 6, 86, 129, 11, 87, 42, 92, 52,}, - {2, 3, 9, 87, 129, 17, 88, 59, 94, 73,}, - {2, 4, 12, 87, 129, 22, 89, 75, 97, 92,}, - {3, 5, 14, 88, 130, 27, 89, 90, 100, 108,}, - {3, 6, 17, 88, 130, 33, 90, 103, 102, 122,}, - {4, 7, 20, 88, 130, 37, 91, 115, 105, 135,}, - {4, 8, 23, 89, 131, 42, 92, 126, 108, 147,}, - {5, 9, 25, 89, 131, 47, 92, 137, 110, 157,}, - {5, 10, 28, 90, 131, 52, 93, 146, 113, 167,}, - {6, 11, 31, 90, 132, 56, 94, 154, 115, 175,}, - {6, 12, 33, 90, 132, 60, 94, 162, 118, 183,}, - {7, 13, 36, 91, 132, 65, 95, 170, 120, 190,}, - {7, 14, 39, 91, 132, 69, 96, 176, 123, 196,}, - {8, 15, 41, 92, 133, 73, 96, 182, 125, 201,}, - {8, 16, 44, 92, 133, 77, 97, 188, 128, 206,}, - {9, 17, 46, 92, 133, 81, 98, 193, 130, 211,}, - {9, 18, 49, 93, 134, 85, 99, 198, 133, 215,}, - {10, 19, 51, 93, 134, 89, 99, 203, 135, 219,}, - {10, 20, 54, 93, 134, 92, 100, 207, 137, 222,}, - {11, 21, 56, 94, 134, 96, 101, 211, 140, 226,}, - {12, 22, 58, 94, 135, 100, 101, 214, 142, 228,}, - {12, 23, 61, 95, 135, 103, 102, 217, 145, 231,}, - {13, 24, 63, 95, 135, 106, 103, 220, 147, 233,}, - {13, 25, 66, 95, 136, 110, 103, 223, 149, 235,}, - {14, 26, 68, 96, 136, 113, 104, 226, 151, 237,}, - {14, 27, 70, 96, 136, 116, 105, 228, 154, 239,}, - {15, 28, 72, 97, 136, 119, 106, 230, 156, 241,}, - {15, 29, 75, 97, 137, 122, 106, 232, 158, 242,}, - {16, 30, 77, 97, 137, 125, 107, 234, 160, 243,}, - {17, 31, 79, 98, 137, 128, 108, 236, 163, 245,}, - {17, 32, 81, 98, 138, 131, 108, 237, 165, 246,}, - {18, 33, 83, 99, 138, 134, 109, 239, 167, 247,}, - {18, 34, 86, 99, 138, 137, 110, 240, 169, 248,}, - {19, 35, 88, 99, 138, 140, 111, 242, 171, 248,}, - {19, 36, 90, 100, 139, 142, 111, 243, 173, 249,}, - {20, 37, 92, 100, 139, 145, 112, 244, 175, 250,}, - {20, 38, 94, 101, 139, 148, 113, 245, 177, 250,}, - {21, 39, 96, 101, 140, 150, 113, 246, 179, 251,}, - {22, 40, 98, 101, 140, 153, 114, 246, 181, 251,}, - {22, 41, 100, 102, 140, 155, 115, 247, 183, 252,}, - {23, 42, 102, 102, 140, 157, 116, 248, 185, 252,}, - {23, 43, 104, 103, 141, 160, 116, 249, 186, 253,}, - {24, 44, 106, 103, 141, 162, 117, 249, 188, 253,}, - {25, 45, 108, 103, 141, 164, 118, 250, 190, 253,}, - {25, 46, 110, 104, 142, 166, 119, 250, 192, 253,}, - {26, 47, 112, 104, 142, 168, 119, 251, 193, 254,}, - {26, 48, 114, 105, 142, 171, 120, 251, 195, 254,}, - {27, 49, 116, 105, 143, 173, 121, 252, 197, 254,}, - {27, 50, 118, 105, 143, 175, 122, 252, 198, 254,}, - {28, 51, 119, 106, 143, 177, 122, 252, 200, 254,}, - {29, 52, 121, 106, 143, 179, 123, 253, 201, 255,}, - {29, 53, 123, 107, 144, 180, 124, 253, 203, 255,}, - {30, 54, 125, 107, 144, 182, 125, 253, 204, 255,}, - {30, 55, 127, 108, 144, 184, 125, 253, 206, 255,}, - {31, 56, 128, 108, 145, 186, 126, 254, 207, 255,}, - {32, 57, 130, 108, 145, 188, 127, 254, 209, 255,}, - {32, 58, 132, 109, 145, 189, 128, 254, 210, 255,}, - {33, 59, 134, 109, 146, 191, 128, 254, 211, 255,}, - {33, 60, 135, 110, 146, 193, 129, 254, 213, 255,}, - {34, 61, 137, 110, 146, 194, 130, 254, 214, 255,}, - {35, 62, 139, 111, 146, 196, 131, 255, 215, 255,}, - {35, 63, 140, 111, 147, 197, 131, 255, 216, 255,}, - {36, 64, 142, 112, 147, 199, 132, 255, 218, 255,}, - {37, 65, 144, 112, 147, 200, 133, 255, 219, 255,}, - {37, 66, 145, 112, 148, 202, 134, 255, 220, 255,}, - {38, 67, 147, 113, 148, 203, 135, 255, 221, 255,}, - {38, 68, 148, 113, 148, 204, 135, 255, 222, 255,}, - {39, 69, 150, 114, 149, 206, 136, 255, 223, 255,}, - {40, 70, 151, 114, 149, 207, 137, 255, 224, 255,}, - {40, 71, 153, 115, 149, 208, 138, 255, 225, 255,}, - {41, 72, 154, 115, 150, 210, 138, 255, 226, 255,}, - {42, 73, 156, 116, 150, 211, 139, 255, 227, 255,}, - {42, 74, 157, 116, 150, 212, 140, 255, 228, 255,}, - {43, 75, 159, 117, 151, 213, 141, 255, 229, 255,}, - {44, 76, 160, 117, 151, 214, 142, 255, 230, 255,}, - {44, 77, 162, 117, 151, 216, 142, 255, 231, 255,}, - {45, 78, 163, 118, 152, 217, 143, 255, 231, 255,}, - {45, 79, 165, 118, 152, 218, 144, 255, 232, 255,}, - {46, 80, 166, 119, 152, 219, 145, 255, 233, 255,}, - {47, 81, 167, 119, 153, 220, 146, 255, 234, 255,}, - {47, 82, 169, 120, 153, 221, 146, 255, 235, 255,}, - {48, 83, 170, 120, 153, 222, 147, 255, 235, 255,}, - {49, 84, 171, 121, 154, 223, 148, 255, 236, 255,}, - {49, 85, 173, 121, 154, 224, 149, 255, 237, 255,}, - {50, 86, 174, 122, 154, 225, 150, 255, 237, 255,}, - {51, 87, 175, 122, 155, 225, 150, 255, 238, 255,}, - {51, 88, 177, 123, 155, 226, 151, 255, 239, 255,}, - {52, 89, 178, 123, 155, 227, 152, 255, 239, 255,}, - {53, 90, 179, 124, 156, 228, 153, 255, 240, 255,}, - {53, 91, 180, 124, 156, 229, 154, 255, 240, 255,}, - {54, 92, 182, 125, 156, 230, 154, 255, 241, 255,}, - {55, 93, 183, 125, 157, 230, 155, 255, 241, 255,}, - {55, 94, 184, 126, 157, 231, 156, 255, 242, 255,}, - {56, 95, 185, 126, 157, 232, 157, 255, 242, 255,}, - {57, 96, 187, 127, 158, 233, 158, 255, 243, 255,}, - {57, 97, 188, 127, 158, 233, 159, 255, 243, 255,}, - {58, 98, 189, 128, 158, 234, 159, 255, 244, 255,}, - {59, 99, 190, 128, 159, 235, 160, 255, 244, 255,}, - {60, 100, 191, 129, 159, 235, 161, 255, 245, 255,}, - {60, 101, 192, 129, 160, 236, 162, 255, 245, 255,}, - {61, 102, 193, 130, 160, 237, 163, 255, 246, 255,}, - {62, 103, 194, 131, 160, 237, 164, 255, 246, 255,}, - {62, 104, 196, 131, 161, 238, 164, 255, 246, 255,}, - {63, 105, 197, 132, 161, 238, 165, 255, 247, 255,}, - {64, 106, 198, 132, 161, 239, 166, 255, 247, 255,}, - {64, 107, 199, 133, 162, 239, 167, 255, 247, 255,}, - {65, 108, 200, 133, 162, 240, 168, 255, 248, 255,}, - {66, 109, 201, 134, 163, 241, 168, 255, 248, 255,}, - {67, 110, 202, 134, 163, 241, 169, 255, 248, 255,}, - {67, 111, 203, 135, 163, 242, 170, 255, 249, 255,}, - {68, 112, 204, 135, 164, 242, 171, 255, 249, 255,}, - {69, 113, 205, 136, 164, 242, 172, 255, 249, 255,}, - {69, 114, 206, 137, 164, 243, 173, 255, 250, 255,}, - {70, 115, 207, 137, 165, 243, 173, 255, 250, 255,}, - {71, 116, 208, 138, 165, 244, 174, 255, 250, 255,}, - {72, 117, 208, 138, 166, 244, 175, 255, 250, 255,}, - {72, 118, 209, 139, 166, 245, 176, 255, 251, 255,}, - {73, 119, 210, 139, 166, 245, 177, 255, 251, 255,}, - {74, 120, 211, 140, 167, 245, 178, 255, 251, 255,}, - {75, 121, 212, 141, 167, 246, 178, 255, 251, 255,}, - {75, 122, 213, 141, 168, 246, 179, 255, 251, 255,}, - {76, 123, 214, 142, 168, 246, 180, 255, 252, 255,}, - {77, 124, 215, 142, 168, 247, 181, 255, 252, 255,}, - {78, 125, 215, 143, 169, 247, 182, 255, 252, 255,}, - {78, 126, 216, 144, 169, 247, 182, 255, 252, 255,}, - {79, 127, 217, 144, 170, 248, 183, 255, 252, 255,}, - {80, 128, 218, 145, 170, 248, 184, 255, 253, 255,}, - {81, 129, 219, 145, 170, 248, 185, 255, 253, 255,}, - {82, 130, 219, 146, 171, 249, 186, 255, 253, 255,}, - {82, 131, 220, 147, 171, 249, 187, 255, 253, 255,}, - {83, 132, 221, 147, 172, 249, 187, 255, 253, 255,}, - {84, 133, 222, 148, 172, 249, 188, 255, 253, 255,}, - {85, 134, 222, 148, 173, 250, 189, 255, 253, 255,}, - {85, 135, 223, 149, 173, 250, 190, 255, 254, 255,}, - {86, 136, 224, 150, 173, 250, 191, 255, 254, 255,}, - {87, 137, 225, 150, 174, 250, 191, 255, 254, 255,}, - {88, 138, 225, 151, 174, 251, 192, 255, 254, 255,}, - {89, 139, 226, 152, 175, 251, 193, 255, 254, 255,}, - {89, 140, 227, 152, 175, 251, 194, 255, 254, 255,}, - {90, 141, 227, 153, 176, 251, 195, 255, 254, 255,}, - {91, 142, 228, 153, 176, 251, 195, 255, 254, 255,}, - {92, 143, 229, 154, 176, 252, 196, 255, 254, 255,}, - {93, 144, 229, 155, 177, 252, 197, 255, 254, 255,}, - {93, 145, 230, 155, 177, 252, 198, 255, 255, 255,}, - {94, 146, 231, 156, 178, 252, 199, 255, 255, 255,}, - {95, 147, 231, 157, 178, 252, 199, 255, 255, 255,}, - {96, 148, 232, 157, 179, 252, 200, 255, 255, 255,}, - {97, 149, 232, 158, 179, 253, 201, 255, 255, 255,}, - {98, 150, 233, 159, 180, 253, 202, 255, 255, 255,}, - {99, 151, 234, 159, 180, 253, 202, 255, 255, 255,}, - {99, 152, 234, 160, 181, 253, 203, 255, 255, 255,}, - {100, 153, 235, 161, 181, 253, 204, 255, 255, 255,}, - {101, 154, 235, 162, 182, 253, 205, 255, 255, 255,}, - {102, 155, 236, 162, 182, 253, 206, 255, 255, 255,}, - {103, 156, 236, 163, 183, 254, 206, 255, 255, 255,}, - {104, 157, 237, 164, 183, 254, 207, 255, 255, 255,}, - {105, 158, 237, 164, 183, 254, 208, 255, 255, 255,}, - {105, 159, 238, 165, 184, 254, 209, 255, 255, 255,}, - {106, 160, 238, 166, 184, 254, 209, 255, 255, 255,}, - {107, 161, 239, 166, 185, 254, 210, 255, 255, 255,}, - {108, 162, 239, 167, 185, 254, 211, 255, 255, 255,}, - {109, 163, 240, 168, 186, 254, 212, 255, 255, 255,}, - {110, 164, 240, 169, 186, 254, 212, 255, 255, 255,}, - {111, 165, 241, 169, 187, 254, 213, 255, 255, 255,}, - {112, 166, 241, 170, 187, 255, 214, 255, 255, 255,}, - {113, 167, 242, 171, 188, 255, 215, 255, 255, 255,}, - {114, 168, 242, 172, 189, 255, 215, 255, 255, 255,}, - {114, 169, 242, 172, 189, 255, 216, 255, 255, 255,}, - {115, 170, 243, 173, 190, 255, 217, 255, 255, 255,}, - {116, 171, 243, 174, 190, 255, 217, 255, 255, 255,}, - {117, 172, 244, 175, 191, 255, 218, 255, 255, 255,}, - {118, 173, 244, 175, 191, 255, 219, 255, 255, 255,}, - {119, 174, 244, 176, 192, 255, 220, 255, 255, 255,}, - {120, 175, 245, 177, 192, 255, 220, 255, 255, 255,}, - {121, 176, 245, 178, 193, 255, 221, 255, 255, 255,}, - {122, 177, 245, 178, 193, 255, 222, 255, 255, 255,}, - {123, 178, 246, 179, 194, 255, 222, 255, 255, 255,}, - {124, 179, 246, 180, 194, 255, 223, 255, 255, 255,}, - {125, 180, 247, 181, 195, 255, 224, 255, 255, 255,}, - {126, 181, 247, 182, 196, 255, 224, 255, 255, 255,}, - {127, 182, 247, 182, 196, 255, 225, 255, 255, 255,}, - {128, 183, 247, 183, 197, 255, 226, 255, 255, 255,}, - {129, 184, 248, 184, 197, 255, 226, 255, 255, 255,}, - {130, 185, 248, 185, 198, 255, 227, 255, 255, 255,}, - {131, 186, 248, 186, 198, 255, 228, 255, 255, 255,}, - {132, 187, 249, 186, 199, 255, 228, 255, 255, 255,}, - {133, 188, 249, 187, 200, 255, 229, 255, 255, 255,}, - {134, 189, 249, 188, 200, 255, 230, 255, 255, 255,}, - {135, 190, 249, 189, 201, 255, 230, 255, 255, 255,}, - {136, 191, 250, 190, 201, 255, 231, 255, 255, 255,}, - {137, 192, 250, 191, 202, 255, 231, 255, 255, 255,}, - {138, 193, 250, 191, 203, 255, 232, 255, 255, 255,}, - {139, 194, 250, 192, 203, 255, 233, 255, 255, 255,}, - {140, 195, 251, 193, 204, 255, 233, 255, 255, 255,}, - {142, 196, 251, 194, 204, 255, 234, 255, 255, 255,}, - {143, 197, 251, 195, 205, 255, 234, 255, 255, 255,}, - {144, 198, 251, 196, 206, 255, 235, 255, 255, 255,}, - {145, 199, 252, 197, 206, 255, 236, 255, 255, 255,}, - {146, 200, 252, 197, 207, 255, 236, 255, 255, 255,}, - {147, 201, 252, 198, 208, 255, 237, 255, 255, 255,}, - {148, 202, 252, 199, 208, 255, 237, 255, 255, 255,}, - {149, 203, 252, 200, 209, 255, 238, 255, 255, 255,}, - {151, 204, 253, 201, 210, 255, 238, 255, 255, 255,}, - {152, 205, 253, 202, 210, 255, 239, 255, 255, 255,}, - {153, 206, 253, 203, 211, 255, 239, 255, 255, 255,}, - {154, 207, 253, 204, 212, 255, 240, 255, 255, 255,}, - {155, 208, 253, 205, 212, 255, 241, 255, 255, 255,}, - {157, 209, 253, 206, 213, 255, 241, 255, 255, 255,}, - {158, 210, 253, 206, 214, 255, 242, 255, 255, 255,}, - {159, 211, 254, 207, 214, 255, 242, 255, 255, 255,}, - {160, 212, 254, 208, 215, 255, 243, 255, 255, 255,}, - {162, 213, 254, 209, 216, 255, 243, 255, 255, 255,}, - {163, 214, 254, 210, 217, 255, 244, 255, 255, 255,}, - {164, 215, 254, 211, 217, 255, 244, 255, 255, 255,}, - {165, 216, 254, 212, 218, 255, 244, 255, 255, 255,}, - {167, 217, 254, 213, 219, 255, 245, 255, 255, 255,}, - {168, 218, 254, 214, 219, 255, 245, 255, 255, 255,}, - {169, 219, 255, 215, 220, 255, 246, 255, 255, 255,}, - {171, 220, 255, 216, 221, 255, 246, 255, 255, 255,}, - {172, 221, 255, 217, 222, 255, 247, 255, 255, 255,}, - {174, 222, 255, 218, 223, 255, 247, 255, 255, 255,}, - {175, 223, 255, 219, 223, 255, 248, 255, 255, 255,}, - {177, 224, 255, 220, 224, 255, 248, 255, 255, 255,}, - {178, 225, 255, 221, 225, 255, 248, 255, 255, 255,}, - {179, 226, 255, 222, 226, 255, 249, 255, 255, 255,}, - {181, 227, 255, 223, 227, 255, 249, 255, 255, 255,}, - {182, 228, 255, 224, 227, 255, 250, 255, 255, 255,}, - {184, 229, 255, 225, 228, 255, 250, 255, 255, 255,}, - {186, 230, 255, 226, 229, 255, 250, 255, 255, 255,}, - {187, 231, 255, 227, 230, 255, 251, 255, 255, 255,}, - {189, 232, 255, 228, 231, 255, 251, 255, 255, 255,}, - {190, 233, 255, 229, 232, 255, 251, 255, 255, 255,}, - {192, 234, 255, 230, 232, 255, 252, 255, 255, 255,}, - {194, 235, 255, 231, 233, 255, 252, 255, 255, 255,}, - {196, 236, 255, 232, 234, 255, 252, 255, 255, 255,}, - {197, 237, 255, 233, 235, 255, 253, 255, 255, 255,}, - {199, 238, 255, 234, 236, 255, 253, 255, 255, 255,}, - {201, 239, 255, 235, 237, 255, 253, 255, 255, 255,}, - {203, 240, 255, 237, 238, 255, 253, 255, 255, 255,}, - {205, 241, 255, 238, 239, 255, 254, 255, 255, 255,}, - {207, 242, 255, 239, 240, 255, 254, 255, 255, 255,}, - {209, 243, 255, 240, 241, 255, 254, 255, 255, 255,}, - {211, 244, 255, 241, 242, 255, 254, 255, 255, 255,}, - {214, 245, 255, 242, 243, 255, 255, 255, 255, 255,}, - {216, 246, 255, 243, 244, 255, 255, 255, 255, 255,}, - {218, 247, 255, 244, 245, 255, 255, 255, 255, 255,}, - {221, 248, 255, 246, 246, 255, 255, 255, 255, 255,}, - {224, 249, 255, 247, 247, 255, 255, 255, 255, 255,}, - {226, 250, 255, 248, 248, 255, 255, 255, 255, 255,}, - {229, 251, 255, 249, 249, 255, 255, 255, 255, 255,}, - {233, 252, 255, 251, 251, 255, 255, 255, 255, 255,}, - {236, 253, 255, 252, 252, 255, 255, 255, 255, 255,}, - {241, 254, 255, 253, 253, 255, 255, 255, 255, 255,}, - {246, 255, 255, 254, 254, 255, 255, 255, 255, 255,}, -}; +#else const vp9_prob vp9_modelcoefprobs_gg75p1[COEFPROB_MODELS][ENTROPY_NODES - 1] = { - // Probs generated with a Generalized Gaussian (with shape parameter 0.625) + // Probs generated with a Generalized Gaussian (with shape parameter 0.75) // source model with varying quantizer step size for a uniform quantizer - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, // do not use {1, 1, 3, 86, 129, 6, 87, 21, 90, 26,}, - {1, 2, 6, 87, 129, 11, 88, 39, 93, 47,}, {2, 3, 9, 87, 130, 16, 89, 55, 96, 65,}, - {2, 4, 11, 88, 130, 21, 89, 69, 98, 81,}, {3, 5, 14, 88, 130, 26, 90, 82, 101, 95,}, - {3, 6, 17, 89, 131, 31, 91, 94, 103, 107,}, {4, 7, 20, 89, 131, 35, 92, 105, 105, 119,}, - {4, 8, 22, 90, 131, 40, 92, 115, 108, 129,}, {5, 9, 25, 90, 132, 44, 93, 124, 110, 138,}, - {5, 10, 27, 91, 132, 48, 94, 133, 112, 147,}, {6, 11, 30, 91, 132, 52, 95, 141, 114, 155,}, - {6, 12, 32, 92, 133, 56, 95, 148, 116, 162,}, {7, 13, 35, 92, 133, 60, 96, 155, 118, 168,}, - {7, 14, 37, 92, 133, 64, 97, 161, 121, 174,}, {8, 15, 40, 93, 134, 68, 97, 167, 123, 180,}, - {9, 16, 42, 93, 134, 71, 98, 173, 125, 185,}, {9, 17, 44, 94, 134, 75, 99, 178, 127, 190,}, - {10, 18, 47, 94, 135, 78, 99, 182, 129, 195,}, {10, 19, 49, 94, 135, 82, 100, 187, 131, 199,}, - {11, 20, 51, 95, 135, 85, 100, 191, 133, 202,}, {11, 21, 54, 95, 135, 88, 101, 195, 135, 206,}, - {12, 22, 56, 96, 136, 92, 102, 199, 137, 209,}, {13, 23, 58, 96, 136, 95, 102, 202, 138, 213,}, - {13, 24, 61, 96, 136, 98, 103, 206, 140, 215,}, {14, 25, 63, 97, 137, 101, 104, 209, 142, 218,}, - {14, 26, 65, 97, 137, 104, 104, 211, 144, 221,}, {15, 27, 67, 98, 137, 107, 105, 214, 146, 223,}, - {15, 28, 69, 98, 138, 110, 106, 217, 148, 225,}, {16, 29, 71, 98, 138, 113, 106, 219, 150, 227,}, - {17, 30, 73, 99, 138, 115, 107, 221, 151, 229,}, {17, 31, 76, 99, 138, 118, 107, 223, 153, 231,}, - {18, 32, 78, 100, 139, 121, 108, 225, 155, 232,}, {18, 33, 80, 100, 139, 123, 109, 227, 157, 234,}, - {19, 34, 82, 100, 139, 126, 109, 229, 158, 235,}, {20, 35, 84, 101, 140, 128, 110, 231, 160, 237,}, - {20, 36, 86, 101, 140, 131, 111, 232, 162, 238,}, {21, 37, 88, 102, 140, 133, 111, 234, 164, 239,}, - {21, 38, 90, 102, 140, 136, 112, 235, 165, 240,}, {22, 39, 92, 102, 141, 138, 112, 236, 167, 241,}, - {23, 40, 94, 103, 141, 140, 113, 237, 169, 242,}, {23, 41, 95, 103, 141, 143, 114, 238, 170, 243,}, - {24, 42, 97, 103, 142, 145, 114, 240, 172, 244,}, {25, 43, 99, 104, 142, 147, 115, 241, 173, 245,}, - {25, 44, 101, 104, 142, 149, 116, 242, 175, 246,}, {26, 45, 103, 105, 142, 151, 116, 242, 176, 246,}, - {26, 46, 105, 105, 143, 153, 117, 243, 178, 247,}, {27, 47, 107, 105, 143, 156, 117, 244, 180, 248,}, - {28, 48, 108, 106, 143, 158, 118, 245, 181, 248,}, {28, 49, 110, 106, 144, 159, 119, 245, 182, 249,}, - {29, 50, 112, 107, 144, 161, 119, 246, 184, 249,}, {30, 51, 114, 107, 144, 163, 120, 247, 185, 250,}, - {30, 52, 115, 108, 144, 165, 121, 247, 187, 250,}, {31, 53, 117, 108, 145, 167, 121, 248, 188, 250,}, - {32, 54, 119, 108, 145, 169, 122, 248, 190, 251,}, {32, 55, 121, 109, 145, 171, 123, 249, 191, 251,}, - {33, 56, 122, 109, 146, 172, 123, 249, 192, 251,}, {34, 57, 124, 110, 146, 174, 124, 250, 194, 252,}, - {34, 58, 126, 110, 146, 176, 125, 250, 195, 252,}, {35, 59, 127, 110, 147, 177, 125, 250, 196, 252,}, - {36, 60, 129, 111, 147, 179, 126, 251, 197, 253,}, {36, 61, 130, 111, 147, 181, 127, 251, 199, 253,}, - {37, 62, 132, 112, 147, 182, 127, 251, 200, 253,}, {38, 63, 134, 112, 148, 184, 128, 252, 201, 253,}, - {38, 64, 135, 112, 148, 185, 128, 252, 202, 253,}, {39, 65, 137, 113, 148, 187, 129, 252, 204, 254,}, - {40, 66, 138, 113, 149, 188, 130, 253, 205, 254,}, {40, 67, 140, 114, 149, 190, 130, 253, 206, 254,}, - {41, 68, 141, 114, 149, 191, 131, 253, 207, 254,}, {42, 69, 143, 115, 150, 192, 132, 253, 208, 254,}, - {42, 70, 144, 115, 150, 194, 132, 253, 209, 254,}, {43, 71, 146, 115, 150, 195, 133, 254, 210, 254,}, - {44, 72, 147, 116, 150, 197, 134, 254, 211, 255,}, {44, 73, 149, 116, 151, 198, 134, 254, 212, 255,}, - {45, 74, 150, 117, 151, 199, 135, 254, 213, 255,}, {46, 75, 152, 117, 151, 200, 136, 254, 214, 255,}, - {46, 76, 153, 118, 152, 202, 136, 254, 215, 255,}, {47, 77, 154, 118, 152, 203, 137, 254, 216, 255,}, - {48, 78, 156, 119, 152, 204, 138, 254, 217, 255,}, {49, 79, 157, 119, 153, 205, 139, 255, 218, 255,}, - {49, 80, 159, 119, 153, 206, 139, 255, 219, 255,}, {50, 81, 160, 120, 153, 207, 140, 255, 220, 255,}, - {51, 82, 161, 120, 154, 208, 141, 255, 221, 255,}, {51, 83, 163, 121, 154, 210, 141, 255, 222, 255,}, - {52, 84, 164, 121, 154, 211, 142, 255, 223, 255,}, {53, 85, 165, 122, 154, 212, 143, 255, 223, 255,}, - {54, 86, 166, 122, 155, 213, 143, 255, 224, 255,}, {54, 87, 168, 123, 155, 214, 144, 255, 225, 255,}, - {55, 88, 169, 123, 155, 215, 145, 255, 226, 255,}, {56, 89, 170, 123, 156, 216, 145, 255, 227, 255,}, - {57, 90, 172, 124, 156, 217, 146, 255, 227, 255,}, {57, 91, 173, 124, 156, 218, 147, 255, 228, 255,}, - {58, 92, 174, 125, 157, 218, 147, 255, 229, 255,}, {59, 93, 175, 125, 157, 219, 148, 255, 230, 255,}, - {60, 94, 176, 126, 157, 220, 149, 255, 230, 255,}, {60, 95, 178, 126, 158, 221, 150, 255, 231, 255,}, - {61, 96, 179, 127, 158, 222, 150, 255, 232, 255,}, {62, 97, 180, 127, 158, 223, 151, 255, 232, 255,}, - {63, 98, 181, 128, 159, 224, 152, 255, 233, 255,}, {63, 99, 182, 128, 159, 224, 152, 255, 234, 255,}, - {64, 100, 183, 129, 159, 225, 153, 255, 234, 255,}, {65, 101, 184, 129, 160, 226, 154, 255, 235, 255,}, - {66, 102, 186, 130, 160, 227, 154, 255, 235, 255,}, {66, 103, 187, 130, 160, 227, 155, 255, 236, 255,}, - {67, 104, 188, 131, 161, 228, 156, 255, 236, 255,}, {68, 105, 189, 131, 161, 229, 157, 255, 237, 255,}, - {69, 106, 190, 132, 161, 230, 157, 255, 238, 255,}, {69, 107, 191, 132, 162, 230, 158, 255, 238, 255,}, - {70, 108, 192, 133, 162, 231, 159, 255, 239, 255,}, {71, 109, 193, 133, 163, 232, 159, 255, 239, 255,}, - {72, 110, 194, 134, 163, 232, 160, 255, 240, 255,}, {73, 111, 195, 134, 163, 233, 161, 255, 240, 255,}, - {73, 112, 196, 135, 164, 233, 162, 255, 241, 255,}, {74, 113, 197, 135, 164, 234, 162, 255, 241, 255,}, - {75, 114, 198, 136, 164, 235, 163, 255, 241, 255,}, {76, 115, 199, 136, 165, 235, 164, 255, 242, 255,}, - {77, 116, 200, 137, 165, 236, 165, 255, 242, 255,}, {77, 117, 201, 137, 165, 236, 165, 255, 243, 255,}, - {78, 118, 202, 138, 166, 237, 166, 255, 243, 255,}, {79, 119, 203, 138, 166, 237, 167, 255, 244, 255,}, - {80, 120, 204, 139, 166, 238, 167, 255, 244, 255,}, {81, 121, 205, 139, 167, 238, 168, 255, 244, 255,}, - {82, 122, 206, 140, 167, 239, 169, 255, 245, 255,}, {82, 123, 206, 141, 168, 239, 170, 255, 245, 255,}, - {83, 124, 207, 141, 168, 240, 170, 255, 245, 255,}, {84, 125, 208, 142, 168, 240, 171, 255, 246, 255,}, - {85, 126, 209, 142, 169, 241, 172, 255, 246, 255,}, {86, 127, 210, 143, 169, 241, 173, 255, 246, 255,}, - {87, 128, 211, 143, 169, 242, 173, 255, 247, 255,}, {87, 129, 212, 144, 170, 242, 174, 255, 247, 255,}, - {88, 130, 212, 144, 170, 242, 175, 255, 247, 255,}, {89, 131, 213, 145, 171, 243, 176, 255, 248, 255,}, - {90, 132, 214, 146, 171, 243, 176, 255, 248, 255,}, {91, 133, 215, 146, 171, 244, 177, 255, 248, 255,}, - {92, 134, 216, 147, 172, 244, 178, 255, 248, 255,}, {93, 135, 216, 147, 172, 244, 179, 255, 249, 255,}, - {93, 136, 217, 148, 173, 245, 179, 255, 249, 255,}, {94, 137, 218, 148, 173, 245, 180, 255, 249, 255,}, - {95, 138, 219, 149, 173, 245, 181, 255, 249, 255,}, {96, 139, 220, 150, 174, 246, 181, 255, 250, 255,}, - {97, 140, 220, 150, 174, 246, 182, 255, 250, 255,}, {98, 141, 221, 151, 175, 246, 183, 255, 250, 255,}, - {99, 142, 222, 151, 175, 247, 184, 255, 250, 255,}, {100, 143, 222, 152, 175, 247, 184, 255, 251, 255,}, - {100, 144, 223, 153, 176, 247, 185, 255, 251, 255,}, {101, 145, 224, 153, 176, 248, 186, 255, 251, 255,}, - {102, 146, 224, 154, 177, 248, 187, 255, 251, 255,}, {103, 147, 225, 154, 177, 248, 187, 255, 251, 255,}, - {104, 148, 226, 155, 178, 248, 188, 255, 252, 255,}, {105, 149, 226, 156, 178, 249, 189, 255, 252, 255,}, - {106, 150, 227, 156, 178, 249, 190, 255, 252, 255,}, {107, 151, 228, 157, 179, 249, 190, 255, 252, 255,}, - {108, 152, 228, 158, 179, 249, 191, 255, 252, 255,}, {109, 153, 229, 158, 180, 250, 192, 255, 252, 255,}, - {110, 154, 230, 159, 180, 250, 193, 255, 253, 255,}, {111, 155, 230, 159, 181, 250, 193, 255, 253, 255,}, - {111, 156, 231, 160, 181, 250, 194, 255, 253, 255,}, {112, 157, 231, 161, 181, 251, 195, 255, 253, 255,}, - {113, 158, 232, 161, 182, 251, 196, 255, 253, 255,}, {114, 159, 233, 162, 182, 251, 196, 255, 253, 255,}, - {115, 160, 233, 163, 183, 251, 197, 255, 253, 255,}, {116, 161, 234, 163, 183, 251, 198, 255, 253, 255,}, - {117, 162, 234, 164, 184, 252, 199, 255, 254, 255,}, {118, 163, 235, 165, 184, 252, 199, 255, 254, 255,}, - {119, 164, 235, 165, 185, 252, 200, 255, 254, 255,}, {120, 165, 236, 166, 185, 252, 201, 255, 254, 255,}, - {121, 166, 236, 167, 186, 252, 202, 255, 254, 255,}, {122, 167, 237, 167, 186, 252, 202, 255, 254, 255,}, - {123, 168, 237, 168, 187, 253, 203, 255, 254, 255,}, {124, 169, 238, 169, 187, 253, 204, 255, 254, 255,}, - {125, 170, 238, 169, 188, 253, 205, 255, 254, 255,}, {126, 171, 239, 170, 188, 253, 205, 255, 254, 255,}, - {127, 172, 239, 171, 189, 253, 206, 255, 254, 255,}, {128, 173, 240, 172, 189, 253, 207, 255, 255, 255,}, - {129, 174, 240, 172, 190, 253, 208, 255, 255, 255,}, {130, 175, 241, 173, 190, 253, 208, 255, 255, 255,}, - {131, 176, 241, 174, 191, 254, 209, 255, 255, 255,}, {132, 177, 242, 175, 191, 254, 210, 255, 255, 255,}, - {133, 178, 242, 175, 192, 254, 210, 255, 255, 255,}, {134, 179, 242, 176, 192, 254, 211, 255, 255, 255,}, - {135, 180, 243, 177, 193, 254, 212, 255, 255, 255,}, {137, 181, 243, 177, 193, 254, 213, 255, 255, 255,}, - {138, 182, 244, 178, 194, 254, 213, 255, 255, 255,}, {139, 183, 244, 179, 194, 254, 214, 255, 255, 255,}, - {140, 184, 244, 180, 195, 254, 215, 255, 255, 255,}, {141, 185, 245, 181, 195, 254, 216, 255, 255, 255,}, - {142, 186, 245, 181, 196, 255, 216, 255, 255, 255,}, {143, 187, 245, 182, 196, 255, 217, 255, 255, 255,}, - {144, 188, 246, 183, 197, 255, 218, 255, 255, 255,}, {145, 189, 246, 184, 197, 255, 218, 255, 255, 255,}, - {146, 190, 247, 184, 198, 255, 219, 255, 255, 255,}, {147, 191, 247, 185, 199, 255, 220, 255, 255, 255,}, - {149, 192, 247, 186, 199, 255, 221, 255, 255, 255,}, {150, 193, 247, 187, 200, 255, 221, 255, 255, 255,}, - {151, 194, 248, 188, 200, 255, 222, 255, 255, 255,}, {152, 195, 248, 188, 201, 255, 223, 255, 255, 255,}, - {153, 196, 248, 189, 201, 255, 223, 255, 255, 255,}, {154, 197, 249, 190, 202, 255, 224, 255, 255, 255,}, - {156, 198, 249, 191, 203, 255, 225, 255, 255, 255,}, {157, 199, 249, 192, 203, 255, 225, 255, 255, 255,}, - {158, 200, 250, 193, 204, 255, 226, 255, 255, 255,}, {159, 201, 250, 193, 205, 255, 227, 255, 255, 255,}, - {160, 202, 250, 194, 205, 255, 227, 255, 255, 255,}, {162, 203, 250, 195, 206, 255, 228, 255, 255, 255,}, - {163, 204, 251, 196, 206, 255, 229, 255, 255, 255,}, {164, 205, 251, 197, 207, 255, 229, 255, 255, 255,}, - {165, 206, 251, 198, 208, 255, 230, 255, 255, 255,}, {166, 207, 251, 199, 208, 255, 231, 255, 255, 255,}, - {168, 208, 251, 200, 209, 255, 231, 255, 255, 255,}, {169, 209, 252, 201, 210, 255, 232, 255, 255, 255,}, - {170, 210, 252, 201, 210, 255, 233, 255, 255, 255,}, {172, 211, 252, 202, 211, 255, 233, 255, 255, 255,}, - {173, 212, 252, 203, 212, 255, 234, 255, 255, 255,}, {174, 213, 252, 204, 212, 255, 235, 255, 255, 255,}, - {175, 214, 253, 205, 213, 255, 235, 255, 255, 255,}, {177, 215, 253, 206, 214, 255, 236, 255, 255, 255,}, - {178, 216, 253, 207, 215, 255, 237, 255, 255, 255,}, {179, 217, 253, 208, 215, 255, 237, 255, 255, 255,}, - {181, 218, 253, 209, 216, 255, 238, 255, 255, 255,}, {182, 219, 254, 210, 217, 255, 238, 255, 255, 255,}, - {184, 220, 254, 211, 217, 255, 239, 255, 255, 255,}, {185, 221, 254, 212, 218, 255, 240, 255, 255, 255,}, - {186, 222, 254, 213, 219, 255, 240, 255, 255, 255,}, {188, 223, 254, 214, 220, 255, 241, 255, 255, 255,}, - {189, 224, 254, 215, 221, 255, 241, 255, 255, 255,}, {191, 225, 254, 216, 221, 255, 242, 255, 255, 255,}, - {192, 226, 254, 217, 222, 255, 243, 255, 255, 255,}, {194, 227, 255, 218, 223, 255, 243, 255, 255, 255,}, - {195, 228, 255, 219, 224, 255, 244, 255, 255, 255,}, {197, 229, 255, 220, 225, 255, 244, 255, 255, 255,}, - {198, 230, 255, 221, 225, 255, 245, 255, 255, 255,}, {200, 231, 255, 222, 226, 255, 245, 255, 255, 255,}, - {201, 232, 255, 223, 227, 255, 246, 255, 255, 255,}, {203, 233, 255, 224, 228, 255, 247, 255, 255, 255,}, - {205, 234, 255, 226, 229, 255, 247, 255, 255, 255,}, {206, 235, 255, 227, 230, 255, 248, 255, 255, 255,}, - {208, 236, 255, 228, 231, 255, 248, 255, 255, 255,}, {210, 237, 255, 229, 232, 255, 249, 255, 255, 255,}, - {211, 238, 255, 230, 233, 255, 249, 255, 255, 255,}, {213, 239, 255, 231, 234, 255, 250, 255, 255, 255,}, - {215, 240, 255, 233, 235, 255, 250, 255, 255, 255,}, {217, 241, 255, 234, 236, 255, 251, 255, 255, 255,}, - {219, 242, 255, 235, 237, 255, 251, 255, 255, 255,}, {221, 243, 255, 236, 238, 255, 252, 255, 255, 255,}, - {223, 244, 255, 237, 239, 255, 252, 255, 255, 255,}, {225, 245, 255, 239, 240, 255, 252, 255, 255, 255,}, - {227, 246, 255, 240, 241, 255, 253, 255, 255, 255,}, {229, 247, 255, 241, 242, 255, 253, 255, 255, 255,}, - {231, 248, 255, 243, 244, 255, 254, 255, 255, 255,}, {233, 249, 255, 244, 245, 255, 254, 255, 255, 255,}, - {236, 250, 255, 246, 246, 255, 254, 255, 255, 255,}, {238, 251, 255, 247, 247, 255, 255, 255, 255, 255,}, - {241, 252, 255, 249, 249, 255, 255, 255, 255, 255,}, {244, 253, 255, 250, 250, 255, 255, 255, 255, 255,}, - {247, 254, 255, 252, 252, 255, 255, 255, 255, 255,}, {251, 255, 255, 254, 254, 255, 255, 255, 255, 255,}, }; const vp9_prob vp9_modelcoefprobs_gg625p1[COEFPROB_MODELS][ENTROPY_NODES - 1] = { // Probs generated with a Generalized Gaussian (with shape parameter 0.625) // source model with varying quantizer step size for a uniform quantizer - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, // do not use {1, 1, 3, 87, 129, 6, 87, 20, 91, 24,}, - {1, 2, 6, 88, 130, 11, 89, 36, 94, 41,}, {2, 3, 8, 88, 130, 15, 90, 50, 97, 56,}, - {2, 4, 11, 89, 131, 20, 90, 62, 99, 70,}, {3, 5, 14, 90, 131, 24, 91, 74, 102, 81,}, - {3, 6, 16, 90, 132, 29, 92, 84, 104, 92,}, {4, 7, 19, 91, 132, 33, 93, 93, 106, 101,}, - {4, 8, 21, 91, 132, 37, 93, 102, 108, 110,}, {5, 9, 24, 92, 133, 40, 94, 110, 110, 118,}, - {5, 10, 26, 92, 133, 44, 95, 118, 111, 125,}, {6, 11, 29, 93, 134, 48, 96, 125, 113, 132,}, - {7, 12, 31, 93, 134, 51, 96, 132, 115, 139,}, {7, 13, 33, 93, 134, 55, 97, 138, 117, 145,}, - {8, 14, 36, 94, 135, 58, 97, 144, 119, 150,}, {8, 15, 38, 94, 135, 62, 98, 149, 120, 155,}, - {9, 16, 40, 95, 135, 65, 99, 154, 122, 160,}, {10, 17, 42, 95, 136, 68, 99, 159, 124, 165,}, - {10, 18, 45, 96, 136, 71, 100, 164, 125, 169,}, {11, 19, 47, 96, 136, 74, 100, 168, 127, 174,}, - {11, 20, 49, 96, 136, 77, 101, 173, 128, 177,}, {12, 21, 51, 97, 137, 80, 102, 176, 130, 181,}, - {13, 22, 53, 97, 137, 83, 102, 180, 131, 185,}, {13, 23, 55, 98, 137, 86, 103, 184, 133, 188,}, - {14, 24, 57, 98, 138, 89, 103, 187, 135, 191,}, {14, 25, 59, 98, 138, 91, 104, 190, 136, 194,}, - {15, 26, 61, 99, 138, 94, 104, 193, 138, 197,}, {16, 27, 64, 99, 139, 97, 105, 196, 139, 200,}, - {16, 28, 66, 100, 139, 99, 106, 199, 141, 202,}, {17, 29, 68, 100, 139, 102, 106, 201, 142, 205,}, - {18, 30, 69, 100, 139, 104, 107, 204, 143, 207,}, {18, 31, 71, 101, 140, 107, 107, 206, 145, 209,}, - {19, 32, 73, 101, 140, 109, 108, 209, 146, 211,}, {20, 33, 75, 102, 140, 112, 108, 211, 148, 213,}, - {20, 34, 77, 102, 141, 114, 109, 213, 149, 215,}, {21, 35, 79, 102, 141, 116, 109, 215, 150, 217,}, - {22, 36, 81, 103, 141, 119, 110, 217, 152, 219,}, {22, 37, 83, 103, 141, 121, 110, 218, 153, 220,}, - {23, 38, 85, 103, 142, 123, 111, 220, 155, 222,}, {24, 39, 87, 104, 142, 125, 112, 222, 156, 224,}, - {24, 40, 88, 104, 142, 127, 112, 223, 157, 225,}, {25, 41, 90, 105, 143, 129, 113, 225, 159, 226,}, - {26, 42, 92, 105, 143, 131, 113, 226, 160, 228,}, {26, 43, 94, 105, 143, 133, 114, 227, 161, 229,}, - {27, 44, 95, 106, 143, 135, 114, 229, 162, 230,}, {28, 45, 97, 106, 144, 137, 115, 230, 164, 231,}, - {28, 46, 99, 107, 144, 139, 115, 231, 165, 232,}, {29, 47, 101, 107, 144, 141, 116, 232, 166, 233,}, - {30, 48, 102, 107, 145, 143, 116, 233, 168, 234,}, {31, 49, 104, 108, 145, 145, 117, 234, 169, 235,}, - {31, 50, 106, 108, 145, 147, 118, 235, 170, 236,}, {32, 51, 107, 108, 145, 149, 118, 236, 171, 237,}, - {33, 52, 109, 109, 146, 150, 119, 237, 172, 238,}, {33, 53, 111, 109, 146, 152, 119, 238, 174, 239,}, - {34, 54, 112, 110, 146, 154, 120, 239, 175, 240,}, {35, 55, 114, 110, 146, 156, 120, 240, 176, 240,}, - {36, 56, 115, 110, 147, 157, 121, 240, 177, 241,}, {36, 57, 117, 111, 147, 159, 121, 241, 178, 242,}, - {37, 58, 119, 111, 147, 161, 122, 242, 180, 242,}, {38, 59, 120, 112, 148, 162, 122, 242, 181, 243,}, - {38, 60, 122, 112, 148, 164, 123, 243, 182, 244,}, {39, 61, 123, 112, 148, 165, 124, 244, 183, 244,}, - {40, 62, 125, 113, 148, 167, 124, 244, 184, 245,}, {41, 63, 126, 113, 149, 168, 125, 245, 185, 245,}, - {41, 64, 128, 114, 149, 170, 125, 245, 186, 246,}, {42, 65, 129, 114, 149, 171, 126, 246, 187, 246,}, - {43, 66, 131, 114, 150, 173, 126, 246, 188, 247,}, {44, 67, 132, 115, 150, 174, 127, 247, 189, 247,}, - {44, 68, 134, 115, 150, 176, 127, 247, 191, 247,}, {45, 69, 135, 116, 150, 177, 128, 248, 192, 248,}, - {46, 70, 136, 116, 151, 178, 129, 248, 193, 248,}, {47, 71, 138, 116, 151, 180, 129, 248, 194, 249,}, - {48, 72, 139, 117, 151, 181, 130, 249, 195, 249,}, {48, 73, 141, 117, 152, 183, 130, 249, 196, 249,}, - {49, 74, 142, 118, 152, 184, 131, 249, 197, 250,}, {50, 75, 143, 118, 152, 185, 131, 250, 198, 250,}, - {51, 76, 145, 118, 152, 186, 132, 250, 199, 250,}, {51, 77, 146, 119, 153, 188, 132, 250, 200, 250,}, - {52, 78, 148, 119, 153, 189, 133, 251, 201, 251,}, {53, 79, 149, 120, 153, 190, 134, 251, 201, 251,}, - {54, 80, 150, 120, 154, 191, 134, 251, 202, 251,}, {55, 81, 151, 120, 154, 192, 135, 251, 203, 251,}, - {55, 82, 153, 121, 154, 194, 135, 252, 204, 252,}, {56, 83, 154, 121, 155, 195, 136, 252, 205, 252,}, - {57, 84, 155, 122, 155, 196, 136, 252, 206, 252,}, {58, 85, 157, 122, 155, 197, 137, 252, 207, 252,}, - {59, 86, 158, 123, 155, 198, 138, 252, 208, 252,}, {59, 87, 159, 123, 156, 199, 138, 253, 209, 253,}, - {60, 88, 160, 123, 156, 200, 139, 253, 210, 253,}, {61, 89, 162, 124, 156, 201, 139, 253, 210, 253,}, - {62, 90, 163, 124, 157, 202, 140, 253, 211, 253,}, {63, 91, 164, 125, 157, 203, 140, 253, 212, 253,}, - {64, 92, 165, 125, 157, 204, 141, 253, 213, 253,}, {64, 93, 166, 126, 158, 205, 142, 254, 214, 253,}, - {65, 94, 168, 126, 158, 206, 142, 254, 214, 254,}, {66, 95, 169, 126, 158, 207, 143, 254, 215, 254,}, - {67, 96, 170, 127, 158, 208, 143, 254, 216, 254,}, {68, 97, 171, 127, 159, 209, 144, 254, 217, 254,}, - {69, 98, 172, 128, 159, 210, 145, 254, 218, 254,}, {69, 99, 173, 128, 159, 211, 145, 254, 218, 254,}, - {70, 100, 175, 129, 160, 212, 146, 254, 219, 254,}, {71, 101, 176, 129, 160, 213, 146, 254, 220, 254,}, - {72, 102, 177, 130, 160, 214, 147, 254, 220, 254,}, {73, 103, 178, 130, 161, 214, 148, 255, 221, 255,}, - {74, 104, 179, 130, 161, 215, 148, 255, 222, 255,}, {75, 105, 180, 131, 161, 216, 149, 255, 223, 255,}, - {75, 106, 181, 131, 162, 217, 149, 255, 223, 255,}, {76, 107, 182, 132, 162, 218, 150, 255, 224, 255,}, - {77, 108, 183, 132, 162, 219, 151, 255, 225, 255,}, {78, 109, 184, 133, 163, 219, 151, 255, 225, 255,}, - {79, 110, 185, 133, 163, 220, 152, 255, 226, 255,}, {80, 111, 186, 134, 163, 221, 152, 255, 226, 255,}, - {81, 112, 187, 134, 164, 222, 153, 255, 227, 255,}, {82, 113, 188, 135, 164, 222, 154, 255, 228, 255,}, - {83, 114, 189, 135, 164, 223, 154, 255, 228, 255,}, {83, 115, 190, 136, 165, 224, 155, 255, 229, 255,}, - {84, 116, 191, 136, 165, 224, 156, 255, 230, 255,}, {85, 117, 192, 137, 165, 225, 156, 255, 230, 255,}, - {86, 118, 193, 137, 166, 226, 157, 255, 231, 255,}, {87, 119, 194, 137, 166, 226, 157, 255, 231, 255,}, - {88, 120, 195, 138, 166, 227, 158, 255, 232, 255,}, {89, 121, 196, 138, 167, 228, 159, 255, 232, 255,}, - {90, 122, 197, 139, 167, 228, 159, 255, 233, 255,}, {91, 123, 198, 139, 167, 229, 160, 255, 233, 255,}, - {92, 124, 199, 140, 168, 230, 161, 255, 234, 255,}, {93, 125, 200, 140, 168, 230, 161, 255, 234, 255,}, - {93, 126, 201, 141, 168, 231, 162, 255, 235, 255,}, {94, 127, 202, 141, 169, 231, 163, 255, 235, 255,}, - {95, 128, 203, 142, 169, 232, 163, 255, 236, 255,}, {96, 129, 203, 142, 169, 233, 164, 255, 236, 255,}, - {97, 130, 204, 143, 170, 233, 164, 255, 237, 255,}, {98, 131, 205, 143, 170, 234, 165, 255, 237, 255,}, - {99, 132, 206, 144, 170, 234, 166, 255, 238, 255,}, {100, 133, 207, 145, 171, 235, 166, 255, 238, 255,}, - {101, 134, 208, 145, 171, 235, 167, 255, 239, 255,}, {102, 135, 209, 146, 171, 236, 168, 255, 239, 255,}, - {103, 136, 209, 146, 172, 236, 168, 255, 240, 255,}, {104, 137, 210, 147, 172, 237, 169, 255, 240, 255,}, - {105, 138, 211, 147, 173, 237, 170, 255, 240, 255,}, {106, 139, 212, 148, 173, 238, 170, 255, 241, 255,}, - {107, 140, 213, 148, 173, 238, 171, 255, 241, 255,}, {108, 141, 213, 149, 174, 239, 172, 255, 242, 255,}, - {109, 142, 214, 149, 174, 239, 172, 255, 242, 255,}, {110, 143, 215, 150, 174, 240, 173, 255, 242, 255,}, - {111, 144, 216, 150, 175, 240, 174, 255, 243, 255,}, {112, 145, 216, 151, 175, 240, 174, 255, 243, 255,}, - {113, 146, 217, 152, 176, 241, 175, 255, 243, 255,}, {114, 147, 218, 152, 176, 241, 176, 255, 244, 255,}, - {115, 148, 219, 153, 176, 242, 176, 255, 244, 255,}, {116, 149, 219, 153, 177, 242, 177, 255, 244, 255,}, - {117, 150, 220, 154, 177, 242, 178, 255, 245, 255,}, {118, 151, 221, 154, 178, 243, 178, 255, 245, 255,}, - {119, 152, 221, 155, 178, 243, 179, 255, 245, 255,}, {120, 153, 222, 156, 178, 244, 180, 255, 246, 255,}, - {121, 154, 223, 156, 179, 244, 180, 255, 246, 255,}, {122, 155, 223, 157, 179, 244, 181, 255, 246, 255,}, - {123, 156, 224, 157, 180, 245, 182, 255, 247, 255,}, {124, 157, 225, 158, 180, 245, 183, 255, 247, 255,}, - {125, 158, 225, 159, 180, 245, 183, 255, 247, 255,}, {126, 159, 226, 159, 181, 246, 184, 255, 247, 255,}, - {127, 160, 227, 160, 181, 246, 185, 255, 248, 255,}, {128, 161, 227, 160, 182, 246, 185, 255, 248, 255,}, - {129, 162, 228, 161, 182, 246, 186, 255, 248, 255,}, {130, 163, 229, 162, 183, 247, 187, 255, 248, 255,}, - {131, 164, 229, 162, 183, 247, 187, 255, 249, 255,}, {132, 165, 230, 163, 183, 247, 188, 255, 249, 255,}, - {133, 166, 230, 163, 184, 248, 189, 255, 249, 255,}, {135, 167, 231, 164, 184, 248, 190, 255, 249, 255,}, - {136, 168, 232, 165, 185, 248, 190, 255, 250, 255,}, {137, 169, 232, 165, 185, 248, 191, 255, 250, 255,}, - {138, 170, 233, 166, 186, 249, 192, 255, 250, 255,}, {139, 171, 233, 167, 186, 249, 192, 255, 250, 255,}, - {140, 172, 234, 167, 187, 249, 193, 255, 251, 255,}, {141, 173, 234, 168, 187, 249, 194, 255, 251, 255,}, - {142, 174, 235, 169, 187, 250, 195, 255, 251, 255,}, {143, 175, 235, 169, 188, 250, 195, 255, 251, 255,}, - {144, 176, 236, 170, 188, 250, 196, 255, 251, 255,}, {146, 177, 236, 171, 189, 250, 197, 255, 251, 255,}, - {147, 178, 237, 171, 189, 251, 197, 255, 252, 255,}, {148, 179, 237, 172, 190, 251, 198, 255, 252, 255,}, - {149, 180, 238, 173, 190, 251, 199, 255, 252, 255,}, {150, 181, 238, 173, 191, 251, 200, 255, 252, 255,}, - {151, 182, 239, 174, 191, 251, 200, 255, 252, 255,}, {152, 183, 239, 175, 192, 251, 201, 255, 252, 255,}, - {153, 184, 240, 176, 192, 252, 202, 255, 253, 255,}, {155, 185, 240, 176, 193, 252, 203, 255, 253, 255,}, - {156, 186, 241, 177, 193, 252, 203, 255, 253, 255,}, {157, 187, 241, 178, 194, 252, 204, 255, 253, 255,}, - {158, 188, 242, 179, 194, 252, 205, 255, 253, 255,}, {159, 189, 242, 179, 195, 252, 206, 255, 253, 255,}, - {160, 190, 242, 180, 195, 253, 206, 255, 253, 255,}, {162, 191, 243, 181, 196, 253, 207, 255, 253, 255,}, - {163, 192, 243, 182, 196, 253, 208, 255, 254, 255,}, {164, 193, 244, 182, 197, 253, 209, 255, 254, 255,}, - {165, 194, 244, 183, 198, 253, 209, 255, 254, 255,}, {166, 195, 244, 184, 198, 253, 210, 255, 254, 255,}, - {168, 196, 245, 185, 199, 253, 211, 255, 254, 255,}, {169, 197, 245, 185, 199, 254, 212, 255, 254, 255,}, - {170, 198, 246, 186, 200, 254, 212, 255, 254, 255,}, {171, 199, 246, 187, 200, 254, 213, 255, 254, 255,}, - {172, 200, 246, 188, 201, 254, 214, 255, 254, 255,}, {174, 201, 247, 189, 201, 254, 215, 255, 254, 255,}, - {175, 202, 247, 189, 202, 254, 215, 255, 255, 255,}, {176, 203, 247, 190, 203, 254, 216, 255, 255, 255,}, - {177, 204, 248, 191, 203, 254, 217, 255, 255, 255,}, {179, 205, 248, 192, 204, 254, 218, 255, 255, 255,}, - {180, 206, 248, 193, 204, 254, 218, 255, 255, 255,}, {181, 207, 249, 194, 205, 255, 219, 255, 255, 255,}, - {183, 208, 249, 195, 206, 255, 220, 255, 255, 255,}, {184, 209, 249, 195, 206, 255, 221, 255, 255, 255,}, - {185, 210, 250, 196, 207, 255, 221, 255, 255, 255,}, {186, 211, 250, 197, 208, 255, 222, 255, 255, 255,}, - {188, 212, 250, 198, 208, 255, 223, 255, 255, 255,}, {189, 213, 250, 199, 209, 255, 224, 255, 255, 255,}, - {190, 214, 251, 200, 210, 255, 224, 255, 255, 255,}, {192, 215, 251, 201, 210, 255, 225, 255, 255, 255,}, - {193, 216, 251, 202, 211, 255, 226, 255, 255, 255,}, {194, 217, 251, 203, 212, 255, 227, 255, 255, 255,}, - {196, 218, 252, 204, 212, 255, 228, 255, 255, 255,}, {197, 219, 252, 205, 213, 255, 228, 255, 255, 255,}, - {198, 220, 252, 206, 214, 255, 229, 255, 255, 255,}, {200, 221, 252, 207, 215, 255, 230, 255, 255, 255,}, - {201, 222, 252, 208, 215, 255, 231, 255, 255, 255,}, {202, 223, 253, 209, 216, 255, 231, 255, 255, 255,}, - {204, 224, 253, 210, 217, 255, 232, 255, 255, 255,}, {205, 225, 253, 211, 218, 255, 233, 255, 255, 255,}, - {207, 226, 253, 212, 218, 255, 234, 255, 255, 255,}, {208, 227, 253, 213, 219, 255, 234, 255, 255, 255,}, - {209, 228, 254, 214, 220, 255, 235, 255, 255, 255,}, {211, 229, 254, 215, 221, 255, 236, 255, 255, 255,}, - {212, 230, 254, 216, 222, 255, 237, 255, 255, 255,}, {214, 231, 254, 217, 223, 255, 238, 255, 255, 255,}, - {215, 232, 254, 218, 223, 255, 238, 255, 255, 255,}, {217, 233, 254, 219, 224, 255, 239, 255, 255, 255,}, - {218, 234, 255, 221, 225, 255, 240, 255, 255, 255,}, {220, 235, 255, 222, 226, 255, 241, 255, 255, 255,}, - {221, 236, 255, 223, 227, 255, 241, 255, 255, 255,}, {223, 237, 255, 224, 228, 255, 242, 255, 255, 255,}, - {224, 238, 255, 225, 229, 255, 243, 255, 255, 255,}, {226, 239, 255, 227, 230, 255, 244, 255, 255, 255,}, - {227, 240, 255, 228, 231, 255, 244, 255, 255, 255,}, {229, 241, 255, 229, 232, 255, 245, 255, 255, 255,}, - {231, 242, 255, 231, 233, 255, 246, 255, 255, 255,}, {232, 243, 255, 232, 234, 255, 247, 255, 255, 255,}, - {234, 244, 255, 233, 236, 255, 247, 255, 255, 255,}, {235, 245, 255, 235, 237, 255, 248, 255, 255, 255,}, - {237, 246, 255, 236, 238, 255, 249, 255, 255, 255,}, {239, 247, 255, 238, 239, 255, 250, 255, 255, 255,}, - {241, 248, 255, 239, 241, 255, 250, 255, 255, 255,}, {242, 249, 255, 241, 242, 255, 251, 255, 255, 255,}, - {244, 250, 255, 243, 243, 255, 252, 255, 255, 255,}, {246, 251, 255, 244, 245, 255, 253, 255, 255, 255,}, - {248, 252, 255, 246, 247, 255, 253, 255, 255, 255,}, {250, 253, 255, 248, 248, 255, 254, 255, 255, 255,}, - {252, 254, 255, 250, 250, 255, 255, 255, 255, 255,}, {254, 255, 255, 253, 253, 255, 255, 255, 255, 255,}, }; +#endif void vp9_get_model_distribution(vp9_prob p, vp9_prob *tree_probs, int b, int r) { @@ -2047,11 +1273,18 @@ void vp9_get_model_distribution(vp9_prob p, vp9_prob *tree_probs, else if (r != INTRA_FRAME && b == PLANE_TYPE_Y_WITH_DC) model = vp9_modelcoefprobs_gg75p1; else - model = vp9_modelcoefprobs_gg75p1; + model = vp9_modelcoefprobs_gg625p1; #endif - vpx_memcpy(tree_probs + UNCONSTRAINED_NODES, - model[p] + UNCONSTRAINED_NODES - 1, - (ENTROPY_NODES - UNCONSTRAINED_NODES) * sizeof(vp9_prob)); + if (p & 1) { // odd + vpx_memcpy(tree_probs + UNCONSTRAINED_NODES, + model[(p - 1) / 2] + UNCONSTRAINED_NODES - 1, + (ENTROPY_NODES - UNCONSTRAINED_NODES) * sizeof(vp9_prob)); + } else { + // interpolate + int i; + for (i = UNCONSTRAINED_NODES; i < ENTROPY_NODES; ++i) + tree_probs[i] = (model[p / 2 - 1][i - 1] + model[p / 2][i - 1]) >> 1; + } } #endif @@ -2216,66 +1449,6 @@ void vp9_default_coef_probs(VP9_COMMON *pc) { #endif } -#if CONFIG_MODELCOEFPROB -// This is a placeholder function that will enable the default coef probs to -// change for key frames based on the base_qindex. If base_qindex is large, -// we can expect probabilities of zeros to be bigger, and vice versa. The rest -// of the probabilities are derived from the nodel. -void vp9_adjust_default_coef_probs(VP9_COMMON *cm) { - static const int factor_bits = 4; - static const int factor_rnd = 8; // (1 << (factor_bits - 1)) - int b, r, c, p; - int factor = (1 << factor_bits); - /* - if (cm->base_qindex < 32) - factor -= ((32 - cm->base_qindex) >> 4); - */ - if (cm->base_qindex > 128) - factor += ((cm->base_qindex - 128) >> 4); - // printf(" Q %d factor %d\n", cm->base_qindex, factor); - - for (b = 0; b < BLOCK_TYPES; ++b) - for (r = 0; r < REF_TYPES; ++r) - for (c = 0; c < COEF_BANDS; ++c) - for (p = 0; p < PREV_COEF_CONTEXTS; ++p) { - int t, x; - vp9_prob prob; - for (t = 0; t < UNCONSTRAINED_NODES; t++) { - x = (default_coef_probs_4x4[b][r][c][p][t] * factor + factor_rnd) - >> factor_bits; - prob = (x > 255 ? 255 : (x < 1 ? 1 : x)); - cm->fc.coef_probs_4x4[b][r][c][p][t] = prob; - } - vp9_get_model_distribution( - prob, cm->fc.coef_probs_4x4[b][r][c][p], b, r); - for (t = 0; t < UNCONSTRAINED_NODES; t++) { - x = (default_coef_probs_8x8[b][r][c][p][t] * factor + factor_rnd) - >> factor_bits; - prob = (x > 255 ? 255 : (x < 1 ? 1 : x)); - cm->fc.coef_probs_8x8[b][r][c][p][t] = prob; - } - vp9_get_model_distribution( - prob, cm->fc.coef_probs_8x8[b][r][c][p], b, r); - for (t = 0; t < UNCONSTRAINED_NODES; t++) { - x = (default_coef_probs_16x16[b][r][c][p][t] * factor + factor_rnd) - >> factor_bits; - prob = (x > 255 ? 255 : (x < 1 ? 1 : x)); - cm->fc.coef_probs_16x16[b][r][c][p][t] = prob; - } - vp9_get_model_distribution( - prob, cm->fc.coef_probs_16x16[b][r][c][p], b, r); - for (t = 0; t < UNCONSTRAINED_NODES; t++) { - x = (default_coef_probs_32x32[b][r][c][p][t] * factor + factor_rnd) - >> factor_bits; - prob = (x > 255 ? 255 : (x < 1 ? 1 : x)); - cm->fc.coef_probs_32x32[b][r][c][p][t] = prob; - } - vp9_get_model_distribution( - prob, cm->fc.coef_probs_32x32[b][r][c][p], b, r); - } -} -#endif - // Neighborhood 5-tuples for various scans and blocksizes, // in {top, left, topleft, topright, bottomleft} order // for each position in raster scan order. @@ -3528,7 +2701,7 @@ void vp9_update_nzc_counts(VP9_COMMON *cm, static void adapt_coef_probs(vp9_coeff_probs *dst_coef_probs, vp9_coeff_probs *pre_coef_probs, - int block_types, vp9_coeff_count *coef_counts, + int qindex, vp9_coeff_count *coef_counts, unsigned int (*eob_branch_count)[REF_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS], @@ -3543,7 +2716,7 @@ static void adapt_coef_probs(vp9_coeff_probs *dst_coef_probs, int entropy_nodes_adapt = ENTROPY_NODES; #endif - for (i = 0; i < block_types; ++i) + for (i = 0; i < BLOCK_TYPES; ++i) for (j = 0; j < REF_TYPES; ++j) for (k = 0; k < COEF_BANDS; ++k) for (l = 0; l < PREV_COEF_CONTEXTS; ++l) { @@ -3587,19 +2760,19 @@ void vp9_adapt_coef_probs(VP9_COMMON *cm) { } adapt_coef_probs(cm->fc.coef_probs_4x4, cm->fc.pre_coef_probs_4x4, - BLOCK_TYPES, cm->fc.coef_counts_4x4, + cm->base_qindex, cm->fc.coef_counts_4x4, cm->fc.eob_branch_counts[TX_4X4], count_sat, update_factor); adapt_coef_probs(cm->fc.coef_probs_8x8, cm->fc.pre_coef_probs_8x8, - BLOCK_TYPES, cm->fc.coef_counts_8x8, + cm->base_qindex, cm->fc.coef_counts_8x8, cm->fc.eob_branch_counts[TX_8X8], count_sat, update_factor); adapt_coef_probs(cm->fc.coef_probs_16x16, cm->fc.pre_coef_probs_16x16, - BLOCK_TYPES, cm->fc.coef_counts_16x16, + cm->base_qindex, cm->fc.coef_counts_16x16, cm->fc.eob_branch_counts[TX_16X16], count_sat, update_factor); adapt_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32, - BLOCK_TYPES, cm->fc.coef_counts_32x32, + cm->base_qindex, cm->fc.coef_counts_32x32, cm->fc.eob_branch_counts[TX_32X32], count_sat, update_factor); } diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index d23f8c442..645faa2c6 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -116,12 +116,6 @@ extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]); void vp9_coef_tree_initialize(void); void vp9_adapt_coef_probs(struct VP9Common *); -static INLINE void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { - /* Clear entropy contexts */ - vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); -} - static INLINE void vp9_reset_sb_tokens_context(MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize) { /* Clear entropy contexts */ @@ -159,22 +153,18 @@ const int *vp9_get_coef_neighbors_handle(const int *scan, int *pad); #define UNCONSTRAINED_NODES 3 // Choose one of 2 or 3 // whether forward updates are model-based -#define MODEL_BASED_UPDATE 0 +#define MODEL_BASED_UPDATE 1 // if model-based how many nodes are unconstrained #define UNCONSTRAINED_UPDATE_NODES 3 // whether backward updates are model-based #define MODEL_BASED_ADAPT 0 #define UNCONSTRAINED_ADAPT_NODES 3 -// whether to adjust the coef probs for key frames based on qindex -#define ADJUST_KF_COEF_PROBS 0 - typedef vp9_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS] [PREV_COEF_CONTEXTS][2]; extern const vp9_prob vp9_modelcoefprobs[COEFPROB_MODELS][ENTROPY_NODES - 1]; void vp9_get_model_distribution(vp9_prob model, vp9_prob *tree_probs, int b, int r); -void vp9_adjust_default_coef_probs(struct VP9Common *cm); #endif // CONFIG_MODELCOEFPROB #if CONFIG_CODE_NONZEROCOUNT diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 5fa63d7c2..22d4b0449 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -473,8 +473,11 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1, int block_size, int stride, int which_mv, int weight, const struct subpix_fn_table *subpix, - int row, int col) { - assert(d1->predictor - d0->predictor == block_size); + int row, int col, int use_dst) { + uint8_t *d0_predictor = use_dst ? *(d0->base_dst) + d0->dst : d0->predictor; + uint8_t *d1_predictor = use_dst ? *(d1->base_dst) + d1->dst : d1->predictor; + stride = use_dst ? d0->dst_stride : stride; + assert(d1_predictor - d0_predictor == block_size); assert(d1->pre == d0->pre + block_size); set_scaled_offsets(&scale[which_mv], row, col); @@ -484,19 +487,18 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1, vp9_build_inter_predictor(*base_pre + d0->pre, d0->pre_stride, - d0->predictor, stride, + d0_predictor, stride, &d0->bmi.as_mv[which_mv], &scale[which_mv], 2 * block_size, block_size, weight, subpix); - } else { uint8_t **base_pre0 = which_mv ? d0->base_second_pre : d0->base_pre; uint8_t **base_pre1 = which_mv ? d1->base_second_pre : d1->base_pre; vp9_build_inter_predictor(*base_pre0 + d0->pre, d0->pre_stride, - d0->predictor, stride, + d0_predictor, stride, &d0->bmi.as_mv[which_mv], &scale[which_mv], block_size, block_size, @@ -506,7 +508,7 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1, vp9_build_inter_predictor(*base_pre1 + d1->pre, d1->pre_stride, - d1->predictor, stride, + d1_predictor, stride, &d1->bmi.as_mv[which_mv], &scale[which_mv], block_size, block_size, @@ -1533,7 +1535,8 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *mb, } static void build_inter4x4_predictors_mb(MACROBLOCKD *xd, - int mb_row, int mb_col) { + int mb_row, int mb_col, + int use_dst) { int i; MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; BLOCKD *blockd = xd->block; @@ -1562,7 +1565,8 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd, build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16, which_mv, which_mv ? weight : 0, - &xd->subpix, mb_row * 16 + y, mb_col * 16); + &xd->subpix, mb_row * 16 + y, mb_col * 16, + use_dst); } } } else { @@ -1579,7 +1583,8 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd, build_2x1_inter_predictor(d0, d1, xd->scale_factor, 4, 16, which_mv, which_mv ? weight : 0, &xd->subpix, - mb_row * 16 + y, mb_col * 16 + x); + mb_row * 16 + y, mb_col * 16 + x, + use_dst); } } } @@ -1597,7 +1602,8 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd, for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv, which_mv ? weight : 0, &xd->subpix, - mb_row * 8 + y, mb_col * 8 + x); + mb_row * 8 + y, mb_col * 8 + x, + use_dst); } } } @@ -1714,10 +1720,26 @@ void vp9_build_inter_predictors_mb(MACROBLOCKD *xd, } else { build_4x4uvmvs(xd); - build_inter4x4_predictors_mb(xd, mb_row, mb_col); + build_inter4x4_predictors_mb(xd, mb_row, mb_col, 0); } } +void vp9_build_inter_predictors_mb_s(MACROBLOCKD *xd, + int mb_row, + int mb_col) { + if (xd->mode_info_context->mbmi.mode != SPLITMV) { + vp9_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.y_stride, + xd->dst.uv_stride, + mb_row, mb_col); + + } else { + build_4x4uvmvs(xd); + build_inter4x4_predictors_mb(xd, mb_row, mb_col, 1); + } +} /*encoder only*/ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, int mb_row, int mb_col) { @@ -1766,7 +1788,8 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv, which_mv ? weight : 0, - &xd->subpix, mb_row * 8 + y, mb_col * 8 + x); + &xd->subpix, mb_row * 8 + y, mb_col * 8 + x, + 0); } } } diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index f09c8e45c..5268607fd 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -48,6 +48,10 @@ void vp9_build_inter_predictors_mb(MACROBLOCKD *xd, int mb_row, int mb_col); +void vp9_build_inter_predictors_mb_s(MACROBLOCKD *xd, + int mb_row, + int mb_col); + void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, int mb_row, int mb_col); diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index 186532c8b..640ce295d 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -751,7 +751,7 @@ void vp9_build_intra_predictors_sb64uv_s(MACROBLOCKD *xd) { void vp9_intra8x8_predict(MACROBLOCKD *xd, BLOCKD *b, int mode, - uint8_t *predictor) { + uint8_t *predictor, int pre_stride) { const int block4x4_idx = (b - xd->block); const int block_idx = (block4x4_idx >> 2) | !!(block4x4_idx & 2); const int have_top = (block_idx >> 1) || xd->up_available; @@ -759,7 +759,7 @@ void vp9_intra8x8_predict(MACROBLOCKD *xd, const int have_right = !(block_idx & 1) || xd->right_available; vp9_build_intra_predictors_internal(*(b->base_dst) + b->dst, - b->dst_stride, predictor, 16, + b->dst_stride, predictor, pre_stride, mode, 8, have_top, have_left, have_right); } @@ -767,14 +767,14 @@ void vp9_intra8x8_predict(MACROBLOCKD *xd, void vp9_intra_uv4x4_predict(MACROBLOCKD *xd, BLOCKD *b, int mode, - uint8_t *predictor) { + uint8_t *predictor, int pre_stride) { const int block_idx = (b - xd->block) & 3; const int have_top = (block_idx >> 1) || xd->up_available; const int have_left = (block_idx & 1) || xd->left_available; const int have_right = !(block_idx & 1) || xd->right_available; vp9_build_intra_predictors_internal(*(b->base_dst) + b->dst, - b->dst_stride, predictor, 8, + b->dst_stride, predictor, pre_stride, mode, 4, have_top, have_left, have_right); } diff --git a/vp9/common/vp9_reconintra4x4.c b/vp9/common/vp9_reconintra4x4.c index eab5ab495..08bfd1d8e 100644 --- a/vp9/common/vp9_reconintra4x4.c +++ b/vp9/common/vp9_reconintra4x4.c @@ -164,7 +164,8 @@ B_PREDICTION_MODE vp9_find_bpred_context(MACROBLOCKD *xd, BLOCKD *x) { void vp9_intra4x4_predict(MACROBLOCKD *xd, BLOCKD *x, int b_mode, - uint8_t *predictor) { + uint8_t *predictor, + int ps) { int i, r, c; const int block_idx = x - xd->block; const int have_top = (block_idx >> 2) || xd->up_available; @@ -276,7 +277,7 @@ void vp9_intra4x4_predict(MACROBLOCKD *xd, predictor[c] = expected_dc; } - predictor += 16; + predictor += ps; } } break; @@ -287,7 +288,7 @@ void vp9_intra4x4_predict(MACROBLOCKD *xd, predictor[c] = clip_pixel(above[c] - top_left + left[r]); } - predictor += 16; + predictor += ps; } } break; @@ -305,7 +306,7 @@ void vp9_intra4x4_predict(MACROBLOCKD *xd, predictor[c] = ap[c]; } - predictor += 16; + predictor += ps; } } break; @@ -323,29 +324,29 @@ void vp9_intra4x4_predict(MACROBLOCKD *xd, predictor[c] = lp[r]; } - predictor += 16; + predictor += ps; } } break; case B_LD_PRED: { uint8_t *ptr = above; - predictor[0 * 16 + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2; - predictor[0 * 16 + 1] = - predictor[1 * 16 + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2; - predictor[0 * 16 + 2] = - predictor[1 * 16 + 1] = - predictor[2 * 16 + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2; - predictor[0 * 16 + 3] = - predictor[1 * 16 + 2] = - predictor[2 * 16 + 1] = - predictor[3 * 16 + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2; - predictor[1 * 16 + 3] = - predictor[2 * 16 + 2] = - predictor[3 * 16 + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2; - predictor[2 * 16 + 3] = - predictor[3 * 16 + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2; - predictor[3 * 16 + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2; + predictor[0 * ps + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2; + predictor[0 * ps + 1] = + predictor[1 * ps + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2; + predictor[0 * ps + 2] = + predictor[1 * ps + 1] = + predictor[2 * ps + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2; + predictor[0 * ps + 3] = + predictor[1 * ps + 2] = + predictor[2 * ps + 1] = + predictor[3 * ps + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2; + predictor[1 * ps + 3] = + predictor[2 * ps + 2] = + predictor[3 * ps + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2; + predictor[2 * ps + 3] = + predictor[3 * ps + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2; + predictor[3 * ps + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2; } break; @@ -362,22 +363,22 @@ void vp9_intra4x4_predict(MACROBLOCKD *xd, pp[7] = above[2]; pp[8] = above[3]; - predictor[3 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; - predictor[3 * 16 + 1] = - predictor[2 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - predictor[3 * 16 + 2] = - predictor[2 * 16 + 1] = - predictor[1 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; - predictor[3 * 16 + 3] = - predictor[2 * 16 + 2] = - predictor[1 * 16 + 1] = - predictor[0 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - predictor[2 * 16 + 3] = - predictor[1 * 16 + 2] = - predictor[0 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - predictor[1 * 16 + 3] = - predictor[0 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; - predictor[0 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2; + predictor[3 * ps + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; + predictor[3 * ps + 1] = + predictor[2 * ps + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; + predictor[3 * ps + 2] = + predictor[2 * ps + 1] = + predictor[1 * ps + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; + predictor[3 * ps + 3] = + predictor[2 * ps + 2] = + predictor[1 * ps + 1] = + predictor[0 * ps + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; + predictor[2 * ps + 3] = + predictor[1 * ps + 2] = + predictor[0 * ps + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; + predictor[1 * ps + 3] = + predictor[0 * ps + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; + predictor[0 * ps + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2; } break; @@ -394,44 +395,44 @@ void vp9_intra4x4_predict(MACROBLOCKD *xd, pp[7] = above[2]; pp[8] = above[3]; - predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - predictor[2 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; - predictor[3 * 16 + 1] = - predictor[1 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - predictor[2 * 16 + 1] = - predictor[0 * 16 + 0] = (pp[4] + pp[5] + 1) >> 1; - predictor[3 * 16 + 2] = - predictor[1 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - predictor[2 * 16 + 2] = - predictor[0 * 16 + 1] = (pp[5] + pp[6] + 1) >> 1; - predictor[3 * 16 + 3] = - predictor[1 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; - predictor[2 * 16 + 3] = - predictor[0 * 16 + 2] = (pp[6] + pp[7] + 1) >> 1; - predictor[1 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2; - predictor[0 * 16 + 3] = (pp[7] + pp[8] + 1) >> 1; + predictor[3 * ps + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; + predictor[2 * ps + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; + predictor[3 * ps + 1] = + predictor[1 * ps + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; + predictor[2 * ps + 1] = + predictor[0 * ps + 0] = (pp[4] + pp[5] + 1) >> 1; + predictor[3 * ps + 2] = + predictor[1 * ps + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; + predictor[2 * ps + 2] = + predictor[0 * ps + 1] = (pp[5] + pp[6] + 1) >> 1; + predictor[3 * ps + 3] = + predictor[1 * ps + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; + predictor[2 * ps + 3] = + predictor[0 * ps + 2] = (pp[6] + pp[7] + 1) >> 1; + predictor[1 * ps + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2; + predictor[0 * ps + 3] = (pp[7] + pp[8] + 1) >> 1; } break; case B_VL_PRED: { uint8_t *pp = above; - predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; - predictor[1 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; - predictor[2 * 16 + 0] = - predictor[0 * 16 + 1] = (pp[1] + pp[2] + 1) >> 1; - predictor[1 * 16 + 1] = - predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - predictor[2 * 16 + 1] = - predictor[0 * 16 + 2] = (pp[2] + pp[3] + 1) >> 1; - predictor[3 * 16 + 1] = - predictor[1 * 16 + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; - predictor[0 * 16 + 3] = - predictor[2 * 16 + 2] = (pp[3] + pp[4] + 1) >> 1; - predictor[1 * 16 + 3] = - predictor[3 * 16 + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - predictor[2 * 16 + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - predictor[3 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; + predictor[0 * ps + 0] = (pp[0] + pp[1] + 1) >> 1; + predictor[1 * ps + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; + predictor[2 * ps + 0] = + predictor[0 * ps + 1] = (pp[1] + pp[2] + 1) >> 1; + predictor[1 * ps + 1] = + predictor[3 * ps + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; + predictor[2 * ps + 1] = + predictor[0 * ps + 2] = (pp[2] + pp[3] + 1) >> 1; + predictor[3 * ps + 1] = + predictor[1 * ps + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; + predictor[0 * ps + 3] = + predictor[2 * ps + 2] = (pp[3] + pp[4] + 1) >> 1; + predictor[1 * ps + 3] = + predictor[3 * ps + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; + predictor[2 * ps + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; + predictor[3 * ps + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; } break; @@ -449,44 +450,44 @@ void vp9_intra4x4_predict(MACROBLOCKD *xd, pp[8] = above[3]; - predictor[3 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; - predictor[3 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; - predictor[2 * 16 + 0] = - predictor[3 * 16 + 2] = (pp[1] + pp[2] + 1) >> 1; - predictor[2 * 16 + 1] = - predictor[3 * 16 + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - predictor[2 * 16 + 2] = - predictor[1 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1; - predictor[2 * 16 + 3] = - predictor[1 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; - predictor[1 * 16 + 2] = - predictor[0 * 16 + 0] = (pp[3] + pp[4] + 1) >> 1; - predictor[1 * 16 + 3] = - predictor[0 * 16 + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - predictor[0 * 16 + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - predictor[0 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; + predictor[3 * ps + 0] = (pp[0] + pp[1] + 1) >> 1; + predictor[3 * ps + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; + predictor[2 * ps + 0] = + predictor[3 * ps + 2] = (pp[1] + pp[2] + 1) >> 1; + predictor[2 * ps + 1] = + predictor[3 * ps + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; + predictor[2 * ps + 2] = + predictor[1 * ps + 0] = (pp[2] + pp[3] + 1) >> 1; + predictor[2 * ps + 3] = + predictor[1 * ps + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; + predictor[1 * ps + 2] = + predictor[0 * ps + 0] = (pp[3] + pp[4] + 1) >> 1; + predictor[1 * ps + 3] = + predictor[0 * ps + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; + predictor[0 * ps + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; + predictor[0 * ps + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; } break; case B_HU_PRED: { uint8_t *pp = left; - predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; - predictor[0 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; - predictor[0 * 16 + 2] = - predictor[1 * 16 + 0] = (pp[1] + pp[2] + 1) >> 1; - predictor[0 * 16 + 3] = - predictor[1 * 16 + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - predictor[1 * 16 + 2] = - predictor[2 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1; - predictor[1 * 16 + 3] = - predictor[2 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2; - predictor[2 * 16 + 2] = - predictor[2 * 16 + 3] = - predictor[3 * 16 + 0] = - predictor[3 * 16 + 1] = - predictor[3 * 16 + 2] = - predictor[3 * 16 + 3] = pp[3]; + predictor[0 * ps + 0] = (pp[0] + pp[1] + 1) >> 1; + predictor[0 * ps + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; + predictor[0 * ps + 2] = + predictor[1 * ps + 0] = (pp[1] + pp[2] + 1) >> 1; + predictor[0 * ps + 3] = + predictor[1 * ps + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; + predictor[1 * ps + 2] = + predictor[2 * ps + 0] = (pp[2] + pp[3] + 1) >> 1; + predictor[1 * ps + 3] = + predictor[2 * ps + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2; + predictor[2 * ps + 2] = + predictor[2 * ps + 3] = + predictor[3 * ps + 0] = + predictor[3 * ps + 1] = + predictor[3 * ps + 2] = + predictor[3 * ps + 3] = pp[3]; } break; diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index a6a2af06d..ec7f29df7 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -27,7 +27,7 @@ forward_decls vp9_common_forward_decls # # Dequant # -prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd" +prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, int pre_stride, uint8_t *dst, int stride, struct macroblockd *xd" specialize vp9_dequant_idct_add_y_block_8x8 prototype void vp9_dequant_idct_add_16x16 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob" @@ -39,10 +39,10 @@ specialize vp9_dequant_idct_add_8x8 prototype void vp9_dequant_idct_add "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob" specialize vp9_dequant_idct_add -prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd" +prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, int pre_stride, uint8_t *dst, int stride, struct macroblockd *xd" specialize vp9_dequant_idct_add_y_block -prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs" +prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, int pre_stride, uint8_t *dst, int stride, uint16_t *eobs" specialize vp9_dequant_idct_add_uv_block prototype void vp9_dequant_idct_add_32x32 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int pitch, int stride, int eob" @@ -119,13 +119,13 @@ specialize vp9_build_intra_predictors_sb64y_s; prototype void vp9_build_intra_predictors_sb64uv_s "struct macroblockd *x" specialize vp9_build_intra_predictors_sb64uv_s; -prototype void vp9_intra4x4_predict "struct macroblockd *xd, struct blockd *x, int b_mode, uint8_t *predictor" +prototype void vp9_intra4x4_predict "struct macroblockd *xd, struct blockd *x, int b_mode, uint8_t *predictor, int pre_stride" specialize vp9_intra4x4_predict; -prototype void vp9_intra8x8_predict "struct macroblockd *xd, struct blockd *x, int b_mode, uint8_t *predictor" +prototype void vp9_intra8x8_predict "struct macroblockd *xd, struct blockd *x, int b_mode, uint8_t *predictor, int pre_stride" specialize vp9_intra8x8_predict; -prototype void vp9_intra_uv4x4_predict "struct macroblockd *xd, struct blockd *x, int b_mode, uint8_t *predictor" +prototype void vp9_intra_uv4x4_predict "struct macroblockd *xd, struct blockd *x, int b_mode, uint8_t *predictor, int pre_stride" specialize vp9_intra_uv4x4_predict; if [ "$CONFIG_VP9_DECODER" = "yes" ]; then diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 28327ff40..6013591f4 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -226,24 +226,28 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd, static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd, BOOL_DECODER* const bc) { const TX_TYPE tx_type = get_tx_type_16x16(xd, 0); + if (tx_type != DCT_DCT) { vp9_dequant_iht_add_16x16_c(tx_type, xd->plane[0].qcoeff, - xd->block[0].dequant, xd->predictor, - xd->dst.y_buffer, 16, xd->dst.y_stride, - xd->plane[0].eobs[0]); + xd->block[0].dequant, xd->dst.y_buffer, + xd->dst.y_buffer, xd->dst.y_stride, + xd->dst.y_stride, xd->plane[0].eobs[0]); } else { vp9_dequant_idct_add_16x16(xd->plane[0].qcoeff, xd->block[0].dequant, - xd->predictor, xd->dst.y_buffer, - 16, xd->dst.y_stride, xd->plane[0].eobs[0]); + xd->dst.y_buffer, xd->dst.y_buffer, + xd->dst.y_stride, xd->dst.y_stride, + xd->plane[0].eobs[0]); } vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, 8, - xd->dst.uv_stride, xd->plane[1].eobs[0]); + xd->dst.u_buffer, xd->dst.u_buffer, + xd->dst.uv_stride, xd->dst.uv_stride, + xd->plane[1].eobs[0]); vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[20].dequant, - xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, 8, - xd->dst.uv_stride, xd->plane[2].eobs[0]); + xd->dst.v_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->dst.uv_stride, + xd->plane[2].eobs[0]); } static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, @@ -259,27 +263,27 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, int idx = (ib & 0x02) ? (ib + 2) : ib; int16_t *q = BLOCK_OFFSET(xd->plane[0].qcoeff, idx, 16); int16_t *dq = xd->block[0].dequant; - uint8_t *pre = xd->block[ib].predictor; uint8_t *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst; int stride = xd->dst.y_stride; - BLOCKD *b = &xd->block[ib]; if (mode == I8X8_PRED) { + BLOCKD *b = &xd->block[ib]; int i8x8mode = b->bmi.as_mode.first; - vp9_intra8x8_predict(xd, b, i8x8mode, b->predictor); + vp9_intra8x8_predict(xd, b, i8x8mode, dst, stride); } tx_type = get_tx_type_8x8(xd, ib); if (tx_type != DCT_DCT) { - vp9_dequant_iht_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride, + vp9_dequant_iht_add_8x8_c(tx_type, q, dq, dst, dst, stride, stride, xd->plane[0].eobs[idx]); } else { - vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, + vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, xd->plane[0].eobs[idx]); } } } else { vp9_dequant_idct_add_y_block_8x8(xd->plane[0].qcoeff, xd->block[0].dequant, - xd->predictor, + xd->dst.y_buffer, + xd->dst.y_stride, xd->dst.y_buffer, xd->dst.y_stride, xd); @@ -294,34 +298,38 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, int i8x8mode = b->bmi.as_mode.first; b = &xd->block[16 + i]; - vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); + vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst, + b->dst_stride); xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16), - b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride, + b->dequant, *(b->base_dst) + b->dst, + *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride, xd->plane[1].eobs[i]); b = &xd->block[20 + i]; - vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); + vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst, + b->dst_stride); xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16), - b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride, + b->dequant, *(b->base_dst) + b->dst, + *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride, xd->plane[2].eobs[i]); } } else if (mode == SPLITMV) { xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, + xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs); xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant, - xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, + xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs); } else { vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, 8, - xd->dst.uv_stride, xd->plane[1].eobs[0]); + xd->dst.u_buffer, xd->dst.u_buffer, + xd->dst.uv_stride, xd->dst.uv_stride, + xd->plane[1].eobs[0]); vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[16].dequant, - xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, 8, - xd->dst.uv_stride, xd->plane[2].eobs[0]); + xd->dst.v_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->dst.uv_stride, + xd->plane[2].eobs[0]); } } @@ -337,35 +345,38 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, int j; BLOCKD *b = &xd->block[ib]; int i8x8mode = b->bmi.as_mode.first; - vp9_intra8x8_predict(xd, b, i8x8mode, b->predictor); + vp9_intra8x8_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst, + b->dst_stride); for (j = 0; j < 4; j++) { b = &xd->block[ib + iblock[j]]; tx_type = get_tx_type_4x4(xd, ib + iblock[j]); if (tx_type != DCT_DCT) { vp9_dequant_iht_add_c(tx_type, BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16), - b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, + b->dequant, *(b->base_dst) + b->dst, + *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride, xd->plane[0].eobs[ib + iblock[j]]); } else { xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16), - b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride, + b->dequant, *(b->base_dst) + b->dst, + *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride, xd->plane[0].eobs[ib + iblock[j]]); } } b = &xd->block[16 + i]; - vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); + vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst, + b->dst_stride); xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16), - b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride, + b->dequant, *(b->base_dst) + b->dst, + *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride, xd->plane[1].eobs[i]); b = &xd->block[20 + i]; - vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); + vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst, + b->dst_stride); xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16), - b->dequant, b->predictor, - *(b->base_dst) + b->dst, 8, b->dst_stride, + b->dequant, *(b->base_dst) + b->dst, + *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride, xd->plane[2].eobs[i]); } } else if (mode == B_PRED) { @@ -378,18 +389,19 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, if (!xd->mode_info_context->mbmi.mb_skip_coeff) vp9_decode_coefs_4x4(pbi, xd, bc, PLANE_TYPE_Y_WITH_DC, i); #endif - vp9_intra4x4_predict(xd, b, b_mode, b->predictor); + vp9_intra4x4_predict(xd, b, b_mode, *(b->base_dst) + b->dst, + b->dst_stride); tx_type = get_tx_type_4x4(xd, i); if (tx_type != DCT_DCT) { vp9_dequant_iht_add_c(tx_type, BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), - b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride, - xd->plane[0].eobs[i]); + b->dequant, *(b->base_dst) + b->dst, + *(b->base_dst) + b->dst, b->dst_stride, + b->dst_stride, xd->plane[0].eobs[i]); } else { xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), - b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride, + b->dequant, *(b->base_dst) + b->dst, + *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride, xd->plane[0].eobs[i]); } } @@ -397,25 +409,25 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, if (!xd->mode_info_context->mbmi.mb_skip_coeff) vp9_decode_mb_tokens_4x4_uv(pbi, xd, bc); #endif - vp9_build_intra_predictors_mbuv(xd); + vp9_build_intra_predictors_mbuv_s(xd); xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, + xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs); xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant, - xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, + xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs); } else if (mode == SPLITMV || get_tx_type_4x4(xd, 0) == DCT_DCT) { xd->itxm_add_y_block(xd->plane[0].qcoeff, xd->block[0].dequant, - xd->predictor, + xd->dst.y_buffer, xd->dst.y_stride, xd->dst.y_buffer, xd->dst.y_stride, xd); xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, + xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs); xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant, - xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, + xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs); } else { for (i = 0; i < 16; i++) { @@ -424,21 +436,21 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, if (tx_type != DCT_DCT) { vp9_dequant_iht_add_c(tx_type, BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), - b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, + b->dequant, *(b->base_dst) + b->dst, + *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride, xd->plane[0].eobs[i]); } else { xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), - b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride, + b->dequant, *(b->base_dst) + b->dst, + *(b->base_dst) + b->dst, b->dst_stride, b->dst_stride, xd->plane[0].eobs[i]); } } xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, + xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs); xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant, - xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, + xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs); } } @@ -771,7 +783,7 @@ static void decode_mb(VP9D_COMP *pbi, MACROBLOCKD *xd, mb_init_dequantizer(pbi, xd); if (xd->mode_info_context->mbmi.mb_skip_coeff) { - vp9_reset_mb_tokens_context(xd); + vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16); } else if (!bool_error(bc)) { #if CONFIG_NEWBINTRAMODES if (mode != B_PRED) @@ -807,9 +819,9 @@ static void decode_mb(VP9D_COMP *pbi, MACROBLOCKD *xd, // do prediction if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { if (mode != I8X8_PRED) { - vp9_build_intra_predictors_mbuv(xd); + vp9_build_intra_predictors_mbuv_s(xd); if (mode != B_PRED) - vp9_build_intra_predictors_mby(xd); + vp9_build_intra_predictors_mby_s(xd); } } else { #if 0 // def DEC_DEBUG @@ -818,7 +830,7 @@ static void decode_mb(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->mode_info_context->mbmi.mode, tx_size, xd->mode_info_context->mbmi.interp_filter); #endif - vp9_build_inter_predictors_mb(xd, mb_row, mb_col); + vp9_build_inter_predictors_mb_s(xd, mb_row, mb_col); } if (tx_size == TX_16X16) { @@ -1127,7 +1139,8 @@ static void read_nzc_probs(VP9_COMMON *cm, } #endif // CONFIG_CODE_NONZEROCOUNT -static void read_coef_probs_common(BOOL_DECODER* const bc, +static void read_coef_probs_common(VP9D_COMP *pbi, + BOOL_DECODER* const bc, vp9_coeff_probs *coef_probs, TX_SIZE tx_size) { #if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE @@ -1172,16 +1185,16 @@ static void read_coef_probs_common(BOOL_DECODER* const bc, static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) { VP9_COMMON *const pc = &pbi->common; - read_coef_probs_common(bc, pc->fc.coef_probs_4x4, TX_4X4); + read_coef_probs_common(pbi, bc, pc->fc.coef_probs_4x4, TX_4X4); if (pbi->common.txfm_mode != ONLY_4X4) - read_coef_probs_common(bc, pc->fc.coef_probs_8x8, TX_8X8); + read_coef_probs_common(pbi, bc, pc->fc.coef_probs_8x8, TX_8X8); if (pbi->common.txfm_mode > ALLOW_8X8) - read_coef_probs_common(bc, pc->fc.coef_probs_16x16, TX_16X16); + read_coef_probs_common(pbi, bc, pc->fc.coef_probs_16x16, TX_16X16); if (pbi->common.txfm_mode > ALLOW_16X16) - read_coef_probs_common(bc, pc->fc.coef_probs_32x32, TX_32X32); + read_coef_probs_common(pbi, bc, pc->fc.coef_probs_32x32, TX_32X32); } static void update_frame_size(VP9D_COMP *pbi) { @@ -1707,9 +1720,10 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { if (vp9_read(&header_bc, 252)) pc->fc.vp9_mode_contexts[i][j] = vp9_read_prob(&header_bc); } -#if CONFIG_MODELCOEFPROB && ADJUST_KF_COEF_PROBS - if (pc->frame_type == KEY_FRAME) - vp9_adjust_default_coef_probs(pc); +#if CONFIG_MODELCOEFPROB + if (pc->frame_type == KEY_FRAME) { + vp9_default_coef_probs(pc); + } #endif #if CONFIG_NEW_MVREF diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h index da9e2b72f..8b53dd9cb 100644 --- a/vp9/decoder/vp9_dequantize.h +++ b/vp9/decoder/vp9_dequantize.h @@ -33,13 +33,14 @@ void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dc); void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, - unsigned char *pre, + unsigned char *pre, int pre_stride, unsigned char *dst, int stride, struct macroblockd *xd); void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, unsigned char *pre, + int pre_stride, unsigned char *dst, int stride, uint16_t *eobs); diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 457c739b2..3df841b88 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -10,6 +10,7 @@ #include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_common.h" #include "vp9/decoder/vp9_onyxd_int.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" @@ -382,101 +383,34 @@ static int get_eob(MACROBLOCKD* const xd, int segment_id, int eob_max) { return vp9_get_segdata(xd, segment_id, SEG_LVL_SKIP) ? 0 : eob_max; } -/* TODO(jkoleszar): Probably best to remove instances that require this, - * as the data likely becomes per-plane and stored in the per-plane structures. - * This is a stub to work with the existing code. - */ -static INLINE int block_idx_4x4(MACROBLOCKD* const xd, int block_size_b, - int plane, int i) { - const int luma_blocks = 1 << block_size_b; - assert(xd->plane[0].subsampling_x == 0); - assert(xd->plane[0].subsampling_y == 0); - assert(xd->plane[1].subsampling_x == 1); - assert(xd->plane[1].subsampling_y == 1); - assert(xd->plane[2].subsampling_x == 1); - assert(xd->plane[2].subsampling_y == 1); - return plane == 0 ? i : - plane == 1 ? luma_blocks + i : - luma_blocks * 5 / 4 + i; -} - -static INLINE int decode_block_plane(VP9D_COMP* const pbi, - MACROBLOCKD* const xd, - BOOL_DECODER* const bc, - int block_size, - int segment_id, - int plane, - int is_split) { - // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") - // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 - const TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - const int block_size_b = block_size; - const int txfrm_size_b = tx_size * 2; - - // subsampled size of the block - const int ss_sum = xd->plane[plane].subsampling_x + - xd->plane[plane].subsampling_y; - const int ss_block_size = block_size_b - ss_sum; - - // size of the transform to use. scale the transform down if it's larger - // than the size of the subsampled data, or forced externally by the mb mode. - const int ss_max = MAX(xd->plane[plane].subsampling_x, - xd->plane[plane].subsampling_y); - const int ss_txfrm_size = txfrm_size_b > ss_block_size || is_split - ? txfrm_size_b - ss_max * 2 - : txfrm_size_b; - const TX_SIZE ss_tx_size = ss_txfrm_size / 2; - // TODO(jkoleszar): 1 may not be correct here with larger chroma planes. - const int inc = is_split ? 1 : (1 << ss_txfrm_size); +struct decode_block_args { + VP9D_COMP *pbi; + MACROBLOCKD *xd; + BOOL_DECODER *bc; + int *eobtotal; +}; +static void decode_block(int plane, int block, + int block_size_b, + int ss_txfrm_size, + void *argv) { + const struct decode_block_args* const arg = argv; + const int old_block_idx = old_block_idx_4x4(arg->xd, block_size_b, + plane, block); // find the maximum eob for this transform size, adjusted by segment - const int seg_eob = get_eob(xd, segment_id, 16 << ss_txfrm_size); - - int i, eobtotal = 0; - - assert(txfrm_size_b <= block_size_b); - assert(ss_txfrm_size <= ss_block_size); - - // step through the block by the size of the transform in use. - for (i = 0; i < (1 << ss_block_size); i += inc) { - const int block_idx = block_idx_4x4(xd, block_size_b, plane, i); + const int segment_id = arg->xd->mode_info_context->mbmi.segment_id; + const TX_SIZE ss_tx_size = ss_txfrm_size / 2; + const int seg_eob = get_eob(arg->xd, segment_id, 16 << ss_txfrm_size); + int16_t* const qcoeff_base = arg->xd->plane[plane].qcoeff; - const int c = decode_coefs(pbi, xd, bc, block_idx, - xd->plane[plane].plane_type, seg_eob, - BLOCK_OFFSET(xd->plane[plane].qcoeff, i, 16), + const int eob = decode_coefs(arg->pbi, arg->xd, arg->bc, old_block_idx, + arg->xd->plane[plane].plane_type, seg_eob, + BLOCK_OFFSET(qcoeff_base, block, 16), ss_tx_size); - xd->plane[plane].eobs[i] = c; - eobtotal += c; - } - return eobtotal; -} - -static INLINE int decode_blocks_helper(VP9D_COMP* const pbi, - MACROBLOCKD* const xd, - BOOL_DECODER* const bc, - int block_size, - int is_split_chroma) { - const int segment_id = xd->mode_info_context->mbmi.segment_id; - int plane, eobtotal = 0; - - for (plane = 0; plane < MAX_MB_PLANE; plane++) { - const int is_split = is_split_chroma && - xd->plane[plane].plane_type == PLANE_TYPE_UV; - eobtotal += decode_block_plane(pbi, xd, bc, block_size, segment_id, - plane, is_split); - } - return eobtotal; -} -static INLINE int decode_blocks(VP9D_COMP* const pbi, - MACROBLOCKD* const xd, - BOOL_DECODER* const bc, - int block_size) { - const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; - const TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - return decode_blocks_helper(pbi, xd, bc, block_size, - tx_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV)); + arg->xd->plane[plane].eobs[block] = eob; + arg->eobtotal[0] += eob; } int vp9_decode_tokens(VP9D_COMP* const pbi, @@ -484,7 +418,10 @@ int vp9_decode_tokens(VP9D_COMP* const pbi, BOOL_DECODER* const bc, BLOCK_SIZE_TYPE bsize) { const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2; - return decode_blocks(pbi, xd, bc, bwl + bhl); + int eobtotal = 0; + struct decode_block_args args = {pbi, xd, bc, &eobtotal}; + foreach_transformed_block(xd, bwl + bhl, decode_block, &args); + return eobtotal; } #if CONFIG_NEWBINTRAMODES diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index bcf7dfdb9..0e3560189 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -13,85 +13,86 @@ #include "vp9/decoder/vp9_dequantize.h" void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq, - uint8_t *pre, + uint8_t *pre, int pre_stride, uint8_t *dst, int stride, MACROBLOCKD *xd) { int i, j; for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { - vp9_dequant_idct_add(q, dq, pre, dst, 16, stride, + vp9_dequant_idct_add(q, dq, pre, dst, pre_stride, stride, xd->plane[0].eobs[i * 4 + j]); q += 16; pre += 4; dst += 4; } - pre += 64 - 16; + pre += 4 * pre_stride - 16; dst += 4 * stride - 16; } } void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq, - uint8_t *pre, uint8_t *dst, + uint8_t *pre, int pre_stride, uint8_t *dst, int stride, uint16_t *eobs) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - vp9_dequant_idct_add(q, dq, pre, dst, 8, stride, eobs[i * 2 + j]); + vp9_dequant_idct_add(q, dq, pre, dst, pre_stride, stride, + eobs[i * 2 + j]); q += 16; pre += 4; dst += 4; } - pre += 32 - 8; + pre += 4 * pre_stride - 8; dst += 4 * stride - 8; } } void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq, - uint8_t *pre, + uint8_t *pre, int pre_stride, uint8_t *dst, int stride, MACROBLOCKD *xd) { uint8_t *origdest = dst; uint8_t *origpred = pre; - vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, + vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, pre_stride, stride, xd->plane[0].eobs[0]); vp9_dequant_idct_add_8x8_c(&q[64], dq, origpred + 8, - origdest + 8, 16, stride, + origdest + 8, pre_stride, stride, xd->plane[0].eobs[4]); - vp9_dequant_idct_add_8x8_c(&q[128], dq, origpred + 8 * 16, - origdest + 8 * stride, 16, stride, + vp9_dequant_idct_add_8x8_c(&q[128], dq, origpred + 8 * pre_stride, + origdest + 8 * stride, pre_stride, stride, xd->plane[0].eobs[8]); - vp9_dequant_idct_add_8x8_c(&q[192], dq, origpred + 8 * 16 + 8, - origdest + 8 * stride + 8, 16, stride, + vp9_dequant_idct_add_8x8_c(&q[192], dq, origpred + 8 * pre_stride + 8, + origdest + 8 * stride + 8, pre_stride, stride, xd->plane[0].eobs[12]); } void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, - uint8_t *pre, + uint8_t *pre, int pre_stride, uint8_t *dst, int stride, MACROBLOCKD *xd) { int i, j; for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { - vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 16, stride, + vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, pre_stride, stride, xd->plane[0].eobs[i * 4 + j]); q += 16; pre += 4; dst += 4; } - pre += 64 - 16; + pre += 4 * pre_stride - 16; dst += 4 * stride - 16; } } void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, - uint8_t *pre, + uint8_t *pre, int pre_stride, uint8_t *dst, int stride, uint16_t *eobs) { @@ -99,14 +100,14 @@ void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 8, stride, + vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, pre_stride, stride, eobs[i * 2 + j]); q += 16; pre += 4; dst += 4; } - pre += 32 - 8; + pre += 4 * pre_stride - 8; dst += 4 * stride - 8; } } diff --git a/vp9/encoder/ppc/vp9_csystemdependent.c b/vp9/encoder/ppc/vp9_csystemdependent.c deleted file mode 100644 index cc67625e7..000000000 --- a/vp9/encoder/ppc/vp9_csystemdependent.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp9/encoder/vp9_variance.h" -#include "vp9/encoder/vp9_onyx_int.h" - -SADFunction *vp9_sad16x16; -SADFunction *vp9_sad16x8; -SADFunction *vp9_sad8x16; -SADFunction *vp9_sad8x8; -SADFunction *vp9_sad4x4; - -variance_function *vp9_variance4x4; -variance_function *vp9_variance8x8; -variance_function *vp9_variance8x16; -variance_function *vp9_variance16x8; -variance_function *vp9_variance16x16; - -variance_function *vp9_mse16x16; - -sub_pixel_variance_function *vp9_sub_pixel_variance4x4; -sub_pixel_variance_function *vp9_sub_pixel_variance8x8; -sub_pixel_variance_function *vp9_sub_pixel_variance8x16; -sub_pixel_variance_function *vp9_sub_pixel_variance16x8; -sub_pixel_variance_function *vp9_sub_pixel_variance16x16; - -int (*vp9_block_error)(short *coeff, short *dqcoeff); -int (*vp9_mbblock_error)(MACROBLOCK *mb, int dc); - -int (*vp9_mbuverror)(MACROBLOCK *mb); -unsigned int (*vp9_get_mb_ss)(short *); -void (*vp9_short_fdct4x4)(short *input, short *output, int pitch); -void (*vp9_short_fdct8x4)(short *input, short *output, int pitch); -void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch); -void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch); -void (*short_walsh4x4)(short *input, short *output, int pitch); - -void (*vp9_subtract_b)(BLOCK *be, BLOCKD *bd, int pitch); -void (*vp9_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, int stride); -void (*vp9_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); -void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d); - -// c imports -extern int block_error_c(short *coeff, short *dqcoeff); -extern int vp9_mbblock_error_c(MACROBLOCK *mb, int dc); - -extern int vp9_mbuverror_c(MACROBLOCK *mb); -extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); -extern void short_fdct4x4_c(short *input, short *output, int pitch); -extern void short_fdct8x4_c(short *input, short *output, int pitch); -extern void vp9_short_walsh4x4_c(short *input, short *output, int pitch); - -extern void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch); -extern void subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride); -extern void subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); -extern void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d); - -extern SADFunction sad16x16_c; -extern SADFunction sad16x8_c; -extern SADFunction sad8x16_c; -extern SADFunction sad8x8_c; -extern SADFunction sad4x4_c; - -extern variance_function variance16x16_c; -extern variance_function variance8x16_c; -extern variance_function variance16x8_c; -extern variance_function variance8x8_c; -extern variance_function variance4x4_c; -extern variance_function mse16x16_c; - -extern sub_pixel_variance_function sub_pixel_variance4x4_c; -extern sub_pixel_variance_function sub_pixel_variance8x8_c; -extern sub_pixel_variance_function sub_pixel_variance8x16_c; -extern sub_pixel_variance_function sub_pixel_variance16x8_c; -extern sub_pixel_variance_function sub_pixel_variance16x16_c; - -extern unsigned int vp9_get_mb_ss_c(short *); - -// ppc -extern int vp9_block_error_ppc(short *coeff, short *dqcoeff); - -extern void vp9_short_fdct4x4_ppc(short *input, short *output, int pitch); -extern void vp9_short_fdct8x4_ppc(short *input, short *output, int pitch); - -extern void vp9_subtract_mby_ppc(short *diff, unsigned char *src, unsigned char *pred, int stride); -extern void vp9_subtract_mbuv_ppc(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); - -extern SADFunction vp9_sad16x16_ppc; -extern SADFunction vp9_sad16x8_ppc; -extern SADFunction vp9_sad8x16_ppc; -extern SADFunction vp9_sad8x8_ppc; -extern SADFunction vp9_sad4x4_ppc; - -extern variance_function vp9_variance16x16_ppc; -extern variance_function vp9_variance8x16_ppc; -extern variance_function vp9_variance16x8_ppc; -extern variance_function vp9_variance8x8_ppc; -extern variance_function vp9_variance4x4_ppc; -extern variance_function vp9_mse16x16_ppc; - -extern sub_pixel_variance_function vp9_sub_pixel_variance4x4_ppc; -extern sub_pixel_variance_function vp9_sub_pixel_variance8x8_ppc; -extern sub_pixel_variance_function vp9_sub_pixel_variance8x16_ppc; -extern sub_pixel_variance_function vp9_sub_pixel_variance16x8_ppc; -extern sub_pixel_variance_function vp9_sub_pixel_variance16x16_ppc; - -extern unsigned int vp8_get8x8var_ppc(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); -extern unsigned int vp8_get16x16var_ppc(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); - -void vp9_cmachine_specific_config(void) { - // Pure C: - vp9_mbuverror = vp9_mbuverror_c; - vp8_fast_quantize_b = vp8_fast_quantize_b_c; - vp9_short_fdct4x4 = vp9_short_fdct4x4_ppc; - vp9_short_fdct8x4 = vp9_short_fdct8x4_ppc; - vp8_fast_fdct4x4 = vp9_short_fdct4x4_ppc; - vp8_fast_fdct8x4 = vp9_short_fdct8x4_ppc; - short_walsh4x4 = vp9_short_walsh4x4_c; - - vp9_variance4x4 = vp9_variance4x4_ppc; - vp9_variance8x8 = vp9_variance8x8_ppc; - vp9_variance8x16 = vp9_variance8x16_ppc; - vp9_variance16x8 = vp9_variance16x8_ppc; - vp9_variance16x16 = vp9_variance16x16_ppc; - vp9_mse16x16 = vp9_mse16x16_ppc; - - vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ppc; - vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ppc; - vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ppc; - vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ppc; - vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_ppc; - - vp9_get_mb_ss = vp9_get_mb_ss_c; - - vp9_sad16x16 = vp9_sad16x16_ppc; - vp9_sad16x8 = vp9_sad16x8_ppc; - vp9_sad8x16 = vp9_sad8x16_ppc; - vp9_sad8x8 = vp9_sad8x8_ppc; - vp9_sad4x4 = vp9_sad4x4_ppc; - - vp9_block_error = vp9_block_error_ppc; - vp9_mbblock_error = vp9_mbblock_error_c; - - vp9_subtract_b = vp9_subtract_b_c; - vp9_subtract_mby = vp9_subtract_mby_ppc; - vp9_subtract_mbuv = vp9_subtract_mbuv_ppc; -} diff --git a/vp9/encoder/ppc/vp9_encodemb_altivec.asm b/vp9/encoder/ppc/vp9_encodemb_altivec.asm deleted file mode 100644 index 6e0099ddc..000000000 --- a/vp9/encoder/ppc/vp9_encodemb_altivec.asm +++ /dev/null @@ -1,153 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp8_subtract_mbuv_ppc - .globl vp8_subtract_mby_ppc - -;# r3 short *diff -;# r4 unsigned char *usrc -;# r5 unsigned char *vsrc -;# r6 unsigned char *pred -;# r7 int stride -vp8_subtract_mbuv_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xf000 - mtspr 256, r12 ;# set VRSAVE - - li r9, 256 - add r3, r3, r9 - add r3, r3, r9 - add r6, r6, r9 - - li r10, 16 - li r9, 4 - mtctr r9 - - vspltisw v0, 0 - -mbu_loop: - lvsl v5, 0, r4 ;# permutate value for alignment - lvx v1, 0, r4 ;# src - lvx v2, 0, r6 ;# pred - - add r4, r4, r7 - addi r6, r6, 16 - - vperm v1, v1, v0, v5 - - vmrghb v3, v0, v1 ;# unpack high src to short - vmrghb v4, v0, v2 ;# unpack high pred to short - - lvsl v5, 0, r4 ;# permutate value for alignment - lvx v1, 0, r4 ;# src - - add r4, r4, r7 - - vsubshs v3, v3, v4 - - stvx v3, 0, r3 ;# store out diff - - vperm v1, v1, v0, v5 - - vmrghb v3, v0, v1 ;# unpack high src to short - vmrglb v4, v0, v2 ;# unpack high pred to short - - vsubshs v3, v3, v4 - - stvx v3, r10, r3 ;# store out diff - - addi r3, r3, 32 - - bdnz mbu_loop - - mtctr r9 - -mbv_loop: - lvsl v5, 0, r5 ;# permutate value for alignment - lvx v1, 0, r5 ;# src - lvx v2, 0, r6 ;# pred - - add r5, r5, r7 - addi r6, r6, 16 - - vperm v1, v1, v0, v5 - - vmrghb v3, v0, v1 ;# unpack high src to short - vmrghb v4, v0, v2 ;# unpack high pred to short - - lvsl v5, 0, r5 ;# permutate value for alignment - lvx v1, 0, r5 ;# src - - add r5, r5, r7 - - vsubshs v3, v3, v4 - - stvx v3, 0, r3 ;# store out diff - - vperm v1, v1, v0, v5 - - vmrghb v3, v0, v1 ;# unpack high src to short - vmrglb v4, v0, v2 ;# unpack high pred to short - - vsubshs v3, v3, v4 - - stvx v3, r10, r3 ;# store out diff - - addi r3, r3, 32 - - bdnz mbv_loop - - mtspr 256, r11 ;# reset old VRSAVE - - blr - -;# r3 short *diff -;# r4 unsigned char *src -;# r5 unsigned char *pred -;# r6 int stride -vp8_subtract_mby_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xf800 - mtspr 256, r12 ;# set VRSAVE - - li r10, 16 - mtctr r10 - - vspltisw v0, 0 - -mby_loop: - lvx v1, 0, r4 ;# src - lvx v2, 0, r5 ;# pred - - add r4, r4, r6 - addi r5, r5, 16 - - vmrghb v3, v0, v1 ;# unpack high src to short - vmrghb v4, v0, v2 ;# unpack high pred to short - - vsubshs v3, v3, v4 - - stvx v3, 0, r3 ;# store out diff - - vmrglb v3, v0, v1 ;# unpack low src to short - vmrglb v4, v0, v2 ;# unpack low pred to short - - vsubshs v3, v3, v4 - - stvx v3, r10, r3 ;# store out diff - - addi r3, r3, 32 - - bdnz mby_loop - - mtspr 256, r11 ;# reset old VRSAVE - - blr diff --git a/vp9/encoder/ppc/vp9_fdct_altivec.asm b/vp9/encoder/ppc/vp9_fdct_altivec.asm deleted file mode 100644 index 935d0cb09..000000000 --- a/vp9/encoder/ppc/vp9_fdct_altivec.asm +++ /dev/null @@ -1,205 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp8_short_fdct4x4_ppc - .globl vp8_short_fdct8x4_ppc - -.macro load_c V, LABEL, OFF, R0, R1 - lis \R0, \LABEL@ha - la \R1, \LABEL@l(\R0) - lvx \V, \OFF, \R1 -.endm - -;# Forward and inverse DCTs are nearly identical; only differences are -;# in normalization (fwd is twice unitary, inv is half unitary) -;# and that they are of course transposes of each other. -;# -;# The following three accomplish most of implementation and -;# are used only by ppc_idct.c and ppc_fdct.c. -.macro prologue - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xfffc - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - li r6, 16 - - load_c v0, dct_tab, 0, r9, r10 - lvx v1, r6, r10 - addi r10, r10, 32 - lvx v2, 0, r10 - lvx v3, r6, r10 - - load_c v4, ppc_dctperm_tab, 0, r9, r10 - load_c v5, ppc_dctperm_tab, r6, r9, r10 - - load_c v6, round_tab, 0, r10, r9 -.endm - -.macro epilogue - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE -.endm - -;# Do horiz xf on two rows of coeffs v8 = a0 a1 a2 a3 b0 b1 b2 b3. -;# a/A are the even rows 0,2 b/B are the odd rows 1,3 -;# For fwd transform, indices are horizontal positions, then frequencies. -;# For inverse transform, frequencies then positions. -;# The two resulting A0..A3 B0..B3 are later combined -;# and vertically transformed. - -.macro two_rows_horiz Dst - vperm v9, v8, v8, v4 ;# v9 = a2 a3 a0 a1 b2 b3 b0 b1 - - vmsumshm v10, v0, v8, v6 - vmsumshm v10, v1, v9, v10 - vsraw v10, v10, v7 ;# v10 = A0 A1 B0 B1 - - vmsumshm v11, v2, v8, v6 - vmsumshm v11, v3, v9, v11 - vsraw v11, v11, v7 ;# v11 = A2 A3 B2 B3 - - vpkuwum v10, v10, v11 ;# v10 = A0 A1 B0 B1 A2 A3 B2 B3 - vperm \Dst, v10, v10, v5 ;# Dest = A0 B0 A1 B1 A2 B2 A3 B3 -.endm - -;# Vertical xf on two rows. DCT values in comments are for inverse transform; -;# forward transform uses transpose. - -.macro two_rows_vert Ceven, Codd - vspltw v8, \Ceven, 0 ;# v8 = c00 c10 or c02 c12 four times - vspltw v9, \Codd, 0 ;# v9 = c20 c30 or c22 c32 "" - vmsumshm v8, v8, v12, v6 - vmsumshm v8, v9, v13, v8 - vsraw v10, v8, v7 - - vspltw v8, \Codd, 1 ;# v8 = c01 c11 or c03 c13 - vspltw v9, \Ceven, 1 ;# v9 = c21 c31 or c23 c33 - vmsumshm v8, v8, v12, v6 - vmsumshm v8, v9, v13, v8 - vsraw v8, v8, v7 - - vpkuwum v8, v10, v8 ;# v8 = rows 0,1 or 2,3 -.endm - -.macro two_rows_h Dest - stw r0, 0(r8) - lwz r0, 4(r3) - stw r0, 4(r8) - lwzux r0, r3,r5 - stw r0, 8(r8) - lwz r0, 4(r3) - stw r0, 12(r8) - lvx v8, 0,r8 - two_rows_horiz \Dest -.endm - - .align 2 -;# r3 short *input -;# r4 short *output -;# r5 int pitch -vp8_short_fdct4x4_ppc: - - prologue - - vspltisw v7, 14 ;# == 14, fits in 5 signed bits - addi r8, r1, 0 - - - lwz r0, 0(r3) - two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13 - - lwzux r0, r3, r5 - two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33 - - lvx v6, r6, r9 ;# v6 = Vround - vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter - - two_rows_vert v0, v1 - stvx v8, 0, r4 - two_rows_vert v2, v3 - stvx v8, r6, r4 - - epilogue - - blr - - .align 2 -;# r3 short *input -;# r4 short *output -;# r5 int pitch -vp8_short_fdct8x4_ppc: - prologue - - vspltisw v7, 14 ;# == 14, fits in 5 signed bits - addi r8, r1, 0 - addi r10, r3, 0 - - lwz r0, 0(r3) - two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13 - - lwzux r0, r3, r5 - two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33 - - lvx v6, r6, r9 ;# v6 = Vround - vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter - - two_rows_vert v0, v1 - stvx v8, 0, r4 - two_rows_vert v2, v3 - stvx v8, r6, r4 - - ;# Next block - addi r3, r10, 8 - addi r4, r4, 32 - lvx v6, 0, r9 ;# v6 = Hround - - vspltisw v7, 14 ;# == 14, fits in 5 signed bits - addi r8, r1, 0 - - lwz r0, 0(r3) - two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13 - - lwzux r0, r3, r5 - two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33 - - lvx v6, r6, r9 ;# v6 = Vround - vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter - - two_rows_vert v0, v1 - stvx v8, 0, r4 - two_rows_vert v2, v3 - stvx v8, r6, r4 - - epilogue - - blr - - .data - .align 4 -ppc_dctperm_tab: - .byte 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 - .byte 0,1,4,5, 2,3,6,7, 8,9,12,13, 10,11,14,15 - - .align 4 -dct_tab: - .short 23170, 23170,-12540,-30274, 23170, 23170,-12540,-30274 - .short 23170, 23170, 30274, 12540, 23170, 23170, 30274, 12540 - - .short 23170,-23170, 30274,-12540, 23170,-23170, 30274,-12540 - .short -23170, 23170, 12540,-30274,-23170, 23170, 12540,-30274 - - .align 4 -round_tab: - .long (1 << (14-1)), (1 << (14-1)), (1 << (14-1)), (1 << (14-1)) - .long (1 << (16-1)), (1 << (16-1)), (1 << (16-1)), (1 << (16-1)) diff --git a/vp9/encoder/ppc/vp9_rdopt_altivec.asm b/vp9/encoder/ppc/vp9_rdopt_altivec.asm deleted file mode 100644 index ba4823009..000000000 --- a/vp9/encoder/ppc/vp9_rdopt_altivec.asm +++ /dev/null @@ -1,51 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp8_block_error_ppc - - .align 2 -;# r3 short *Coeff -;# r4 short *dqcoeff -vp8_block_error_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xf800 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - stw r5, 12(r1) ;# tranfer dc to vector register - - lvx v0, 0, r3 ;# Coeff - lvx v1, 0, r4 ;# dqcoeff - - li r10, 16 - - vspltisw v3, 0 - - vsubshs v0, v0, v1 - - vmsumshm v2, v0, v0, v3 ;# multiply differences - - lvx v0, r10, r3 ;# Coeff - lvx v1, r10, r4 ;# dqcoeff - - vsubshs v0, v0, v1 - - vmsumshm v1, v0, v0, v2 ;# multiply differences - vsumsws v1, v1, v3 ;# sum up - - stvx v1, 0, r1 - lwz r3, 12(r1) ;# return value - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - - blr diff --git a/vp9/encoder/ppc/vp9_sad_altivec.asm b/vp9/encoder/ppc/vp9_sad_altivec.asm deleted file mode 100644 index e5f26380f..000000000 --- a/vp9/encoder/ppc/vp9_sad_altivec.asm +++ /dev/null @@ -1,277 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp8_sad16x16_ppc - .globl vp8_sad16x8_ppc - .globl vp8_sad8x16_ppc - .globl vp8_sad8x8_ppc - .globl vp8_sad4x4_ppc - -.macro load_aligned_16 V R O - lvsl v3, 0, \R ;# permutate value for alignment - - lvx v1, 0, \R - lvx v2, \O, \R - - vperm \V, v1, v2, v3 -.endm - -.macro prologue - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffc0 - mtspr 256, r12 ;# set VRSAVE - - stwu r1, -32(r1) ;# create space on the stack - - li r10, 16 ;# load offset and loop counter - - vspltisw v8, 0 ;# zero out total to start -.endm - -.macro epilogue - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE -.endm - -.macro SAD_16 - ;# v6 = abs (v4 - v5) - vsububs v6, v4, v5 - vsububs v7, v5, v4 - vor v6, v6, v7 - - ;# v8 += abs (v4 - v5) - vsum4ubs v8, v6, v8 -.endm - -.macro sad_16_loop loop_label - lvsl v3, 0, r5 ;# only needs to be done once per block - - ;# preload a line of data before getting into the loop - lvx v4, 0, r3 - lvx v1, 0, r5 - lvx v2, r10, r5 - - add r5, r5, r6 - add r3, r3, r4 - - vperm v5, v1, v2, v3 - - .align 4 -\loop_label: - ;# compute difference on first row - vsububs v6, v4, v5 - vsububs v7, v5, v4 - - ;# load up next set of data - lvx v9, 0, r3 - lvx v1, 0, r5 - lvx v2, r10, r5 - - ;# perform abs() of difference - vor v6, v6, v7 - add r3, r3, r4 - - ;# add to the running tally - vsum4ubs v8, v6, v8 - - ;# now onto the next line - vperm v5, v1, v2, v3 - add r5, r5, r6 - lvx v4, 0, r3 - - ;# compute difference on second row - vsububs v6, v9, v5 - lvx v1, 0, r5 - vsububs v7, v5, v9 - lvx v2, r10, r5 - vor v6, v6, v7 - add r3, r3, r4 - vsum4ubs v8, v6, v8 - vperm v5, v1, v2, v3 - add r5, r5, r6 - - bdnz \loop_label - - vspltisw v7, 0 - - vsumsws v8, v8, v7 - - stvx v8, 0, r1 - lwz r3, 12(r1) -.endm - -.macro sad_8_loop loop_label - .align 4 -\loop_label: - ;# only one of the inputs should need to be aligned. - load_aligned_16 v4, r3, r10 - load_aligned_16 v5, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - ;# only one of the inputs should need to be aligned. - load_aligned_16 v6, r3, r10 - load_aligned_16 v7, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - vmrghb v4, v4, v6 - vmrghb v5, v5, v7 - - SAD_16 - - bdnz \loop_label - - vspltisw v7, 0 - - vsumsws v8, v8, v7 - - stvx v8, 0, r1 - lwz r3, 12(r1) -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_stride -;# r5 unsigned char *ref_ptr -;# r6 int ref_stride -;# -;# r3 return value -vp8_sad16x16_ppc: - - prologue - - li r9, 8 - mtctr r9 - - sad_16_loop sad16x16_loop - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_stride -;# r5 unsigned char *ref_ptr -;# r6 int ref_stride -;# -;# r3 return value -vp8_sad16x8_ppc: - - prologue - - li r9, 4 - mtctr r9 - - sad_16_loop sad16x8_loop - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_stride -;# r5 unsigned char *ref_ptr -;# r6 int ref_stride -;# -;# r3 return value -vp8_sad8x16_ppc: - - prologue - - li r9, 8 - mtctr r9 - - sad_8_loop sad8x16_loop - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_stride -;# r5 unsigned char *ref_ptr -;# r6 int ref_stride -;# -;# r3 return value -vp8_sad8x8_ppc: - - prologue - - li r9, 4 - mtctr r9 - - sad_8_loop sad8x8_loop - - epilogue - - blr - -.macro transfer_4x4 I P - lwz r0, 0(\I) - add \I, \I, \P - - lwz r7, 0(\I) - add \I, \I, \P - - lwz r8, 0(\I) - add \I, \I, \P - - lwz r9, 0(\I) - - stw r0, 0(r1) - stw r7, 4(r1) - stw r8, 8(r1) - stw r9, 12(r1) -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_stride -;# r5 unsigned char *ref_ptr -;# r6 int ref_stride -;# -;# r3 return value -vp8_sad4x4_ppc: - - prologue - - transfer_4x4 r3, r4 - lvx v4, 0, r1 - - transfer_4x4 r5, r6 - lvx v5, 0, r1 - - vspltisw v8, 0 ;# zero out total to start - - ;# v6 = abs (v4 - v5) - vsububs v6, v4, v5 - vsububs v7, v5, v4 - vor v6, v6, v7 - - ;# v8 += abs (v4 - v5) - vsum4ubs v7, v6, v8 - vsumsws v7, v7, v8 - - stvx v7, 0, r1 - lwz r3, 12(r1) - - epilogue - - blr diff --git a/vp9/encoder/ppc/vp9_variance_altivec.asm b/vp9/encoder/ppc/vp9_variance_altivec.asm deleted file mode 100644 index ad2664143..000000000 --- a/vp9/encoder/ppc/vp9_variance_altivec.asm +++ /dev/null @@ -1,375 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp8_get8x8var_ppc - .globl vp8_get16x16var_ppc - .globl vp8_mse16x16_ppc - .globl vp9_variance16x16_ppc - .globl vp9_variance16x8_ppc - .globl vp9_variance8x16_ppc - .globl vp9_variance8x8_ppc - .globl vp9_variance4x4_ppc - -.macro load_aligned_16 V R O - lvsl v3, 0, \R ;# permutate value for alignment - - lvx v1, 0, \R - lvx v2, \O, \R - - vperm \V, v1, v2, v3 -.endm - -.macro prologue - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffc0 - mtspr 256, r12 ;# set VRSAVE - - stwu r1, -32(r1) ;# create space on the stack - - li r10, 16 ;# load offset and loop counter - - vspltisw v7, 0 ;# zero for merging - vspltisw v8, 0 ;# zero out total to start - vspltisw v9, 0 ;# zero out total for dif^2 -.endm - -.macro epilogue - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE -.endm - -.macro compute_sum_sse - ;# Compute sum first. Unpack to so signed subract - ;# can be used. Only have a half word signed - ;# subract. Do high, then low. - vmrghb v2, v7, v4 - vmrghb v3, v7, v5 - vsubshs v2, v2, v3 - vsum4shs v8, v2, v8 - - vmrglb v2, v7, v4 - vmrglb v3, v7, v5 - vsubshs v2, v2, v3 - vsum4shs v8, v2, v8 - - ;# Now compute sse. - vsububs v2, v4, v5 - vsububs v3, v5, v4 - vor v2, v2, v3 - - vmsumubm v9, v2, v2, v9 -.endm - -.macro variance_16 DS loop_label store_sum -\loop_label: - ;# only one of the inputs should need to be aligned. - load_aligned_16 v4, r3, r10 - load_aligned_16 v5, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - compute_sum_sse - - bdnz \loop_label - - vsumsws v8, v8, v7 - vsumsws v9, v9, v7 - - stvx v8, 0, r1 - lwz r3, 12(r1) - - stvx v9, 0, r1 - lwz r4, 12(r1) - -.if \store_sum - stw r3, 0(r8) ;# sum -.endif - stw r4, 0(r7) ;# sse - - mullw r3, r3, r3 ;# sum*sum - srawi r3, r3, \DS ;# (sum*sum) >> DS - subf r3, r3, r4 ;# sse - ((sum*sum) >> DS) -.endm - -.macro variance_8 DS loop_label store_sum -\loop_label: - ;# only one of the inputs should need to be aligned. - load_aligned_16 v4, r3, r10 - load_aligned_16 v5, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - ;# only one of the inputs should need to be aligned. - load_aligned_16 v6, r3, r10 - load_aligned_16 v0, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - vmrghb v4, v4, v6 - vmrghb v5, v5, v0 - - compute_sum_sse - - bdnz \loop_label - - vsumsws v8, v8, v7 - vsumsws v9, v9, v7 - - stvx v8, 0, r1 - lwz r3, 12(r1) - - stvx v9, 0, r1 - lwz r4, 12(r1) - -.if \store_sum - stw r3, 0(r8) ;# sum -.endif - stw r4, 0(r7) ;# sse - - mullw r3, r3, r3 ;# sum*sum - srawi r3, r3, \DS ;# (sum*sum) >> 8 - subf r3, r3, r4 ;# sse - ((sum*sum) >> 8) -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *SSE -;# r8 int *Sum -;# -;# r3 return value -vp8_get8x8var_ppc: - - prologue - - li r9, 4 - mtctr r9 - - variance_8 6, get8x8var_loop, 1 - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *SSE -;# r8 int *Sum -;# -;# r3 return value -vp8_get16x16var_ppc: - - prologue - - mtctr r10 - - variance_16 8, get16x16var_loop, 1 - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r 3 return value -vp8_mse16x16_ppc: - prologue - - mtctr r10 - -mse16x16_loop: - ;# only one of the inputs should need to be aligned. - load_aligned_16 v4, r3, r10 - load_aligned_16 v5, r5, r10 - - ;# move onto the next line - add r3, r3, r4 - add r5, r5, r6 - - ;# Now compute sse. - vsububs v2, v4, v5 - vsububs v3, v5, v4 - vor v2, v2, v3 - - vmsumubm v9, v2, v2, v9 - - bdnz mse16x16_loop - - vsumsws v9, v9, v7 - - stvx v9, 0, r1 - lwz r3, 12(r1) - - stvx v9, 0, r1 - lwz r3, 12(r1) - - stw r3, 0(r7) ;# sse - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r3 return value -vp9_variance16x16_ppc: - - prologue - - mtctr r10 - - variance_16 8, variance16x16_loop, 0 - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r3 return value -vp9_variance16x8_ppc: - - prologue - - li r9, 8 - mtctr r9 - - variance_16 7, variance16x8_loop, 0 - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r3 return value -vp9_variance8x16_ppc: - - prologue - - li r9, 8 - mtctr r9 - - variance_8 7, variance8x16_loop, 0 - - epilogue - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r3 return value -vp9_variance8x8_ppc: - - prologue - - li r9, 4 - mtctr r9 - - variance_8 6, variance8x8_loop, 0 - - epilogue - - blr - -.macro transfer_4x4 I P - lwz r0, 0(\I) - add \I, \I, \P - - lwz r10,0(\I) - add \I, \I, \P - - lwz r8, 0(\I) - add \I, \I, \P - - lwz r9, 0(\I) - - stw r0, 0(r1) - stw r10, 4(r1) - stw r8, 8(r1) - stw r9, 12(r1) -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int source_stride -;# r5 unsigned char *ref_ptr -;# r6 int recon_stride -;# r7 unsigned int *sse -;# -;# r3 return value -vp9_variance4x4_ppc: - - prologue - - transfer_4x4 r3, r4 - lvx v4, 0, r1 - - transfer_4x4 r5, r6 - lvx v5, 0, r1 - - compute_sum_sse - - vsumsws v8, v8, v7 - vsumsws v9, v9, v7 - - stvx v8, 0, r1 - lwz r3, 12(r1) - - stvx v9, 0, r1 - lwz r4, 12(r1) - - stw r4, 0(r7) ;# sse - - mullw r3, r3, r3 ;# sum*sum - srawi r3, r3, 4 ;# (sum*sum) >> 4 - subf r3, r3, r4 ;# sse - ((sum*sum) >> 4) - - epilogue - - blr diff --git a/vp9/encoder/ppc/vp9_variance_subpixel_altivec.asm b/vp9/encoder/ppc/vp9_variance_subpixel_altivec.asm deleted file mode 100644 index 26cc76f73..000000000 --- a/vp9/encoder/ppc/vp9_variance_subpixel_altivec.asm +++ /dev/null @@ -1,865 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - .globl vp9_sub_pixel_variance4x4_ppc - .globl vp9_sub_pixel_variance8x8_ppc - .globl vp9_sub_pixel_variance8x16_ppc - .globl vp9_sub_pixel_variance16x8_ppc - .globl vp9_sub_pixel_variance16x16_ppc - -.macro load_c V, LABEL, OFF, R0, R1 - lis \R0, \LABEL@ha - la \R1, \LABEL@l(\R0) - lvx \V, \OFF, \R1 -.endm - -.macro load_vfilter V0, V1 - load_c \V0, vfilter_b, r6, r12, r10 - - addi r6, r6, 16 - lvx \V1, r6, r10 -.endm - -.macro HProlog jump_label - ;# load up horizontal filter - slwi. r5, r5, 4 ;# index into horizontal filter array - - ;# index to the next set of vectors in the row. - li r10, 16 - - ;# downshift by 7 ( divide by 128 ) at the end - vspltish v19, 7 - - ;# If there isn't any filtering to be done for the horizontal, then - ;# just skip to the second pass. - beq \jump_label - - load_c v20, hfilter_b, r5, r12, r0 - - ;# setup constants - ;# v14 permutation value for alignment - load_c v28, b_hperm_b, 0, r12, r0 - - ;# index to the next set of vectors in the row. - li r12, 32 - - ;# rounding added in on the multiply - vspltisw v21, 8 - vspltisw v18, 3 - vslw v18, v21, v18 ;# 0x00000040000000400000004000000040 - - slwi. r6, r6, 5 ;# index into vertical filter array -.endm - -;# Filters a horizontal line -;# expects: -;# r3 src_ptr -;# r4 pitch -;# r10 16 -;# r12 32 -;# v17 perm intput -;# v18 rounding -;# v19 shift -;# v20 filter taps -;# v21 tmp -;# v22 tmp -;# v23 tmp -;# v24 tmp -;# v25 tmp -;# v26 tmp -;# v27 tmp -;# v28 perm output -;# - -.macro hfilter_8 V, hp, lp, increment_counter - lvsl v17, 0, r3 ;# permutate value for alignment - - ;# input to filter is 9 bytes wide, output is 8 bytes. - lvx v21, 0, r3 - lvx v22, r10, r3 - -.if \increment_counter - add r3, r3, r4 -.endif - vperm v21, v21, v22, v17 - - vperm v24, v21, v21, \hp ;# v20 = 0123 1234 2345 3456 - vperm v25, v21, v21, \lp ;# v21 = 4567 5678 6789 789A - - vmsummbm v24, v20, v24, v18 - vmsummbm v25, v20, v25, v18 - - vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit) - - vsrh v24, v24, v19 ;# divide v0, v1 by 128 - - vpkuhus \V, v24, v24 ;# \V = scrambled 8-bit result -.endm - -.macro vfilter_16 P0 P1 - vmuleub v22, \P0, v20 ;# 64 + 4 positive taps - vadduhm v22, v18, v22 - vmuloub v23, \P0, v20 - vadduhm v23, v18, v23 - - vmuleub v24, \P1, v21 - vadduhm v22, v22, v24 ;# Re = evens, saturation unnecessary - vmuloub v25, \P1, v21 - vadduhm v23, v23, v25 ;# Ro = odds - - vsrh v22, v22, v19 ;# divide by 128 - vsrh v23, v23, v19 ;# v16 v17 = evens, odds - vmrghh \P0, v22, v23 ;# v18 v19 = 16-bit result in order - vmrglh v23, v22, v23 - vpkuhus \P0, \P0, v23 ;# P0 = 8-bit result -.endm - -.macro compute_sum_sse src, ref, sum, sse, t1, t2, z0 - ;# Compute sum first. Unpack to so signed subract - ;# can be used. Only have a half word signed - ;# subract. Do high, then low. - vmrghb \t1, \z0, \src - vmrghb \t2, \z0, \ref - vsubshs \t1, \t1, \t2 - vsum4shs \sum, \t1, \sum - - vmrglb \t1, \z0, \src - vmrglb \t2, \z0, \ref - vsubshs \t1, \t1, \t2 - vsum4shs \sum, \t1, \sum - - ;# Now compute sse. - vsububs \t1, \src, \ref - vsububs \t2, \ref, \src - vor \t1, \t1, \t2 - - vmsumubm \sse, \t1, \t1, \sse -.endm - -.macro variance_final sum, sse, z0, DS - vsumsws \sum, \sum, \z0 - vsumsws \sse, \sse, \z0 - - stvx \sum, 0, r1 - lwz r3, 12(r1) - - stvx \sse, 0, r1 - lwz r4, 12(r1) - - stw r4, 0(r9) ;# sse - - mullw r3, r3, r3 ;# sum*sum - srawi r3, r3, \DS ;# (sum*sum) >> 8 - subf r3, r3, r4 ;# sse - ((sum*sum) >> 8) -.endm - -.macro compute_sum_sse_16 V, increment_counter - load_and_align_16 v16, r7, r8, \increment_counter - compute_sum_sse \V, v16, v18, v19, v20, v21, v23 -.endm - -.macro load_and_align_16 V, R, P, increment_counter - lvsl v17, 0, \R ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx v21, 0, \R - lvx v22, r10, \R - -.if \increment_counter - add \R, \R, \P -.endif - - vperm \V, v21, v22, v17 -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_pixels_per_line -;# r5 int xoffset -;# r6 int yoffset -;# r7 unsigned char *dst_ptr -;# r8 int dst_pixels_per_line -;# r9 unsigned int *sse -;# -;# r3 return value -vp9_sub_pixel_variance4x4_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xf830 - ori r12, r12, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - HProlog second_pass_4x4_pre_copy_b - - ;# Load up permutation constants - load_c v10, b_0123_b, 0, r12, r0 - load_c v11, b_4567_b, 0, r12, r0 - - hfilter_8 v0, v10, v11, 1 - hfilter_8 v1, v10, v11, 1 - hfilter_8 v2, v10, v11, 1 - hfilter_8 v3, v10, v11, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq compute_sum_sse_4x4_b - - hfilter_8 v4, v10, v11, 0 - - b second_pass_4x4_b - -second_pass_4x4_pre_copy_b: - slwi r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, r3, r4, 1 - load_and_align_16 v1, r3, r4, 1 - load_and_align_16 v2, r3, r4, 1 - load_and_align_16 v3, r3, r4, 1 - load_and_align_16 v4, r3, r4, 0 - -second_pass_4x4_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - -compute_sum_sse_4x4_b: - vspltish v18, 0 ;# sum - vspltish v19, 0 ;# sse - vspltish v23, 0 ;# unpack - li r10, 16 - - load_and_align_16 v4, r7, r8, 1 - load_and_align_16 v5, r7, r8, 1 - load_and_align_16 v6, r7, r8, 1 - load_and_align_16 v7, r7, r8, 1 - - vmrghb v0, v0, v1 - vmrghb v1, v2, v3 - - vmrghb v2, v4, v5 - vmrghb v3, v6, v7 - - load_c v10, b_hilo_b, 0, r12, r0 - - vperm v0, v0, v1, v10 - vperm v1, v2, v3, v10 - - compute_sum_sse v0, v1, v18, v19, v20, v21, v23 - - variance_final v18, v19, v23, 4 - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_pixels_per_line -;# r5 int xoffset -;# r6 int yoffset -;# r7 unsigned char *dst_ptr -;# r8 int dst_pixels_per_line -;# r9 unsigned int *sse -;# -;# r3 return value -vp9_sub_pixel_variance8x8_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xfff0 - ori r12, r12, 0xffff - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - HProlog second_pass_8x8_pre_copy_b - - ;# Load up permutation constants - load_c v10, b_0123_b, 0, r12, r0 - load_c v11, b_4567_b, 0, r12, r0 - - hfilter_8 v0, v10, v11, 1 - hfilter_8 v1, v10, v11, 1 - hfilter_8 v2, v10, v11, 1 - hfilter_8 v3, v10, v11, 1 - hfilter_8 v4, v10, v11, 1 - hfilter_8 v5, v10, v11, 1 - hfilter_8 v6, v10, v11, 1 - hfilter_8 v7, v10, v11, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq compute_sum_sse_8x8_b - - hfilter_8 v8, v10, v11, 0 - - b second_pass_8x8_b - -second_pass_8x8_pre_copy_b: - slwi. r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, r3, r4, 1 - load_and_align_16 v1, r3, r4, 1 - load_and_align_16 v2, r3, r4, 1 - load_and_align_16 v3, r3, r4, 1 - load_and_align_16 v4, r3, r4, 1 - load_and_align_16 v5, r3, r4, 1 - load_and_align_16 v6, r3, r4, 1 - load_and_align_16 v7, r3, r4, 1 - load_and_align_16 v8, r3, r4, 0 - - beq compute_sum_sse_8x8_b - -second_pass_8x8_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - vfilter_16 v4, v5 - vfilter_16 v5, v6 - vfilter_16 v6, v7 - vfilter_16 v7, v8 - -compute_sum_sse_8x8_b: - vspltish v18, 0 ;# sum - vspltish v19, 0 ;# sse - vspltish v23, 0 ;# unpack - li r10, 16 - - vmrghb v0, v0, v1 - vmrghb v1, v2, v3 - vmrghb v2, v4, v5 - vmrghb v3, v6, v7 - - load_and_align_16 v4, r7, r8, 1 - load_and_align_16 v5, r7, r8, 1 - load_and_align_16 v6, r7, r8, 1 - load_and_align_16 v7, r7, r8, 1 - load_and_align_16 v8, r7, r8, 1 - load_and_align_16 v9, r7, r8, 1 - load_and_align_16 v10, r7, r8, 1 - load_and_align_16 v11, r7, r8, 0 - - vmrghb v4, v4, v5 - vmrghb v5, v6, v7 - vmrghb v6, v8, v9 - vmrghb v7, v10, v11 - - compute_sum_sse v0, v4, v18, v19, v20, v21, v23 - compute_sum_sse v1, v5, v18, v19, v20, v21, v23 - compute_sum_sse v2, v6, v18, v19, v20, v21, v23 - compute_sum_sse v3, v7, v18, v19, v20, v21, v23 - - variance_final v18, v19, v23, 6 - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_pixels_per_line -;# r5 int xoffset -;# r6 int yoffset -;# r7 unsigned char *dst_ptr -;# r8 int dst_pixels_per_line -;# r9 unsigned int *sse -;# -;# r3 return value -vp9_sub_pixel_variance8x16_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xfffc - mtspr 256, r12 ;# set VRSAVE - - stwu r1,-32(r1) ;# create space on the stack - - HProlog second_pass_8x16_pre_copy_b - - ;# Load up permutation constants - load_c v29, b_0123_b, 0, r12, r0 - load_c v30, b_4567_b, 0, r12, r0 - - hfilter_8 v0, v29, v30, 1 - hfilter_8 v1, v29, v30, 1 - hfilter_8 v2, v29, v30, 1 - hfilter_8 v3, v29, v30, 1 - hfilter_8 v4, v29, v30, 1 - hfilter_8 v5, v29, v30, 1 - hfilter_8 v6, v29, v30, 1 - hfilter_8 v7, v29, v30, 1 - hfilter_8 v8, v29, v30, 1 - hfilter_8 v9, v29, v30, 1 - hfilter_8 v10, v29, v30, 1 - hfilter_8 v11, v29, v30, 1 - hfilter_8 v12, v29, v30, 1 - hfilter_8 v13, v29, v30, 1 - hfilter_8 v14, v29, v30, 1 - hfilter_8 v15, v29, v30, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq compute_sum_sse_8x16_b - - hfilter_8 v16, v29, v30, 0 - - b second_pass_8x16_b - -second_pass_8x16_pre_copy_b: - slwi. r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, r3, r4, 1 - load_and_align_16 v1, r3, r4, 1 - load_and_align_16 v2, r3, r4, 1 - load_and_align_16 v3, r3, r4, 1 - load_and_align_16 v4, r3, r4, 1 - load_and_align_16 v5, r3, r4, 1 - load_and_align_16 v6, r3, r4, 1 - load_and_align_16 v7, r3, r4, 1 - load_and_align_16 v8, r3, r4, 1 - load_and_align_16 v9, r3, r4, 1 - load_and_align_16 v10, r3, r4, 1 - load_and_align_16 v11, r3, r4, 1 - load_and_align_16 v12, r3, r4, 1 - load_and_align_16 v13, r3, r4, 1 - load_and_align_16 v14, r3, r4, 1 - load_and_align_16 v15, r3, r4, 1 - load_and_align_16 v16, r3, r4, 0 - - beq compute_sum_sse_8x16_b - -second_pass_8x16_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - vfilter_16 v4, v5 - vfilter_16 v5, v6 - vfilter_16 v6, v7 - vfilter_16 v7, v8 - vfilter_16 v8, v9 - vfilter_16 v9, v10 - vfilter_16 v10, v11 - vfilter_16 v11, v12 - vfilter_16 v12, v13 - vfilter_16 v13, v14 - vfilter_16 v14, v15 - vfilter_16 v15, v16 - -compute_sum_sse_8x16_b: - vspltish v18, 0 ;# sum - vspltish v19, 0 ;# sse - vspltish v23, 0 ;# unpack - li r10, 16 - - vmrghb v0, v0, v1 - vmrghb v1, v2, v3 - vmrghb v2, v4, v5 - vmrghb v3, v6, v7 - vmrghb v4, v8, v9 - vmrghb v5, v10, v11 - vmrghb v6, v12, v13 - vmrghb v7, v14, v15 - - load_and_align_16 v8, r7, r8, 1 - load_and_align_16 v9, r7, r8, 1 - load_and_align_16 v10, r7, r8, 1 - load_and_align_16 v11, r7, r8, 1 - load_and_align_16 v12, r7, r8, 1 - load_and_align_16 v13, r7, r8, 1 - load_and_align_16 v14, r7, r8, 1 - load_and_align_16 v15, r7, r8, 1 - - vmrghb v8, v8, v9 - vmrghb v9, v10, v11 - vmrghb v10, v12, v13 - vmrghb v11, v14, v15 - - compute_sum_sse v0, v8, v18, v19, v20, v21, v23 - compute_sum_sse v1, v9, v18, v19, v20, v21, v23 - compute_sum_sse v2, v10, v18, v19, v20, v21, v23 - compute_sum_sse v3, v11, v18, v19, v20, v21, v23 - - load_and_align_16 v8, r7, r8, 1 - load_and_align_16 v9, r7, r8, 1 - load_and_align_16 v10, r7, r8, 1 - load_and_align_16 v11, r7, r8, 1 - load_and_align_16 v12, r7, r8, 1 - load_and_align_16 v13, r7, r8, 1 - load_and_align_16 v14, r7, r8, 1 - load_and_align_16 v15, r7, r8, 0 - - vmrghb v8, v8, v9 - vmrghb v9, v10, v11 - vmrghb v10, v12, v13 - vmrghb v11, v14, v15 - - compute_sum_sse v4, v8, v18, v19, v20, v21, v23 - compute_sum_sse v5, v9, v18, v19, v20, v21, v23 - compute_sum_sse v6, v10, v18, v19, v20, v21, v23 - compute_sum_sse v7, v11, v18, v19, v20, v21, v23 - - variance_final v18, v19, v23, 7 - - addi r1, r1, 32 ;# recover stack - mtspr 256, r11 ;# reset old VRSAVE - blr - -;# Filters a horizontal line -;# expects: -;# r3 src_ptr -;# r4 pitch -;# r10 16 -;# r12 32 -;# v17 perm intput -;# v18 rounding -;# v19 shift -;# v20 filter taps -;# v21 tmp -;# v22 tmp -;# v23 tmp -;# v24 tmp -;# v25 tmp -;# v26 tmp -;# v27 tmp -;# v28 perm output -;# -.macro hfilter_16 V, increment_counter - - lvsl v17, 0, r3 ;# permutate value for alignment - - ;# input to filter is 21 bytes wide, output is 16 bytes. - ;# input will can span three vectors if not aligned correctly. - lvx v21, 0, r3 - lvx v22, r10, r3 - lvx v23, r12, r3 - -.if \increment_counter - add r3, r3, r4 -.endif - vperm v21, v21, v22, v17 - vperm v22, v22, v23, v17 ;# v8 v9 = 21 input pixels left-justified - - ;# set 0 - vmsummbm v24, v20, v21, v18 ;# taps times elements - - ;# set 1 - vsldoi v23, v21, v22, 1 - vmsummbm v25, v20, v23, v18 - - ;# set 2 - vsldoi v23, v21, v22, 2 - vmsummbm v26, v20, v23, v18 - - ;# set 3 - vsldoi v23, v21, v22, 3 - vmsummbm v27, v20, v23, v18 - - vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit) - vpkswus v25, v26, v27 ;# v25 = 2 6 A E 3 7 B F - - vsrh v24, v24, v19 ;# divide v0, v1 by 128 - vsrh v25, v25, v19 - - vpkuhus \V, v24, v25 ;# \V = scrambled 8-bit result - vperm \V, \V, v0, v28 ;# \V = correctly-ordered result -.endm - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_pixels_per_line -;# r5 int xoffset -;# r6 int yoffset -;# r7 unsigned char *dst_ptr -;# r8 int dst_pixels_per_line -;# r9 unsigned int *sse -;# -;# r3 return value -vp9_sub_pixel_variance16x8_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - stwu r1, -32(r1) ;# create space on the stack - - HProlog second_pass_16x8_pre_copy_b - - hfilter_16 v0, 1 - hfilter_16 v1, 1 - hfilter_16 v2, 1 - hfilter_16 v3, 1 - hfilter_16 v4, 1 - hfilter_16 v5, 1 - hfilter_16 v6, 1 - hfilter_16 v7, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq compute_sum_sse_16x8_b - - hfilter_16 v8, 0 - - b second_pass_16x8_b - -second_pass_16x8_pre_copy_b: - slwi. r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, r3, r4, 1 - load_and_align_16 v1, r3, r4, 1 - load_and_align_16 v2, r3, r4, 1 - load_and_align_16 v3, r3, r4, 1 - load_and_align_16 v4, r3, r4, 1 - load_and_align_16 v5, r3, r4, 1 - load_and_align_16 v6, r3, r4, 1 - load_and_align_16 v7, r3, r4, 1 - load_and_align_16 v8, r3, r4, 1 - - beq compute_sum_sse_16x8_b - -second_pass_16x8_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - vfilter_16 v4, v5 - vfilter_16 v5, v6 - vfilter_16 v6, v7 - vfilter_16 v7, v8 - -compute_sum_sse_16x8_b: - vspltish v18, 0 ;# sum - vspltish v19, 0 ;# sse - vspltish v23, 0 ;# unpack - li r10, 16 - - compute_sum_sse_16 v0, 1 - compute_sum_sse_16 v1, 1 - compute_sum_sse_16 v2, 1 - compute_sum_sse_16 v3, 1 - compute_sum_sse_16 v4, 1 - compute_sum_sse_16 v5, 1 - compute_sum_sse_16 v6, 1 - compute_sum_sse_16 v7, 0 - - variance_final v18, v19, v23, 7 - - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .align 2 -;# r3 unsigned char *src_ptr -;# r4 int src_pixels_per_line -;# r5 int xoffset -;# r6 int yoffset -;# r7 unsigned char *dst_ptr -;# r8 int dst_pixels_per_line -;# r9 unsigned int *sse -;# -;# r3 return value -vp9_sub_pixel_variance16x16_ppc: - mfspr r11, 256 ;# get old VRSAVE - oris r12, r11, 0xffff - ori r12, r12, 0xfff8 - mtspr 256, r12 ;# set VRSAVE - - stwu r1, -32(r1) ;# create space on the stack - - HProlog second_pass_16x16_pre_copy_b - - hfilter_16 v0, 1 - hfilter_16 v1, 1 - hfilter_16 v2, 1 - hfilter_16 v3, 1 - hfilter_16 v4, 1 - hfilter_16 v5, 1 - hfilter_16 v6, 1 - hfilter_16 v7, 1 - hfilter_16 v8, 1 - hfilter_16 v9, 1 - hfilter_16 v10, 1 - hfilter_16 v11, 1 - hfilter_16 v12, 1 - hfilter_16 v13, 1 - hfilter_16 v14, 1 - hfilter_16 v15, 1 - - ;# Finished filtering main horizontal block. If there is no - ;# vertical filtering, jump to storing the data. Otherwise - ;# load up and filter the additional line that is needed - ;# for the vertical filter. - beq compute_sum_sse_16x16_b - - hfilter_16 v16, 0 - - b second_pass_16x16_b - -second_pass_16x16_pre_copy_b: - slwi. r6, r6, 5 ;# index into vertical filter array - - load_and_align_16 v0, r3, r4, 1 - load_and_align_16 v1, r3, r4, 1 - load_and_align_16 v2, r3, r4, 1 - load_and_align_16 v3, r3, r4, 1 - load_and_align_16 v4, r3, r4, 1 - load_and_align_16 v5, r3, r4, 1 - load_and_align_16 v6, r3, r4, 1 - load_and_align_16 v7, r3, r4, 1 - load_and_align_16 v8, r3, r4, 1 - load_and_align_16 v9, r3, r4, 1 - load_and_align_16 v10, r3, r4, 1 - load_and_align_16 v11, r3, r4, 1 - load_and_align_16 v12, r3, r4, 1 - load_and_align_16 v13, r3, r4, 1 - load_and_align_16 v14, r3, r4, 1 - load_and_align_16 v15, r3, r4, 1 - load_and_align_16 v16, r3, r4, 0 - - beq compute_sum_sse_16x16_b - -second_pass_16x16_b: - vspltish v20, 8 - vspltish v18, 3 - vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 - - load_vfilter v20, v21 - - vfilter_16 v0, v1 - vfilter_16 v1, v2 - vfilter_16 v2, v3 - vfilter_16 v3, v4 - vfilter_16 v4, v5 - vfilter_16 v5, v6 - vfilter_16 v6, v7 - vfilter_16 v7, v8 - vfilter_16 v8, v9 - vfilter_16 v9, v10 - vfilter_16 v10, v11 - vfilter_16 v11, v12 - vfilter_16 v12, v13 - vfilter_16 v13, v14 - vfilter_16 v14, v15 - vfilter_16 v15, v16 - -compute_sum_sse_16x16_b: - vspltish v18, 0 ;# sum - vspltish v19, 0 ;# sse - vspltish v23, 0 ;# unpack - li r10, 16 - - compute_sum_sse_16 v0, 1 - compute_sum_sse_16 v1, 1 - compute_sum_sse_16 v2, 1 - compute_sum_sse_16 v3, 1 - compute_sum_sse_16 v4, 1 - compute_sum_sse_16 v5, 1 - compute_sum_sse_16 v6, 1 - compute_sum_sse_16 v7, 1 - compute_sum_sse_16 v8, 1 - compute_sum_sse_16 v9, 1 - compute_sum_sse_16 v10, 1 - compute_sum_sse_16 v11, 1 - compute_sum_sse_16 v12, 1 - compute_sum_sse_16 v13, 1 - compute_sum_sse_16 v14, 1 - compute_sum_sse_16 v15, 0 - - variance_final v18, v19, v23, 8 - - addi r1, r1, 32 ;# recover stack - - mtspr 256, r11 ;# reset old VRSAVE - - blr - - .data - - .align 4 -hfilter_b: - .byte 128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0 - .byte 112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0 - .byte 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0 - .byte 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0 - .byte 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0 - .byte 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0 - .byte 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0 - .byte 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0 - - .align 4 -vfilter_b: - .byte 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112 - .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 - .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96 - .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 - .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 - .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48 - .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 - .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 - .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48 - .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 - .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 - .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96 - .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 - .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112 - - .align 4 -b_hperm_b: - .byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 - - .align 4 -b_0123_b: - .byte 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 - - .align 4 -b_4567_b: - .byte 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 - -b_hilo_b: - .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 886e1fcdc..932fc0df1 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -404,7 +404,7 @@ static int prob_diff_update_savings_search_model(const unsigned int *ct, const vp9_prob *oldp, vp9_prob *bestp, const vp9_prob upd, - int b, int r) { + int b, int r, int q) { int i, old_b, new_b, update_b, savings, bestsavings, step; int newp; vp9_prob bestnewp, newplist[ENTROPY_NODES]; @@ -2068,8 +2068,8 @@ static void update_nzc_probs(VP9_COMP* cpi, #endif // CONFIG_CODE_NONZEROCOUNT static void update_coef_probs_common(vp9_writer* const bc, -#ifdef ENTROPY_STATS VP9_COMP *cpi, +#ifdef ENTROPY_STATS vp9_coeff_stats *tree_update_hist, #endif vp9_coeff_probs *new_frame_coef_probs, @@ -2112,7 +2112,8 @@ static void update_coef_probs_common(vp9_writer* const bc, if (t == UNCONSTRAINED_NODES - 1) s = prob_diff_update_savings_search_model( frame_branch_ct[i][j][k][l][0], - old_frame_coef_probs[i][j][k][l], &newp, upd, i, j); + old_frame_coef_probs[i][j][k][l], &newp, upd, i, j, + cpi->common.base_qindex); else #endif s = prob_diff_update_savings_search( @@ -2166,7 +2167,8 @@ static void update_coef_probs_common(vp9_writer* const bc, if (t == UNCONSTRAINED_NODES - 1) s = prob_diff_update_savings_search_model( frame_branch_ct[i][j][k][l][0], - old_frame_coef_probs[i][j][k][l], &newp, upd, i, j); + old_frame_coef_probs[i][j][k][l], &newp, upd, i, j, + cpi->common.base_qindex); else #endif s = prob_diff_update_savings_search( @@ -2209,8 +2211,8 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { build_coeff_contexts(cpi); update_coef_probs_common(bc, -#ifdef ENTROPY_STATS cpi, +#ifdef ENTROPY_STATS tree_update_hist_4x4, #endif cpi->frame_coef_probs_4x4, @@ -2221,8 +2223,8 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { /* do not do this if not even allowed */ if (cpi->common.txfm_mode != ONLY_4X4) { update_coef_probs_common(bc, -#ifdef ENTROPY_STATS cpi, +#ifdef ENTROPY_STATS tree_update_hist_8x8, #endif cpi->frame_coef_probs_8x8, @@ -2233,8 +2235,8 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { if (cpi->common.txfm_mode > ALLOW_8X8) { update_coef_probs_common(bc, -#ifdef ENTROPY_STATS cpi, +#ifdef ENTROPY_STATS tree_update_hist_16x16, #endif cpi->frame_coef_probs_16x16, @@ -2245,8 +2247,8 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { if (cpi->common.txfm_mode > ALLOW_16X16) { update_coef_probs_common(bc, -#ifdef ENTROPY_STATS cpi, +#ifdef ENTROPY_STATS tree_update_hist_32x32, #endif cpi->frame_coef_probs_32x32, diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index b0df15207..2e9a93cdc 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -2083,7 +2083,7 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, mbmi->mb_skip_coeff = 1; if (output_enabled) cpi->skip_true_count[mb_skip_context]++; - vp9_reset_mb_tokens_context(xd); + vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16); } else { vp9_stuff_mb(cpi, xd, t, !output_enabled); mbmi->mb_skip_coeff = 0; diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index a6ca2b6a3..355867ba7 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -52,7 +52,7 @@ static void encode_intra4x4block(MACROBLOCK *x, int ib) { b->bmi.as_mode.context = vp9_find_bpred_context(&x->e_mbd, b); #endif - vp9_intra4x4_predict(&x->e_mbd, b, b->bmi.as_mode.first, b->predictor); + vp9_intra4x4_predict(&x->e_mbd, b, b->bmi.as_mode.first, b->predictor, 16); vp9_subtract_b(be, b, 16); tx_type = get_tx_type_4x4(&x->e_mbd, ib); @@ -152,7 +152,7 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { int i; TX_TYPE tx_type; - vp9_intra8x8_predict(xd, b, b->bmi.as_mode.first, b->predictor); + vp9_intra8x8_predict(xd, b, b->bmi.as_mode.first, b->predictor, 16); // generate residual blocks vp9_subtract_4b_c(be, b, 16); @@ -227,7 +227,7 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) { const int block = ib < 20 ? ib - 16 : ib - 20; assert(ib >= 16 && ib < 24); - vp9_intra_uv4x4_predict(&x->e_mbd, b, mode, b->predictor); + vp9_intra_uv4x4_predict(&x->e_mbd, b, mode, b->predictor, 8); vp9_subtract_b(be, b, 8); diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index c1203f04b..21413408f 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -2935,25 +2935,26 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Set up entropy depending on frame type. if (cm->frame_type == KEY_FRAME) { /* Choose which entropy context to use. When using a forward reference - * frame, it immediately follows the keyframe, and thus benefits from - * using the same entropy context established by the keyframe. Otherwise, - * use the default context 0. - */ + * frame, it immediately follows the keyframe, and thus benefits from + * using the same entropy context established by the keyframe. Otherwise, + * use the default context 0. + */ cm->frame_context_idx = cpi->oxcf.play_alternate; vp9_setup_key_frame(cpi); } else { - /* Choose which entropy context to use. Currently there are only two - * contexts used, one for normal frames and one for alt ref frames. - */ + /* Choose which entropy context to use. Currently there are only two + * contexts used, one for normal frames and one for alt ref frames. + */ cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame; vp9_setup_inter_frame(cpi); } } // transform / motion compensation build reconstruction frame -#if CONFIG_MODELCOEFPROB && ADJUST_KF_COEF_PROBS - if (cm->frame_type == KEY_FRAME) - vp9_adjust_default_coef_probs(cm); +#if CONFIG_MODELCOEFPROB + if (cm->frame_type == KEY_FRAME) { + vp9_default_coef_probs(cm); + } #endif vp9_encode_frame(cpi); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 735d6958f..a1eea1ec0 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -766,7 +766,7 @@ static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x, *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 16 << (bwl + bhl), 2); *rate = rdcost_sby_4x4(cm, x, bsize); - *skippable = vp9_sby_is_skippable(xd, bsize, TX_4X4); + *skippable = vp9_sby_is_skippable(xd, bsize); } static int rdcost_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x, @@ -806,7 +806,7 @@ static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x, *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 64 << (bhl + bwl), 2); *rate = rdcost_sby_8x8(cm, x, bsize); - *skippable = vp9_sby_is_skippable(xd, bsize, TX_8X8); + *skippable = vp9_sby_is_skippable(xd, bsize); } static int rdcost_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x, @@ -844,7 +844,7 @@ static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x, *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 256 << (bwl + bhl), 2); *rate = rdcost_sby_16x16(cm, x, bsize); - *skippable = vp9_sby_is_skippable(xd, bsize, TX_16X16); + *skippable = vp9_sby_is_skippable(xd, bsize); } static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x, @@ -884,7 +884,7 @@ static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024 << (bwl + bhl), 0); *rate = rdcost_sby_32x32(cm, x, bsize); - *skippable = vp9_sby_is_skippable(xd, bsize, TX_32X32); + *skippable = vp9_sby_is_skippable(xd, bsize); } static void super_block_yrd(VP9_COMP *cpi, @@ -970,7 +970,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, rate = bmode_costs[mode]; #endif - vp9_intra4x4_predict(xd, b, mode, b->predictor); + vp9_intra4x4_predict(xd, b, mode, b->predictor, 16); vp9_subtract_b(be, b, 16); b->bmi.as_mode.first = mode; @@ -1180,7 +1180,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, rate = mode_costs[mode]; b->bmi.as_mode.first = mode; - vp9_intra8x8_predict(xd, b, mode, b->predictor); + vp9_intra8x8_predict(xd, b, mode, b->predictor, 16); vp9_subtract_4b_c(be, b, 16); @@ -1446,7 +1446,7 @@ static void super_block_uvrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x, xd->plane[1].dqcoeff, xd->plane[2].dqcoeff, 32 << (bwl + bhl - 2), 2); - *skip = vp9_sbuv_is_skippable(xd, bsize, TX_4X4); + *skip = vp9_sbuv_is_skippable(xd, bsize); } static int rd_cost_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x, @@ -1491,7 +1491,7 @@ static void super_block_uvrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x, xd->plane[1].dqcoeff, xd->plane[2].dqcoeff, 128 << (bwl + bhl - 2), 2); - *skip = vp9_sbuv_is_skippable(xd, bsize, TX_8X8); + *skip = vp9_sbuv_is_skippable(xd, bsize); } static int rd_cost_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, @@ -1536,7 +1536,7 @@ static void super_block_uvrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x, xd->plane[1].dqcoeff, xd->plane[2].dqcoeff, 512 << (bwl + bhl - 2), 2); - *skip = vp9_sbuv_is_skippable(xd, bsize, TX_16X16); + *skip = vp9_sbuv_is_skippable(xd, bsize); } static int rd_cost_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, @@ -1582,7 +1582,7 @@ static void super_block_uvrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, xd->plane[1].dqcoeff, xd->plane[2].dqcoeff, 2048 << (bwl + bhl - 2), 0); - *skip = vp9_sbuv_is_skippable(xd, bsize, TX_32X32); + *skip = vp9_sbuv_is_skippable(xd, bsize); } static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, @@ -2507,13 +2507,6 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, x->e_mbd.plane[0].eobs[i] = bsi.eobs[i]; } - *returntotrate = bsi.r; - *returndistortion = bsi.d; - *returnyrate = bsi.segment_yrate; - *skippable = bsi.txfm_size == TX_4X4 ? - vp9_mby_is_skippable_4x4(&x->e_mbd) : - vp9_mby_is_skippable_8x8(&x->e_mbd); - /* save partitions */ mbmi->txfm_size = bsi.txfm_size; mbmi->partitioning = bsi.segment_num; @@ -2536,6 +2529,11 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, if (mbmi->second_ref_frame > 0) x->partition_info->bmi[15].second_mv.as_int = bsi.second_mvs[15].as_int; + *returntotrate = bsi.r; + *returndistortion = bsi.d; + *returnyrate = bsi.segment_yrate; + *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_MB16X16); + return (int)(bsi.segment_rd); } diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index e85f85193..891eb25fd 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -339,95 +339,38 @@ static void tokenize_b(VP9_COMP *cpi, } } -int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 16; i++) - skip &= (!xd->plane[0].eobs[i]); - - return skip; -} - -int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd) { - int skip = 1; - int i; - - for (i = 0; i < 4; i++) - skip &= (!xd->plane[1].eobs[i]); - for (i = 0; i < 4; i++) - skip &= (!xd->plane[2].eobs[i]); - return skip; -} - -static int mb_is_skippable_4x4(MACROBLOCKD *xd) { - return (vp9_mby_is_skippable_4x4(xd) & - vp9_mbuv_is_skippable_4x4(xd)); -} - -int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 16; i += 4) - skip &= (!xd->plane[0].eobs[i]); - - return skip; -} - -int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd) { - return (!xd->plane[1].eobs[0]) & (!xd->plane[2].eobs[0]); -} - -static int mb_is_skippable_8x8(MACROBLOCKD *xd) { - return (vp9_mby_is_skippable_8x8(xd) & - vp9_mbuv_is_skippable_8x8(xd)); +struct is_skippable_args { + MACROBLOCKD *xd; + int *skippable; +}; +static void is_skippable(int plane, int block, + int block_size_b, int ss_txfrm_size, void *argv) { + struct is_skippable_args *args = argv; + args->skippable[0] &= (!args->xd->plane[plane].eobs[block]); } -static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd) { - return (vp9_mby_is_skippable_8x8(xd) & - vp9_mbuv_is_skippable_4x4(xd)); -} - -int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd) { - return (!xd->plane[0].eobs[0]); -} - -static int mb_is_skippable_16x16(MACROBLOCKD *xd) { - return (vp9_mby_is_skippable_16x16(xd) & vp9_mbuv_is_skippable_8x8(xd)); -} - -int vp9_sby_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, - TX_SIZE sz) { - const int inc = 1 << (sz * 2); +int vp9_sb_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2; - int skip = 1; - int i = 0; - - for (i = 0; i < (1 << (bwl + bhl)); i += inc) - skip &= (!xd->plane[0].eobs[i]); - - return skip; + int result = 1; + struct is_skippable_args args = {xd, &result}; + foreach_transformed_block(xd, bwl + bhl, is_skippable, &args); + return result; } -int vp9_sbuv_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, TX_SIZE sz) { - const int inc = 1 << (sz * 2); - const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1; - int skip = 1; - int i = 0; - - for (i = 0; i < (1 << (bwl + bhl)); i += inc) { - skip &= (!xd->plane[1].eobs[i]); - skip &= (!xd->plane[2].eobs[i]); - } - - return skip; +int vp9_sby_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2; + int result = 1; + struct is_skippable_args args = {xd, &result}; + foreach_transformed_block_in_plane(xd, bwl + bhl, 0, 0, is_skippable, &args); + return result; } -static int sb_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, - TX_SIZE ysz, TX_SIZE uvsz) { - return vp9_sby_is_skippable(xd, bsize, ysz) & - vp9_sbuv_is_skippable(xd, bsize, uvsz); +int vp9_sbuv_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2; + int result = 1; + struct is_skippable_args args = {xd, &result}; + foreach_transformed_block_uv(xd, bwl + bhl, is_skippable, &args); + return result; } void vp9_tokenize_sb(VP9_COMP *cpi, @@ -449,7 +392,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, int b; const int n_y = (1 << (bwl + bhl)), n_uv = (n_y * 3) >> 1; - mbmi->mb_skip_coeff = sb_is_skippable(xd, bsize, txfm_size, uv_txfm_size); + mbmi->mb_skip_coeff = vp9_sb_is_skippable(xd, bsize); if (mbmi->mb_skip_coeff) { if (!dry_run) @@ -541,26 +484,8 @@ void vp9_tokenize_mb(VP9_COMP *cpi, } else skip_inc = 0; - switch (tx_size) { - case TX_16X16: - - xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_16x16(xd); - break; - case TX_8X8: - if (xd->mode_info_context->mbmi.mode == I8X8_PRED || - xd->mode_info_context->mbmi.mode == SPLITMV) - xd->mode_info_context->mbmi.mb_skip_coeff = - mb_is_skippable_8x8_4x4uv(xd); - else - xd->mode_info_context->mbmi.mb_skip_coeff = - mb_is_skippable_8x8(xd); - break; - - default: - xd->mode_info_context->mbmi.mb_skip_coeff = - mb_is_skippable_4x4(xd); - break; - } + xd->mode_info_context->mbmi.mb_skip_coeff = vp9_sb_is_skippable(xd, + BLOCK_SIZE_MB16X16); if (xd->mode_info_context->mbmi.mb_skip_coeff) { if (!dry_run) @@ -568,7 +493,7 @@ void vp9_tokenize_mb(VP9_COMP *cpi, if (!cpi->common.mb_no_coeff_skip) { vp9_stuff_mb(cpi, xd, t, dry_run); } else { - vp9_reset_mb_tokens_context(xd); + vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16); } if (dry_run) diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index decb34a4a..2dcbd3002 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -31,15 +31,9 @@ typedef struct { typedef int64_t vp9_coeff_accum[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS + 1]; -int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd); -int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd); -int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd); -int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd); -int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd); - -int vp9_sby_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, TX_SIZE sz); -int vp9_sbuv_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, TX_SIZE sz); - +int vp9_sb_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); +int vp9_sby_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); +int vp9_sbuv_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); struct VP9_COMP; void vp9_tokenize_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd, |