From a53d5a4c442a84cacbd8225fac72db3789b3e10c Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Thu, 15 Dec 2011 14:23:36 -0500 Subject: Moved dequant idct into common These functions are now used by the encoder. This is WIP with the goal of creating a common idct/add for the encoder and decoder. A boost of 1.8% was seen for the HD rt test clip used. [Tero] Added needed changes to ARM side. Change-Id: Ibbb8000be09034203d7adffc457d3c3f8b06a5bf --- vp8/decoder/arm/arm_dsystemdependent.c | 11 - vp8/decoder/arm/armv6/dequant_idct_v6.asm | 190 --------------- vp8/decoder/arm/armv6/dequantize_v6.asm | 69 ------ vp8/decoder/arm/armv6/idct_blk_v6.c | 116 --------- vp8/decoder/arm/dequantize_arm.c | 46 ---- vp8/decoder/arm/dequantize_arm.h | 59 ----- vp8/decoder/arm/neon/dequant_idct_neon.asm | 131 ----------- vp8/decoder/arm/neon/dequantizeb_neon.asm | 34 --- vp8/decoder/arm/neon/idct_blk_neon.c | 97 -------- vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm | 79 ------- vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm | 196 ---------------- vp8/decoder/decodframe.c | 12 +- vp8/decoder/dequantize.c | 44 ---- vp8/decoder/dequantize.h | 85 ------- vp8/decoder/generic/dsystemdependent.c | 8 +- vp8/decoder/idct_blk.c | 90 ------- vp8/decoder/onyxd_int.h | 8 +- vp8/decoder/threading.c | 8 +- vp8/decoder/x86/dequantize_mmx.asm | 258 --------------------- vp8/decoder/x86/dequantize_x86.h | 58 ----- vp8/decoder/x86/idct_blk_mmx.c | 117 ---------- vp8/decoder/x86/idct_blk_sse2.c | 90 ------- vp8/decoder/x86/x86_dsystemdependent.c | 40 ---- 23 files changed, 14 insertions(+), 1832 deletions(-) delete mode 100644 vp8/decoder/arm/armv6/dequant_idct_v6.asm delete mode 100644 vp8/decoder/arm/armv6/dequantize_v6.asm delete mode 100644 vp8/decoder/arm/armv6/idct_blk_v6.c delete mode 100644 vp8/decoder/arm/dequantize_arm.c delete mode 100644 vp8/decoder/arm/dequantize_arm.h delete mode 100644 vp8/decoder/arm/neon/dequant_idct_neon.asm delete mode 100644 vp8/decoder/arm/neon/dequantizeb_neon.asm delete mode 100644 vp8/decoder/arm/neon/idct_blk_neon.c delete mode 100644 vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm delete mode 100644 vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm delete mode 100644 vp8/decoder/dequantize.c delete mode 100644 vp8/decoder/dequantize.h delete mode 100644 vp8/decoder/idct_blk.c delete mode 100644 vp8/decoder/x86/dequantize_mmx.asm delete mode 100644 vp8/decoder/x86/dequantize_x86.h delete mode 100644 vp8/decoder/x86/idct_blk_mmx.c delete mode 100644 vp8/decoder/x86/idct_blk_sse2.c (limited to 'vp8/decoder') diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c index f802c5181..bf0a3481a 100644 --- a/vp8/decoder/arm/arm_dsystemdependent.c +++ b/vp8/decoder/arm/arm_dsystemdependent.c @@ -11,9 +11,6 @@ #include "vpx_config.h" #include "vpx_ports/arm.h" -#include "vp8/common/blockd.h" -#include "vp8/common/pragmas.h" -#include "vp8/decoder/dequantize.h" #include "vp8/decoder/onyxd_int.h" void vp8_arch_arm_decode_init(VP8D_COMP *pbi) @@ -30,20 +27,12 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi) #if HAVE_ARMV6 if (flags & HAS_MEDIA) { - pbi->dequant.block = vp8_dequantize_b_v6; - pbi->dequant.idct_add = vp8_dequant_idct_add_v6; - pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6; - pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6; } #endif #if HAVE_ARMV7 if (flags & HAS_NEON) { - pbi->dequant.block = vp8_dequantize_b_neon; - pbi->dequant.idct_add = vp8_dequant_idct_add_neon; - pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon; - pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon; } #endif #endif diff --git a/vp8/decoder/arm/armv6/dequant_idct_v6.asm b/vp8/decoder/arm/armv6/dequant_idct_v6.asm deleted file mode 100644 index 2510ad838..000000000 --- a/vp8/decoder/arm/armv6/dequant_idct_v6.asm +++ /dev/null @@ -1,190 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - EXPORT |vp8_dequant_idct_add_v6| - - AREA |.text|, CODE, READONLY -;void vp8_dequant_idct_v6(short *input, short *dq, -; unsigned char *dest, int stride) -; r0 = q -; r1 = dq -; r2 = dst -; r3 = stride - -|vp8_dequant_idct_add_v6| PROC - stmdb sp!, {r4-r11, lr} - - ldr r4, [r0] ;input - ldr r5, [r1], #4 ;dq - - sub sp, sp, #4 - str r3, [sp] - - mov r12, #4 - -vp8_dequant_add_loop - smulbb r6, r4, r5 - smultt r7, r4, r5 - - ldr r4, [r0, #4] ;input - ldr r5, [r1], #4 ;dq - - strh r6, [r0], #2 - strh r7, [r0], #2 - - smulbb r6, r4, r5 - smultt r7, r4, r5 - - subs r12, r12, #1 - - ldrne r4, [r0, #4] - ldrne r5, [r1], #4 - - strh r6, [r0], #2 - strh r7, [r0], #2 - - bne vp8_dequant_add_loop - - sub r0, r0, #32 - mov r1, r0 - -; short_idct4x4llm_v6_dual - ldr r3, cospi8sqrt2minus1 - ldr r4, sinpi8sqrt2 - ldr r6, [r0, #8] - mov r5, #2 -vp8_dequant_idct_loop1_v6 - ldr r12, [r0, #24] - ldr r14, [r0, #16] - smulwt r9, r3, r6 - smulwb r7, r3, r6 - smulwt r10, r4, r6 - smulwb r8, r4, r6 - pkhbt r7, r7, r9, lsl #16 - smulwt r11, r3, r12 - pkhbt r8, r8, r10, lsl #16 - uadd16 r6, r6, r7 - smulwt r7, r4, r12 - smulwb r9, r3, r12 - smulwb r10, r4, r12 - subs r5, r5, #1 - pkhbt r9, r9, r11, lsl #16 - ldr r11, [r0], #4 - pkhbt r10, r10, r7, lsl #16 - uadd16 r7, r12, r9 - usub16 r7, r8, r7 - uadd16 r6, r6, r10 - uadd16 r10, r11, r14 - usub16 r8, r11, r14 - uadd16 r9, r10, r6 - usub16 r10, r10, r6 - uadd16 r6, r8, r7 - usub16 r7, r8, r7 - str r6, [r1, #8] - ldrne r6, [r0, #8] - str r7, [r1, #16] - str r10, [r1, #24] - str r9, [r1], #4 - bne vp8_dequant_idct_loop1_v6 - - mov r5, #2 - sub r0, r1, #8 -vp8_dequant_idct_loop2_v6 - ldr r6, [r0], #4 - ldr r7, [r0], #4 - ldr r8, [r0], #4 - ldr r9, [r0], #4 - smulwt r1, r3, r6 - smulwt r12, r4, r6 - smulwt lr, r3, r8 - smulwt r10, r4, r8 - pkhbt r11, r8, r6, lsl #16 - pkhbt r1, lr, r1, lsl #16 - pkhbt r12, r10, r12, lsl #16 - pkhtb r6, r6, r8, asr #16 - uadd16 r6, r1, r6 - pkhbt lr, r9, r7, lsl #16 - uadd16 r10, r11, lr - usub16 lr, r11, lr - pkhtb r8, r7, r9, asr #16 - subs r5, r5, #1 - smulwt r1, r3, r8 - smulwb r7, r3, r8 - smulwt r11, r4, r8 - smulwb r9, r4, r8 - pkhbt r1, r7, r1, lsl #16 - uadd16 r8, r1, r8 - pkhbt r11, r9, r11, lsl #16 - usub16 r1, r12, r8 - uadd16 r8, r11, r6 - ldr r9, c0x00040004 - ldr r12, [sp] ; get stride from stack - uadd16 r6, r10, r8 - usub16 r7, r10, r8 - uadd16 r7, r7, r9 - uadd16 r6, r6, r9 - uadd16 r10, r14, r1 - usub16 r1, r14, r1 - uadd16 r10, r10, r9 - uadd16 r1, r1, r9 - ldr r11, [r2] ; load input from dst - mov r8, r7, asr #3 - pkhtb r9, r8, r10, asr #19 - mov r8, r1, asr #3 - pkhtb r8, r8, r6, asr #19 - uxtb16 lr, r11, ror #8 - qadd16 r9, r9, lr - uxtb16 lr, r11 - qadd16 r8, r8, lr - usat16 r9, #8, r9 - usat16 r8, #8, r8 - orr r9, r8, r9, lsl #8 - ldr r11, [r2, r12] ; load input from dst - mov r7, r7, lsl #16 - mov r1, r1, lsl #16 - mov r10, r10, lsl #16 - mov r6, r6, lsl #16 - mov r7, r7, asr #3 - pkhtb r7, r7, r10, asr #19 - mov r1, r1, asr #3 - pkhtb r1, r1, r6, asr #19 - uxtb16 r8, r11, ror #8 - qadd16 r7, r7, r8 - uxtb16 r8, r11 - qadd16 r1, r1, r8 - usat16 r7, #8, r7 - usat16 r1, #8, r1 - orr r1, r1, r7, lsl #8 - str r9, [r2], r12 ; store output to dst - str r1, [r2], r12 ; store output to dst - bne vp8_dequant_idct_loop2_v6 - -; vpx_memset - sub r0, r0, #32 - add sp, sp, #4 - - mov r12, #0 - str r12, [r0] - str r12, [r0, #4] - str r12, [r0, #8] - str r12, [r0, #12] - str r12, [r0, #16] - str r12, [r0, #20] - str r12, [r0, #24] - str r12, [r0, #28] - - ldmia sp!, {r4 - r11, pc} - ENDP ; |vp8_dequant_idct_add_v6| - -; Constant Pool -cospi8sqrt2minus1 DCD 0x00004E7B -sinpi8sqrt2 DCD 0x00008A8C -c0x00040004 DCD 0x00040004 - - END diff --git a/vp8/decoder/arm/armv6/dequantize_v6.asm b/vp8/decoder/arm/armv6/dequantize_v6.asm deleted file mode 100644 index 72f7e0ee5..000000000 --- a/vp8/decoder/arm/armv6/dequantize_v6.asm +++ /dev/null @@ -1,69 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_dequantize_b_loop_v6| - - AREA |.text|, CODE, READONLY ; name this block of code -;------------------------------- -;void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ); -; r0 short *Q, -; r1 short *DQC -; r2 short *DQ -|vp8_dequantize_b_loop_v6| PROC - stmdb sp!, {r4-r9, lr} - - ldr r3, [r0] ;load Q - ldr r4, [r1] ;load DQC - ldr r5, [r0, #4] - ldr r6, [r1, #4] - - mov r12, #2 ;loop counter - -dequant_loop - smulbb r7, r3, r4 ;multiply - smultt r8, r3, r4 - smulbb r9, r5, r6 - smultt lr, r5, r6 - - ldr r3, [r0, #8] - ldr r4, [r1, #8] - ldr r5, [r0, #12] - ldr r6, [r1, #12] - - strh r7, [r2], #2 ;store result - smulbb r7, r3, r4 ;multiply - strh r8, [r2], #2 - smultt r8, r3, r4 - strh r9, [r2], #2 - smulbb r9, r5, r6 - strh lr, [r2], #2 - smultt lr, r5, r6 - - subs r12, r12, #1 - - add r0, r0, #16 - add r1, r1, #16 - - ldrne r3, [r0] - strh r7, [r2], #2 ;store result - ldrne r4, [r1] - strh r8, [r2], #2 - ldrne r5, [r0, #4] - strh r9, [r2], #2 - ldrne r6, [r1, #4] - strh lr, [r2], #2 - - bne dequant_loop - - ldmia sp!, {r4-r9, pc} - ENDP ;|vp8_dequantize_b_loop_v6| - - END diff --git a/vp8/decoder/arm/armv6/idct_blk_v6.c b/vp8/decoder/arm/armv6/idct_blk_v6.c deleted file mode 100644 index c1ef2852f..000000000 --- a/vp8/decoder/arm/armv6/idct_blk_v6.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8/common/idct.h" -#include "vp8/decoder/dequantize.h" - - -void vp8_dequant_idct_add_y_block_v6(short *q, short *dq, - unsigned char *dst, - int stride, char *eobs) -{ - int i; - - for (i = 0; i < 4; i++) - { - if (eobs[0] > 1) - vp8_dequant_idct_add_v6 (q, dq, dst, stride); - else if (eobs[0] == 1) - { - vp8_dc_only_idct_add_v6 (q[0]*dq[0], dst, stride, dst, stride); - ((int *)q)[0] = 0; - } - - if (eobs[1] > 1) - vp8_dequant_idct_add_v6 (q+16, dq, dst+4, stride); - else if (eobs[1] == 1) - { - vp8_dc_only_idct_add_v6 (q[16]*dq[0], dst+4, stride, dst+4, stride); - ((int *)(q+16))[0] = 0; - } - - if (eobs[2] > 1) - vp8_dequant_idct_add_v6 (q+32, dq, dst+8, stride); - else if (eobs[2] == 1) - { - vp8_dc_only_idct_add_v6 (q[32]*dq[0], dst+8, stride, dst+8, stride); - ((int *)(q+32))[0] = 0; - } - - if (eobs[3] > 1) - vp8_dequant_idct_add_v6 (q+48, dq, dst+12, stride); - else if (eobs[3] == 1) - { - vp8_dc_only_idct_add_v6 (q[48]*dq[0], dst+12, stride,dst+12,stride); - ((int *)(q+48))[0] = 0; - } - - q += 64; - dst += 4*stride; - eobs += 4; - } -} - -void vp8_dequant_idct_add_uv_block_v6(short *q, short *dq, - unsigned char *dstu, - unsigned char *dstv, - int stride, char *eobs) -{ - int i; - - for (i = 0; i < 2; i++) - { - if (eobs[0] > 1) - vp8_dequant_idct_add_v6 (q, dq, dstu, stride); - else if (eobs[0] == 1) - { - vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstu, stride, dstu, stride); - ((int *)q)[0] = 0; - } - - if (eobs[1] > 1) - vp8_dequant_idct_add_v6 (q+16, dq, dstu+4, stride); - else if (eobs[1] == 1) - { - vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstu+4, stride, - dstu+4, stride); - ((int *)(q+16))[0] = 0; - } - - q += 32; - dstu += 4*stride; - eobs += 2; - } - - for (i = 0; i < 2; i++) - { - if (eobs[0] > 1) - vp8_dequant_idct_add_v6 (q, dq, dstv, stride); - else if (eobs[0] == 1) - { - vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstv, stride, dstv, stride); - ((int *)q)[0] = 0; - } - - if (eobs[1] > 1) - vp8_dequant_idct_add_v6 (q+16, dq, dstv+4, stride); - else if (eobs[1] == 1) - { - vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstv+4, stride, - dstv+4, stride); - ((int *)(q+16))[0] = 0; - } - - q += 32; - dstv += 4*stride; - eobs += 2; - } -} diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/decoder/arm/dequantize_arm.c deleted file mode 100644 index 2918e0512..000000000 --- a/vp8/decoder/arm/dequantize_arm.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8/decoder/dequantize.h" -#include "vp8/common/idct.h" -#include "vpx_mem/vpx_mem.h" - -#if HAVE_ARMV7 -extern void vp8_dequantize_b_loop_neon(short *Q, short *DQC, short *DQ); -#endif - -#if HAVE_ARMV6 -extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ); -#endif - -#if HAVE_ARMV7 - -void vp8_dequantize_b_neon(BLOCKD *d) -{ - short *DQ = d->dqcoeff; - short *Q = d->qcoeff; - short *DQC = d->dequant; - - vp8_dequantize_b_loop_neon(Q, DQC, DQ); -} -#endif - -#if HAVE_ARMV6 -void vp8_dequantize_b_v6(BLOCKD *d) -{ - short *DQ = d->dqcoeff; - short *Q = d->qcoeff; - short *DQC = d->dequant; - - vp8_dequantize_b_loop_v6(Q, DQC, DQ); -} -#endif diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h deleted file mode 100644 index 1123e8446..000000000 --- a/vp8/decoder/arm/dequantize_arm.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef DEQUANTIZE_ARM_H -#define DEQUANTIZE_ARM_H - -#if HAVE_ARMV6 -extern prototype_dequant_block(vp8_dequantize_b_v6); -extern prototype_dequant_idct_add(vp8_dequant_idct_add_v6); -extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_v6); -extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_dequant_block -#define vp8_dequant_block vp8_dequantize_b_v6 - -#undef vp8_dequant_idct_add -#define vp8_dequant_idct_add vp8_dequant_idct_add_v6 - -#undef vp8_dequant_idct_add_y_block -#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_v6 - -#undef vp8_dequant_idct_add_uv_block -#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6 -#endif -#endif - -#if HAVE_ARMV7 -extern prototype_dequant_block(vp8_dequantize_b_neon); -extern prototype_dequant_idct_add(vp8_dequant_idct_add_neon); -extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_neon); -extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon); - - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_dequant_block -#define vp8_dequant_block vp8_dequantize_b_neon - -#undef vp8_dequant_idct_add -#define vp8_dequant_idct_add vp8_dequant_idct_add_neon - -#undef vp8_dequant_idct_add_y_block -#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_neon - -#undef vp8_dequant_idct_add_uv_block -#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon -#endif - -#endif - -#endif diff --git a/vp8/decoder/arm/neon/dequant_idct_neon.asm b/vp8/decoder/arm/neon/dequant_idct_neon.asm deleted file mode 100644 index 602cce676..000000000 --- a/vp8/decoder/arm/neon/dequant_idct_neon.asm +++ /dev/null @@ -1,131 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_dequant_idct_add_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp8_dequant_idct_add_neon(short *input, short *dq, -; unsigned char *dest, int stride) -; r0 short *input, -; r1 short *dq, -; r2 unsigned char *dest -; r3 int stride - -|vp8_dequant_idct_add_neon| PROC - vld1.16 {q3, q4}, [r0] - vld1.16 {q5, q6}, [r1] - - add r1, r2, r3 ; r1 = dest + stride - lsl r3, #1 ; 2x stride - - vld1.32 {d14[0]}, [r2], r3 - vld1.32 {d14[1]}, [r1], r3 - vld1.32 {d15[0]}, [r2] - vld1.32 {d15[1]}, [r1] - - adr r12, cospi8sqrt2minus1 ; pointer to the first constant - - vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon - vmul.i16 q2, q4, q6 - -;|short_idct4x4llm_neon| PROC - vld1.16 {d0}, [r12] - vswp d3, d4 ;q2(vp[4] vp[12]) - - vqdmulh.s16 q3, q2, d0[2] - vqdmulh.s16 q4, q2, d0[0] - - vqadd.s16 d12, d2, d3 ;a1 - vqsub.s16 d13, d2, d3 ;b1 - - vshr.s16 q3, q3, #1 - vshr.s16 q4, q4, #1 - - vqadd.s16 q3, q3, q2 - vqadd.s16 q4, q4, q2 - - vqsub.s16 d10, d6, d9 ;c1 - vqadd.s16 d11, d7, d8 ;d1 - - vqadd.s16 d2, d12, d11 - vqadd.s16 d3, d13, d10 - vqsub.s16 d4, d13, d10 - vqsub.s16 d5, d12, d11 - - vtrn.32 d2, d4 - vtrn.32 d3, d5 - vtrn.16 d2, d3 - vtrn.16 d4, d5 - -; memset(input, 0, 32) -- 32bytes - vmov.i16 q14, #0 - - vswp d3, d4 - vqdmulh.s16 q3, q2, d0[2] - vqdmulh.s16 q4, q2, d0[0] - - vqadd.s16 d12, d2, d3 ;a1 - vqsub.s16 d13, d2, d3 ;b1 - - vmov q15, q14 - - vshr.s16 q3, q3, #1 - vshr.s16 q4, q4, #1 - - vqadd.s16 q3, q3, q2 - vqadd.s16 q4, q4, q2 - - vqsub.s16 d10, d6, d9 ;c1 - vqadd.s16 d11, d7, d8 ;d1 - - vqadd.s16 d2, d12, d11 - vqadd.s16 d3, d13, d10 - vqsub.s16 d4, d13, d10 - vqsub.s16 d5, d12, d11 - - vst1.16 {q14, q15}, [r0] - - vrshr.s16 d2, d2, #3 - vrshr.s16 d3, d3, #3 - vrshr.s16 d4, d4, #3 - vrshr.s16 d5, d5, #3 - - vtrn.32 d2, d4 - vtrn.32 d3, d5 - vtrn.16 d2, d3 - vtrn.16 d4, d5 - - vaddw.u8 q1, q1, d14 - vaddw.u8 q2, q2, d15 - - sub r2, r2, r3 - sub r1, r1, r3 - - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - - vst1.32 {d0[0]}, [r2], r3 - vst1.32 {d0[1]}, [r1], r3 - vst1.32 {d1[0]}, [r2] - vst1.32 {d1[1]}, [r1] - - bx lr - - ENDP ; |vp8_dequant_idct_add_neon| - -; Constant Pool -cospi8sqrt2minus1 DCD 0x4e7b4e7b -sinpi8sqrt2 DCD 0x8a8c8a8c - - END diff --git a/vp8/decoder/arm/neon/dequantizeb_neon.asm b/vp8/decoder/arm/neon/dequantizeb_neon.asm deleted file mode 100644 index c8e0c31f2..000000000 --- a/vp8/decoder/arm/neon/dequantizeb_neon.asm +++ /dev/null @@ -1,34 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_dequantize_b_loop_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -; r0 short *Q, -; r1 short *DQC -; r2 short *DQ -|vp8_dequantize_b_loop_neon| PROC - vld1.16 {q0, q1}, [r0] - vld1.16 {q2, q3}, [r1] - - vmul.i16 q4, q0, q2 - vmul.i16 q5, q1, q3 - - vst1.16 {q4, q5}, [r2] - - bx lr - - ENDP - - END diff --git a/vp8/decoder/arm/neon/idct_blk_neon.c b/vp8/decoder/arm/neon/idct_blk_neon.c deleted file mode 100644 index 185895f05..000000000 --- a/vp8/decoder/arm/neon/idct_blk_neon.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8/common/idct.h" -#include "vp8/decoder/dequantize.h" - -/* place these declarations here because we don't want to maintain them - * outside of this scope - */ -void idct_dequant_full_2x_neon(short *q, short *dq, - unsigned char *dst, int stride); -void idct_dequant_0_2x_neon(short *q, short dq, - unsigned char *dst, int stride); - - -void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, - unsigned char *dst, - int stride, char *eobs) -{ - int i; - - for (i = 0; i < 4; i++) - { - if (((short *)(eobs))[0]) - { - if (((short *)eobs)[0] & 0xfefe) - idct_dequant_full_2x_neon (q, dq, dst, stride); - else - idct_dequant_0_2x_neon (q, dq[0], dst, stride); - } - - if (((short *)(eobs))[1]) - { - if (((short *)eobs)[1] & 0xfefe) - idct_dequant_full_2x_neon (q+32, dq, dst+8, stride); - else - idct_dequant_0_2x_neon (q+32, dq[0], dst+8, stride); - } - q += 64; - dst += 4*stride; - eobs += 4; - } -} - -void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq, - unsigned char *dstu, - unsigned char *dstv, - int stride, char *eobs) -{ - if (((short *)(eobs))[0]) - { - if (((short *)eobs)[0] & 0xfefe) - idct_dequant_full_2x_neon (q, dq, dstu, stride); - else - idct_dequant_0_2x_neon (q, dq[0], dstu, stride); - } - - q += 32; - dstu += 4*stride; - - if (((short *)(eobs))[1]) - { - if (((short *)eobs)[1] & 0xfefe) - idct_dequant_full_2x_neon (q, dq, dstu, stride); - else - idct_dequant_0_2x_neon (q, dq[0], dstu, stride); - } - - q += 32; - - if (((short *)(eobs))[2]) - { - if (((short *)eobs)[2] & 0xfefe) - idct_dequant_full_2x_neon (q, dq, dstv, stride); - else - idct_dequant_0_2x_neon (q, dq[0], dstv, stride); - } - - q += 32; - dstv += 4*stride; - - if (((short *)(eobs))[3]) - { - if (((short *)eobs)[3] & 0xfefe) - idct_dequant_full_2x_neon (q, dq, dstv, stride); - else - idct_dequant_0_2x_neon (q, dq[0], dstv, stride); - } -} diff --git a/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm deleted file mode 100644 index 6c29c5586..000000000 --- a/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm +++ /dev/null @@ -1,79 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |idct_dequant_0_2x_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;void idct_dequant_0_2x_neon(short *q, short dq, -; unsigned char *dst, int stride); -; r0 *q -; r1 dq -; r2 *dst -; r3 stride -|idct_dequant_0_2x_neon| PROC - push {r4, r5} - - add r12, r2, #4 - vld1.32 {d2[0]}, [r2], r3 - vld1.32 {d8[0]}, [r12], r3 - vld1.32 {d2[1]}, [r2], r3 - vld1.32 {d8[1]}, [r12], r3 - vld1.32 {d4[0]}, [r2], r3 - vld1.32 {d10[0]}, [r12], r3 - vld1.32 {d4[1]}, [r2], r3 - vld1.32 {d10[1]}, [r12], r3 - - ldrh r12, [r0] ; lo q - ldrh r4, [r0, #32] ; hi q - mov r5, #0 - strh r5, [r0] - strh r5, [r0, #32] - - sxth r12, r12 ; lo - mul r0, r12, r1 - add r0, r0, #4 - asr r0, r0, #3 - vdup.16 q0, r0 - sxth r4, r4 ; hi - mul r0, r4, r1 - add r0, r0, #4 - asr r0, r0, #3 - vdup.16 q3, r0 - - vaddw.u8 q1, q0, d2 ; lo - vaddw.u8 q2, q0, d4 - vaddw.u8 q4, q3, d8 ; hi - vaddw.u8 q5, q3, d10 - - sub r2, r2, r3, lsl #2 ; dst - 4*stride - add r0, r2, #4 - - vqmovun.s16 d2, q1 ; lo - vqmovun.s16 d4, q2 - vqmovun.s16 d8, q4 ; hi - vqmovun.s16 d10, q5 - - vst1.32 {d2[0]}, [r2], r3 ; lo - vst1.32 {d8[0]}, [r0], r3 ; hi - vst1.32 {d2[1]}, [r2], r3 - vst1.32 {d8[1]}, [r0], r3 - vst1.32 {d4[0]}, [r2], r3 - vst1.32 {d10[0]}, [r0], r3 - vst1.32 {d4[1]}, [r2] - vst1.32 {d10[1]}, [r0] - - pop {r4, r5} - bx lr - - ENDP ; |idct_dequant_0_2x_neon| - END diff --git a/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm deleted file mode 100644 index d5dce63f6..000000000 --- a/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm +++ /dev/null @@ -1,196 +0,0 @@ -; -; Copyright (c) 2010 The Webm project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |idct_dequant_full_2x_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;void idct_dequant_full_2x_neon(short *q, short *dq, -; unsigned char *dst, int stride); -; r0 *q, -; r1 *dq, -; r2 *dst -; r3 stride -|idct_dequant_full_2x_neon| PROC - vld1.16 {q0, q1}, [r1] ; dq (same l/r) - vld1.16 {q2, q3}, [r0] ; l q - add r0, r0, #32 - vld1.16 {q4, q5}, [r0] ; r q - add r12, r2, #4 - - ; interleave the predictors - vld1.32 {d28[0]}, [r2], r3 ; l pre - vld1.32 {d28[1]}, [r12], r3 ; r pre - vld1.32 {d29[0]}, [r2], r3 - vld1.32 {d29[1]}, [r12], r3 - vld1.32 {d30[0]}, [r2], r3 - vld1.32 {d30[1]}, [r12], r3 - vld1.32 {d31[0]}, [r2], r3 - vld1.32 {d31[1]}, [r12] - - adr r1, cospi8sqrt2minus1 ; pointer to the first constant - - ; dequant: q[i] = q[i] * dq[i] - vmul.i16 q2, q2, q0 - vmul.i16 q3, q3, q1 - vmul.i16 q4, q4, q0 - vmul.i16 q5, q5, q1 - - vld1.16 {d0}, [r1] - - ; q2: l0r0 q3: l8r8 - ; q4: l4r4 q5: l12r12 - vswp d5, d8 - vswp d7, d10 - - ; _CONSTANTS_ * 4,12 >> 16 - ; q6: 4 * sinpi : c1/temp1 - ; q7: 12 * sinpi : d1/temp2 - ; q8: 4 * cospi - ; q9: 12 * cospi - vqdmulh.s16 q6, q4, d0[2] ; sinpi8sqrt2 - vqdmulh.s16 q7, q5, d0[2] - vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1 - vqdmulh.s16 q9, q5, d0[0] - - vqadd.s16 q10, q2, q3 ; a1 = 0 + 8 - vqsub.s16 q11, q2, q3 ; b1 = 0 - 8 - - ; vqdmulh only accepts signed values. this was a problem because - ; our constant had the high bit set, and was treated as a negative value. - ; vqdmulh also doubles the value before it shifts by 16. we need to - ; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0, - ; so we can shift the constant without losing precision. this avoids - ; shift again afterward, but also avoids the sign issue. win win! - ; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we - ; pre-shift it - vshr.s16 q8, q8, #1 - vshr.s16 q9, q9, #1 - - ; q4: 4 + 4 * cospi : d1/temp1 - ; q5: 12 + 12 * cospi : c1/temp2 - vqadd.s16 q4, q4, q8 - vqadd.s16 q5, q5, q9 - - ; c1 = temp1 - temp2 - ; d1 = temp1 + temp2 - vqsub.s16 q2, q6, q5 - vqadd.s16 q3, q4, q7 - - ; [0]: a1+d1 - ; [1]: b1+c1 - ; [2]: b1-c1 - ; [3]: a1-d1 - vqadd.s16 q4, q10, q3 - vqadd.s16 q5, q11, q2 - vqsub.s16 q6, q11, q2 - vqsub.s16 q7, q10, q3 - - ; rotate - vtrn.32 q4, q6 - vtrn.32 q5, q7 - vtrn.16 q4, q5 - vtrn.16 q6, q7 - ; idct loop 2 - ; q4: l 0, 4, 8,12 r 0, 4, 8,12 - ; q5: l 1, 5, 9,13 r 1, 5, 9,13 - ; q6: l 2, 6,10,14 r 2, 6,10,14 - ; q7: l 3, 7,11,15 r 3, 7,11,15 - - ; q8: 1 * sinpi : c1/temp1 - ; q9: 3 * sinpi : d1/temp2 - ; q10: 1 * cospi - ; q11: 3 * cospi - vqdmulh.s16 q8, q5, d0[2] ; sinpi8sqrt2 - vqdmulh.s16 q9, q7, d0[2] - vqdmulh.s16 q10, q5, d0[0] ; cospi8sqrt2minus1 - vqdmulh.s16 q11, q7, d0[0] - - vqadd.s16 q2, q4, q6 ; a1 = 0 + 2 - vqsub.s16 q3, q4, q6 ; b1 = 0 - 2 - - ; see note on shifting above - vshr.s16 q10, q10, #1 - vshr.s16 q11, q11, #1 - - ; q10: 1 + 1 * cospi : d1/temp1 - ; q11: 3 + 3 * cospi : c1/temp2 - vqadd.s16 q10, q5, q10 - vqadd.s16 q11, q7, q11 - - ; q8: c1 = temp1 - temp2 - ; q9: d1 = temp1 + temp2 - vqsub.s16 q8, q8, q11 - vqadd.s16 q9, q10, q9 - - ; a1+d1 - ; b1+c1 - ; b1-c1 - ; a1-d1 - vqadd.s16 q4, q2, q9 - vqadd.s16 q5, q3, q8 - vqsub.s16 q6, q3, q8 - vqsub.s16 q7, q2, q9 - - ; +4 >> 3 (rounding) - vrshr.s16 q4, q4, #3 ; lo - vrshr.s16 q5, q5, #3 - vrshr.s16 q6, q6, #3 ; hi - vrshr.s16 q7, q7, #3 - - vtrn.32 q4, q6 - vtrn.32 q5, q7 - vtrn.16 q4, q5 - vtrn.16 q6, q7 - - ; adding pre - ; input is still packed. pre was read interleaved - vaddw.u8 q4, q4, d28 - vaddw.u8 q5, q5, d29 - vaddw.u8 q6, q6, d30 - vaddw.u8 q7, q7, d31 - - vmov.i16 q14, #0 - vmov q15, q14 - vst1.16 {q14, q15}, [r0] ; write over high input - sub r0, r0, #32 - vst1.16 {q14, q15}, [r0] ; write over low input - - sub r2, r2, r3, lsl #2 ; dst - 4*stride - add r1, r2, #4 ; hi - - ;saturate and narrow - vqmovun.s16 d0, q4 ; lo - vqmovun.s16 d1, q5 - vqmovun.s16 d2, q6 ; hi - vqmovun.s16 d3, q7 - - vst1.32 {d0[0]}, [r2], r3 ; lo - vst1.32 {d0[1]}, [r1], r3 ; hi - vst1.32 {d1[0]}, [r2], r3 - vst1.32 {d1[1]}, [r1], r3 - vst1.32 {d2[0]}, [r2], r3 - vst1.32 {d2[1]}, [r1], r3 - vst1.32 {d3[0]}, [r2] - vst1.32 {d3[1]}, [r1] - - bx lr - - ENDP ; |idct_dequant_full_2x_neon| - -; Constant Pool -cospi8sqrt2minus1 DCD 0x4e7b -; because the lowest bit in 0x8a8c is 0, we can pre-shift this -sinpi8sqrt2 DCD 0x4546 - - END diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 31eafcf54..4ab09c596 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -15,7 +15,7 @@ #include "vp8/common/reconintra4x4.h" #include "vp8/common/recon.h" #include "vp8/common/reconinter.h" -#include "dequantize.h" +#include "vp8/common/dequantize.h" #include "detokenize.h" #include "vp8/common/invtrans.h" #include "vp8/common/alloccommon.h" @@ -32,7 +32,7 @@ #endif #include "vpx_mem/vpx_mem.h" #include "vp8/common/idct.h" -#include "dequantize.h" + #include "vp8/common/threading.h" #include "decoderthreading.h" #include "dboolhuff.h" @@ -218,7 +218,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, { if (xd->eobs[i] > 1) { - DEQUANT_INVOKE(&pbi->dequant, idct_add) + DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add) (b->qcoeff, b->dequant, *(b->base_dst) + b->dst, b->dst_stride); } @@ -247,7 +247,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, /* do 2nd order transform on the dc block */ if (xd->eobs[24] > 1) { - DEQUANT_INVOKE(&pbi->dequant, block)(b); + DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b); IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], xd->qcoeff); @@ -272,7 +272,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, DQC[0] = 1; } - DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block) + DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block) (xd->qcoeff, xd->block[0].dequant, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs); @@ -281,7 +281,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, DQC[0] = dc_dequant_temp; } - DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block) + DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block) (xd->qcoeff+16*16, xd->block[16].dequant, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs+16); diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c deleted file mode 100644 index 4a48a3192..000000000 --- a/vp8/decoder/dequantize.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "dequantize.h" -#include "vp8/common/idct.h" -#include "vpx_mem/vpx_mem.h" - -void vp8_dequantize_b_c(BLOCKD *d) -{ - int i; - short *DQ = d->dqcoeff; - short *Q = d->qcoeff; - short *DQC = d->dequant; - - for (i = 0; i < 16; i++) - { - DQ[i] = Q[i] * DQC[i]; - } -} - -void vp8_dequant_idct_add_c(short *input, short *dq, - unsigned char *dest, int stride) -{ - int i; - - for (i = 0; i < 16; i++) - { - input[i] = dq[i] * input[i]; - } - - vp8_short_idct4x4llm_c(input, dest, stride, dest, stride); - - vpx_memset(input, 0, 32); - -} diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h deleted file mode 100644 index f66cf2bac..000000000 --- a/vp8/decoder/dequantize.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef DEQUANTIZE_H -#define DEQUANTIZE_H -#include "vp8/common/blockd.h" - -#define prototype_dequant_block(sym) \ - void sym(BLOCKD *x) - -#define prototype_dequant_idct_add(sym) \ - void sym(short *input, short *dq, \ - unsigned char *output, \ - int stride) - -#define prototype_dequant_idct_add_y_block(sym) \ - void sym(short *q, short *dq, \ - unsigned char *dst, \ - int stride, char *eobs) - -#define prototype_dequant_idct_add_uv_block(sym) \ - void sym(short *q, short *dq, \ - unsigned char *dst_u, \ - unsigned char *dst_v, int stride, char *eobs) - -#if ARCH_X86 || ARCH_X86_64 -#include "x86/dequantize_x86.h" -#endif - -#if ARCH_ARM -#include "arm/dequantize_arm.h" -#endif - -#ifndef vp8_dequant_block -#define vp8_dequant_block vp8_dequantize_b_c -#endif -extern prototype_dequant_block(vp8_dequant_block); - -#ifndef vp8_dequant_idct_add -#define vp8_dequant_idct_add vp8_dequant_idct_add_c -#endif -extern prototype_dequant_idct_add(vp8_dequant_idct_add); - -#ifndef vp8_dequant_idct_add_y_block -#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c -#endif -extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block); - -#ifndef vp8_dequant_idct_add_uv_block -#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c -#endif -extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block); - - -typedef prototype_dequant_block((*vp8_dequant_block_fn_t)); - -typedef prototype_dequant_idct_add((*vp8_dequant_idct_add_fn_t)); - -typedef prototype_dequant_idct_add_y_block((*vp8_dequant_idct_add_y_block_fn_t)); - -typedef prototype_dequant_idct_add_uv_block((*vp8_dequant_idct_add_uv_block_fn_t)); - -typedef struct -{ - vp8_dequant_block_fn_t block; - vp8_dequant_idct_add_fn_t idct_add; - vp8_dequant_idct_add_y_block_fn_t idct_add_y_block; - vp8_dequant_idct_add_uv_block_fn_t idct_add_uv_block; -} vp8_dequant_rtcd_vtable_t; - -#if CONFIG_RUNTIME_CPU_DETECT -#define DEQUANT_INVOKE(ctx,fn) (ctx)->fn -#else -#define DEQUANT_INVOKE(ctx,fn) vp8_dequant_##fn -#endif - -#endif diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c index d9f9ba3c8..8a84e566a 100644 --- a/vp8/decoder/generic/dsystemdependent.c +++ b/vp8/decoder/generic/dsystemdependent.c @@ -10,7 +10,7 @@ #include "vpx_config.h" -#include "vp8/decoder/dequantize.h" +#include "vp8/common/dequantize.h" #include "vp8/decoder/onyxd_int.h" extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi); @@ -20,11 +20,7 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi) { /* Pure C: */ #if CONFIG_RUNTIME_CPU_DETECT - pbi->mb.rtcd = &pbi->common.rtcd; - pbi->dequant.block = vp8_dequantize_b_c; - pbi->dequant.idct_add = vp8_dequant_idct_add_c; - pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_c; - pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_c; + pbi->mb.rtcd = &pbi->common.rtcd; #endif #if ARCH_X86 || ARCH_X86_64 diff --git a/vp8/decoder/idct_blk.c b/vp8/decoder/idct_blk.c deleted file mode 100644 index 249fad4ea..000000000 --- a/vp8/decoder/idct_blk.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8/common/idct.h" -#include "dequantize.h" - -void vp8_dequant_idct_add_c(short *input, short *dq, - unsigned char *dest, int stride); -void vp8_dc_only_idct_add_c(short input_dc, unsigned char * pred, - int pred_stride, unsigned char *dst_ptr, - int dst_stride); - -void vp8_dequant_idct_add_y_block_c - (short *q, short *dq, - unsigned char *dst, int stride, char *eobs) -{ - int i, j; - - for (i = 0; i < 4; i++) - { - for (j = 0; j < 4; j++) - { - if (*eobs++ > 1) - vp8_dequant_idct_add_c (q, dq, dst, stride); - else - { - vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride); - ((int *)q)[0] = 0; - } - - q += 16; - dst += 4; - } - - dst += 4*stride - 16; - } -} - -void vp8_dequant_idct_add_uv_block_c - (short *q, short *dq, - unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) -{ - int i, j; - - for (i = 0; i < 2; i++) - { - for (j = 0; j < 2; j++) - { - if (*eobs++ > 1) - vp8_dequant_idct_add_c (q, dq, dstu, stride); - else - { - vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride); - ((int *)q)[0] = 0; - } - - q += 16; - dstu += 4; - } - - dstu += 4*stride - 8; - } - - for (i = 0; i < 2; i++) - { - for (j = 0; j < 2; j++) - { - if (*eobs++ > 1) - vp8_dequant_idct_add_c (q, dq, dstv, stride); - else - { - vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride); - ((int *)q)[0] = 0; - } - - q += 16; - dstv += 4; - } - - dstv += 4*stride - 8; - } -} diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h index 519a7f2b9..f48f5173b 100644 --- a/vp8/decoder/onyxd_int.h +++ b/vp8/decoder/onyxd_int.h @@ -16,7 +16,8 @@ #include "treereader.h" #include "vp8/common/onyxc_int.h" #include "vp8/common/threading.h" -#include "dequantize.h" + + #if CONFIG_ERROR_CONCEALMENT #include "ec_types.h" #endif @@ -93,11 +94,6 @@ typedef struct VP8Decompressor DATARATE dr[16]; -#if CONFIG_RUNTIME_CPU_DETECT - vp8_dequant_rtcd_vtable_t dequant; -#endif - - vp8_prob prob_intra; vp8_prob prob_last; vp8_prob prob_gf; diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c index 1967781eb..947b3a1c6 100644 --- a/vp8/decoder/threading.c +++ b/vp8/decoder/threading.c @@ -189,7 +189,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m { if (xd->eobs[i] > 1) { - DEQUANT_INVOKE(&pbi->dequant, idct_add) + DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add) (b->qcoeff, b->dequant, *(b->base_dst) + b->dst, b->dst_stride); } @@ -217,7 +217,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m /* do 2nd order transform on the dc block */ if (xd->eobs[24] > 1) { - DEQUANT_INVOKE(&pbi->dequant, block)(b); + DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b); IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], xd->qcoeff); @@ -248,13 +248,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m DQC = local_dequant; } - DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block) + DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block) (xd->qcoeff, DQC, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs); } - DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block) + DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block) (xd->qcoeff+16*16, xd->block[16].dequant, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs+16); diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm deleted file mode 100644 index de9eba89f..000000000 --- a/vp8/decoder/x86/dequantize_mmx.asm +++ /dev/null @@ -1,258 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - - -;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q) -global sym(vp8_dequantize_b_impl_mmx) -sym(vp8_dequantize_b_impl_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;sq - mov rdi, arg(1) ;dq - mov rax, arg(2) ;q - - movq mm1, [rsi] - pmullw mm1, [rax+0] ; mm4 *= kernel 0 modifiers. - movq [rdi], mm1 - - movq mm1, [rsi+8] - pmullw mm1, [rax+8] ; mm4 *= kernel 0 modifiers. - movq [rdi+8], mm1 - - movq mm1, [rsi+16] - pmullw mm1, [rax+16] ; mm4 *= kernel 0 modifiers. - movq [rdi+16], mm1 - - movq mm1, [rsi+24] - pmullw mm1, [rax+24] ; mm4 *= kernel 0 modifiers. - movq [rdi+24], mm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void dequant_idct_add_mmx( -;short *input, 0 -;short *dq, 1 -;unsigned char *dest, 2 -;int stride) 3 -global sym(vp8_dequant_idct_add_mmx) -sym(vp8_dequant_idct_add_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - GET_GOT rbx - push rdi - ; end prolog - - mov rax, arg(0) ;input - mov rdx, arg(1) ;dq - - - movq mm0, [rax ] - pmullw mm0, [rdx] - - movq mm1, [rax +8] - pmullw mm1, [rdx +8] - - movq mm2, [rax+16] - pmullw mm2, [rdx+16] - - movq mm3, [rax+24] - pmullw mm3, [rdx+24] - - mov rdx, arg(2) ;dest - - pxor mm7, mm7 - - - movq [rax], mm7 - movq [rax+8], mm7 - - movq [rax+16],mm7 - movq [rax+24],mm7 - - - movsxd rdi, dword ptr arg(3) ;stride - - psubw mm0, mm2 ; b1= 0-2 - paddw mm2, mm2 ; - - movq mm5, mm1 - paddw mm2, mm0 ; a1 =0+2 - - pmulhw mm5, [GLOBAL(x_s1sqr2)]; - paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) - - movq mm7, mm3 ; - pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; - - paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw mm7, mm5 ; c1 - - movq mm5, mm1 - movq mm4, mm3 - - pmulhw mm5, [GLOBAL(x_c1sqr2less1)] - paddw mm5, mm1 - - pmulhw mm3, [GLOBAL(x_s1sqr2)] - paddw mm3, mm4 - - paddw mm3, mm5 ; d1 - movq mm6, mm2 ; a1 - - movq mm4, mm0 ; b1 - paddw mm2, mm3 ;0 - - paddw mm4, mm7 ;1 - psubw mm0, mm7 ;2 - - psubw mm6, mm3 ;3 - - movq mm1, mm2 ; 03 02 01 00 - movq mm3, mm4 ; 23 22 21 20 - - punpcklwd mm1, mm0 ; 11 01 10 00 - punpckhwd mm2, mm0 ; 13 03 12 02 - - punpcklwd mm3, mm6 ; 31 21 30 20 - punpckhwd mm4, mm6 ; 33 23 32 22 - - movq mm0, mm1 ; 11 01 10 00 - movq mm5, mm2 ; 13 03 12 02 - - punpckldq mm0, mm3 ; 30 20 10 00 - punpckhdq mm1, mm3 ; 31 21 11 01 - - punpckldq mm2, mm4 ; 32 22 12 02 - punpckhdq mm5, mm4 ; 33 23 13 03 - - movq mm3, mm5 ; 33 23 13 03 - - psubw mm0, mm2 ; b1= 0-2 - paddw mm2, mm2 ; - - movq mm5, mm1 - paddw mm2, mm0 ; a1 =0+2 - - pmulhw mm5, [GLOBAL(x_s1sqr2)]; - paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) - - movq mm7, mm3 ; - pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; - - paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw mm7, mm5 ; c1 - - movq mm5, mm1 - movq mm4, mm3 - - pmulhw mm5, [GLOBAL(x_c1sqr2less1)] - paddw mm5, mm1 - - pmulhw mm3, [GLOBAL(x_s1sqr2)] - paddw mm3, mm4 - - paddw mm3, mm5 ; d1 - paddw mm0, [GLOBAL(fours)] - - paddw mm2, [GLOBAL(fours)] - movq mm6, mm2 ; a1 - - movq mm4, mm0 ; b1 - paddw mm2, mm3 ;0 - - paddw mm4, mm7 ;1 - psubw mm0, mm7 ;2 - - psubw mm6, mm3 ;3 - psraw mm2, 3 - - psraw mm0, 3 - psraw mm4, 3 - - psraw mm6, 3 - - movq mm1, mm2 ; 03 02 01 00 - movq mm3, mm4 ; 23 22 21 20 - - punpcklwd mm1, mm0 ; 11 01 10 00 - punpckhwd mm2, mm0 ; 13 03 12 02 - - punpcklwd mm3, mm6 ; 31 21 30 20 - punpckhwd mm4, mm6 ; 33 23 32 22 - - movq mm0, mm1 ; 11 01 10 00 - movq mm5, mm2 ; 13 03 12 02 - - punpckldq mm0, mm3 ; 30 20 10 00 - punpckhdq mm1, mm3 ; 31 21 11 01 - - punpckldq mm2, mm4 ; 32 22 12 02 - punpckhdq mm5, mm4 ; 33 23 13 03 - - pxor mm7, mm7 - - movd mm4, [rdx] - punpcklbw mm4, mm7 - paddsw mm0, mm4 - packuswb mm0, mm7 - movd [rdx], mm0 - - movd mm4, [rdx+rdi] - punpcklbw mm4, mm7 - paddsw mm1, mm4 - packuswb mm1, mm7 - movd [rdx+rdi], mm1 - - movd mm4, [rdx+2*rdi] - punpcklbw mm4, mm7 - paddsw mm2, mm4 - packuswb mm2, mm7 - movd [rdx+rdi*2], mm2 - - add rdx, rdi - - movd mm4, [rdx+2*rdi] - punpcklbw mm4, mm7 - paddsw mm5, mm4 - packuswb mm5, mm7 - movd [rdx+rdi*2], mm5 - - ; begin epilog - pop rdi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -x_s1sqr2: - times 4 dw 0x8A8C -align 16 -x_c1sqr2less1: - times 4 dw 0x4E7B -align 16 -fours: - times 4 dw 0x0004 diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/decoder/x86/dequantize_x86.h deleted file mode 100644 index 49bcb7f19..000000000 --- a/vp8/decoder/x86/dequantize_x86.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef DEQUANTIZE_X86_H -#define DEQUANTIZE_X86_H - - -/* Note: - * - * This platform is commonly built for runtime CPU detection. If you modify - * any of the function mappings present in this file, be sure to also update - * them in the function pointer initialization code - */ -#if HAVE_MMX -extern prototype_dequant_block(vp8_dequantize_b_mmx); -extern prototype_dequant_idct_add(vp8_dequant_idct_add_mmx); -extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_mmx); -extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_mmx); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_dequant_block -#define vp8_dequant_block vp8_dequantize_b_mmx - -#undef vp8_dequant_idct_add -#define vp8_dequant_idct_add vp8_dequant_idct_add_mmx - -#undef vp8_dequant_idct_add_y_block -#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_mmx - -#undef vp8_dequant_idct_add_uv_block -#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_mmx - -#endif -#endif - -#if HAVE_SSE2 -extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_sse2); -extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_sse2); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_dequant_idct_add_y_block -#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_sse2 - -#undef vp8_dequant_idct_add_uv_block -#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_sse2 - -#endif -#endif - -#endif diff --git a/vp8/decoder/x86/idct_blk_mmx.c b/vp8/decoder/x86/idct_blk_mmx.c deleted file mode 100644 index 29276e5d7..000000000 --- a/vp8/decoder/x86/idct_blk_mmx.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8/common/idct.h" -#include "vp8/decoder/dequantize.h" - -void vp8_dequant_idct_add_y_block_mmx - (short *q, short *dq, - unsigned char *dst, int stride, char *eobs) -{ - int i; - - for (i = 0; i < 4; i++) - { - if (eobs[0] > 1) - vp8_dequant_idct_add_mmx (q, dq, dst, stride); - else if (eobs[0] == 1) - { - vp8_dc_only_idct_add_mmx (q[0]*dq[0], dst, stride, dst, stride); - ((int *)q)[0] = 0; - } - - if (eobs[1] > 1) - vp8_dequant_idct_add_mmx (q+16, dq, dst+4, stride); - else if (eobs[1] == 1) - { - vp8_dc_only_idct_add_mmx (q[16]*dq[0], dst+4, stride, - dst+4, stride); - ((int *)(q+16))[0] = 0; - } - - if (eobs[2] > 1) - vp8_dequant_idct_add_mmx (q+32, dq, dst+8, stride); - else if (eobs[2] == 1) - { - vp8_dc_only_idct_add_mmx (q[32]*dq[0], dst+8, stride, - dst+8, stride); - ((int *)(q+32))[0] = 0; - } - - if (eobs[3] > 1) - vp8_dequant_idct_add_mmx (q+48, dq, dst+12, stride); - else if (eobs[3] == 1) - { - vp8_dc_only_idct_add_mmx (q[48]*dq[0], dst+12, stride, - dst+12, stride); - ((int *)(q+48))[0] = 0; - } - - q += 64; - dst += 4*stride; - eobs += 4; - } -} - -void vp8_dequant_idct_add_uv_block_mmx - (short *q, short *dq, - unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) -{ - int i; - - for (i = 0; i < 2; i++) - { - if (eobs[0] > 1) - vp8_dequant_idct_add_mmx (q, dq, dstu, stride); - else if (eobs[0] == 1) - { - vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstu, stride, dstu, stride); - ((int *)q)[0] = 0; - } - - if (eobs[1] > 1) - vp8_dequant_idct_add_mmx (q+16, dq, dstu+4, stride); - else if (eobs[1] == 1) - { - vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstu+4, stride, - dstu+4, stride); - ((int *)(q+16))[0] = 0; - } - - q += 32; - dstu += 4*stride; - eobs += 2; - } - - for (i = 0; i < 2; i++) - { - if (eobs[0] > 1) - vp8_dequant_idct_add_mmx (q, dq, dstv, stride); - else if (eobs[0] == 1) - { - vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstv, stride, dstv, stride); - ((int *)q)[0] = 0; - } - - if (eobs[1] > 1) - vp8_dequant_idct_add_mmx (q+16, dq, dstv+4, stride); - else if (eobs[1] == 1) - { - vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstv+4, stride, - dstv+4, stride); - ((int *)(q+16))[0] = 0; - } - - q += 32; - dstv += 4*stride; - eobs += 2; - } -} diff --git a/vp8/decoder/x86/idct_blk_sse2.c b/vp8/decoder/x86/idct_blk_sse2.c deleted file mode 100644 index 03c2878c1..000000000 --- a/vp8/decoder/x86/idct_blk_sse2.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8/common/idct.h" -#include "vp8/decoder/dequantize.h" - -void vp8_idct_dequant_0_2x_sse2 - (short *q, short *dq , - unsigned char *dst, int dst_stride); -void vp8_idct_dequant_full_2x_sse2 - (short *q, short *dq , - unsigned char *dst, int dst_stride); - -void vp8_dequant_idct_add_y_block_sse2 - (short *q, short *dq, - unsigned char *dst, int stride, char *eobs) -{ - int i; - - for (i = 0; i < 4; i++) - { - if (((short *)(eobs))[0]) - { - if (((short *)(eobs))[0] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q, dq, dst, stride); - else - vp8_idct_dequant_0_2x_sse2 (q, dq, dst, stride); - } - if (((short *)(eobs))[1]) - { - if (((short *)(eobs))[1] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q+32, dq, dst+8, stride); - else - vp8_idct_dequant_0_2x_sse2 (q+32, dq, dst+8, stride); - } - q += 64; - dst += stride*4; - eobs += 4; - } -} - -void vp8_dequant_idct_add_uv_block_sse2 - (short *q, short *dq, - unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) -{ - if (((short *)(eobs))[0]) - { - if (((short *)(eobs))[0] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q, dq, dstu, stride); - else - vp8_idct_dequant_0_2x_sse2 (q, dq, dstu, stride); - } - q += 32; - dstu += stride*4; - - if (((short *)(eobs))[1]) - { - if (((short *)(eobs))[1] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q, dq, dstu, stride); - else - vp8_idct_dequant_0_2x_sse2 (q, dq, dstu, stride); - } - q += 32; - - if (((short *)(eobs))[2]) - { - if (((short *)(eobs))[2] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q, dq, dstv, stride); - else - vp8_idct_dequant_0_2x_sse2 (q, dq, dstv, stride); - } - q += 32; - dstv += stride*4; - - if (((short *)(eobs))[3]) - { - if (((short *)(eobs))[3] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q, dq, dstv, stride); - else - vp8_idct_dequant_0_2x_sse2 (q, dq, dstv, stride); - } -} diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c index 91dba7e1a..27bf5ddbd 100644 --- a/vp8/decoder/x86/x86_dsystemdependent.c +++ b/vp8/decoder/x86/x86_dsystemdependent.c @@ -13,47 +13,7 @@ #include "vpx_ports/x86.h" #include "vp8/decoder/onyxd_int.h" - -#if HAVE_MMX -void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q); - -void vp8_dequantize_b_mmx(BLOCKD *d) -{ - short *sq = (short *) d->qcoeff; - short *dq = (short *) d->dqcoeff; - short *q = (short *) d->dequant; - vp8_dequantize_b_impl_mmx(sq, dq, q); -} -#endif - void vp8_arch_x86_decode_init(VP8D_COMP *pbi) { -#if CONFIG_RUNTIME_CPU_DETECT - int flags = x86_simd_caps(); - - /* Note: - * - * This platform can be built without runtime CPU detection as well. If - * you modify any of the function mappings present in this file, be sure - * to also update them in static mapings (/filename_.h) - */ - /* Override default functions with fastest ones for this CPU. */ -#if HAVE_MMX - if (flags & HAS_MMX) - { - pbi->dequant.block = vp8_dequantize_b_mmx; - pbi->dequant.idct_add = vp8_dequant_idct_add_mmx; - pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_mmx; - pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx; - } -#endif -#if HAVE_SSE2 - if (flags & HAS_SSE2) - { - pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_sse2; - pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2; - } -#endif -#endif } -- cgit v1.2.3