summaryrefslogtreecommitdiff
path: root/vp8/decoder
diff options
context:
space:
mode:
authorScott LaVarnway <slavarnway@google.com>2011-12-15 14:23:36 -0500
committerScott LaVarnway <slavarnway@google.com>2011-12-15 14:23:41 -0500
commita53d5a4c442a84cacbd8225fac72db3789b3e10c (patch)
tree85b3431c866bcfcf3a7ce9412ca26857b5dafd7a /vp8/decoder
parentc8df1656bd94928059204242e778bd5b8b9dc7aa (diff)
downloadlibvpx-a53d5a4c442a84cacbd8225fac72db3789b3e10c.tar
libvpx-a53d5a4c442a84cacbd8225fac72db3789b3e10c.tar.gz
libvpx-a53d5a4c442a84cacbd8225fac72db3789b3e10c.tar.bz2
libvpx-a53d5a4c442a84cacbd8225fac72db3789b3e10c.zip
Moved dequant idct into common
These functions are now used by the encoder. This is WIP with the goal of creating a common idct/add for the encoder and decoder. A boost of 1.8% was seen for the HD rt test clip used. [Tero] Added needed changes to ARM side. Change-Id: Ibbb8000be09034203d7adffc457d3c3f8b06a5bf
Diffstat (limited to 'vp8/decoder')
-rw-r--r--vp8/decoder/arm/arm_dsystemdependent.c11
-rw-r--r--vp8/decoder/arm/armv6/dequant_idct_v6.asm190
-rw-r--r--vp8/decoder/arm/armv6/dequantize_v6.asm69
-rw-r--r--vp8/decoder/arm/armv6/idct_blk_v6.c116
-rw-r--r--vp8/decoder/arm/dequantize_arm.c46
-rw-r--r--vp8/decoder/arm/dequantize_arm.h59
-rw-r--r--vp8/decoder/arm/neon/dequant_idct_neon.asm131
-rw-r--r--vp8/decoder/arm/neon/dequantizeb_neon.asm34
-rw-r--r--vp8/decoder/arm/neon/idct_blk_neon.c97
-rw-r--r--vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm79
-rw-r--r--vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm196
-rw-r--r--vp8/decoder/decodframe.c12
-rw-r--r--vp8/decoder/dequantize.c44
-rw-r--r--vp8/decoder/dequantize.h85
-rw-r--r--vp8/decoder/generic/dsystemdependent.c8
-rw-r--r--vp8/decoder/idct_blk.c90
-rw-r--r--vp8/decoder/onyxd_int.h8
-rw-r--r--vp8/decoder/threading.c8
-rw-r--r--vp8/decoder/x86/dequantize_mmx.asm258
-rw-r--r--vp8/decoder/x86/dequantize_x86.h58
-rw-r--r--vp8/decoder/x86/idct_blk_mmx.c117
-rw-r--r--vp8/decoder/x86/idct_blk_sse2.c90
-rw-r--r--vp8/decoder/x86/x86_dsystemdependent.c40
23 files changed, 14 insertions, 1832 deletions
diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c
index f802c5181..bf0a3481a 100644
--- a/vp8/decoder/arm/arm_dsystemdependent.c
+++ b/vp8/decoder/arm/arm_dsystemdependent.c
@@ -11,9 +11,6 @@
#include "vpx_config.h"
#include "vpx_ports/arm.h"
-#include "vp8/common/blockd.h"
-#include "vp8/common/pragmas.h"
-#include "vp8/decoder/dequantize.h"
#include "vp8/decoder/onyxd_int.h"
void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
@@ -30,20 +27,12 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
#if HAVE_ARMV6
if (flags & HAS_MEDIA)
{
- pbi->dequant.block = vp8_dequantize_b_v6;
- pbi->dequant.idct_add = vp8_dequant_idct_add_v6;
- pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6;
- pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6;
}
#endif
#if HAVE_ARMV7
if (flags & HAS_NEON)
{
- pbi->dequant.block = vp8_dequantize_b_neon;
- pbi->dequant.idct_add = vp8_dequant_idct_add_neon;
- pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon;
- pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;
}
#endif
#endif
diff --git a/vp8/decoder/arm/armv6/dequant_idct_v6.asm b/vp8/decoder/arm/armv6/dequant_idct_v6.asm
deleted file mode 100644
index 2510ad838..000000000
--- a/vp8/decoder/arm/armv6/dequant_idct_v6.asm
+++ /dev/null
@@ -1,190 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
- EXPORT |vp8_dequant_idct_add_v6|
-
- AREA |.text|, CODE, READONLY
-;void vp8_dequant_idct_v6(short *input, short *dq,
-; unsigned char *dest, int stride)
-; r0 = q
-; r1 = dq
-; r2 = dst
-; r3 = stride
-
-|vp8_dequant_idct_add_v6| PROC
- stmdb sp!, {r4-r11, lr}
-
- ldr r4, [r0] ;input
- ldr r5, [r1], #4 ;dq
-
- sub sp, sp, #4
- str r3, [sp]
-
- mov r12, #4
-
-vp8_dequant_add_loop
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- ldr r4, [r0, #4] ;input
- ldr r5, [r1], #4 ;dq
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- subs r12, r12, #1
-
- ldrne r4, [r0, #4]
- ldrne r5, [r1], #4
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- bne vp8_dequant_add_loop
-
- sub r0, r0, #32
- mov r1, r0
-
-; short_idct4x4llm_v6_dual
- ldr r3, cospi8sqrt2minus1
- ldr r4, sinpi8sqrt2
- ldr r6, [r0, #8]
- mov r5, #2
-vp8_dequant_idct_loop1_v6
- ldr r12, [r0, #24]
- ldr r14, [r0, #16]
- smulwt r9, r3, r6
- smulwb r7, r3, r6
- smulwt r10, r4, r6
- smulwb r8, r4, r6
- pkhbt r7, r7, r9, lsl #16
- smulwt r11, r3, r12
- pkhbt r8, r8, r10, lsl #16
- uadd16 r6, r6, r7
- smulwt r7, r4, r12
- smulwb r9, r3, r12
- smulwb r10, r4, r12
- subs r5, r5, #1
- pkhbt r9, r9, r11, lsl #16
- ldr r11, [r0], #4
- pkhbt r10, r10, r7, lsl #16
- uadd16 r7, r12, r9
- usub16 r7, r8, r7
- uadd16 r6, r6, r10
- uadd16 r10, r11, r14
- usub16 r8, r11, r14
- uadd16 r9, r10, r6
- usub16 r10, r10, r6
- uadd16 r6, r8, r7
- usub16 r7, r8, r7
- str r6, [r1, #8]
- ldrne r6, [r0, #8]
- str r7, [r1, #16]
- str r10, [r1, #24]
- str r9, [r1], #4
- bne vp8_dequant_idct_loop1_v6
-
- mov r5, #2
- sub r0, r1, #8
-vp8_dequant_idct_loop2_v6
- ldr r6, [r0], #4
- ldr r7, [r0], #4
- ldr r8, [r0], #4
- ldr r9, [r0], #4
- smulwt r1, r3, r6
- smulwt r12, r4, r6
- smulwt lr, r3, r8
- smulwt r10, r4, r8
- pkhbt r11, r8, r6, lsl #16
- pkhbt r1, lr, r1, lsl #16
- pkhbt r12, r10, r12, lsl #16
- pkhtb r6, r6, r8, asr #16
- uadd16 r6, r1, r6
- pkhbt lr, r9, r7, lsl #16
- uadd16 r10, r11, lr
- usub16 lr, r11, lr
- pkhtb r8, r7, r9, asr #16
- subs r5, r5, #1
- smulwt r1, r3, r8
- smulwb r7, r3, r8
- smulwt r11, r4, r8
- smulwb r9, r4, r8
- pkhbt r1, r7, r1, lsl #16
- uadd16 r8, r1, r8
- pkhbt r11, r9, r11, lsl #16
- usub16 r1, r12, r8
- uadd16 r8, r11, r6
- ldr r9, c0x00040004
- ldr r12, [sp] ; get stride from stack
- uadd16 r6, r10, r8
- usub16 r7, r10, r8
- uadd16 r7, r7, r9
- uadd16 r6, r6, r9
- uadd16 r10, r14, r1
- usub16 r1, r14, r1
- uadd16 r10, r10, r9
- uadd16 r1, r1, r9
- ldr r11, [r2] ; load input from dst
- mov r8, r7, asr #3
- pkhtb r9, r8, r10, asr #19
- mov r8, r1, asr #3
- pkhtb r8, r8, r6, asr #19
- uxtb16 lr, r11, ror #8
- qadd16 r9, r9, lr
- uxtb16 lr, r11
- qadd16 r8, r8, lr
- usat16 r9, #8, r9
- usat16 r8, #8, r8
- orr r9, r8, r9, lsl #8
- ldr r11, [r2, r12] ; load input from dst
- mov r7, r7, lsl #16
- mov r1, r1, lsl #16
- mov r10, r10, lsl #16
- mov r6, r6, lsl #16
- mov r7, r7, asr #3
- pkhtb r7, r7, r10, asr #19
- mov r1, r1, asr #3
- pkhtb r1, r1, r6, asr #19
- uxtb16 r8, r11, ror #8
- qadd16 r7, r7, r8
- uxtb16 r8, r11
- qadd16 r1, r1, r8
- usat16 r7, #8, r7
- usat16 r1, #8, r1
- orr r1, r1, r7, lsl #8
- str r9, [r2], r12 ; store output to dst
- str r1, [r2], r12 ; store output to dst
- bne vp8_dequant_idct_loop2_v6
-
-; vpx_memset
- sub r0, r0, #32
- add sp, sp, #4
-
- mov r12, #0
- str r12, [r0]
- str r12, [r0, #4]
- str r12, [r0, #8]
- str r12, [r0, #12]
- str r12, [r0, #16]
- str r12, [r0, #20]
- str r12, [r0, #24]
- str r12, [r0, #28]
-
- ldmia sp!, {r4 - r11, pc}
- ENDP ; |vp8_dequant_idct_add_v6|
-
-; Constant Pool
-cospi8sqrt2minus1 DCD 0x00004E7B
-sinpi8sqrt2 DCD 0x00008A8C
-c0x00040004 DCD 0x00040004
-
- END
diff --git a/vp8/decoder/arm/armv6/dequantize_v6.asm b/vp8/decoder/arm/armv6/dequantize_v6.asm
deleted file mode 100644
index 72f7e0ee5..000000000
--- a/vp8/decoder/arm/armv6/dequantize_v6.asm
+++ /dev/null
@@ -1,69 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequantize_b_loop_v6|
-
- AREA |.text|, CODE, READONLY ; name this block of code
-;-------------------------------
-;void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
-; r0 short *Q,
-; r1 short *DQC
-; r2 short *DQ
-|vp8_dequantize_b_loop_v6| PROC
- stmdb sp!, {r4-r9, lr}
-
- ldr r3, [r0] ;load Q
- ldr r4, [r1] ;load DQC
- ldr r5, [r0, #4]
- ldr r6, [r1, #4]
-
- mov r12, #2 ;loop counter
-
-dequant_loop
- smulbb r7, r3, r4 ;multiply
- smultt r8, r3, r4
- smulbb r9, r5, r6
- smultt lr, r5, r6
-
- ldr r3, [r0, #8]
- ldr r4, [r1, #8]
- ldr r5, [r0, #12]
- ldr r6, [r1, #12]
-
- strh r7, [r2], #2 ;store result
- smulbb r7, r3, r4 ;multiply
- strh r8, [r2], #2
- smultt r8, r3, r4
- strh r9, [r2], #2
- smulbb r9, r5, r6
- strh lr, [r2], #2
- smultt lr, r5, r6
-
- subs r12, r12, #1
-
- add r0, r0, #16
- add r1, r1, #16
-
- ldrne r3, [r0]
- strh r7, [r2], #2 ;store result
- ldrne r4, [r1]
- strh r8, [r2], #2
- ldrne r5, [r0, #4]
- strh r9, [r2], #2
- ldrne r6, [r1, #4]
- strh lr, [r2], #2
-
- bne dequant_loop
-
- ldmia sp!, {r4-r9, pc}
- ENDP ;|vp8_dequantize_b_loop_v6|
-
- END
diff --git a/vp8/decoder/arm/armv6/idct_blk_v6.c b/vp8/decoder/arm/armv6/idct_blk_v6.c
deleted file mode 100644
index c1ef2852f..000000000
--- a/vp8/decoder/arm/armv6/idct_blk_v6.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "vpx_config.h"
-#include "vp8/common/idct.h"
-#include "vp8/decoder/dequantize.h"
-
-
-void vp8_dequant_idct_add_y_block_v6(short *q, short *dq,
- unsigned char *dst,
- int stride, char *eobs)
-{
- int i;
-
- for (i = 0; i < 4; i++)
- {
- if (eobs[0] > 1)
- vp8_dequant_idct_add_v6 (q, dq, dst, stride);
- else if (eobs[0] == 1)
- {
- vp8_dc_only_idct_add_v6 (q[0]*dq[0], dst, stride, dst, stride);
- ((int *)q)[0] = 0;
- }
-
- if (eobs[1] > 1)
- vp8_dequant_idct_add_v6 (q+16, dq, dst+4, stride);
- else if (eobs[1] == 1)
- {
- vp8_dc_only_idct_add_v6 (q[16]*dq[0], dst+4, stride, dst+4, stride);
- ((int *)(q+16))[0] = 0;
- }
-
- if (eobs[2] > 1)
- vp8_dequant_idct_add_v6 (q+32, dq, dst+8, stride);
- else if (eobs[2] == 1)
- {
- vp8_dc_only_idct_add_v6 (q[32]*dq[0], dst+8, stride, dst+8, stride);
- ((int *)(q+32))[0] = 0;
- }
-
- if (eobs[3] > 1)
- vp8_dequant_idct_add_v6 (q+48, dq, dst+12, stride);
- else if (eobs[3] == 1)
- {
- vp8_dc_only_idct_add_v6 (q[48]*dq[0], dst+12, stride,dst+12,stride);
- ((int *)(q+48))[0] = 0;
- }
-
- q += 64;
- dst += 4*stride;
- eobs += 4;
- }
-}
-
-void vp8_dequant_idct_add_uv_block_v6(short *q, short *dq,
- unsigned char *dstu,
- unsigned char *dstv,
- int stride, char *eobs)
-{
- int i;
-
- for (i = 0; i < 2; i++)
- {
- if (eobs[0] > 1)
- vp8_dequant_idct_add_v6 (q, dq, dstu, stride);
- else if (eobs[0] == 1)
- {
- vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstu, stride, dstu, stride);
- ((int *)q)[0] = 0;
- }
-
- if (eobs[1] > 1)
- vp8_dequant_idct_add_v6 (q+16, dq, dstu+4, stride);
- else if (eobs[1] == 1)
- {
- vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstu+4, stride,
- dstu+4, stride);
- ((int *)(q+16))[0] = 0;
- }
-
- q += 32;
- dstu += 4*stride;
- eobs += 2;
- }
-
- for (i = 0; i < 2; i++)
- {
- if (eobs[0] > 1)
- vp8_dequant_idct_add_v6 (q, dq, dstv, stride);
- else if (eobs[0] == 1)
- {
- vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstv, stride, dstv, stride);
- ((int *)q)[0] = 0;
- }
-
- if (eobs[1] > 1)
- vp8_dequant_idct_add_v6 (q+16, dq, dstv+4, stride);
- else if (eobs[1] == 1)
- {
- vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstv+4, stride,
- dstv+4, stride);
- ((int *)(q+16))[0] = 0;
- }
-
- q += 32;
- dstv += 4*stride;
- eobs += 2;
- }
-}
diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/decoder/arm/dequantize_arm.c
deleted file mode 100644
index 2918e0512..000000000
--- a/vp8/decoder/arm/dequantize_arm.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_config.h"
-#include "vp8/decoder/dequantize.h"
-#include "vp8/common/idct.h"
-#include "vpx_mem/vpx_mem.h"
-
-#if HAVE_ARMV7
-extern void vp8_dequantize_b_loop_neon(short *Q, short *DQC, short *DQ);
-#endif
-
-#if HAVE_ARMV6
-extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
-#endif
-
-#if HAVE_ARMV7
-
-void vp8_dequantize_b_neon(BLOCKD *d)
-{
- short *DQ = d->dqcoeff;
- short *Q = d->qcoeff;
- short *DQC = d->dequant;
-
- vp8_dequantize_b_loop_neon(Q, DQC, DQ);
-}
-#endif
-
-#if HAVE_ARMV6
-void vp8_dequantize_b_v6(BLOCKD *d)
-{
- short *DQ = d->dqcoeff;
- short *Q = d->qcoeff;
- short *DQC = d->dequant;
-
- vp8_dequantize_b_loop_v6(Q, DQC, DQ);
-}
-#endif
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h
deleted file mode 100644
index 1123e8446..000000000
--- a/vp8/decoder/arm/dequantize_arm.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef DEQUANTIZE_ARM_H
-#define DEQUANTIZE_ARM_H
-
-#if HAVE_ARMV6
-extern prototype_dequant_block(vp8_dequantize_b_v6);
-extern prototype_dequant_idct_add(vp8_dequant_idct_add_v6);
-extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_v6);
-extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef vp8_dequant_block
-#define vp8_dequant_block vp8_dequantize_b_v6
-
-#undef vp8_dequant_idct_add
-#define vp8_dequant_idct_add vp8_dequant_idct_add_v6
-
-#undef vp8_dequant_idct_add_y_block
-#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_v6
-
-#undef vp8_dequant_idct_add_uv_block
-#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6
-#endif
-#endif
-
-#if HAVE_ARMV7
-extern prototype_dequant_block(vp8_dequantize_b_neon);
-extern prototype_dequant_idct_add(vp8_dequant_idct_add_neon);
-extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_neon);
-extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
-
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef vp8_dequant_block
-#define vp8_dequant_block vp8_dequantize_b_neon
-
-#undef vp8_dequant_idct_add
-#define vp8_dequant_idct_add vp8_dequant_idct_add_neon
-
-#undef vp8_dequant_idct_add_y_block
-#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_neon
-
-#undef vp8_dequant_idct_add_uv_block
-#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon
-#endif
-
-#endif
-
-#endif
diff --git a/vp8/decoder/arm/neon/dequant_idct_neon.asm b/vp8/decoder/arm/neon/dequant_idct_neon.asm
deleted file mode 100644
index 602cce676..000000000
--- a/vp8/decoder/arm/neon/dequant_idct_neon.asm
+++ /dev/null
@@ -1,131 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequant_idct_add_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_dequant_idct_add_neon(short *input, short *dq,
-; unsigned char *dest, int stride)
-; r0 short *input,
-; r1 short *dq,
-; r2 unsigned char *dest
-; r3 int stride
-
-|vp8_dequant_idct_add_neon| PROC
- vld1.16 {q3, q4}, [r0]
- vld1.16 {q5, q6}, [r1]
-
- add r1, r2, r3 ; r1 = dest + stride
- lsl r3, #1 ; 2x stride
-
- vld1.32 {d14[0]}, [r2], r3
- vld1.32 {d14[1]}, [r1], r3
- vld1.32 {d15[0]}, [r2]
- vld1.32 {d15[1]}, [r1]
-
- adr r12, cospi8sqrt2minus1 ; pointer to the first constant
-
- vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon
- vmul.i16 q2, q4, q6
-
-;|short_idct4x4llm_neon| PROC
- vld1.16 {d0}, [r12]
- vswp d3, d4 ;q2(vp[4] vp[12])
-
- vqdmulh.s16 q3, q2, d0[2]
- vqdmulh.s16 q4, q2, d0[0]
-
- vqadd.s16 d12, d2, d3 ;a1
- vqsub.s16 d13, d2, d3 ;b1
-
- vshr.s16 q3, q3, #1
- vshr.s16 q4, q4, #1
-
- vqadd.s16 q3, q3, q2
- vqadd.s16 q4, q4, q2
-
- vqsub.s16 d10, d6, d9 ;c1
- vqadd.s16 d11, d7, d8 ;d1
-
- vqadd.s16 d2, d12, d11
- vqadd.s16 d3, d13, d10
- vqsub.s16 d4, d13, d10
- vqsub.s16 d5, d12, d11
-
- vtrn.32 d2, d4
- vtrn.32 d3, d5
- vtrn.16 d2, d3
- vtrn.16 d4, d5
-
-; memset(input, 0, 32) -- 32bytes
- vmov.i16 q14, #0
-
- vswp d3, d4
- vqdmulh.s16 q3, q2, d0[2]
- vqdmulh.s16 q4, q2, d0[0]
-
- vqadd.s16 d12, d2, d3 ;a1
- vqsub.s16 d13, d2, d3 ;b1
-
- vmov q15, q14
-
- vshr.s16 q3, q3, #1
- vshr.s16 q4, q4, #1
-
- vqadd.s16 q3, q3, q2
- vqadd.s16 q4, q4, q2
-
- vqsub.s16 d10, d6, d9 ;c1
- vqadd.s16 d11, d7, d8 ;d1
-
- vqadd.s16 d2, d12, d11
- vqadd.s16 d3, d13, d10
- vqsub.s16 d4, d13, d10
- vqsub.s16 d5, d12, d11
-
- vst1.16 {q14, q15}, [r0]
-
- vrshr.s16 d2, d2, #3
- vrshr.s16 d3, d3, #3
- vrshr.s16 d4, d4, #3
- vrshr.s16 d5, d5, #3
-
- vtrn.32 d2, d4
- vtrn.32 d3, d5
- vtrn.16 d2, d3
- vtrn.16 d4, d5
-
- vaddw.u8 q1, q1, d14
- vaddw.u8 q2, q2, d15
-
- sub r2, r2, r3
- sub r1, r1, r3
-
- vqmovun.s16 d0, q1
- vqmovun.s16 d1, q2
-
- vst1.32 {d0[0]}, [r2], r3
- vst1.32 {d0[1]}, [r1], r3
- vst1.32 {d1[0]}, [r2]
- vst1.32 {d1[1]}, [r1]
-
- bx lr
-
- ENDP ; |vp8_dequant_idct_add_neon|
-
-; Constant Pool
-cospi8sqrt2minus1 DCD 0x4e7b4e7b
-sinpi8sqrt2 DCD 0x8a8c8a8c
-
- END
diff --git a/vp8/decoder/arm/neon/dequantizeb_neon.asm b/vp8/decoder/arm/neon/dequantizeb_neon.asm
deleted file mode 100644
index c8e0c31f2..000000000
--- a/vp8/decoder/arm/neon/dequantizeb_neon.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequantize_b_loop_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-; r0 short *Q,
-; r1 short *DQC
-; r2 short *DQ
-|vp8_dequantize_b_loop_neon| PROC
- vld1.16 {q0, q1}, [r0]
- vld1.16 {q2, q3}, [r1]
-
- vmul.i16 q4, q0, q2
- vmul.i16 q5, q1, q3
-
- vst1.16 {q4, q5}, [r2]
-
- bx lr
-
- ENDP
-
- END
diff --git a/vp8/decoder/arm/neon/idct_blk_neon.c b/vp8/decoder/arm/neon/idct_blk_neon.c
deleted file mode 100644
index 185895f05..000000000
--- a/vp8/decoder/arm/neon/idct_blk_neon.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "vpx_config.h"
-#include "vp8/common/idct.h"
-#include "vp8/decoder/dequantize.h"
-
-/* place these declarations here because we don't want to maintain them
- * outside of this scope
- */
-void idct_dequant_full_2x_neon(short *q, short *dq,
- unsigned char *dst, int stride);
-void idct_dequant_0_2x_neon(short *q, short dq,
- unsigned char *dst, int stride);
-
-
-void vp8_dequant_idct_add_y_block_neon(short *q, short *dq,
- unsigned char *dst,
- int stride, char *eobs)
-{
- int i;
-
- for (i = 0; i < 4; i++)
- {
- if (((short *)(eobs))[0])
- {
- if (((short *)eobs)[0] & 0xfefe)
- idct_dequant_full_2x_neon (q, dq, dst, stride);
- else
- idct_dequant_0_2x_neon (q, dq[0], dst, stride);
- }
-
- if (((short *)(eobs))[1])
- {
- if (((short *)eobs)[1] & 0xfefe)
- idct_dequant_full_2x_neon (q+32, dq, dst+8, stride);
- else
- idct_dequant_0_2x_neon (q+32, dq[0], dst+8, stride);
- }
- q += 64;
- dst += 4*stride;
- eobs += 4;
- }
-}
-
-void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq,
- unsigned char *dstu,
- unsigned char *dstv,
- int stride, char *eobs)
-{
- if (((short *)(eobs))[0])
- {
- if (((short *)eobs)[0] & 0xfefe)
- idct_dequant_full_2x_neon (q, dq, dstu, stride);
- else
- idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
- }
-
- q += 32;
- dstu += 4*stride;
-
- if (((short *)(eobs))[1])
- {
- if (((short *)eobs)[1] & 0xfefe)
- idct_dequant_full_2x_neon (q, dq, dstu, stride);
- else
- idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
- }
-
- q += 32;
-
- if (((short *)(eobs))[2])
- {
- if (((short *)eobs)[2] & 0xfefe)
- idct_dequant_full_2x_neon (q, dq, dstv, stride);
- else
- idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
- }
-
- q += 32;
- dstv += 4*stride;
-
- if (((short *)(eobs))[3])
- {
- if (((short *)eobs)[3] & 0xfefe)
- idct_dequant_full_2x_neon (q, dq, dstv, stride);
- else
- idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
- }
-}
diff --git a/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm
deleted file mode 100644
index 6c29c5586..000000000
--- a/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm
+++ /dev/null
@@ -1,79 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |idct_dequant_0_2x_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;void idct_dequant_0_2x_neon(short *q, short dq,
-; unsigned char *dst, int stride);
-; r0 *q
-; r1 dq
-; r2 *dst
-; r3 stride
-|idct_dequant_0_2x_neon| PROC
- push {r4, r5}
-
- add r12, r2, #4
- vld1.32 {d2[0]}, [r2], r3
- vld1.32 {d8[0]}, [r12], r3
- vld1.32 {d2[1]}, [r2], r3
- vld1.32 {d8[1]}, [r12], r3
- vld1.32 {d4[0]}, [r2], r3
- vld1.32 {d10[0]}, [r12], r3
- vld1.32 {d4[1]}, [r2], r3
- vld1.32 {d10[1]}, [r12], r3
-
- ldrh r12, [r0] ; lo q
- ldrh r4, [r0, #32] ; hi q
- mov r5, #0
- strh r5, [r0]
- strh r5, [r0, #32]
-
- sxth r12, r12 ; lo
- mul r0, r12, r1
- add r0, r0, #4
- asr r0, r0, #3
- vdup.16 q0, r0
- sxth r4, r4 ; hi
- mul r0, r4, r1
- add r0, r0, #4
- asr r0, r0, #3
- vdup.16 q3, r0
-
- vaddw.u8 q1, q0, d2 ; lo
- vaddw.u8 q2, q0, d4
- vaddw.u8 q4, q3, d8 ; hi
- vaddw.u8 q5, q3, d10
-
- sub r2, r2, r3, lsl #2 ; dst - 4*stride
- add r0, r2, #4
-
- vqmovun.s16 d2, q1 ; lo
- vqmovun.s16 d4, q2
- vqmovun.s16 d8, q4 ; hi
- vqmovun.s16 d10, q5
-
- vst1.32 {d2[0]}, [r2], r3 ; lo
- vst1.32 {d8[0]}, [r0], r3 ; hi
- vst1.32 {d2[1]}, [r2], r3
- vst1.32 {d8[1]}, [r0], r3
- vst1.32 {d4[0]}, [r2], r3
- vst1.32 {d10[0]}, [r0], r3
- vst1.32 {d4[1]}, [r2]
- vst1.32 {d10[1]}, [r0]
-
- pop {r4, r5}
- bx lr
-
- ENDP ; |idct_dequant_0_2x_neon|
- END
diff --git a/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm
deleted file mode 100644
index d5dce63f6..000000000
--- a/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm
+++ /dev/null
@@ -1,196 +0,0 @@
-;
-; Copyright (c) 2010 The Webm project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |idct_dequant_full_2x_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;void idct_dequant_full_2x_neon(short *q, short *dq,
-; unsigned char *dst, int stride);
-; r0 *q,
-; r1 *dq,
-; r2 *dst
-; r3 stride
-|idct_dequant_full_2x_neon| PROC
- vld1.16 {q0, q1}, [r1] ; dq (same l/r)
- vld1.16 {q2, q3}, [r0] ; l q
- add r0, r0, #32
- vld1.16 {q4, q5}, [r0] ; r q
- add r12, r2, #4
-
- ; interleave the predictors
- vld1.32 {d28[0]}, [r2], r3 ; l pre
- vld1.32 {d28[1]}, [r12], r3 ; r pre
- vld1.32 {d29[0]}, [r2], r3
- vld1.32 {d29[1]}, [r12], r3
- vld1.32 {d30[0]}, [r2], r3
- vld1.32 {d30[1]}, [r12], r3
- vld1.32 {d31[0]}, [r2], r3
- vld1.32 {d31[1]}, [r12]
-
- adr r1, cospi8sqrt2minus1 ; pointer to the first constant
-
- ; dequant: q[i] = q[i] * dq[i]
- vmul.i16 q2, q2, q0
- vmul.i16 q3, q3, q1
- vmul.i16 q4, q4, q0
- vmul.i16 q5, q5, q1
-
- vld1.16 {d0}, [r1]
-
- ; q2: l0r0 q3: l8r8
- ; q4: l4r4 q5: l12r12
- vswp d5, d8
- vswp d7, d10
-
- ; _CONSTANTS_ * 4,12 >> 16
- ; q6: 4 * sinpi : c1/temp1
- ; q7: 12 * sinpi : d1/temp2
- ; q8: 4 * cospi
- ; q9: 12 * cospi
- vqdmulh.s16 q6, q4, d0[2] ; sinpi8sqrt2
- vqdmulh.s16 q7, q5, d0[2]
- vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1
- vqdmulh.s16 q9, q5, d0[0]
-
- vqadd.s16 q10, q2, q3 ; a1 = 0 + 8
- vqsub.s16 q11, q2, q3 ; b1 = 0 - 8
-
- ; vqdmulh only accepts signed values. this was a problem because
- ; our constant had the high bit set, and was treated as a negative value.
- ; vqdmulh also doubles the value before it shifts by 16. we need to
- ; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0,
- ; so we can shift the constant without losing precision. this avoids
- ; shift again afterward, but also avoids the sign issue. win win!
- ; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we
- ; pre-shift it
- vshr.s16 q8, q8, #1
- vshr.s16 q9, q9, #1
-
- ; q4: 4 + 4 * cospi : d1/temp1
- ; q5: 12 + 12 * cospi : c1/temp2
- vqadd.s16 q4, q4, q8
- vqadd.s16 q5, q5, q9
-
- ; c1 = temp1 - temp2
- ; d1 = temp1 + temp2
- vqsub.s16 q2, q6, q5
- vqadd.s16 q3, q4, q7
-
- ; [0]: a1+d1
- ; [1]: b1+c1
- ; [2]: b1-c1
- ; [3]: a1-d1
- vqadd.s16 q4, q10, q3
- vqadd.s16 q5, q11, q2
- vqsub.s16 q6, q11, q2
- vqsub.s16 q7, q10, q3
-
- ; rotate
- vtrn.32 q4, q6
- vtrn.32 q5, q7
- vtrn.16 q4, q5
- vtrn.16 q6, q7
- ; idct loop 2
- ; q4: l 0, 4, 8,12 r 0, 4, 8,12
- ; q5: l 1, 5, 9,13 r 1, 5, 9,13
- ; q6: l 2, 6,10,14 r 2, 6,10,14
- ; q7: l 3, 7,11,15 r 3, 7,11,15
-
- ; q8: 1 * sinpi : c1/temp1
- ; q9: 3 * sinpi : d1/temp2
- ; q10: 1 * cospi
- ; q11: 3 * cospi
- vqdmulh.s16 q8, q5, d0[2] ; sinpi8sqrt2
- vqdmulh.s16 q9, q7, d0[2]
- vqdmulh.s16 q10, q5, d0[0] ; cospi8sqrt2minus1
- vqdmulh.s16 q11, q7, d0[0]
-
- vqadd.s16 q2, q4, q6 ; a1 = 0 + 2
- vqsub.s16 q3, q4, q6 ; b1 = 0 - 2
-
- ; see note on shifting above
- vshr.s16 q10, q10, #1
- vshr.s16 q11, q11, #1
-
- ; q10: 1 + 1 * cospi : d1/temp1
- ; q11: 3 + 3 * cospi : c1/temp2
- vqadd.s16 q10, q5, q10
- vqadd.s16 q11, q7, q11
-
- ; q8: c1 = temp1 - temp2
- ; q9: d1 = temp1 + temp2
- vqsub.s16 q8, q8, q11
- vqadd.s16 q9, q10, q9
-
- ; a1+d1
- ; b1+c1
- ; b1-c1
- ; a1-d1
- vqadd.s16 q4, q2, q9
- vqadd.s16 q5, q3, q8
- vqsub.s16 q6, q3, q8
- vqsub.s16 q7, q2, q9
-
- ; +4 >> 3 (rounding)
- vrshr.s16 q4, q4, #3 ; lo
- vrshr.s16 q5, q5, #3
- vrshr.s16 q6, q6, #3 ; hi
- vrshr.s16 q7, q7, #3
-
- vtrn.32 q4, q6
- vtrn.32 q5, q7
- vtrn.16 q4, q5
- vtrn.16 q6, q7
-
- ; adding pre
- ; input is still packed. pre was read interleaved
- vaddw.u8 q4, q4, d28
- vaddw.u8 q5, q5, d29
- vaddw.u8 q6, q6, d30
- vaddw.u8 q7, q7, d31
-
- vmov.i16 q14, #0
- vmov q15, q14
- vst1.16 {q14, q15}, [r0] ; write over high input
- sub r0, r0, #32
- vst1.16 {q14, q15}, [r0] ; write over low input
-
- sub r2, r2, r3, lsl #2 ; dst - 4*stride
- add r1, r2, #4 ; hi
-
- ;saturate and narrow
- vqmovun.s16 d0, q4 ; lo
- vqmovun.s16 d1, q5
- vqmovun.s16 d2, q6 ; hi
- vqmovun.s16 d3, q7
-
- vst1.32 {d0[0]}, [r2], r3 ; lo
- vst1.32 {d0[1]}, [r1], r3 ; hi
- vst1.32 {d1[0]}, [r2], r3
- vst1.32 {d1[1]}, [r1], r3
- vst1.32 {d2[0]}, [r2], r3
- vst1.32 {d2[1]}, [r1], r3
- vst1.32 {d3[0]}, [r2]
- vst1.32 {d3[1]}, [r1]
-
- bx lr
-
- ENDP ; |idct_dequant_full_2x_neon|
-
-; Constant Pool
-cospi8sqrt2minus1 DCD 0x4e7b
-; because the lowest bit in 0x8a8c is 0, we can pre-shift this
-sinpi8sqrt2 DCD 0x4546
-
- END
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 31eafcf54..4ab09c596 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -15,7 +15,7 @@
#include "vp8/common/reconintra4x4.h"
#include "vp8/common/recon.h"
#include "vp8/common/reconinter.h"
-#include "dequantize.h"
+#include "vp8/common/dequantize.h"
#include "detokenize.h"
#include "vp8/common/invtrans.h"
#include "vp8/common/alloccommon.h"
@@ -32,7 +32,7 @@
#endif
#include "vpx_mem/vpx_mem.h"
#include "vp8/common/idct.h"
-#include "dequantize.h"
+
#include "vp8/common/threading.h"
#include "decoderthreading.h"
#include "dboolhuff.h"
@@ -218,7 +218,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
{
if (xd->eobs[i] > 1)
{
- DEQUANT_INVOKE(&pbi->dequant, idct_add)
+ DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add)
(b->qcoeff, b->dequant,
*(b->base_dst) + b->dst, b->dst_stride);
}
@@ -247,7 +247,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
/* do 2nd order transform on the dc block */
if (xd->eobs[24] > 1)
{
- DEQUANT_INVOKE(&pbi->dequant, block)(b);
+ DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b);
IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],
xd->qcoeff);
@@ -272,7 +272,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
DQC[0] = 1;
}
- DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
+ DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block)
(xd->qcoeff, xd->block[0].dequant,
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
@@ -281,7 +281,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
DQC[0] = dc_dequant_temp;
}
- DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
+ DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block)
(xd->qcoeff+16*16, xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
deleted file mode 100644
index 4a48a3192..000000000
--- a/vp8/decoder/dequantize.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_config.h"
-#include "dequantize.h"
-#include "vp8/common/idct.h"
-#include "vpx_mem/vpx_mem.h"
-
-void vp8_dequantize_b_c(BLOCKD *d)
-{
- int i;
- short *DQ = d->dqcoeff;
- short *Q = d->qcoeff;
- short *DQC = d->dequant;
-
- for (i = 0; i < 16; i++)
- {
- DQ[i] = Q[i] * DQC[i];
- }
-}
-
-void vp8_dequant_idct_add_c(short *input, short *dq,
- unsigned char *dest, int stride)
-{
- int i;
-
- for (i = 0; i < 16; i++)
- {
- input[i] = dq[i] * input[i];
- }
-
- vp8_short_idct4x4llm_c(input, dest, stride, dest, stride);
-
- vpx_memset(input, 0, 32);
-
-}
diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h
deleted file mode 100644
index f66cf2bac..000000000
--- a/vp8/decoder/dequantize.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef DEQUANTIZE_H
-#define DEQUANTIZE_H
-#include "vp8/common/blockd.h"
-
-#define prototype_dequant_block(sym) \
- void sym(BLOCKD *x)
-
-#define prototype_dequant_idct_add(sym) \
- void sym(short *input, short *dq, \
- unsigned char *output, \
- int stride)
-
-#define prototype_dequant_idct_add_y_block(sym) \
- void sym(short *q, short *dq, \
- unsigned char *dst, \
- int stride, char *eobs)
-
-#define prototype_dequant_idct_add_uv_block(sym) \
- void sym(short *q, short *dq, \
- unsigned char *dst_u, \
- unsigned char *dst_v, int stride, char *eobs)
-
-#if ARCH_X86 || ARCH_X86_64
-#include "x86/dequantize_x86.h"
-#endif
-
-#if ARCH_ARM
-#include "arm/dequantize_arm.h"
-#endif
-
-#ifndef vp8_dequant_block
-#define vp8_dequant_block vp8_dequantize_b_c
-#endif
-extern prototype_dequant_block(vp8_dequant_block);
-
-#ifndef vp8_dequant_idct_add
-#define vp8_dequant_idct_add vp8_dequant_idct_add_c
-#endif
-extern prototype_dequant_idct_add(vp8_dequant_idct_add);
-
-#ifndef vp8_dequant_idct_add_y_block
-#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c
-#endif
-extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block);
-
-#ifndef vp8_dequant_idct_add_uv_block
-#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c
-#endif
-extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block);
-
-
-typedef prototype_dequant_block((*vp8_dequant_block_fn_t));
-
-typedef prototype_dequant_idct_add((*vp8_dequant_idct_add_fn_t));
-
-typedef prototype_dequant_idct_add_y_block((*vp8_dequant_idct_add_y_block_fn_t));
-
-typedef prototype_dequant_idct_add_uv_block((*vp8_dequant_idct_add_uv_block_fn_t));
-
-typedef struct
-{
- vp8_dequant_block_fn_t block;
- vp8_dequant_idct_add_fn_t idct_add;
- vp8_dequant_idct_add_y_block_fn_t idct_add_y_block;
- vp8_dequant_idct_add_uv_block_fn_t idct_add_uv_block;
-} vp8_dequant_rtcd_vtable_t;
-
-#if CONFIG_RUNTIME_CPU_DETECT
-#define DEQUANT_INVOKE(ctx,fn) (ctx)->fn
-#else
-#define DEQUANT_INVOKE(ctx,fn) vp8_dequant_##fn
-#endif
-
-#endif
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index d9f9ba3c8..8a84e566a 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vp8/decoder/dequantize.h"
+#include "vp8/common/dequantize.h"
#include "vp8/decoder/onyxd_int.h"
extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi);
@@ -20,11 +20,7 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
{
/* Pure C: */
#if CONFIG_RUNTIME_CPU_DETECT
- pbi->mb.rtcd = &pbi->common.rtcd;
- pbi->dequant.block = vp8_dequantize_b_c;
- pbi->dequant.idct_add = vp8_dequant_idct_add_c;
- pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_c;
- pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
+ pbi->mb.rtcd = &pbi->common.rtcd;
#endif
#if ARCH_X86 || ARCH_X86_64
diff --git a/vp8/decoder/idct_blk.c b/vp8/decoder/idct_blk.c
deleted file mode 100644
index 249fad4ea..000000000
--- a/vp8/decoder/idct_blk.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "vpx_config.h"
-#include "vp8/common/idct.h"
-#include "dequantize.h"
-
-void vp8_dequant_idct_add_c(short *input, short *dq,
- unsigned char *dest, int stride);
-void vp8_dc_only_idct_add_c(short input_dc, unsigned char * pred,
- int pred_stride, unsigned char *dst_ptr,
- int dst_stride);
-
-void vp8_dequant_idct_add_y_block_c
- (short *q, short *dq,
- unsigned char *dst, int stride, char *eobs)
-{
- int i, j;
-
- for (i = 0; i < 4; i++)
- {
- for (j = 0; j < 4; j++)
- {
- if (*eobs++ > 1)
- vp8_dequant_idct_add_c (q, dq, dst, stride);
- else
- {
- vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride);
- ((int *)q)[0] = 0;
- }
-
- q += 16;
- dst += 4;
- }
-
- dst += 4*stride - 16;
- }
-}
-
-void vp8_dequant_idct_add_uv_block_c
- (short *q, short *dq,
- unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
-{
- int i, j;
-
- for (i = 0; i < 2; i++)
- {
- for (j = 0; j < 2; j++)
- {
- if (*eobs++ > 1)
- vp8_dequant_idct_add_c (q, dq, dstu, stride);
- else
- {
- vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride);
- ((int *)q)[0] = 0;
- }
-
- q += 16;
- dstu += 4;
- }
-
- dstu += 4*stride - 8;
- }
-
- for (i = 0; i < 2; i++)
- {
- for (j = 0; j < 2; j++)
- {
- if (*eobs++ > 1)
- vp8_dequant_idct_add_c (q, dq, dstv, stride);
- else
- {
- vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride);
- ((int *)q)[0] = 0;
- }
-
- q += 16;
- dstv += 4;
- }
-
- dstv += 4*stride - 8;
- }
-}
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index 519a7f2b9..f48f5173b 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -16,7 +16,8 @@
#include "treereader.h"
#include "vp8/common/onyxc_int.h"
#include "vp8/common/threading.h"
-#include "dequantize.h"
+
+
#if CONFIG_ERROR_CONCEALMENT
#include "ec_types.h"
#endif
@@ -93,11 +94,6 @@ typedef struct VP8Decompressor
DATARATE dr[16];
-#if CONFIG_RUNTIME_CPU_DETECT
- vp8_dequant_rtcd_vtable_t dequant;
-#endif
-
-
vp8_prob prob_intra;
vp8_prob prob_last;
vp8_prob prob_gf;
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 1967781eb..947b3a1c6 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -189,7 +189,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
{
if (xd->eobs[i] > 1)
{
- DEQUANT_INVOKE(&pbi->dequant, idct_add)
+ DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add)
(b->qcoeff, b->dequant,
*(b->base_dst) + b->dst, b->dst_stride);
}
@@ -217,7 +217,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
/* do 2nd order transform on the dc block */
if (xd->eobs[24] > 1)
{
- DEQUANT_INVOKE(&pbi->dequant, block)(b);
+ DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b);
IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],
xd->qcoeff);
@@ -248,13 +248,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
DQC = local_dequant;
}
- DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
+ DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block)
(xd->qcoeff, DQC,
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
}
- DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
+ DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block)
(xd->qcoeff+16*16, xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);
diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm
deleted file mode 100644
index de9eba89f..000000000
--- a/vp8/decoder/x86/dequantize_mmx.asm
+++ /dev/null
@@ -1,258 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-
-;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q)
-global sym(vp8_dequantize_b_impl_mmx)
-sym(vp8_dequantize_b_impl_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 3
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;sq
- mov rdi, arg(1) ;dq
- mov rax, arg(2) ;q
-
- movq mm1, [rsi]
- pmullw mm1, [rax+0] ; mm4 *= kernel 0 modifiers.
- movq [rdi], mm1
-
- movq mm1, [rsi+8]
- pmullw mm1, [rax+8] ; mm4 *= kernel 0 modifiers.
- movq [rdi+8], mm1
-
- movq mm1, [rsi+16]
- pmullw mm1, [rax+16] ; mm4 *= kernel 0 modifiers.
- movq [rdi+16], mm1
-
- movq mm1, [rsi+24]
- pmullw mm1, [rax+24] ; mm4 *= kernel 0 modifiers.
- movq [rdi+24], mm1
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void dequant_idct_add_mmx(
-;short *input, 0
-;short *dq, 1
-;unsigned char *dest, 2
-;int stride) 3
-global sym(vp8_dequant_idct_add_mmx)
-sym(vp8_dequant_idct_add_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- GET_GOT rbx
- push rdi
- ; end prolog
-
- mov rax, arg(0) ;input
- mov rdx, arg(1) ;dq
-
-
- movq mm0, [rax ]
- pmullw mm0, [rdx]
-
- movq mm1, [rax +8]
- pmullw mm1, [rdx +8]
-
- movq mm2, [rax+16]
- pmullw mm2, [rdx+16]
-
- movq mm3, [rax+24]
- pmullw mm3, [rdx+24]
-
- mov rdx, arg(2) ;dest
-
- pxor mm7, mm7
-
-
- movq [rax], mm7
- movq [rax+8], mm7
-
- movq [rax+16],mm7
- movq [rax+24],mm7
-
-
- movsxd rdi, dword ptr arg(3) ;stride
-
- psubw mm0, mm2 ; b1= 0-2
- paddw mm2, mm2 ;
-
- movq mm5, mm1
- paddw mm2, mm0 ; a1 =0+2
-
- pmulhw mm5, [GLOBAL(x_s1sqr2)];
- paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
-
- movq mm7, mm3 ;
- pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
-
- paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
- psubw mm7, mm5 ; c1
-
- movq mm5, mm1
- movq mm4, mm3
-
- pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
- paddw mm5, mm1
-
- pmulhw mm3, [GLOBAL(x_s1sqr2)]
- paddw mm3, mm4
-
- paddw mm3, mm5 ; d1
- movq mm6, mm2 ; a1
-
- movq mm4, mm0 ; b1
- paddw mm2, mm3 ;0
-
- paddw mm4, mm7 ;1
- psubw mm0, mm7 ;2
-
- psubw mm6, mm3 ;3
-
- movq mm1, mm2 ; 03 02 01 00
- movq mm3, mm4 ; 23 22 21 20
-
- punpcklwd mm1, mm0 ; 11 01 10 00
- punpckhwd mm2, mm0 ; 13 03 12 02
-
- punpcklwd mm3, mm6 ; 31 21 30 20
- punpckhwd mm4, mm6 ; 33 23 32 22
-
- movq mm0, mm1 ; 11 01 10 00
- movq mm5, mm2 ; 13 03 12 02
-
- punpckldq mm0, mm3 ; 30 20 10 00
- punpckhdq mm1, mm3 ; 31 21 11 01
-
- punpckldq mm2, mm4 ; 32 22 12 02
- punpckhdq mm5, mm4 ; 33 23 13 03
-
- movq mm3, mm5 ; 33 23 13 03
-
- psubw mm0, mm2 ; b1= 0-2
- paddw mm2, mm2 ;
-
- movq mm5, mm1
- paddw mm2, mm0 ; a1 =0+2
-
- pmulhw mm5, [GLOBAL(x_s1sqr2)];
- paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
-
- movq mm7, mm3 ;
- pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
-
- paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
- psubw mm7, mm5 ; c1
-
- movq mm5, mm1
- movq mm4, mm3
-
- pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
- paddw mm5, mm1
-
- pmulhw mm3, [GLOBAL(x_s1sqr2)]
- paddw mm3, mm4
-
- paddw mm3, mm5 ; d1
- paddw mm0, [GLOBAL(fours)]
-
- paddw mm2, [GLOBAL(fours)]
- movq mm6, mm2 ; a1
-
- movq mm4, mm0 ; b1
- paddw mm2, mm3 ;0
-
- paddw mm4, mm7 ;1
- psubw mm0, mm7 ;2
-
- psubw mm6, mm3 ;3
- psraw mm2, 3
-
- psraw mm0, 3
- psraw mm4, 3
-
- psraw mm6, 3
-
- movq mm1, mm2 ; 03 02 01 00
- movq mm3, mm4 ; 23 22 21 20
-
- punpcklwd mm1, mm0 ; 11 01 10 00
- punpckhwd mm2, mm0 ; 13 03 12 02
-
- punpcklwd mm3, mm6 ; 31 21 30 20
- punpckhwd mm4, mm6 ; 33 23 32 22
-
- movq mm0, mm1 ; 11 01 10 00
- movq mm5, mm2 ; 13 03 12 02
-
- punpckldq mm0, mm3 ; 30 20 10 00
- punpckhdq mm1, mm3 ; 31 21 11 01
-
- punpckldq mm2, mm4 ; 32 22 12 02
- punpckhdq mm5, mm4 ; 33 23 13 03
-
- pxor mm7, mm7
-
- movd mm4, [rdx]
- punpcklbw mm4, mm7
- paddsw mm0, mm4
- packuswb mm0, mm7
- movd [rdx], mm0
-
- movd mm4, [rdx+rdi]
- punpcklbw mm4, mm7
- paddsw mm1, mm4
- packuswb mm1, mm7
- movd [rdx+rdi], mm1
-
- movd mm4, [rdx+2*rdi]
- punpcklbw mm4, mm7
- paddsw mm2, mm4
- packuswb mm2, mm7
- movd [rdx+rdi*2], mm2
-
- add rdx, rdi
-
- movd mm4, [rdx+2*rdi]
- punpcklbw mm4, mm7
- paddsw mm5, mm4
- packuswb mm5, mm7
- movd [rdx+rdi*2], mm5
-
- ; begin epilog
- pop rdi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-x_s1sqr2:
- times 4 dw 0x8A8C
-align 16
-x_c1sqr2less1:
- times 4 dw 0x4E7B
-align 16
-fours:
- times 4 dw 0x0004
diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/decoder/x86/dequantize_x86.h
deleted file mode 100644
index 49bcb7f19..000000000
--- a/vp8/decoder/x86/dequantize_x86.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef DEQUANTIZE_X86_H
-#define DEQUANTIZE_X86_H
-
-
-/* Note:
- *
- * This platform is commonly built for runtime CPU detection. If you modify
- * any of the function mappings present in this file, be sure to also update
- * them in the function pointer initialization code
- */
-#if HAVE_MMX
-extern prototype_dequant_block(vp8_dequantize_b_mmx);
-extern prototype_dequant_idct_add(vp8_dequant_idct_add_mmx);
-extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_mmx);
-extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_mmx);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef vp8_dequant_block
-#define vp8_dequant_block vp8_dequantize_b_mmx
-
-#undef vp8_dequant_idct_add
-#define vp8_dequant_idct_add vp8_dequant_idct_add_mmx
-
-#undef vp8_dequant_idct_add_y_block
-#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_mmx
-
-#undef vp8_dequant_idct_add_uv_block
-#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_mmx
-
-#endif
-#endif
-
-#if HAVE_SSE2
-extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_sse2);
-extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_sse2);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef vp8_dequant_idct_add_y_block
-#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_sse2
-
-#undef vp8_dequant_idct_add_uv_block
-#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_sse2
-
-#endif
-#endif
-
-#endif
diff --git a/vp8/decoder/x86/idct_blk_mmx.c b/vp8/decoder/x86/idct_blk_mmx.c
deleted file mode 100644
index 29276e5d7..000000000
--- a/vp8/decoder/x86/idct_blk_mmx.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "vpx_config.h"
-#include "vp8/common/idct.h"
-#include "vp8/decoder/dequantize.h"
-
-void vp8_dequant_idct_add_y_block_mmx
- (short *q, short *dq,
- unsigned char *dst, int stride, char *eobs)
-{
- int i;
-
- for (i = 0; i < 4; i++)
- {
- if (eobs[0] > 1)
- vp8_dequant_idct_add_mmx (q, dq, dst, stride);
- else if (eobs[0] == 1)
- {
- vp8_dc_only_idct_add_mmx (q[0]*dq[0], dst, stride, dst, stride);
- ((int *)q)[0] = 0;
- }
-
- if (eobs[1] > 1)
- vp8_dequant_idct_add_mmx (q+16, dq, dst+4, stride);
- else if (eobs[1] == 1)
- {
- vp8_dc_only_idct_add_mmx (q[16]*dq[0], dst+4, stride,
- dst+4, stride);
- ((int *)(q+16))[0] = 0;
- }
-
- if (eobs[2] > 1)
- vp8_dequant_idct_add_mmx (q+32, dq, dst+8, stride);
- else if (eobs[2] == 1)
- {
- vp8_dc_only_idct_add_mmx (q[32]*dq[0], dst+8, stride,
- dst+8, stride);
- ((int *)(q+32))[0] = 0;
- }
-
- if (eobs[3] > 1)
- vp8_dequant_idct_add_mmx (q+48, dq, dst+12, stride);
- else if (eobs[3] == 1)
- {
- vp8_dc_only_idct_add_mmx (q[48]*dq[0], dst+12, stride,
- dst+12, stride);
- ((int *)(q+48))[0] = 0;
- }
-
- q += 64;
- dst += 4*stride;
- eobs += 4;
- }
-}
-
-void vp8_dequant_idct_add_uv_block_mmx
- (short *q, short *dq,
- unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
-{
- int i;
-
- for (i = 0; i < 2; i++)
- {
- if (eobs[0] > 1)
- vp8_dequant_idct_add_mmx (q, dq, dstu, stride);
- else if (eobs[0] == 1)
- {
- vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstu, stride, dstu, stride);
- ((int *)q)[0] = 0;
- }
-
- if (eobs[1] > 1)
- vp8_dequant_idct_add_mmx (q+16, dq, dstu+4, stride);
- else if (eobs[1] == 1)
- {
- vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstu+4, stride,
- dstu+4, stride);
- ((int *)(q+16))[0] = 0;
- }
-
- q += 32;
- dstu += 4*stride;
- eobs += 2;
- }
-
- for (i = 0; i < 2; i++)
- {
- if (eobs[0] > 1)
- vp8_dequant_idct_add_mmx (q, dq, dstv, stride);
- else if (eobs[0] == 1)
- {
- vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstv, stride, dstv, stride);
- ((int *)q)[0] = 0;
- }
-
- if (eobs[1] > 1)
- vp8_dequant_idct_add_mmx (q+16, dq, dstv+4, stride);
- else if (eobs[1] == 1)
- {
- vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstv+4, stride,
- dstv+4, stride);
- ((int *)(q+16))[0] = 0;
- }
-
- q += 32;
- dstv += 4*stride;
- eobs += 2;
- }
-}
diff --git a/vp8/decoder/x86/idct_blk_sse2.c b/vp8/decoder/x86/idct_blk_sse2.c
deleted file mode 100644
index 03c2878c1..000000000
--- a/vp8/decoder/x86/idct_blk_sse2.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "vpx_config.h"
-#include "vp8/common/idct.h"
-#include "vp8/decoder/dequantize.h"
-
-void vp8_idct_dequant_0_2x_sse2
- (short *q, short *dq ,
- unsigned char *dst, int dst_stride);
-void vp8_idct_dequant_full_2x_sse2
- (short *q, short *dq ,
- unsigned char *dst, int dst_stride);
-
-void vp8_dequant_idct_add_y_block_sse2
- (short *q, short *dq,
- unsigned char *dst, int stride, char *eobs)
-{
- int i;
-
- for (i = 0; i < 4; i++)
- {
- if (((short *)(eobs))[0])
- {
- if (((short *)(eobs))[0] & 0xfefe)
- vp8_idct_dequant_full_2x_sse2 (q, dq, dst, stride);
- else
- vp8_idct_dequant_0_2x_sse2 (q, dq, dst, stride);
- }
- if (((short *)(eobs))[1])
- {
- if (((short *)(eobs))[1] & 0xfefe)
- vp8_idct_dequant_full_2x_sse2 (q+32, dq, dst+8, stride);
- else
- vp8_idct_dequant_0_2x_sse2 (q+32, dq, dst+8, stride);
- }
- q += 64;
- dst += stride*4;
- eobs += 4;
- }
-}
-
-void vp8_dequant_idct_add_uv_block_sse2
- (short *q, short *dq,
- unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
-{
- if (((short *)(eobs))[0])
- {
- if (((short *)(eobs))[0] & 0xfefe)
- vp8_idct_dequant_full_2x_sse2 (q, dq, dstu, stride);
- else
- vp8_idct_dequant_0_2x_sse2 (q, dq, dstu, stride);
- }
- q += 32;
- dstu += stride*4;
-
- if (((short *)(eobs))[1])
- {
- if (((short *)(eobs))[1] & 0xfefe)
- vp8_idct_dequant_full_2x_sse2 (q, dq, dstu, stride);
- else
- vp8_idct_dequant_0_2x_sse2 (q, dq, dstu, stride);
- }
- q += 32;
-
- if (((short *)(eobs))[2])
- {
- if (((short *)(eobs))[2] & 0xfefe)
- vp8_idct_dequant_full_2x_sse2 (q, dq, dstv, stride);
- else
- vp8_idct_dequant_0_2x_sse2 (q, dq, dstv, stride);
- }
- q += 32;
- dstv += stride*4;
-
- if (((short *)(eobs))[3])
- {
- if (((short *)(eobs))[3] & 0xfefe)
- vp8_idct_dequant_full_2x_sse2 (q, dq, dstv, stride);
- else
- vp8_idct_dequant_0_2x_sse2 (q, dq, dstv, stride);
- }
-}
diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c
index 91dba7e1a..27bf5ddbd 100644
--- a/vp8/decoder/x86/x86_dsystemdependent.c
+++ b/vp8/decoder/x86/x86_dsystemdependent.c
@@ -13,47 +13,7 @@
#include "vpx_ports/x86.h"
#include "vp8/decoder/onyxd_int.h"
-
-#if HAVE_MMX
-void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
-
-void vp8_dequantize_b_mmx(BLOCKD *d)
-{
- short *sq = (short *) d->qcoeff;
- short *dq = (short *) d->dqcoeff;
- short *q = (short *) d->dequant;
- vp8_dequantize_b_impl_mmx(sq, dq, q);
-}
-#endif
-
void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
{
-#if CONFIG_RUNTIME_CPU_DETECT
- int flags = x86_simd_caps();
-
- /* Note:
- *
- * This platform can be built without runtime CPU detection as well. If
- * you modify any of the function mappings present in this file, be sure
- * to also update them in static mapings (<arch>/filename_<arch>.h)
- */
- /* Override default functions with fastest ones for this CPU. */
-#if HAVE_MMX
- if (flags & HAS_MMX)
- {
- pbi->dequant.block = vp8_dequantize_b_mmx;
- pbi->dequant.idct_add = vp8_dequant_idct_add_mmx;
- pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_mmx;
- pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx;
- }
-#endif
-#if HAVE_SSE2
- if (flags & HAS_SSE2)
- {
- pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
- pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
- }
-#endif
-#endif
}