summaryrefslogtreecommitdiff
path: root/vp8
diff options
context:
space:
mode:
Diffstat (limited to 'vp8')
-rw-r--r--vp8/common/x86/filter_x86.c35
-rw-r--r--vp8/common/x86/filter_x86.h19
-rw-r--r--vp8/common/x86/subpixel_mmx.asm45
-rw-r--r--vp8/common/x86/subpixel_sse2.asm16
-rw-r--r--vp8/common/x86/subpixel_x86.h2
-rw-r--r--vp8/common/x86/vp8_asm_stubs.c2
-rw-r--r--vp8/encoder/x86/variance_mmx.c37
-rw-r--r--vp8/encoder/x86/variance_sse2.c13
-rw-r--r--vp8/vp8_common.mk2
9 files changed, 93 insertions, 78 deletions
diff --git a/vp8/common/x86/filter_x86.c b/vp8/common/x86/filter_x86.c
new file mode 100644
index 000000000..ebab814f4
--- /dev/null
+++ b/vp8/common/x86/filter_x86.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/mem.h"
+
+DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) =
+{
+ { 128, 128, 128, 128, 0, 0, 0, 0 },
+ { 112, 112, 112, 112, 16, 16, 16, 16 },
+ { 96, 96, 96, 96, 32, 32, 32, 32 },
+ { 80, 80, 80, 80, 48, 48, 48, 48 },
+ { 64, 64, 64, 64, 64, 64, 64, 64 },
+ { 48, 48, 48, 48, 80, 80, 80, 80 },
+ { 32, 32, 32, 32, 96, 96, 96, 96 },
+ { 16, 16, 16, 16, 112, 112, 112, 112 }
+};
+
+DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]) =
+{
+ { 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 },
+ { 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 },
+ { 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 },
+ { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
+ { 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 },
+ { 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 },
+ { 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 }
+};
diff --git a/vp8/common/x86/filter_x86.h b/vp8/common/x86/filter_x86.h
new file mode 100644
index 000000000..efcc4dc2a
--- /dev/null
+++ b/vp8/common/x86/filter_x86.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef FILTER_X86_H
+#define FILTER_X86_H
+
+/* x86 assembly specific copy of vp8/common/filter.c:vp8_bilinear_filters with
+ * duplicated values */
+extern const short vp8_bilinear_filters_x86_4[8][8]; /* duplicated 4x */
+extern const short vp8_bilinear_filters_x86_8[8][16]; /* duplicated 8x */
+
+#endif /* FILTER_X86_H */
diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm
index e68d950ad..5528fd0e6 100644
--- a/vp8/common/x86/subpixel_mmx.asm
+++ b/vp8/common/x86/subpixel_mmx.asm
@@ -10,6 +10,7 @@
%include "vpx_ports/x86_abi_support.asm"
+extern sym(vp8_bilinear_filters_x86_8)
%define BLOCK_HEIGHT_WIDTH 4
@@ -222,14 +223,14 @@ sym(vp8_bilinear_predict8x8_mmx):
push rdi
; end prolog
- ;const short *HFilter = bilinear_filters_mmx[xoffset];
- ;const short *VFilter = bilinear_filters_mmx[yoffset];
+ ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset];
+ ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset];
movsxd rax, dword ptr arg(2) ;xoffset
mov rdi, arg(4) ;dst_ptr ;
shl rax, 5 ; offset * 32
- lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
+ lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
add rax, rcx ; HFilter
mov rsi, arg(0) ;src_ptr ;
@@ -379,13 +380,13 @@ sym(vp8_bilinear_predict8x4_mmx):
push rdi
; end prolog
- ;const short *HFilter = bilinear_filters_mmx[xoffset];
- ;const short *VFilter = bilinear_filters_mmx[yoffset];
+ ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset];
+ ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset];
movsxd rax, dword ptr arg(2) ;xoffset
mov rdi, arg(4) ;dst_ptr ;
- lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
+ lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
shl rax, 5
mov rsi, arg(0) ;src_ptr ;
@@ -534,13 +535,13 @@ sym(vp8_bilinear_predict4x4_mmx):
push rdi
; end prolog
- ;const short *HFilter = bilinear_filters_mmx[xoffset];
- ;const short *VFilter = bilinear_filters_mmx[yoffset];
+ ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset];
+ ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset];
movsxd rax, dword ptr arg(2) ;xoffset
mov rdi, arg(4) ;dst_ptr ;
- lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
+ lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
shl rax, 5
add rax, rcx ; HFilter
@@ -699,29 +700,3 @@ sym(vp8_six_tap_mmx):
times 8 dw 0
-align 16
-global HIDDEN_DATA(sym(vp8_bilinear_filters_mmx))
-sym(vp8_bilinear_filters_mmx):
- times 8 dw 128
- times 8 dw 0
-
- times 8 dw 112
- times 8 dw 16
-
- times 8 dw 96
- times 8 dw 32
-
- times 8 dw 80
- times 8 dw 48
-
- times 8 dw 64
- times 8 dw 64
-
- times 8 dw 48
- times 8 dw 80
-
- times 8 dw 32
- times 8 dw 96
-
- times 8 dw 16
- times 8 dw 112
diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm
index b62b5c68d..cb550af59 100644
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -10,6 +10,7 @@
%include "vpx_ports/x86_abi_support.asm"
+extern sym(vp8_bilinear_filters_x86_8)
%define BLOCK_HEIGHT_WIDTH 4
%define VP8_FILTER_WEIGHT 128
@@ -961,7 +962,7 @@ sym(vp8_unpack_block1d16_h6_sse2):
; unsigned char *dst_ptr,
; int dst_pitch
;)
-extern sym(vp8_bilinear_filters_mmx)
+extern sym(vp8_bilinear_filters_x86_8)
global sym(vp8_bilinear_predict16x16_sse2)
sym(vp8_bilinear_predict16x16_sse2):
push rbp
@@ -973,10 +974,10 @@ sym(vp8_bilinear_predict16x16_sse2):
push rdi
; end prolog
- ;const short *HFilter = bilinear_filters_mmx[xoffset]
- ;const short *VFilter = bilinear_filters_mmx[yoffset]
+ ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]
+ ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]
- lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
+ lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
movsxd rax, dword ptr arg(2) ;xoffset
cmp rax, 0 ;skip first_pass filter if xoffset=0
@@ -1230,7 +1231,6 @@ sym(vp8_bilinear_predict16x16_sse2):
; unsigned char *dst_ptr,
; int dst_pitch
;)
-extern sym(vp8_bilinear_filters_mmx)
global sym(vp8_bilinear_predict8x8_sse2)
sym(vp8_bilinear_predict8x8_sse2):
push rbp
@@ -1245,9 +1245,9 @@ sym(vp8_bilinear_predict8x8_sse2):
ALIGN_STACK 16, rax
sub rsp, 144 ; reserve 144 bytes
- ;const short *HFilter = bilinear_filters_mmx[xoffset]
- ;const short *VFilter = bilinear_filters_mmx[yoffset]
- lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
+ ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]
+ ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]
+ lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
mov rsi, arg(0) ;src_ptr
movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
diff --git a/vp8/common/x86/subpixel_x86.h b/vp8/common/x86/subpixel_x86.h
index 75991cc4f..01ec9e210 100644
--- a/vp8/common/x86/subpixel_x86.h
+++ b/vp8/common/x86/subpixel_x86.h
@@ -12,6 +12,8 @@
#ifndef SUBPIXEL_X86_H
#define SUBPIXEL_X86_H
+#include "filter_x86.h"
+
/* Note:
*
* This platform is commonly built for runtime CPU detection. If you modify
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index bce7bc38e..a623c69b4 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -12,9 +12,9 @@
#include "vpx_config.h"
#include "vpx_ports/mem.h"
#include "vp8/common/subpixel.h"
+#include "filter_x86.h"
extern const short vp8_six_tap_mmx[8][6*8];
-extern const short vp8_bilinear_filters_mmx[8][2*8];
extern void vp8_filter_block1d_h6_mmx
(
diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c
index 92b695f17..e2524b46a 100644
--- a/vp8/encoder/x86/variance_mmx.c
+++ b/vp8/encoder/x86/variance_mmx.c
@@ -12,6 +12,7 @@
#include "vp8/encoder/variance.h"
#include "vp8/common/pragmas.h"
#include "vpx_ports/mem.h"
+#include "vp8/common/x86/filter_x86.h"
extern void filter_block1d_h6_mmx
(
@@ -21,7 +22,7 @@ extern void filter_block1d_h6_mmx
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
- short *vp7_filter
+ short *filter
);
extern void filter_block1d_v6_mmx
(
@@ -31,7 +32,7 @@ extern void filter_block1d_v6_mmx
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
- short *vp7_filter
+ short *filter
);
extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr);
@@ -198,24 +199,6 @@ unsigned int vp8_variance8x16_mmx(
}
-
-
-///////////////////////////////////////////////////////////////////////////
-// the mmx function that does the bilinear filtering and var calculation //
-// int one pass //
-///////////////////////////////////////////////////////////////////////////
-DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
-{
- { 128, 128, 128, 128, 0, 0, 0, 0 },
- { 112, 112, 112, 112, 16, 16, 16, 16 },
- { 96, 96, 96, 96, 32, 32, 32, 32 },
- { 80, 80, 80, 80, 48, 48, 48, 48 },
- { 64, 64, 64, 64, 64, 64, 64, 64 },
- { 48, 48, 48, 48, 80, 80, 80, 80 },
- { 32, 32, 32, 32, 96, 96, 96, 96 },
- { 16, 16, 16, 16, 112, 112, 112, 112 }
-};
-
unsigned int vp8_sub_pixel_variance4x4_mmx
(
const unsigned char *src_ptr,
@@ -232,7 +215,7 @@ unsigned int vp8_sub_pixel_variance4x4_mmx
vp8_filter_block2d_bil4x4_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line,
- vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
+ vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum, &xxsum
);
*sse = xxsum;
@@ -257,7 +240,7 @@ unsigned int vp8_sub_pixel_variance8x8_mmx
vp8_filter_block2d_bil_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
- vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
+ vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum, &xxsum
);
*sse = xxsum;
@@ -283,7 +266,7 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
vp8_filter_block2d_bil_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
- vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
+ vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum0, &xxsum0
);
@@ -291,7 +274,7 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
vp8_filter_block2d_bil_var_mmx(
src_ptr + 8, src_pixels_per_line,
dst_ptr + 8, dst_pixels_per_line, 16,
- vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
+ vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum1, &xxsum1
);
@@ -336,7 +319,7 @@ unsigned int vp8_sub_pixel_variance16x8_mmx
vp8_filter_block2d_bil_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
- vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
+ vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum0, &xxsum0
);
@@ -344,7 +327,7 @@ unsigned int vp8_sub_pixel_variance16x8_mmx
vp8_filter_block2d_bil_var_mmx(
src_ptr + 8, src_pixels_per_line,
dst_ptr + 8, dst_pixels_per_line, 8,
- vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
+ vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum1, &xxsum1
);
@@ -371,7 +354,7 @@ unsigned int vp8_sub_pixel_variance8x16_mmx
vp8_filter_block2d_bil_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
- vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
+ vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum, &xxsum
);
*sse = xxsum;
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c
index 24062eb9b..39213b03d 100644
--- a/vp8/encoder/x86/variance_sse2.c
+++ b/vp8/encoder/x86/variance_sse2.c
@@ -12,11 +12,12 @@
#include "vp8/encoder/variance.h"
#include "vp8/common/pragmas.h"
#include "vpx_ports/mem.h"
+#include "vp8/common/x86/filter_x86.h"
-extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
-extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
-extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
-extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
+extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
+extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
+extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
+extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
extern void vp8_filter_block2d_bil4x4_var_mmx
(
@@ -135,8 +136,6 @@ void vp8_half_vert_variance16x_h_sse2
unsigned int *sumsquared
);
-DECLARE_ALIGNED(16, extern short, vp8_vp7_bilinear_filters_mmx[8][8]);
-
unsigned int vp8_variance4x4_wmt(
const unsigned char *src_ptr,
int source_stride,
@@ -262,7 +261,7 @@ unsigned int vp8_sub_pixel_variance4x4_wmt
vp8_filter_block2d_bil4x4_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line,
- vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
+ vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum, &xxsum
);
*sse = xxsum;
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 5c15a3e4f..683af34b0 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -72,6 +72,8 @@ VP8_COMMON_SRCS-yes += common/swapyv12buffer.c
VP8_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/textblit.c
VP8_COMMON_SRCS-yes += common/treecoder.c
+VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.c
+VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.h
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/idct_x86.h
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/subpixel_x86.h
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/recon_x86.h