summaryrefslogtreecommitdiff
path: root/vp9/common
diff options
context:
space:
mode:
authorJohn Koleszar <jkoleszar@google.com>2013-04-19 10:22:54 -0700
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2013-04-19 10:22:54 -0700
commit6e5d2ac54cecc08ae29e962695a7d56c3bf67c71 (patch)
tree727f71c70147e41a1f00c9a7e5d37f6ab0532af6 /vp9/common
parent2a1efafd29bcaa3b9b5c53b48d98a89d56c13ee4 (diff)
parent5b8a7d6e2556e16bfeee2ba8101080f079051aa3 (diff)
downloadlibvpx-6e5d2ac54cecc08ae29e962695a7d56c3bf67c71.tar
libvpx-6e5d2ac54cecc08ae29e962695a7d56c3bf67c71.tar.gz
libvpx-6e5d2ac54cecc08ae29e962695a7d56c3bf67c71.tar.bz2
libvpx-6e5d2ac54cecc08ae29e962695a7d56c3bf67c71.zip
Merge "Use SSSE3 for 2d filters larger than 16" into experimental
Diffstat (limited to 'vp9/common')
-rw-r--r--vp9/common/x86/vp9_asm_stubs.c91
1 files changed, 23 insertions, 68 deletions
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c
index 310f8ed24..2b66834a7 100644
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -278,43 +278,20 @@ void vp9_convolve8_ssse3(const uint8_t *src, int src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*71);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64*71);
+ assert(w <= 64);
assert(h <= 64);
-
- if (x_step_q4 == 16 && y_step_q4 == 16 &&
- filter_x[3] != 128 && filter_y[3] != 128) {
- if (w == 16) {
- vp9_filter_block1d16_h8_ssse3(src - 3 * src_stride, src_stride,
- fdata2, 16,
- h + 7, filter_x);
- vp9_filter_block1d16_v8_ssse3(fdata2, 16,
- dst, dst_stride,
- h, filter_y);
- return;
- }
- if (w == 8) {
- vp9_filter_block1d8_h8_ssse3(src - 3 * src_stride, src_stride,
- fdata2, 16,
- h + 7, filter_x);
- vp9_filter_block1d8_v8_ssse3(fdata2, 16,
- dst, dst_stride,
- h, filter_y);
- return;
- }
- if (w == 4) {
- vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride,
- fdata2, 16,
- h + 7, filter_x);
- vp9_filter_block1d4_v8_ssse3(fdata2, 16,
- dst, dst_stride,
- h, filter_y);
- return;
- }
+ if (x_step_q4 == 16 && y_step_q4 == 16) {
+ vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h + 7);
+ vp9_convolve8_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4, w, h);
+ } else {
+ vp9_convolve8_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4, w, h);
}
- vp9_convolve8_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
}
void vp9_convolve8_avg_ssse3(const uint8_t *src, int src_stride,
@@ -322,42 +299,20 @@ void vp9_convolve8_avg_ssse3(const uint8_t *src, int src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*71);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64*71);
+ assert(w <= 64);
assert(h <= 64);
-
- if (x_step_q4 == 16 && y_step_q4 == 16 &&
- filter_x[3] != 128 && filter_y[3] != 128) {
- if (w == 16) {
- vp9_filter_block1d16_h8_ssse3(src - 3 * src_stride, src_stride,
- fdata2, 16,
- h + 7, filter_x);
- vp9_filter_block1d16_v8_avg_ssse3(fdata2, 16,
- dst, dst_stride,
- h, filter_y);
- return;
- }
- if (w == 8) {
- vp9_filter_block1d8_h8_ssse3(src - 3 * src_stride, src_stride,
- fdata2, 16,
- h + 7, filter_x);
- vp9_filter_block1d8_v8_avg_ssse3(fdata2, 16,
- dst, dst_stride,
- h, filter_y);
- return;
- }
- if (w == 4) {
- vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride,
- fdata2, 16,
- h + 7, filter_x);
- vp9_filter_block1d4_v8_avg_ssse3(fdata2, 16,
- dst, dst_stride,
- h, filter_y);
- return;
- }
+ if (x_step_q4 == 16 && y_step_q4 == 16) {
+ vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h + 7);
+ vp9_convolve8_avg_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ } else {
+ vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4, w, h);
}
- vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
}
#endif