summaryrefslogtreecommitdiff
path: root/vp9/common/x86
diff options
context:
space:
mode:
authorJohn Koleszar <jkoleszar@google.com>2013-04-18 13:05:38 -0700
committerJohn Koleszar <jkoleszar@google.com>2013-04-18 13:57:59 -0700
commita9ebbcc33836d8666f98de7a6d416a6dcd0cae9a (patch)
tree466865fec91567c8c72481c194ad9b1c6b8d5db8 /vp9/common/x86
parentff3f93639c147fa9b8dcf6d014b3c80bf3b7c8ef (diff)
downloadlibvpx-a9ebbcc33836d8666f98de7a6d416a6dcd0cae9a.tar
libvpx-a9ebbcc33836d8666f98de7a6d416a6dcd0cae9a.tar.gz
libvpx-a9ebbcc33836d8666f98de7a6d416a6dcd0cae9a.tar.bz2
libvpx-a9ebbcc33836d8666f98de7a6d416a6dcd0cae9a.zip
convolve: support larger blocks, fix asm saturation bug
Updates the common convoloution code to support blocks larger than 16x16, and rectangular blocks. This uncovered a bug in the SSSE3 filtering routines due to the order of application of saturation. This commit fixes that bug, adjusts the unit test to bias its random values towards the extremes, and adds a test to ensure that all filters conform to the expected pairwise addition structure. Change-Id: I81f69668b1de0de5a8ed43f0643845641525c8f0
Diffstat (limited to 'vp9/common/x86')
-rw-r--r--vp9/common/x86/vp9_asm_stubs.c12
-rw-r--r--vp9/common/x86/vp9_subpixel_8t_ssse3.asm16
2 files changed, 12 insertions, 16 deletions
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c
index 6d3bb021a..310f8ed24 100644
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -278,11 +278,9 @@ void vp9_convolve8_ssse3(const uint8_t *src, int src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*23);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*71);
- // check w/h due to fixed size fdata2 array
- assert(w <= 16);
- assert(h <= 16);
+ assert(h <= 64);
if (x_step_q4 == 16 && y_step_q4 == 16 &&
filter_x[3] != 128 && filter_y[3] != 128) {
@@ -324,11 +322,9 @@ void vp9_convolve8_avg_ssse3(const uint8_t *src, int src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*23);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*71);
- // check w/h due to fixed size fdata2 array
- assert(w <= 16);
- assert(h <= 16);
+ assert(h <= 64);
if (x_step_q4 == 16 && y_step_q4 == 16 &&
filter_x[3] != 128 && filter_y[3] != 128) {
diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
index 32f00e289..bbf9888ca 100644
--- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
+++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
@@ -81,10 +81,10 @@
pmaddubsw xmm4, k4k5
pmaddubsw xmm6, k6k7
+ paddsw xmm0, xmm6
paddsw xmm0, xmm2
- paddsw xmm0, krd
- paddsw xmm4, xmm6
paddsw xmm0, xmm4
+ paddsw xmm0, krd
psraw xmm0, 7
packuswb xmm0, xmm0
@@ -165,10 +165,10 @@
pmaddubsw xmm4, k4k5
pmaddubsw xmm6, k6k7
+ paddsw xmm0, xmm6
paddsw xmm0, xmm2
- paddsw xmm0, krd
- paddsw xmm4, xmm6
paddsw xmm0, xmm4
+ paddsw xmm0, krd
psraw xmm0, 7
packuswb xmm0, xmm0
@@ -250,10 +250,10 @@
pmaddubsw xmm4, k4k5
pmaddubsw xmm6, k6k7
+ paddsw xmm0, xmm6
paddsw xmm0, xmm2
- paddsw xmm0, krd
- paddsw xmm4, xmm6
paddsw xmm0, xmm4
+ paddsw xmm0, krd
psraw xmm0, 7
packuswb xmm0, xmm0
@@ -285,10 +285,10 @@
pmaddubsw xmm4, k4k5
pmaddubsw xmm6, k6k7
+ paddsw xmm0, xmm6
paddsw xmm0, xmm2
- paddsw xmm4, xmm6
- paddsw xmm0, krd
paddsw xmm0, xmm4
+ paddsw xmm0, krd
psraw xmm0, 7
packuswb xmm0, xmm0