diff options
author | John Koleszar <jkoleszar@google.com> | 2013-04-18 13:05:38 -0700 |
---|---|---|
committer | John Koleszar <jkoleszar@google.com> | 2013-04-18 13:57:59 -0700 |
commit | a9ebbcc33836d8666f98de7a6d416a6dcd0cae9a (patch) | |
tree | 466865fec91567c8c72481c194ad9b1c6b8d5db8 /vp9/common/x86 | |
parent | ff3f93639c147fa9b8dcf6d014b3c80bf3b7c8ef (diff) | |
download | libvpx-a9ebbcc33836d8666f98de7a6d416a6dcd0cae9a.tar libvpx-a9ebbcc33836d8666f98de7a6d416a6dcd0cae9a.tar.gz libvpx-a9ebbcc33836d8666f98de7a6d416a6dcd0cae9a.tar.bz2 libvpx-a9ebbcc33836d8666f98de7a6d416a6dcd0cae9a.zip |
convolve: support larger blocks, fix asm saturation bug
Updates the common convoloution code to support blocks larger than
16x16, and rectangular blocks. This uncovered a bug in the SSSE3
filtering routines due to the order of application of saturation.
This commit fixes that bug, adjusts the unit test to bias its
random values towards the extremes, and adds a test to ensure that
all filters conform to the expected pairwise addition structure.
Change-Id: I81f69668b1de0de5a8ed43f0643845641525c8f0
Diffstat (limited to 'vp9/common/x86')
-rw-r--r-- | vp9/common/x86/vp9_asm_stubs.c | 12 | ||||
-rw-r--r-- | vp9/common/x86/vp9_subpixel_8t_ssse3.asm | 16 |
2 files changed, 12 insertions, 16 deletions
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c index 6d3bb021a..310f8ed24 100644 --- a/vp9/common/x86/vp9_asm_stubs.c +++ b/vp9/common/x86/vp9_asm_stubs.c @@ -278,11 +278,9 @@ void vp9_convolve8_ssse3(const uint8_t *src, int src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*23); + DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*71); - // check w/h due to fixed size fdata2 array - assert(w <= 16); - assert(h <= 16); + assert(h <= 64); if (x_step_q4 == 16 && y_step_q4 == 16 && filter_x[3] != 128 && filter_y[3] != 128) { @@ -324,11 +322,9 @@ void vp9_convolve8_avg_ssse3(const uint8_t *src, int src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*23); + DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*71); - // check w/h due to fixed size fdata2 array - assert(w <= 16); - assert(h <= 16); + assert(h <= 64); if (x_step_q4 == 16 && y_step_q4 == 16 && filter_x[3] != 128 && filter_y[3] != 128) { diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm index 32f00e289..bbf9888ca 100644 --- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm +++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm @@ -81,10 +81,10 @@ pmaddubsw xmm4, k4k5 pmaddubsw xmm6, k6k7 + paddsw xmm0, xmm6 paddsw xmm0, xmm2 - paddsw xmm0, krd - paddsw xmm4, xmm6 paddsw xmm0, xmm4 + paddsw xmm0, krd psraw xmm0, 7 packuswb xmm0, xmm0 @@ -165,10 +165,10 @@ pmaddubsw xmm4, k4k5 pmaddubsw xmm6, k6k7 + paddsw xmm0, xmm6 paddsw xmm0, xmm2 - paddsw xmm0, krd - paddsw xmm4, xmm6 paddsw xmm0, xmm4 + paddsw xmm0, krd psraw xmm0, 7 packuswb xmm0, xmm0 @@ -250,10 +250,10 @@ pmaddubsw xmm4, k4k5 pmaddubsw xmm6, k6k7 + paddsw xmm0, xmm6 paddsw xmm0, xmm2 - paddsw xmm0, krd - paddsw xmm4, xmm6 paddsw xmm0, xmm4 + paddsw xmm0, krd psraw xmm0, 7 packuswb xmm0, xmm0 @@ -285,10 +285,10 @@ pmaddubsw xmm4, k4k5 pmaddubsw xmm6, k6k7 + paddsw xmm0, xmm6 paddsw xmm0, xmm2 - paddsw xmm4, xmm6 - paddsw xmm0, krd paddsw xmm0, xmm4 + paddsw xmm0, krd psraw xmm0, 7 packuswb xmm0, xmm0 |