diff options
-rw-r--r-- | test/convolve_test.cc | 6 | ||||
-rw-r--r-- | vpx_dsp/mips/convolve8_avg_dspr2.c | 3 | ||||
-rw-r--r-- | vpx_dsp/mips/convolve8_avg_horiz_dspr2.c | 3 | ||||
-rw-r--r-- | vpx_dsp/mips/convolve8_dspr2.c | 4 | ||||
-rw-r--r-- | vpx_dsp/mips/convolve8_horiz_dspr2.c | 2 | ||||
-rw-r--r-- | vpx_dsp/mips/convolve8_vert_dspr2.c | 2 | ||||
-rw-r--r-- | vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c | 2 | ||||
-rw-r--r-- | vpx_dsp/mips/vpx_convolve8_avg_msa.c | 8 | ||||
-rw-r--r-- | vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c | 2 | ||||
-rw-r--r-- | vpx_dsp/mips/vpx_convolve8_horiz_msa.c | 2 | ||||
-rw-r--r-- | vpx_dsp/mips/vpx_convolve8_msa.c | 8 | ||||
-rw-r--r-- | vpx_dsp/mips/vpx_convolve8_vert_msa.c | 2 | ||||
-rw-r--r-- | vpx_dsp/vpx_filter.h | 9 |
13 files changed, 29 insertions, 24 deletions
diff --git a/test/convolve_test.cc b/test/convolve_test.cc index a45db8eba..c4b3922e2 100644 --- a/test/convolve_test.cc +++ b/test/convolve_test.cc @@ -789,13 +789,7 @@ TEST_P(ConvolveTest, Copy2D) { } } -#if HAVE_MSA -// TODO(any) MSA optimizations doesn't work with 4-tap interp filter. Need to be -// fixed. -const int kNumFilterBanks = 4; -#else const int kNumFilterBanks = 5; -#endif const int kNumFilters = 16; TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) { diff --git a/vpx_dsp/mips/convolve8_avg_dspr2.c b/vpx_dsp/mips/convolve8_avg_dspr2.c index d9c2bef69..cc458c861 100644 --- a/vpx_dsp/mips/convolve8_avg_dspr2.c +++ b/vpx_dsp/mips/convolve8_avg_dspr2.c @@ -15,6 +15,7 @@ #include "vpx_dsp/mips/convolve_common_dspr2.h" #include "vpx_dsp/vpx_convolve.h" #include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_dsp/vpx_filter.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 @@ -341,7 +342,7 @@ void vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, assert(y_step_q4 == 16); assert(((const int32_t *)filter_y)[1] != 0x800000); - if (((const int32_t *)filter_y)[0] == 0) { + if (vpx_get_filter_taps(filter_y) == 2) { vpx_convolve2_avg_vert_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { diff --git a/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c b/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c index fb68ad881..7a9aa49d8 100644 --- a/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c +++ b/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c @@ -15,6 +15,7 @@ #include "vpx_dsp/mips/convolve_common_dspr2.h" #include "vpx_dsp/vpx_convolve.h" #include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_dsp/vpx_filter.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 @@ -945,7 +946,7 @@ void vpx_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, assert(x_step_q4 == 16); assert(((const int32_t *)filter_x)[1] != 0x800000); - if (((const int32_t *)filter_x)[0] == 0) { + if (vpx_get_filter_taps(filter_x) == 2) { vpx_convolve2_avg_horiz_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { diff --git a/vpx_dsp/mips/convolve8_dspr2.c b/vpx_dsp/mips/convolve8_dspr2.c index 89f0f4196..1e7052f6c 100644 --- a/vpx_dsp/mips/convolve8_dspr2.c +++ b/vpx_dsp/mips/convolve8_dspr2.c @@ -1322,7 +1322,7 @@ void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, if (filter_x[3] == 0x80) { copy_horiz_transposed(src - src_stride * 3, src_stride, temp, intermediate_height, w, intermediate_height); - } else if (((const int32_t *)filter_x)[0] == 0) { + } else if (vpx_get_filter_taps(filter_x) == 2) { vpx_convolve2_dspr2(src - src_stride * 3, src_stride, temp, intermediate_height, filter_x, w, intermediate_height); } else { @@ -1365,7 +1365,7 @@ void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, /* copy the src to dst */ if (filter_y[3] == 0x80) { copy_horiz_transposed(temp + 3, intermediate_height, dst, dst_stride, h, w); - } else if (((const int32_t *)filter_y)[0] == 0) { + } else if (vpx_get_filter_taps(filter_y) == 2) { vpx_convolve2_dspr2(temp + 3, intermediate_height, dst, dst_stride, filter_y, h, w); } else { diff --git a/vpx_dsp/mips/convolve8_horiz_dspr2.c b/vpx_dsp/mips/convolve8_horiz_dspr2.c index 77e95c844..09d6f36e5 100644 --- a/vpx_dsp/mips/convolve8_horiz_dspr2.c +++ b/vpx_dsp/mips/convolve8_horiz_dspr2.c @@ -825,7 +825,7 @@ void vpx_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, assert(x_step_q4 == 16); assert(((const int32_t *)filter_x)[1] != 0x800000); - if (((const int32_t *)filter_x)[0] == 0) { + if (vpx_get_filter_taps(filter_x) == 2) { vpx_convolve2_horiz_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { diff --git a/vpx_dsp/mips/convolve8_vert_dspr2.c b/vpx_dsp/mips/convolve8_vert_dspr2.c index c329f71cc..fd977b533 100644 --- a/vpx_dsp/mips/convolve8_vert_dspr2.c +++ b/vpx_dsp/mips/convolve8_vert_dspr2.c @@ -325,7 +325,7 @@ void vpx_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, assert(y_step_q4 == 16); assert(((const int32_t *)filter_y)[1] != 0x800000); - if (((const int32_t *)filter_y)[0] == 0) { + if (vpx_get_filter_taps(filter_y) == 2) { vpx_convolve2_vert_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { diff --git a/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c b/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c index 187a01342..5b5a1cbc3 100644 --- a/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c +++ b/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c @@ -658,7 +658,7 @@ void vpx_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride, filt_hor[cnt] = filter_x[cnt]; } - if (((const int32_t *)filter_x)[0] == 0) { + if (vpx_get_filter_taps(filter_x) == 2) { switch (w) { case 4: common_hz_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst, diff --git a/vpx_dsp/mips/vpx_convolve8_avg_msa.c b/vpx_dsp/mips/vpx_convolve8_avg_msa.c index 5187cea21..ba816192a 100644 --- a/vpx_dsp/mips/vpx_convolve8_avg_msa.c +++ b/vpx_dsp/mips/vpx_convolve8_avg_msa.c @@ -538,8 +538,8 @@ void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride, filt_ver[cnt] = filter_y[cnt]; } - if (((const int32_t *)filter_x)[0] == 0 && - ((const int32_t *)filter_y)[0] == 0) { + if (vpx_get_filter_taps(filter_x) == 2 && + vpx_get_filter_taps(filter_y) == 2) { switch (w) { case 4: common_hv_2ht_2vt_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst, @@ -571,8 +571,8 @@ void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride, x_step_q4, y0_q4, y_step_q4, w, h); break; } - } else if (((const int32_t *)filter_x)[0] == 0 || - ((const int32_t *)filter_y)[0] == 0) { + } else if (vpx_get_filter_taps(filter_x) == 2 || + vpx_get_filter_taps(filter_y) == 2) { vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { diff --git a/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c b/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c index ef8c90114..e6a790dfc 100644 --- a/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c +++ b/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c @@ -625,7 +625,7 @@ void vpx_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride, filt_ver[cnt] = filter_y[cnt]; } - if (((const int32_t *)filter_y)[0] == 0) { + if (vpx_get_filter_taps(filter_y) == 2) { switch (w) { case 4: common_vt_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst, diff --git a/vpx_dsp/mips/vpx_convolve8_horiz_msa.c b/vpx_dsp/mips/vpx_convolve8_horiz_msa.c index 152dc2610..792c0f709 100644 --- a/vpx_dsp/mips/vpx_convolve8_horiz_msa.c +++ b/vpx_dsp/mips/vpx_convolve8_horiz_msa.c @@ -634,7 +634,7 @@ void vpx_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride, filt_hor[cnt] = filter_x[cnt]; } - if (((const int32_t *)filter_x)[0] == 0) { + if (vpx_get_filter_taps(filter_x) == 2) { switch (w) { case 4: common_hz_2t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, diff --git a/vpx_dsp/mips/vpx_convolve8_msa.c b/vpx_dsp/mips/vpx_convolve8_msa.c index d35a5a7a6..c94216758 100644 --- a/vpx_dsp/mips/vpx_convolve8_msa.c +++ b/vpx_dsp/mips/vpx_convolve8_msa.c @@ -558,8 +558,8 @@ void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, filt_ver[cnt] = filter_y[cnt]; } - if (((const int32_t *)filter_x)[0] == 0 && - ((const int32_t *)filter_y)[0] == 0) { + if (vpx_get_filter_taps(filter_x) == 2 && + vpx_get_filter_taps(filter_y) == 2) { switch (w) { case 4: common_hv_2ht_2vt_4w_msa(src, (int32_t)src_stride, dst, @@ -591,8 +591,8 @@ void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, x_step_q4, y0_q4, y_step_q4, w, h); break; } - } else if (((const int32_t *)filter_x)[0] == 0 || - ((const int32_t *)filter_y)[0] == 0) { + } else if (vpx_get_filter_taps(filter_x) == 2 || + vpx_get_filter_taps(filter_y) == 2) { vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { diff --git a/vpx_dsp/mips/vpx_convolve8_vert_msa.c b/vpx_dsp/mips/vpx_convolve8_vert_msa.c index 13fce0077..195228689 100644 --- a/vpx_dsp/mips/vpx_convolve8_vert_msa.c +++ b/vpx_dsp/mips/vpx_convolve8_vert_msa.c @@ -641,7 +641,7 @@ void vpx_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride, filt_ver[cnt] = filter_y[cnt]; } - if (((const int32_t *)filter_y)[0] == 0) { + if (vpx_get_filter_taps(filter_y) == 2) { switch (w) { case 4: common_vt_2t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, diff --git a/vpx_dsp/vpx_filter.h b/vpx_dsp/vpx_filter.h index 05eb57265..54357ee6c 100644 --- a/vpx_dsp/vpx_filter.h +++ b/vpx_dsp/vpx_filter.h @@ -11,6 +11,7 @@ #ifndef VPX_VPX_DSP_VPX_FILTER_H_ #define VPX_VPX_DSP_VPX_FILTER_H_ +#include <assert.h> #include "vpx/vpx_integer.h" #ifdef __cplusplus @@ -26,6 +27,14 @@ extern "C" { typedef int16_t InterpKernel[SUBPEL_TAPS]; +static INLINE int vpx_get_filter_taps(const int16_t *const filter) { + assert(filter[3] != 128); + if (!filter[0] && !filter[1] && !filter[2]) + return 2; + else + return 8; +} + #ifdef __cplusplus } // extern "C" #endif |