summaryrefslogtreecommitdiff
path: root/vpx_dsp
diff options
context:
space:
mode:
authorYunqing Wang <yunqingwang@google.com>2018-10-16 09:24:18 -0700
committerYunqing Wang <yunqingwang@google.com>2018-10-16 09:35:23 -0700
commitbcd17e32c9498cb7a2375999226a60c500cc29da (patch)
tree588ecdd1374ea67fab92de3ed3584c9e213d0488 /vpx_dsp
parentc875803def7344aa69334f4dada2a3812a10a964 (diff)
downloadlibvpx-bcd17e32c9498cb7a2375999226a60c500cc29da.tar
libvpx-bcd17e32c9498cb7a2375999226a60c500cc29da.tar.gz
libvpx-bcd17e32c9498cb7a2375999226a60c500cc29da.tar.bz2
libvpx-bcd17e32c9498cb7a2375999226a60c500cc29da.zip
Fix the filter tap calculation in mips optimizations
The interp filter tap calculation was not accurate to tell the difference between 2 taps and 4 taps. This patch fixed the bug, and resolved Jenkins test failures in mips sub-pel filter optimizations. BUG=webm:1568 Change-Id: I51eb8adb7ed194ef2ea7dd4aa57aa9870ee38cfc
Diffstat (limited to 'vpx_dsp')
-rw-r--r--vpx_dsp/mips/convolve8_avg_dspr2.c3
-rw-r--r--vpx_dsp/mips/convolve8_avg_horiz_dspr2.c3
-rw-r--r--vpx_dsp/mips/convolve8_dspr2.c4
-rw-r--r--vpx_dsp/mips/convolve8_horiz_dspr2.c2
-rw-r--r--vpx_dsp/mips/convolve8_vert_dspr2.c2
-rw-r--r--vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c2
-rw-r--r--vpx_dsp/mips/vpx_convolve8_avg_msa.c8
-rw-r--r--vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c2
-rw-r--r--vpx_dsp/mips/vpx_convolve8_horiz_msa.c2
-rw-r--r--vpx_dsp/mips/vpx_convolve8_msa.c8
-rw-r--r--vpx_dsp/mips/vpx_convolve8_vert_msa.c2
-rw-r--r--vpx_dsp/vpx_filter.h9
12 files changed, 29 insertions, 18 deletions
diff --git a/vpx_dsp/mips/convolve8_avg_dspr2.c b/vpx_dsp/mips/convolve8_avg_dspr2.c
index d9c2bef69..cc458c861 100644
--- a/vpx_dsp/mips/convolve8_avg_dspr2.c
+++ b/vpx_dsp/mips/convolve8_avg_dspr2.c
@@ -15,6 +15,7 @@
#include "vpx_dsp/mips/convolve_common_dspr2.h"
#include "vpx_dsp/vpx_convolve.h"
#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/vpx_filter.h"
#include "vpx_ports/mem.h"
#if HAVE_DSPR2
@@ -341,7 +342,7 @@ void vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
assert(y_step_q4 == 16);
assert(((const int32_t *)filter_y)[1] != 0x800000);
- if (((const int32_t *)filter_y)[0] == 0) {
+ if (vpx_get_filter_taps(filter_y) == 2) {
vpx_convolve2_avg_vert_dspr2(src, src_stride, dst, dst_stride, filter,
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
} else {
diff --git a/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c b/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c
index fb68ad881..7a9aa49d8 100644
--- a/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c
+++ b/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c
@@ -15,6 +15,7 @@
#include "vpx_dsp/mips/convolve_common_dspr2.h"
#include "vpx_dsp/vpx_convolve.h"
#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/vpx_filter.h"
#include "vpx_ports/mem.h"
#if HAVE_DSPR2
@@ -945,7 +946,7 @@ void vpx_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
assert(x_step_q4 == 16);
assert(((const int32_t *)filter_x)[1] != 0x800000);
- if (((const int32_t *)filter_x)[0] == 0) {
+ if (vpx_get_filter_taps(filter_x) == 2) {
vpx_convolve2_avg_horiz_dspr2(src, src_stride, dst, dst_stride, filter,
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
} else {
diff --git a/vpx_dsp/mips/convolve8_dspr2.c b/vpx_dsp/mips/convolve8_dspr2.c
index 89f0f4196..1e7052f6c 100644
--- a/vpx_dsp/mips/convolve8_dspr2.c
+++ b/vpx_dsp/mips/convolve8_dspr2.c
@@ -1322,7 +1322,7 @@ void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
if (filter_x[3] == 0x80) {
copy_horiz_transposed(src - src_stride * 3, src_stride, temp,
intermediate_height, w, intermediate_height);
- } else if (((const int32_t *)filter_x)[0] == 0) {
+ } else if (vpx_get_filter_taps(filter_x) == 2) {
vpx_convolve2_dspr2(src - src_stride * 3, src_stride, temp,
intermediate_height, filter_x, w, intermediate_height);
} else {
@@ -1365,7 +1365,7 @@ void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
/* copy the src to dst */
if (filter_y[3] == 0x80) {
copy_horiz_transposed(temp + 3, intermediate_height, dst, dst_stride, h, w);
- } else if (((const int32_t *)filter_y)[0] == 0) {
+ } else if (vpx_get_filter_taps(filter_y) == 2) {
vpx_convolve2_dspr2(temp + 3, intermediate_height, dst, dst_stride,
filter_y, h, w);
} else {
diff --git a/vpx_dsp/mips/convolve8_horiz_dspr2.c b/vpx_dsp/mips/convolve8_horiz_dspr2.c
index 77e95c844..09d6f36e5 100644
--- a/vpx_dsp/mips/convolve8_horiz_dspr2.c
+++ b/vpx_dsp/mips/convolve8_horiz_dspr2.c
@@ -825,7 +825,7 @@ void vpx_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
assert(x_step_q4 == 16);
assert(((const int32_t *)filter_x)[1] != 0x800000);
- if (((const int32_t *)filter_x)[0] == 0) {
+ if (vpx_get_filter_taps(filter_x) == 2) {
vpx_convolve2_horiz_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4,
x_step_q4, y0_q4, y_step_q4, w, h);
} else {
diff --git a/vpx_dsp/mips/convolve8_vert_dspr2.c b/vpx_dsp/mips/convolve8_vert_dspr2.c
index c329f71cc..fd977b533 100644
--- a/vpx_dsp/mips/convolve8_vert_dspr2.c
+++ b/vpx_dsp/mips/convolve8_vert_dspr2.c
@@ -325,7 +325,7 @@ void vpx_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
assert(y_step_q4 == 16);
assert(((const int32_t *)filter_y)[1] != 0x800000);
- if (((const int32_t *)filter_y)[0] == 0) {
+ if (vpx_get_filter_taps(filter_y) == 2) {
vpx_convolve2_vert_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4,
x_step_q4, y0_q4, y_step_q4, w, h);
} else {
diff --git a/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c b/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c
index 187a01342..5b5a1cbc3 100644
--- a/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c
@@ -658,7 +658,7 @@ void vpx_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
filt_hor[cnt] = filter_x[cnt];
}
- if (((const int32_t *)filter_x)[0] == 0) {
+ if (vpx_get_filter_taps(filter_x) == 2) {
switch (w) {
case 4:
common_hz_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst,
diff --git a/vpx_dsp/mips/vpx_convolve8_avg_msa.c b/vpx_dsp/mips/vpx_convolve8_avg_msa.c
index 5187cea21..ba816192a 100644
--- a/vpx_dsp/mips/vpx_convolve8_avg_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_msa.c
@@ -538,8 +538,8 @@ void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
filt_ver[cnt] = filter_y[cnt];
}
- if (((const int32_t *)filter_x)[0] == 0 &&
- ((const int32_t *)filter_y)[0] == 0) {
+ if (vpx_get_filter_taps(filter_x) == 2 &&
+ vpx_get_filter_taps(filter_y) == 2) {
switch (w) {
case 4:
common_hv_2ht_2vt_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst,
@@ -571,8 +571,8 @@ void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
- } else if (((const int32_t *)filter_x)[0] == 0 ||
- ((const int32_t *)filter_y)[0] == 0) {
+ } else if (vpx_get_filter_taps(filter_x) == 2 ||
+ vpx_get_filter_taps(filter_y) == 2) {
vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4,
x_step_q4, y0_q4, y_step_q4, w, h);
} else {
diff --git a/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c b/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c
index ef8c90114..e6a790dfc 100644
--- a/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c
@@ -625,7 +625,7 @@ void vpx_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
filt_ver[cnt] = filter_y[cnt];
}
- if (((const int32_t *)filter_y)[0] == 0) {
+ if (vpx_get_filter_taps(filter_y) == 2) {
switch (w) {
case 4:
common_vt_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst,
diff --git a/vpx_dsp/mips/vpx_convolve8_horiz_msa.c b/vpx_dsp/mips/vpx_convolve8_horiz_msa.c
index 152dc2610..792c0f709 100644
--- a/vpx_dsp/mips/vpx_convolve8_horiz_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_horiz_msa.c
@@ -634,7 +634,7 @@ void vpx_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
filt_hor[cnt] = filter_x[cnt];
}
- if (((const int32_t *)filter_x)[0] == 0) {
+ if (vpx_get_filter_taps(filter_x) == 2) {
switch (w) {
case 4:
common_hz_2t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
diff --git a/vpx_dsp/mips/vpx_convolve8_msa.c b/vpx_dsp/mips/vpx_convolve8_msa.c
index d35a5a7a6..c94216758 100644
--- a/vpx_dsp/mips/vpx_convolve8_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_msa.c
@@ -558,8 +558,8 @@ void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
filt_ver[cnt] = filter_y[cnt];
}
- if (((const int32_t *)filter_x)[0] == 0 &&
- ((const int32_t *)filter_y)[0] == 0) {
+ if (vpx_get_filter_taps(filter_x) == 2 &&
+ vpx_get_filter_taps(filter_y) == 2) {
switch (w) {
case 4:
common_hv_2ht_2vt_4w_msa(src, (int32_t)src_stride, dst,
@@ -591,8 +591,8 @@ void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
- } else if (((const int32_t *)filter_x)[0] == 0 ||
- ((const int32_t *)filter_y)[0] == 0) {
+ } else if (vpx_get_filter_taps(filter_x) == 2 ||
+ vpx_get_filter_taps(filter_y) == 2) {
vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
y0_q4, y_step_q4, w, h);
} else {
diff --git a/vpx_dsp/mips/vpx_convolve8_vert_msa.c b/vpx_dsp/mips/vpx_convolve8_vert_msa.c
index 13fce0077..195228689 100644
--- a/vpx_dsp/mips/vpx_convolve8_vert_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_vert_msa.c
@@ -641,7 +641,7 @@ void vpx_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
filt_ver[cnt] = filter_y[cnt];
}
- if (((const int32_t *)filter_y)[0] == 0) {
+ if (vpx_get_filter_taps(filter_y) == 2) {
switch (w) {
case 4:
common_vt_2t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
diff --git a/vpx_dsp/vpx_filter.h b/vpx_dsp/vpx_filter.h
index 05eb57265..54357ee6c 100644
--- a/vpx_dsp/vpx_filter.h
+++ b/vpx_dsp/vpx_filter.h
@@ -11,6 +11,7 @@
#ifndef VPX_VPX_DSP_VPX_FILTER_H_
#define VPX_VPX_DSP_VPX_FILTER_H_
+#include <assert.h>
#include "vpx/vpx_integer.h"
#ifdef __cplusplus
@@ -26,6 +27,14 @@ extern "C" {
typedef int16_t InterpKernel[SUBPEL_TAPS];
+static INLINE int vpx_get_filter_taps(const int16_t *const filter) {
+ assert(filter[3] != 128);
+ if (!filter[0] && !filter[1] && !filter[2])
+ return 2;
+ else
+ return 8;
+}
+
#ifdef __cplusplus
} // extern "C"
#endif