summaryrefslogtreecommitdiff
path: root/vpx_dsp
diff options
context:
space:
mode:
authorKaustubh Raste <kaustubh.raste@imgtec.com>2017-03-23 07:44:16 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2017-03-23 07:44:16 +0000
commit8ee9b855a0cb9930dd859e391552b40a1ca6c35d (patch)
tree81e3b2c05b16b623eb679a1a9acc4d59d72c129f /vpx_dsp
parentf16ea6a6eb3b0e06b9e3e586ef15be0d2b0e870c (diff)
parente45c1f55b4e8d10a3fe66a986749c849c72fae58 (diff)
downloadlibvpx-8ee9b855a0cb9930dd859e391552b40a1ca6c35d.tar
libvpx-8ee9b855a0cb9930dd859e391552b40a1ca6c35d.tar.gz
libvpx-8ee9b855a0cb9930dd859e391552b40a1ca6c35d.tar.bz2
libvpx-8ee9b855a0cb9930dd859e391552b40a1ca6c35d.zip
Merge "Fix mips msa fwd xform mismatch"
Diffstat (limited to 'vpx_dsp')
-rw-r--r--vpx_dsp/mips/fwd_dct32x32_msa.c32
-rw-r--r--vpx_dsp/mips/fwd_txfm_msa.c34
-rw-r--r--vpx_dsp/mips/fwd_txfm_msa.h16
3 files changed, 45 insertions, 37 deletions
diff --git a/vpx_dsp/mips/fwd_dct32x32_msa.c b/vpx_dsp/mips/fwd_dct32x32_msa.c
index e41a90480..06fdc951e 100644
--- a/vpx_dsp/mips/fwd_dct32x32_msa.c
+++ b/vpx_dsp/mips/fwd_dct32x32_msa.c
@@ -927,21 +927,21 @@ void vpx_fdct32x32_rd_msa(const int16_t *input, int16_t *out,
}
void vpx_fdct32x32_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
- int sum = LD_HADD(input, stride);
- sum += LD_HADD(input + 8, stride);
- sum += LD_HADD(input + 16, stride);
- sum += LD_HADD(input + 24, stride);
- sum += LD_HADD(input + 32 * 8, stride);
- sum += LD_HADD(input + 32 * 8 + 8, stride);
- sum += LD_HADD(input + 32 * 8 + 16, stride);
- sum += LD_HADD(input + 32 * 8 + 24, stride);
- sum += LD_HADD(input + 32 * 16, stride);
- sum += LD_HADD(input + 32 * 16 + 8, stride);
- sum += LD_HADD(input + 32 * 16 + 16, stride);
- sum += LD_HADD(input + 32 * 16 + 24, stride);
- sum += LD_HADD(input + 32 * 24, stride);
- sum += LD_HADD(input + 32 * 24 + 8, stride);
- sum += LD_HADD(input + 32 * 24 + 16, stride);
- sum += LD_HADD(input + 32 * 24 + 24, stride);
+ int sum, i;
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+ v4i32 vec_w = { 0 };
+
+ for (i = 0; i < 16; ++i) {
+ LD_SH4(input, 8, in0, in1, in2, in3);
+ input += stride;
+ LD_SH4(input, 8, in4, in5, in6, in7);
+ input += stride;
+ ADD4(in0, in1, in2, in3, in4, in5, in6, in7, in0, in2, in4, in6);
+ ADD2(in0, in2, in4, in6, in0, in4);
+ vec_w += __msa_hadd_s_w(in0, in0);
+ vec_w += __msa_hadd_s_w(in4, in4);
+ }
+
+ sum = HADD_SW_S32(vec_w);
out[0] = (int16_t)(sum >> 3);
}
diff --git a/vpx_dsp/mips/fwd_txfm_msa.c b/vpx_dsp/mips/fwd_txfm_msa.c
index fdead5050..f786664bb 100644
--- a/vpx_dsp/mips/fwd_txfm_msa.c
+++ b/vpx_dsp/mips/fwd_txfm_msa.c
@@ -216,7 +216,15 @@ void vpx_fdct8x8_msa(const int16_t *input, int16_t *output,
}
void vpx_fdct8x8_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
- out[0] = LD_HADD(input, stride);
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+ v4i32 vec_w;
+
+ LD_SH8(input, stride, in0, in1, in2, in3, in4, in5, in6, in7);
+ ADD4(in0, in1, in2, in3, in4, in5, in6, in7, in0, in2, in4, in6);
+ ADD2(in0, in2, in4, in6, in0, in4);
+ vec_w = __msa_hadd_s_w(in0, in0);
+ vec_w += __msa_hadd_s_w(in4, in4);
+ out[0] = HADD_SW_S32(vec_w);
out[1] = 0;
}
@@ -237,9 +245,25 @@ void vpx_fdct16x16_msa(const int16_t *input, int16_t *output,
}
void vpx_fdct16x16_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
- int sum = LD_HADD(input, stride);
- sum += LD_HADD(input + 8, stride);
- sum += LD_HADD(input + 16 * 8, stride);
- sum += LD_HADD(input + 16 * 8 + 8, stride);
+ int sum, i;
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+ v4i32 vec_w = { 0 };
+
+ for (i = 0; i < 4; ++i) {
+ LD_SH2(input, 8, in0, in1);
+ input += stride;
+ LD_SH2(input, 8, in2, in3);
+ input += stride;
+ LD_SH2(input, 8, in4, in5);
+ input += stride;
+ LD_SH2(input, 8, in6, in7);
+ input += stride;
+ ADD4(in0, in1, in2, in3, in4, in5, in6, in7, in0, in2, in4, in6);
+ ADD2(in0, in2, in4, in6, in0, in4);
+ vec_w += __msa_hadd_s_w(in0, in0);
+ vec_w += __msa_hadd_s_w(in4, in4);
+ }
+
+ sum = HADD_SW_S32(vec_w);
out[0] = (int16_t)(sum >> 1);
}
diff --git a/vpx_dsp/mips/fwd_txfm_msa.h b/vpx_dsp/mips/fwd_txfm_msa.h
index db5e90e7b..fd589224d 100644
--- a/vpx_dsp/mips/fwd_txfm_msa.h
+++ b/vpx_dsp/mips/fwd_txfm_msa.h
@@ -14,22 +14,6 @@
#include "vpx_dsp/mips/txfm_macros_msa.h"
#include "vpx_dsp/txfm_common.h"
-#define LD_HADD(psrc, stride) \
- ({ \
- v8i16 in0_m, in1_m, in2_m, in3_m, in4_m, in5_m, in6_m, in7_m; \
- v4i32 vec_w_m; \
- \
- LD_SH4((psrc), stride, in0_m, in1_m, in2_m, in3_m); \
- ADD2(in0_m, in1_m, in2_m, in3_m, in0_m, in2_m); \
- LD_SH4(((psrc) + 4 * stride), stride, in4_m, in5_m, in6_m, in7_m); \
- ADD4(in4_m, in5_m, in6_m, in7_m, in0_m, in2_m, in4_m, in6_m, in4_m, in6_m, \
- in0_m, in4_m); \
- in0_m += in4_m; \
- \
- vec_w_m = __msa_hadd_s_w(in0_m, in0_m); \
- HADD_SW_S32(vec_w_m); \
- })
-
#define VP9_FDCT4(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m; \