diff options
author | Luca Barbato <lu_zero@gentoo.org> | 2018-05-15 17:01:54 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2018-05-15 17:01:54 +0000 |
commit | 689ac413314db76b92fe3f34792502d72e2ea0cd (patch) | |
tree | f8af582410b3e9d2701830e0902eec9f4eda903a | |
parent | 8266c45b01a4da284a2fded11e8dbf41683f5d86 (diff) | |
parent | d8c36c94807270631d375aa3eee72a87ff593958 (diff) | |
download | libvpx-689ac413314db76b92fe3f34792502d72e2ea0cd.tar libvpx-689ac413314db76b92fe3f34792502d72e2ea0cd.tar.gz libvpx-689ac413314db76b92fe3f34792502d72e2ea0cd.tar.bz2 libvpx-689ac413314db76b92fe3f34792502d72e2ea0cd.zip |
Merge "Add vpx_varianceNxM_vsx and vpx_mseNxM_vsx"
-rw-r--r-- | test/variance_test.cc | 21 | ||||
-rw-r--r-- | vpx_dsp/ppc/variance_vsx.c | 174 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 38 |
3 files changed, 213 insertions, 20 deletions
diff --git a/test/variance_test.cc b/test/variance_test.cc index 725821ae6..fce7a1475 100644 --- a/test/variance_test.cc +++ b/test/variance_test.cc @@ -1533,6 +1533,27 @@ INSTANTIATE_TEST_CASE_P(VSX, SumOfSquaresTest, INSTANTIATE_TEST_CASE_P(VSX, VpxSseTest, ::testing::Values(SseParams(2, 2, &vpx_get4x4sse_cs_vsx))); +INSTANTIATE_TEST_CASE_P(VSX, VpxMseTest, + ::testing::Values(MseParams(4, 4, &vpx_mse16x16_vsx), + MseParams(4, 3, &vpx_mse16x8_vsx), + MseParams(3, 4, &vpx_mse8x16_vsx), + MseParams(3, 3, &vpx_mse8x8_vsx))); + +INSTANTIATE_TEST_CASE_P( + VSX, VpxVarianceTest, + ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_vsx), + VarianceParams(6, 5, &vpx_variance64x32_vsx), + VarianceParams(5, 6, &vpx_variance32x64_vsx), + VarianceParams(5, 5, &vpx_variance32x32_vsx), + VarianceParams(5, 4, &vpx_variance32x16_vsx), + VarianceParams(4, 5, &vpx_variance16x32_vsx), + VarianceParams(4, 4, &vpx_variance16x16_vsx), + VarianceParams(4, 3, &vpx_variance16x8_vsx), + VarianceParams(3, 4, &vpx_variance8x16_vsx), + VarianceParams(3, 3, &vpx_variance8x8_vsx), + VarianceParams(3, 2, &vpx_variance8x4_vsx), + VarianceParams(2, 3, &vpx_variance4x8_vsx), + VarianceParams(2, 2, &vpx_variance4x4_vsx))); #endif // HAVE_VSX #if HAVE_MMI diff --git a/vpx_dsp/ppc/variance_vsx.c b/vpx_dsp/ppc/variance_vsx.c index 1efe2f005..d3f257b63 100644 --- a/vpx_dsp/ppc/variance_vsx.c +++ b/vpx_dsp/ppc/variance_vsx.c @@ -10,10 +10,11 @@ #include <assert.h> +#include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ppc/types_vsx.h" -static inline uint8x16_t read4x2(const uint8_t *a, int stride) { +static INLINE uint8x16_t read4x2(const uint8_t *a, int stride) { const uint32x4_t a0 = (uint32x4_t)vec_vsx_ld(0, a); const uint32x4_t a1 = (uint32x4_t)vec_vsx_ld(0, a + stride); @@ -101,3 +102,174 @@ void vpx_comp_avg_pred_vsx(uint8_t *comp_pred, const uint8_t *pred, int width, } } } + +static INLINE void variance_inner_32(const uint8_t *a, const uint8_t *b, + int32x4_t *sum_squared, int32x4_t *sum) { + int32x4_t s = *sum; + int32x4_t ss = *sum_squared; + + const uint8x16_t va0 = vec_vsx_ld(0, a); + const uint8x16_t vb0 = vec_vsx_ld(0, b); + const uint8x16_t va1 = vec_vsx_ld(16, a); + const uint8x16_t vb1 = vec_vsx_ld(16, b); + + const int16x8_t a0 = unpack_to_s16_h(va0); + const int16x8_t b0 = unpack_to_s16_h(vb0); + const int16x8_t a1 = unpack_to_s16_l(va0); + const int16x8_t b1 = unpack_to_s16_l(vb0); + const int16x8_t a2 = unpack_to_s16_h(va1); + const int16x8_t b2 = unpack_to_s16_h(vb1); + const int16x8_t a3 = unpack_to_s16_l(va1); + const int16x8_t b3 = unpack_to_s16_l(vb1); + const int16x8_t d0 = vec_sub(a0, b0); + const int16x8_t d1 = vec_sub(a1, b1); + const int16x8_t d2 = vec_sub(a2, b2); + const int16x8_t d3 = vec_sub(a3, b3); + + s = vec_sum4s(d0, s); + ss = vec_msum(d0, d0, ss); + s = vec_sum4s(d1, s); + ss = vec_msum(d1, d1, ss); + s = vec_sum4s(d2, s); + ss = vec_msum(d2, d2, ss); + s = vec_sum4s(d3, s); + ss = vec_msum(d3, d3, ss); + *sum = s; + *sum_squared = ss; +} + +static INLINE void variance(const uint8_t *a, int a_stride, const uint8_t *b, + int b_stride, int w, int h, uint32_t *sse, + int *sum) { + int i; + + int32x4_t s = vec_splat_s32(0); + int32x4_t ss = vec_splat_s32(0); + + switch (w) { + case 4: + for (i = 0; i < h / 2; ++i) { + const int16x8_t a0 = unpack_to_s16_h(read4x2(a, a_stride)); + const int16x8_t b0 = unpack_to_s16_h(read4x2(b, b_stride)); + const int16x8_t d = vec_sub(a0, b0); + s = vec_sum4s(d, s); + ss = vec_msum(d, d, ss); + a += a_stride * 2; + b += b_stride * 2; + } + break; + case 8: + for (i = 0; i < h; ++i) { + const int16x8_t a0 = unpack_to_s16_h(vec_vsx_ld(0, a)); + const int16x8_t b0 = unpack_to_s16_h(vec_vsx_ld(0, b)); + const int16x8_t d = vec_sub(a0, b0); + + s = vec_sum4s(d, s); + ss = vec_msum(d, d, ss); + a += a_stride; + b += b_stride; + } + break; + case 16: + for (i = 0; i < h; ++i) { + const uint8x16_t va = vec_vsx_ld(0, a); + const uint8x16_t vb = vec_vsx_ld(0, b); + const int16x8_t a0 = unpack_to_s16_h(va); + const int16x8_t b0 = unpack_to_s16_h(vb); + const int16x8_t a1 = unpack_to_s16_l(va); + const int16x8_t b1 = unpack_to_s16_l(vb); + const int16x8_t d0 = vec_sub(a0, b0); + const int16x8_t d1 = vec_sub(a1, b1); + + s = vec_sum4s(d0, s); + ss = vec_msum(d0, d0, ss); + s = vec_sum4s(d1, s); + ss = vec_msum(d1, d1, ss); + + a += a_stride; + b += b_stride; + } + break; + case 32: + for (i = 0; i < h; ++i) { + variance_inner_32(a, b, &ss, &s); + a += a_stride; + b += b_stride; + } + break; + case 64: + for (i = 0; i < h; ++i) { + variance_inner_32(a, b, &ss, &s); + variance_inner_32(a + 32, b + 32, &ss, &s); + + a += a_stride; + b += b_stride; + } + break; + } + + s = vec_splat(vec_sums(s, vec_splat_s32(0)), 3); + + vec_ste(s, 0, sum); + + ss = vec_splat(vec_sums(ss, vec_splat_s32(0)), 3); + + vec_ste((uint32x4_t)ss, 0, sse); +} + +/* Identical to the variance call except it takes an additional parameter, sum, + * and returns that value using pass-by-reference instead of returning + * sse - sum^2 / w*h + */ +#define GET_VAR(W, H) \ + void vpx_get##W##x##H##var_vsx(const uint8_t *a, int a_stride, \ + const uint8_t *b, int b_stride, \ + uint32_t *sse, int *sum) { \ + variance(a, a_stride, b, b_stride, W, H, sse, sum); \ + } + +/* Identical to the variance call except it does not calculate the + * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in + * variable. + */ +#define MSE(W, H) \ + uint32_t vpx_mse##W##x##H##_vsx(const uint8_t *a, int a_stride, \ + const uint8_t *b, int b_stride, \ + uint32_t *sse) { \ + int sum; \ + variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ + return *sse; \ + } + +#define VAR(W, H) \ + uint32_t vpx_variance##W##x##H##_vsx(const uint8_t *a, int a_stride, \ + const uint8_t *b, int b_stride, \ + uint32_t *sse) { \ + int sum; \ + variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ + return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ + } + +#define VARIANCES(W, H) VAR(W, H) + +VARIANCES(64, 64) +VARIANCES(64, 32) +VARIANCES(32, 64) +VARIANCES(32, 32) +VARIANCES(32, 16) +VARIANCES(16, 32) +VARIANCES(16, 16) +VARIANCES(16, 8) +VARIANCES(8, 16) +VARIANCES(8, 8) +VARIANCES(8, 4) +VARIANCES(4, 8) +VARIANCES(4, 4) + +GET_VAR(16, 16) +GET_VAR(8, 8) + +MSE(16, 16) +MSE(16, 8) +MSE(8, 16) +MSE(8, 8) diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index f9347ae5f..824ae0f43 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -1082,64 +1082,64 @@ if (vpx_config("CONFIG_ENCODERS") eq "yes" || vpx_config("CONFIG_POSTPROC") eq " # Variance # add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance64x64 sse2 avx2 neon msa mmi/; + specialize qw/vpx_variance64x64 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance64x32 sse2 avx2 neon msa mmi/; + specialize qw/vpx_variance64x32 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance32x64 sse2 avx2 neon msa mmi/; + specialize qw/vpx_variance32x64 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance32x32 sse2 avx2 neon msa mmi/; + specialize qw/vpx_variance32x32 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance32x16 sse2 avx2 neon msa mmi/; + specialize qw/vpx_variance32x16 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance16x32 sse2 avx2 neon msa mmi/; + specialize qw/vpx_variance16x32 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance16x16 sse2 avx2 neon msa mmi/; + specialize qw/vpx_variance16x16 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance16x8 sse2 avx2 neon msa mmi/; + specialize qw/vpx_variance16x8 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance8x16 sse2 neon msa mmi/; + specialize qw/vpx_variance8x16 sse2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance8x8 sse2 neon msa mmi/; + specialize qw/vpx_variance8x8 sse2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance8x4 sse2 neon msa mmi/; + specialize qw/vpx_variance8x4 sse2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance4x8 sse2 neon msa mmi/; + specialize qw/vpx_variance4x8 sse2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance4x4 sse2 neon msa mmi/; + specialize qw/vpx_variance4x4 sse2 neon msa mmi vsx/; # # Specialty Variance # add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; - specialize qw/vpx_get16x16var sse2 avx2 neon msa/; + specialize qw/vpx_get16x16var sse2 avx2 neon msa vsx/; add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; - specialize qw/vpx_get8x8var sse2 neon msa/; + specialize qw/vpx_get8x8var sse2 neon msa vsx/; add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse16x16 sse2 avx2 neon msa mmi/; + specialize qw/vpx_mse16x16 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse16x8 sse2 avx2 msa mmi/; + specialize qw/vpx_mse16x8 sse2 avx2 msa mmi vsx/; add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse8x16 sse2 msa mmi/; + specialize qw/vpx_mse8x16 sse2 msa mmi vsx/; add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse8x8 sse2 msa mmi/; + specialize qw/vpx_mse8x8 sse2 msa mmi vsx/; add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *"; specialize qw/vpx_get_mb_ss sse2 msa vsx/; |