diff options
-rw-r--r-- | test/variance_test.cc | 6 | ||||
-rw-r--r-- | vpx_dsp/ppc/variance_vsx.c | 25 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 |
3 files changed, 32 insertions, 1 deletions
diff --git a/test/variance_test.cc b/test/variance_test.cc index 57d57c972..e1c549aa8 100644 --- a/test/variance_test.cc +++ b/test/variance_test.cc @@ -1338,4 +1338,10 @@ INSTANTIATE_TEST_CASE_P( make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_msa, 0), make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_msa, 0))); #endif // HAVE_MSA + +#if HAVE_VSX +INSTANTIATE_TEST_CASE_P(VSX, VpxSseTest, + ::testing::Values(SseParams(2, 2, + &vpx_get4x4sse_cs_vsx))); +#endif // HAVE_VSX } // namespace diff --git a/vpx_dsp/ppc/variance_vsx.c b/vpx_dsp/ppc/variance_vsx.c index 4f238dc95..8eb6f1a12 100644 --- a/vpx_dsp/ppc/variance_vsx.c +++ b/vpx_dsp/ppc/variance_vsx.c @@ -13,6 +13,31 @@ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ppc/types_vsx.h" +static inline uint8x16_t read4x2(const uint8_t *a, int stride) { + const uint32x4_t a0 = (uint32x4_t)vec_vsx_ld(0, a); + const uint32x4_t a1 = (uint32x4_t)vec_vsx_ld(0, a + stride); + + return (uint8x16_t)vec_mergeh(a0, a1); +} + +uint32_t vpx_get4x4sse_cs_vsx(const uint8_t *a, int a_stride, const uint8_t *b, + int b_stride) { + int distortion; + + const int16x8_t a0 = unpack_to_s16_h(read4x2(a, a_stride)); + const int16x8_t a1 = unpack_to_s16_h(read4x2(a + a_stride * 2, a_stride)); + const int16x8_t b0 = unpack_to_s16_h(read4x2(b, b_stride)); + const int16x8_t b1 = unpack_to_s16_h(read4x2(b + b_stride * 2, b_stride)); + const int16x8_t d0 = vec_sub(a0, b0); + const int16x8_t d1 = vec_sub(a1, b1); + const int32x4_t ds = vec_msum(d1, d1, vec_msum(d0, d0, vec_splat_s32(0))); + const int32x4_t d = vec_splat(vec_sums(ds, vec_splat_s32(0)), 3); + + vec_ste(d, 0, &distortion); + + return distortion; +} + void vpx_comp_avg_pred_vsx(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride) { int i, j; diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 5570f9b4c..7368d90ea 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -1174,7 +1174,7 @@ add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *"; specialize qw/vpx_get_mb_ss sse2 msa/; add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride"; - specialize qw/vpx_get4x4sse_cs neon msa/; + specialize qw/vpx_get4x4sse_cs neon msa vsx/; add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride"; specialize qw/vpx_comp_avg_pred sse2 vsx/; |