ppc: Add get4x4sse_cs_vsx

Change-Id: I3028bdadf653665d18e781d28e9625f62804b3d8
author: Luca Barbato <lu_zero@gentoo.org> 2017-05-11 05:16:45 +0200
committer: James Zern <jzern@google.com> 2017-05-12 17:23:00 +0200
commit: 6d225eb5f972f6a0d7b6eac7df652f5b664e2b31 (patch)
tree: 5bd914b2371f11418306b6bbd8bdce8f953a540c
parent: a7f8bd451b82f52fb9bb5d9e7926f4d09cc66831 (diff)
download: libvpx-6d225eb5f972f6a0d7b6eac7df652f5b664e2b31.tar
libvpx-6d225eb5f972f6a0d7b6eac7df652f5b664e2b31.tar.gz
libvpx-6d225eb5f972f6a0d7b6eac7df652f5b664e2b31.tar.bz2
libvpx-6d225eb5f972f6a0d7b6eac7df652f5b664e2b31.zip
3 files changed, 32 insertions, 1 deletions
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 57d57c972..e1c549aa8 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -1338,4 +1338,10 @@ INSTANTIATE_TEST_CASE_P(
                       make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_msa, 0),
                       make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_msa, 0)));
 #endif  // HAVE_MSA
+
+#if HAVE_VSX
+INSTANTIATE_TEST_CASE_P(VSX, VpxSseTest,
+                        ::testing::Values(SseParams(2, 2,
+                                                    &vpx_get4x4sse_cs_vsx)));
+#endif  // HAVE_VSX
 }  // namespace
diff --git a/vpx_dsp/ppc/variance_vsx.c b/vpx_dsp/ppc/variance_vsx.c
index 4f238dc95..8eb6f1a12 100644
--- a/vpx_dsp/ppc/variance_vsx.c
+++ b/vpx_dsp/ppc/variance_vsx.c
@@ -13,6 +13,31 @@
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/ppc/types_vsx.h"
 
+static inline uint8x16_t read4x2(const uint8_t *a, int stride) {
+  const uint32x4_t a0 = (uint32x4_t)vec_vsx_ld(0, a);
+  const uint32x4_t a1 = (uint32x4_t)vec_vsx_ld(0, a + stride);
+
+  return (uint8x16_t)vec_mergeh(a0, a1);
+}
+
+uint32_t vpx_get4x4sse_cs_vsx(const uint8_t *a, int a_stride, const uint8_t *b,
+                              int b_stride) {
+  int distortion;
+
+  const int16x8_t a0 = unpack_to_s16_h(read4x2(a, a_stride));
+  const int16x8_t a1 = unpack_to_s16_h(read4x2(a + a_stride * 2, a_stride));
+  const int16x8_t b0 = unpack_to_s16_h(read4x2(b, b_stride));
+  const int16x8_t b1 = unpack_to_s16_h(read4x2(b + b_stride * 2, b_stride));
+  const int16x8_t d0 = vec_sub(a0, b0);
+  const int16x8_t d1 = vec_sub(a1, b1);
+  const int32x4_t ds = vec_msum(d1, d1, vec_msum(d0, d0, vec_splat_s32(0)));
+  const int32x4_t d = vec_splat(vec_sums(ds, vec_splat_s32(0)), 3);
+
+  vec_ste(d, 0, &distortion);
+
+  return distortion;
+}
+
 void vpx_comp_avg_pred_vsx(uint8_t *comp_pred, const uint8_t *pred, int width,
                            int height, const uint8_t *ref, int ref_stride) {
   int i, j;
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 5570f9b4c..7368d90ea 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -1174,7 +1174,7 @@ add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *";
   specialize qw/vpx_get_mb_ss sse2 msa/;
 
 add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride";
-  specialize qw/vpx_get4x4sse_cs neon msa/;
+  specialize qw/vpx_get4x4sse_cs neon msa vsx/;
 
 add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
   specialize qw/vpx_comp_avg_pred sse2 vsx/;
author	Luca Barbato <lu_zero@gentoo.org>	2017-05-11 05:16:45 +0200
committer	James Zern <jzern@google.com>	2017-05-12 17:23:00 +0200
commit	6d225eb5f972f6a0d7b6eac7df652f5b664e2b31 (patch)
tree	5bd914b2371f11418306b6bbd8bdce8f953a540c
parent	a7f8bd451b82f52fb9bb5d9e7926f4d09cc66831 (diff)
download	libvpx-6d225eb5f972f6a0d7b6eac7df652f5b664e2b31.tar libvpx-6d225eb5f972f6a0d7b6eac7df652f5b664e2b31.tar.gz libvpx-6d225eb5f972f6a0d7b6eac7df652f5b664e2b31.tar.bz2 libvpx-6d225eb5f972f6a0d7b6eac7df652f5b664e2b31.zip