summaryrefslogtreecommitdiff
path: root/vpx_dsp
diff options
context:
space:
mode:
authorLuc Trudeau <luc@trud.ca>2018-06-13 13:39:04 -0400
committerLuc Trudeau <luc@trud.ca>2018-06-13 19:21:06 +0000
commite3ce12cfc1c2d2cc245e1a6d49eaf3ff18538547 (patch)
tree76c392d2752ac15a5466f0878db5f3b11aac2db4 /vpx_dsp
parentf950248b9b357b21e974e3ace94359d7ee8c7b29 (diff)
downloadlibvpx-e3ce12cfc1c2d2cc245e1a6d49eaf3ff18538547.tar
libvpx-e3ce12cfc1c2d2cc245e1a6d49eaf3ff18538547.tar.gz
libvpx-e3ce12cfc1c2d2cc245e1a6d49eaf3ff18538547.tar.bz2
libvpx-e3ce12cfc1c2d2cc245e1a6d49eaf3ff18538547.zip
VSX Version of SAD8xN
VSX versions of the SAD functions of width 8. SADTest Speed Test (POWER8 Model 2.1) 8x4 C time = 68.7 ms (±0.3 ms), VSX time = 31.8 ms (±0.1 ms) [2.2x] 8x8 C time = 55.6 ms (±0.3 ms), VSX time = 18.3 ms (±0.1 ms) [3.0x] 8x16 C time = 46.5 ms (±0.1 ms), VSX time = 15.6 ms (±0.1 ms) [3.0x] Change-Id: I843f3b34e103b72deeade4a939193d8b53cee460
Diffstat (limited to 'vpx_dsp')
-rw-r--r--vpx_dsp/ppc/sad_vsx.c25
-rw-r--r--vpx_dsp/ppc/types_vsx.h1
-rw-r--r--vpx_dsp/vpx_dsp_rtcd_defs.pl6
3 files changed, 29 insertions, 3 deletions
diff --git a/vpx_dsp/ppc/sad_vsx.c b/vpx_dsp/ppc/sad_vsx.c
index bb49addae..b41c05b48 100644
--- a/vpx_dsp/ppc/sad_vsx.c
+++ b/vpx_dsp/ppc/sad_vsx.c
@@ -31,6 +31,28 @@
v_sad = vec_sum4s(v_absh, v_sad); \
v_sad = vec_sum4s(v_absl, v_sad);
+#define SAD8(height) \
+ unsigned int vpx_sad8x##height##_vsx(const uint8_t *a, int a_stride, \
+ const uint8_t *b, int b_stride) { \
+ int y = 0; \
+ uint8x16_t v_a, v_b, v_abs; \
+ uint32x4_t v_sad = vec_zeros_u32; \
+ \
+ do { \
+ v_a = vec_vsx_ld(0, a); \
+ v_b = vec_vsx_ld(0, b); \
+ \
+ v_abs = vec_sub(vec_max(v_a, v_b), vec_min(v_a, v_b)); \
+ v_sad = vec_sum4s(v_abs, v_sad); \
+ \
+ a += a_stride; \
+ b += b_stride; \
+ y++; \
+ } while (y < height); \
+ \
+ return v_sad[1] + v_sad[0]; \
+ }
+
#define SAD16(height) \
unsigned int vpx_sad16x##height##_vsx(const uint8_t *a, int a_stride, \
const uint8_t *b, int b_stride) { \
@@ -95,6 +117,9 @@
return sad[3] + sad[2] + sad[1] + sad[0]; \
}
+SAD8(4);
+SAD8(8);
+SAD8(16);
SAD16(8);
SAD16(16);
SAD16(32);
diff --git a/vpx_dsp/ppc/types_vsx.h b/vpx_dsp/ppc/types_vsx.h
index 803d0377a..81c7b970a 100644
--- a/vpx_dsp/ppc/types_vsx.h
+++ b/vpx_dsp/ppc/types_vsx.h
@@ -82,6 +82,7 @@ static const int16x8_t vec_ones_s16 = { 1, 1, 1, 1, 1, 1, 1, 1 };
static const uint16x8_t vec_ones_u16 = { 1, 1, 1, 1, 1, 1, 1, 1 };
static const uint32x4_t vec_ones_u32 = { 1, 1, 1, 1 };
static const int32x4_t vec_zeros_s32 = { 0, 0, 0, 0 };
+static const uint32x4_t vec_zeros_u32 = { 0, 0, 0, 0 };
static const uint16x8_t vec_shift_sign_s16 = { 15, 15, 15, 15, 15, 15, 15, 15 };
static const uint32x4_t vec_shift_sign_s32 = { 31, 31, 31, 31 };
static const uint8x16_t vec_perm64 = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index f237e5503..9f3e268cc 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -748,13 +748,13 @@ add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride,
specialize qw/vpx_sad16x8 neon msa sse2 vsx mmi/;
add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad8x16 neon msa sse2 mmi/;
+specialize qw/vpx_sad8x16 neon msa sse2 vsx mmi/;
add_proto qw/unsigned int vpx_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad8x8 neon msa sse2 mmi/;
+specialize qw/vpx_sad8x8 neon msa sse2 vsx mmi/;
add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad8x4 neon msa sse2 mmi/;
+specialize qw/vpx_sad8x4 neon msa sse2 vsx mmi/;
add_proto qw/unsigned int vpx_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad4x8 neon msa sse2 mmi/;