summaryrefslogtreecommitdiff
path: root/vpx_dsp/x86/variance_avx2.c
diff options
context:
space:
mode:
authorJohann <johannkoenig@google.com>2015-06-05 09:54:19 -0700
committerJohann <johannkoenig@google.com>2015-07-07 15:51:04 -0700
commit6a82f0d7fb9ee908c389e8d55444bbaed3d54e9c (patch)
tree5b346f932d7256defc451958f474a33cd8b51205 /vpx_dsp/x86/variance_avx2.c
parent155b9416b36d9708b18f22ef2bc396fba264f513 (diff)
downloadlibvpx-6a82f0d7fb9ee908c389e8d55444bbaed3d54e9c.tar
libvpx-6a82f0d7fb9ee908c389e8d55444bbaed3d54e9c.tar.gz
libvpx-6a82f0d7fb9ee908c389e8d55444bbaed3d54e9c.tar.bz2
libvpx-6a82f0d7fb9ee908c389e8d55444bbaed3d54e9c.zip
Move sub pixel variance to vpx_dsp
Change-Id: I66bf6720c396c89aa2d1fd26d5d52bf5d5e3dff1
Diffstat (limited to 'vpx_dsp/x86/variance_avx2.c')
-rw-r--r--vpx_dsp/x86/variance_avx2.c90
1 files changed, 90 insertions, 0 deletions
diff --git a/vpx_dsp/x86/variance_avx2.c b/vpx_dsp/x86/variance_avx2.c
index 82cef4af0..7851a98b1 100644
--- a/vpx_dsp/x86/variance_avx2.c
+++ b/vpx_dsp/x86/variance_avx2.c
@@ -91,3 +91,93 @@ unsigned int vpx_variance64x32_avx2(const uint8_t *src, int src_stride,
sse, &sum, vpx_get32x32var_avx2, 32);
return *sse - (((int64_t)sum * sum) >> 11);
}
+
+unsigned int vpx_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,
+ int x_offset, int y_offset,
+ const uint8_t *dst, int dst_stride,
+ int height,
+ unsigned int *sse);
+
+unsigned int vpx_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,
+ int src_stride,
+ int x_offset,
+ int y_offset,
+ const uint8_t *dst,
+ int dst_stride,
+ const uint8_t *sec,
+ int sec_stride,
+ int height,
+ unsigned int *sseptr);
+
+unsigned int vpx_sub_pixel_variance64x64_avx2(const uint8_t *src,
+ int src_stride,
+ int x_offset,
+ int y_offset,
+ const uint8_t *dst,
+ int dst_stride,
+ unsigned int *sse) {
+ unsigned int sse1;
+ const int se1 = vpx_sub_pixel_variance32xh_avx2(src, src_stride, x_offset,
+ y_offset, dst, dst_stride,
+ 64, &sse1);
+ unsigned int sse2;
+ const int se2 = vpx_sub_pixel_variance32xh_avx2(src + 32, src_stride,
+ x_offset, y_offset,
+ dst + 32, dst_stride,
+ 64, &sse2);
+ const int se = se1 + se2;
+ *sse = sse1 + sse2;
+ return *sse - (((int64_t)se * se) >> 12);
+}
+
+unsigned int vpx_sub_pixel_variance32x32_avx2(const uint8_t *src,
+ int src_stride,
+ int x_offset,
+ int y_offset,
+ const uint8_t *dst,
+ int dst_stride,
+ unsigned int *sse) {
+ const int se = vpx_sub_pixel_variance32xh_avx2(src, src_stride, x_offset,
+ y_offset, dst, dst_stride,
+ 32, sse);
+ return *sse - (((int64_t)se * se) >> 10);
+}
+
+unsigned int vpx_sub_pixel_avg_variance64x64_avx2(const uint8_t *src,
+ int src_stride,
+ int x_offset,
+ int y_offset,
+ const uint8_t *dst,
+ int dst_stride,
+ unsigned int *sse,
+ const uint8_t *sec) {
+ unsigned int sse1;
+ const int se1 = vpx_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset,
+ y_offset, dst, dst_stride,
+ sec, 64, 64, &sse1);
+ unsigned int sse2;
+ const int se2 =
+ vpx_sub_pixel_avg_variance32xh_avx2(src + 32, src_stride, x_offset,
+ y_offset, dst + 32, dst_stride,
+ sec + 32, 64, 64, &sse2);
+ const int se = se1 + se2;
+
+ *sse = sse1 + sse2;
+
+ return *sse - (((int64_t)se * se) >> 12);
+}
+
+unsigned int vpx_sub_pixel_avg_variance32x32_avx2(const uint8_t *src,
+ int src_stride,
+ int x_offset,
+ int y_offset,
+ const uint8_t *dst,
+ int dst_stride,
+ unsigned int *sse,
+ const uint8_t *sec) {
+ // Process 32 elements in parallel.
+ const int se = vpx_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset,
+ y_offset, dst, dst_stride,
+ sec, 32, 32, sse);
+ return *sse - (((int64_t)se * se) >> 10);
+}