diff options
author | Johann <johannkoenig@google.com> | 2015-06-05 09:54:19 -0700 |
---|---|---|
committer | Johann <johannkoenig@google.com> | 2015-07-07 15:51:04 -0700 |
commit | 6a82f0d7fb9ee908c389e8d55444bbaed3d54e9c (patch) | |
tree | 5b346f932d7256defc451958f474a33cd8b51205 /vpx_dsp/x86/variance_avx2.c | |
parent | 155b9416b36d9708b18f22ef2bc396fba264f513 (diff) | |
download | libvpx-6a82f0d7fb9ee908c389e8d55444bbaed3d54e9c.tar libvpx-6a82f0d7fb9ee908c389e8d55444bbaed3d54e9c.tar.gz libvpx-6a82f0d7fb9ee908c389e8d55444bbaed3d54e9c.tar.bz2 libvpx-6a82f0d7fb9ee908c389e8d55444bbaed3d54e9c.zip |
Move sub pixel variance to vpx_dsp
Change-Id: I66bf6720c396c89aa2d1fd26d5d52bf5d5e3dff1
Diffstat (limited to 'vpx_dsp/x86/variance_avx2.c')
-rw-r--r-- | vpx_dsp/x86/variance_avx2.c | 90 |
1 files changed, 90 insertions, 0 deletions
diff --git a/vpx_dsp/x86/variance_avx2.c b/vpx_dsp/x86/variance_avx2.c index 82cef4af0..7851a98b1 100644 --- a/vpx_dsp/x86/variance_avx2.c +++ b/vpx_dsp/x86/variance_avx2.c @@ -91,3 +91,93 @@ unsigned int vpx_variance64x32_avx2(const uint8_t *src, int src_stride, sse, &sum, vpx_get32x32var_avx2, 32); return *sse - (((int64_t)sum * sum) >> 11); } + +unsigned int vpx_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride, + int x_offset, int y_offset, + const uint8_t *dst, int dst_stride, + int height, + unsigned int *sse); + +unsigned int vpx_sub_pixel_avg_variance32xh_avx2(const uint8_t *src, + int src_stride, + int x_offset, + int y_offset, + const uint8_t *dst, + int dst_stride, + const uint8_t *sec, + int sec_stride, + int height, + unsigned int *sseptr); + +unsigned int vpx_sub_pixel_variance64x64_avx2(const uint8_t *src, + int src_stride, + int x_offset, + int y_offset, + const uint8_t *dst, + int dst_stride, + unsigned int *sse) { + unsigned int sse1; + const int se1 = vpx_sub_pixel_variance32xh_avx2(src, src_stride, x_offset, + y_offset, dst, dst_stride, + 64, &sse1); + unsigned int sse2; + const int se2 = vpx_sub_pixel_variance32xh_avx2(src + 32, src_stride, + x_offset, y_offset, + dst + 32, dst_stride, + 64, &sse2); + const int se = se1 + se2; + *sse = sse1 + sse2; + return *sse - (((int64_t)se * se) >> 12); +} + +unsigned int vpx_sub_pixel_variance32x32_avx2(const uint8_t *src, + int src_stride, + int x_offset, + int y_offset, + const uint8_t *dst, + int dst_stride, + unsigned int *sse) { + const int se = vpx_sub_pixel_variance32xh_avx2(src, src_stride, x_offset, + y_offset, dst, dst_stride, + 32, sse); + return *sse - (((int64_t)se * se) >> 10); +} + +unsigned int vpx_sub_pixel_avg_variance64x64_avx2(const uint8_t *src, + int src_stride, + int x_offset, + int y_offset, + const uint8_t *dst, + int dst_stride, + unsigned int *sse, + const uint8_t *sec) { + unsigned int sse1; + const int se1 = vpx_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset, + y_offset, dst, dst_stride, + sec, 64, 64, &sse1); + unsigned int sse2; + const int se2 = + vpx_sub_pixel_avg_variance32xh_avx2(src + 32, src_stride, x_offset, + y_offset, dst + 32, dst_stride, + sec + 32, 64, 64, &sse2); + const int se = se1 + se2; + + *sse = sse1 + sse2; + + return *sse - (((int64_t)se * se) >> 12); +} + +unsigned int vpx_sub_pixel_avg_variance32x32_avx2(const uint8_t *src, + int src_stride, + int x_offset, + int y_offset, + const uint8_t *dst, + int dst_stride, + unsigned int *sse, + const uint8_t *sec) { + // Process 32 elements in parallel. + const int se = vpx_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset, + y_offset, dst, dst_stride, + sec, 32, 32, sse); + return *sse - (((int64_t)se * se) >> 10); +} |