diff options
author | Johann Koenig <johannkoenig@google.com> | 2018-09-24 23:11:49 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2018-09-24 23:11:49 +0000 |
commit | 78f1ae5ffc4ccbcf3501fa782f5ed42ce1bc634d (patch) | |
tree | 90557f642b1d391d981ece205dcf95cc1dfb6ca1 /vpx_dsp | |
parent | af2ba81b94e362531af3cb118e406bc7d9115c98 (diff) | |
parent | 4fa0727fbcf9bb790139fa34b10d610af824a7ef (diff) | |
download | libvpx-78f1ae5ffc4ccbcf3501fa782f5ed42ce1bc634d.tar libvpx-78f1ae5ffc4ccbcf3501fa782f5ed42ce1bc634d.tar.gz libvpx-78f1ae5ffc4ccbcf3501fa782f5ed42ce1bc634d.tar.bz2 libvpx-78f1ae5ffc4ccbcf3501fa782f5ed42ce1bc634d.zip |
Merge "sanitizer: sse2 - fix unaligned double stores"
Diffstat (limited to 'vpx_dsp')
-rw-r--r-- | vpx_dsp/x86/loopfilter_sse2.c | 24 | ||||
-rw-r--r-- | vpx_dsp/x86/mem_sse2.h | 3 |
2 files changed, 11 insertions, 16 deletions
diff --git a/vpx_dsp/x86/loopfilter_sse2.c b/vpx_dsp/x86/loopfilter_sse2.c index 1a76d670e..853c4d270 100644 --- a/vpx_dsp/x86/loopfilter_sse2.c +++ b/vpx_dsp/x86/loopfilter_sse2.c @@ -1627,16 +1627,12 @@ static INLINE void transpose(unsigned char *src[], int in_p, x5 = _mm_unpacklo_epi16(x2, x3); // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 x6 = _mm_unpacklo_epi32(x4, x5); - _mm_storel_pd((double *)(out + 0 * out_p), - _mm_castsi128_pd(x6)); // 00 10 20 30 40 50 60 70 - _mm_storeh_pd((double *)(out + 1 * out_p), - _mm_castsi128_pd(x6)); // 01 11 21 31 41 51 61 71 + mm_storelu(out + 0 * out_p, x6); // 00 10 20 30 40 50 60 70 + mm_storehu(out + 1 * out_p, x6); // 01 11 21 31 41 51 61 71 // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 x7 = _mm_unpackhi_epi32(x4, x5); - _mm_storel_pd((double *)(out + 2 * out_p), - _mm_castsi128_pd(x7)); // 02 12 22 32 42 52 62 72 - _mm_storeh_pd((double *)(out + 3 * out_p), - _mm_castsi128_pd(x7)); // 03 13 23 33 43 53 63 73 + mm_storelu(out + 2 * out_p, x7); // 02 12 22 32 42 52 62 72 + mm_storehu(out + 3 * out_p, x7); // 03 13 23 33 43 53 63 73 // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 x4 = _mm_unpackhi_epi16(x0, x1); @@ -1644,17 +1640,13 @@ static INLINE void transpose(unsigned char *src[], int in_p, x5 = _mm_unpackhi_epi16(x2, x3); // 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75 x6 = _mm_unpacklo_epi32(x4, x5); - _mm_storel_pd((double *)(out + 4 * out_p), - _mm_castsi128_pd(x6)); // 04 14 24 34 44 54 64 74 - _mm_storeh_pd((double *)(out + 5 * out_p), - _mm_castsi128_pd(x6)); // 05 15 25 35 45 55 65 75 + mm_storelu(out + 4 * out_p, x6); // 04 14 24 34 44 54 64 74 + mm_storehu(out + 5 * out_p, x6); // 05 15 25 35 45 55 65 75 // 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77 x7 = _mm_unpackhi_epi32(x4, x5); - _mm_storel_pd((double *)(out + 6 * out_p), - _mm_castsi128_pd(x7)); // 06 16 26 36 46 56 66 76 - _mm_storeh_pd((double *)(out + 7 * out_p), - _mm_castsi128_pd(x7)); // 07 17 27 37 47 57 67 77 + mm_storelu(out + 6 * out_p, x7); // 06 16 26 36 46 56 66 76 + mm_storehu(out + 7 * out_p, x7); // 07 17 27 37 47 57 67 77 } while (++idx8x8 < num_8x8_to_transpose); } diff --git a/vpx_dsp/x86/mem_sse2.h b/vpx_dsp/x86/mem_sse2.h index 48dc97970..5209a0628 100644 --- a/vpx_dsp/x86/mem_sse2.h +++ b/vpx_dsp/x86/mem_sse2.h @@ -26,6 +26,9 @@ static INLINE uint32_t loadu_uint32(const void *src) { return v; } +#define mm_storelu(dst, v) memcpy((dst), (const char *)&(v), 8) +#define mm_storehu(dst, v) memcpy((dst), (const char *)&(v) + 8, 8) + static INLINE __m128i loadh_epi64(const __m128i s, const void *const src) { return _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(s), (const __m64 *)src)); |