diff options
author | Johann <johann.koenig@duck.com> | 2018-10-25 12:23:03 -0700 |
---|---|---|
committer | Johann <johann.koenig@duck.com> | 2018-10-25 15:05:28 -0700 |
commit | 5caec339bea5e37ae8707f9d783feb2aa51cd111 (patch) | |
tree | daa592ba5c5e5ab14d6605cf4c286228b65e547e /vpx_dsp/x86 | |
parent | 13a946ec77d9ea8e3547f6016847e8d2c8f331ab (diff) | |
download | libvpx-5caec339bea5e37ae8707f9d783feb2aa51cd111.tar libvpx-5caec339bea5e37ae8707f9d783feb2aa51cd111.tar.gz libvpx-5caec339bea5e37ae8707f9d783feb2aa51cd111.tar.bz2 libvpx-5caec339bea5e37ae8707f9d783feb2aa51cd111.zip |
vp8 bilinear: rewrite 4x4
~20% faster than the MMX. Removes the last usage of
vp8_bilinear_filters_x86_[48].
Change-Id: Iee976fab9655d0020440f26c4403ce50103af913
Diffstat (limited to 'vpx_dsp/x86')
-rw-r--r-- | vpx_dsp/x86/mem_sse2.h | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/vpx_dsp/x86/mem_sse2.h b/vpx_dsp/x86/mem_sse2.h index 5209a0628..258ab38e6 100644 --- a/vpx_dsp/x86/mem_sse2.h +++ b/vpx_dsp/x86/mem_sse2.h @@ -26,6 +26,17 @@ static INLINE uint32_t loadu_uint32(const void *src) { return v; } +static INLINE __m128i load_unaligned_u32(const void *a) { + uint32_t val; + memcpy(&val, a, sizeof(val)); + return _mm_cvtsi32_si128(val); +} + +static INLINE void store_unaligned_u32(void *const a, const __m128i v) { + const uint32_t val = _mm_cvtsi128_si32(v); + memcpy(a, &val, sizeof(val)); +} + #define mm_storelu(dst, v) memcpy((dst), (const char *)&(v), 8) #define mm_storehu(dst, v) memcpy((dst), (const char *)&(v) + 8, 8) |