summaryrefslogtreecommitdiff
path: root/vpx_dsp/x86
diff options
context:
space:
mode:
authorJohann <johann.koenig@duck.com>2018-10-25 12:23:03 -0700
committerJohann <johann.koenig@duck.com>2018-10-25 15:05:28 -0700
commit5caec339bea5e37ae8707f9d783feb2aa51cd111 (patch)
treedaa592ba5c5e5ab14d6605cf4c286228b65e547e /vpx_dsp/x86
parent13a946ec77d9ea8e3547f6016847e8d2c8f331ab (diff)
downloadlibvpx-5caec339bea5e37ae8707f9d783feb2aa51cd111.tar
libvpx-5caec339bea5e37ae8707f9d783feb2aa51cd111.tar.gz
libvpx-5caec339bea5e37ae8707f9d783feb2aa51cd111.tar.bz2
libvpx-5caec339bea5e37ae8707f9d783feb2aa51cd111.zip
vp8 bilinear: rewrite 4x4
~20% faster than the MMX. Removes the last usage of vp8_bilinear_filters_x86_[48]. Change-Id: Iee976fab9655d0020440f26c4403ce50103af913
Diffstat (limited to 'vpx_dsp/x86')
-rw-r--r--vpx_dsp/x86/mem_sse2.h11
1 files changed, 11 insertions, 0 deletions
diff --git a/vpx_dsp/x86/mem_sse2.h b/vpx_dsp/x86/mem_sse2.h
index 5209a0628..258ab38e6 100644
--- a/vpx_dsp/x86/mem_sse2.h
+++ b/vpx_dsp/x86/mem_sse2.h
@@ -26,6 +26,17 @@ static INLINE uint32_t loadu_uint32(const void *src) {
return v;
}
+static INLINE __m128i load_unaligned_u32(const void *a) {
+ uint32_t val;
+ memcpy(&val, a, sizeof(val));
+ return _mm_cvtsi32_si128(val);
+}
+
+static INLINE void store_unaligned_u32(void *const a, const __m128i v) {
+ const uint32_t val = _mm_cvtsi128_si32(v);
+ memcpy(a, &val, sizeof(val));
+}
+
#define mm_storelu(dst, v) memcpy((dst), (const char *)&(v), 8)
#define mm_storehu(dst, v) memcpy((dst), (const char *)&(v) + 8, 8)