diff options
author | Jingning Han <jingning@google.com> | 2015-02-24 12:04:09 -0800 |
---|---|---|
committer | Gerrit Code Review <gerrit@gerrit.golo.chromium.org> | 2015-02-25 09:59:50 -0800 |
commit | 2080e4b20604e6bdd2afac39941bf72f3fa1ec14 (patch) | |
tree | ddd15dc8591f638fa04ccfa05d41fa6ebef740ce /vp9/common/x86 | |
parent | 0f57d0a682f21d27747935fe80d75cb21769f576 (diff) | |
download | libvpx-2080e4b20604e6bdd2afac39941bf72f3fa1ec14.tar libvpx-2080e4b20604e6bdd2afac39941bf72f3fa1ec14.tar.gz libvpx-2080e4b20604e6bdd2afac39941bf72f3fa1ec14.tar.bz2 libvpx-2080e4b20604e6bdd2afac39941bf72f3fa1ec14.zip |
Fix high bit-depth loop-filter sse2 compiling issue - part 1
The intrinsic statement _mm_subs_epi16() should take immediate.
Feeding variable as its input argument will cause compile failure
in older version gcc.
Change-Id: I6a71efcc8d3b16b84715e0a9bcfa818494eea3f4
Diffstat (limited to 'vp9/common/x86')
-rw-r--r-- | vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c | 50 |
1 files changed, 32 insertions, 18 deletions
diff --git a/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c b/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c index 7e63f389e..c56c1ffb6 100644 --- a/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c +++ b/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c @@ -15,24 +15,38 @@ #include "vpx_ports/emmintrin_compat.h" static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) { - __m128i ubounded; - __m128i lbounded; - __m128i retval; - - const __m128i zero = _mm_set1_epi16(0); - const __m128i one = _mm_set1_epi16(1); - const __m128i t80 = _mm_slli_epi16(_mm_set1_epi16(0x80), bd - 8); - const __m128i max = _mm_subs_epi16( - _mm_subs_epi16(_mm_slli_epi16(one, bd), one), t80); - const __m128i min = _mm_subs_epi16(zero, t80); - ubounded = _mm_cmpgt_epi16(value, max); - lbounded = _mm_cmplt_epi16(value, min); - retval = _mm_andnot_si128(_mm_or_si128(ubounded, lbounded), value); - ubounded = _mm_and_si128(ubounded, max); - lbounded = _mm_and_si128(lbounded, min); - retval = _mm_or_si128(retval, ubounded); - retval = _mm_or_si128(retval, lbounded); - return retval; + __m128i ubounded; + __m128i lbounded; + __m128i retval; + + const __m128i zero = _mm_set1_epi16(0); + const __m128i one = _mm_set1_epi16(1); + __m128i t80, max, min; + + if (bd == 8) { + t80 = _mm_set1_epi16(0x80); + max = _mm_subs_epi16( + _mm_subs_epi16(_mm_slli_epi16(one, 8), one), t80); + } else if (bd == 10) { + t80 = _mm_set1_epi16(0x200); + max = _mm_subs_epi16( + _mm_subs_epi16(_mm_slli_epi16(one, 10), one), t80); + } else { // bd == 12 + t80 = _mm_set1_epi16(0x800); + max = _mm_subs_epi16( + _mm_subs_epi16(_mm_slli_epi16(one, 12), one), t80); + } + + min = _mm_subs_epi16(zero, t80); + + ubounded = _mm_cmpgt_epi16(value, max); + lbounded = _mm_cmplt_epi16(value, min); + retval = _mm_andnot_si128(_mm_or_si128(ubounded, lbounded), value); + ubounded = _mm_and_si128(ubounded, max); + lbounded = _mm_and_si128(lbounded, min); + retval = _mm_or_si128(retval, ubounded); + retval = _mm_or_si128(retval, lbounded); + return retval; } // TODO(debargha, peter): Break up large functions into smaller ones |