summaryrefslogtreecommitdiff
path: root/vp9/common/x86
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2015-02-24 12:04:09 -0800
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2015-02-25 09:59:50 -0800
commit2080e4b20604e6bdd2afac39941bf72f3fa1ec14 (patch)
treeddd15dc8591f638fa04ccfa05d41fa6ebef740ce /vp9/common/x86
parent0f57d0a682f21d27747935fe80d75cb21769f576 (diff)
downloadlibvpx-2080e4b20604e6bdd2afac39941bf72f3fa1ec14.tar
libvpx-2080e4b20604e6bdd2afac39941bf72f3fa1ec14.tar.gz
libvpx-2080e4b20604e6bdd2afac39941bf72f3fa1ec14.tar.bz2
libvpx-2080e4b20604e6bdd2afac39941bf72f3fa1ec14.zip
Fix high bit-depth loop-filter sse2 compiling issue - part 1
The intrinsic statement _mm_subs_epi16() should take immediate. Feeding variable as its input argument will cause compile failure in older version gcc. Change-Id: I6a71efcc8d3b16b84715e0a9bcfa818494eea3f4
Diffstat (limited to 'vp9/common/x86')
-rw-r--r--vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c50
1 files changed, 32 insertions, 18 deletions
diff --git a/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c b/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c
index 7e63f389e..c56c1ffb6 100644
--- a/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c
+++ b/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c
@@ -15,24 +15,38 @@
#include "vpx_ports/emmintrin_compat.h"
static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) {
- __m128i ubounded;
- __m128i lbounded;
- __m128i retval;
-
- const __m128i zero = _mm_set1_epi16(0);
- const __m128i one = _mm_set1_epi16(1);
- const __m128i t80 = _mm_slli_epi16(_mm_set1_epi16(0x80), bd - 8);
- const __m128i max = _mm_subs_epi16(
- _mm_subs_epi16(_mm_slli_epi16(one, bd), one), t80);
- const __m128i min = _mm_subs_epi16(zero, t80);
- ubounded = _mm_cmpgt_epi16(value, max);
- lbounded = _mm_cmplt_epi16(value, min);
- retval = _mm_andnot_si128(_mm_or_si128(ubounded, lbounded), value);
- ubounded = _mm_and_si128(ubounded, max);
- lbounded = _mm_and_si128(lbounded, min);
- retval = _mm_or_si128(retval, ubounded);
- retval = _mm_or_si128(retval, lbounded);
- return retval;
+ __m128i ubounded;
+ __m128i lbounded;
+ __m128i retval;
+
+ const __m128i zero = _mm_set1_epi16(0);
+ const __m128i one = _mm_set1_epi16(1);
+ __m128i t80, max, min;
+
+ if (bd == 8) {
+ t80 = _mm_set1_epi16(0x80);
+ max = _mm_subs_epi16(
+ _mm_subs_epi16(_mm_slli_epi16(one, 8), one), t80);
+ } else if (bd == 10) {
+ t80 = _mm_set1_epi16(0x200);
+ max = _mm_subs_epi16(
+ _mm_subs_epi16(_mm_slli_epi16(one, 10), one), t80);
+ } else { // bd == 12
+ t80 = _mm_set1_epi16(0x800);
+ max = _mm_subs_epi16(
+ _mm_subs_epi16(_mm_slli_epi16(one, 12), one), t80);
+ }
+
+ min = _mm_subs_epi16(zero, t80);
+
+ ubounded = _mm_cmpgt_epi16(value, max);
+ lbounded = _mm_cmplt_epi16(value, min);
+ retval = _mm_andnot_si128(_mm_or_si128(ubounded, lbounded), value);
+ ubounded = _mm_and_si128(ubounded, max);
+ lbounded = _mm_and_si128(lbounded, min);
+ retval = _mm_or_si128(retval, ubounded);
+ retval = _mm_or_si128(retval, lbounded);
+ return retval;
}
// TODO(debargha, peter): Break up large functions into smaller ones