summaryrefslogtreecommitdiff
path: root/third_party/libyuv/source/compare_win.cc
diff options
context:
space:
mode:
authorJim Bankoski <jimbankoski@google.com>2016-08-25 06:39:38 -0700
committerJim Bankoski <jimbankoski@google.com>2016-08-25 06:39:38 -0700
commit6d7a9f3e9cdff1a7d714dcb021eb353dff867dbe (patch)
treea7de0122036885928d5e45c0a92d6533593659bd /third_party/libyuv/source/compare_win.cc
parentce634bbf4d4e995067f47494f57056727751df15 (diff)
downloadlibvpx-6d7a9f3e9cdff1a7d714dcb021eb353dff867dbe.tar
libvpx-6d7a9f3e9cdff1a7d714dcb021eb353dff867dbe.tar.gz
libvpx-6d7a9f3e9cdff1a7d714dcb021eb353dff867dbe.tar.bz2
libvpx-6d7a9f3e9cdff1a7d714dcb021eb353dff867dbe.zip
libyuv: update to c244a3e9
Fixes color issue when scaling without breaking mingw. BUG=https://bugs.chromium.org/p/libyuv/issues/detail?id=605 BUG=https://bugs.chromium.org/p/webm/issues/detail?id=1252 Change-Id: I09437d93fd65964ad57113274d8c819f3eaf2e57
Diffstat (limited to 'third_party/libyuv/source/compare_win.cc')
-rw-r--r--third_party/libyuv/source/compare_win.cc87
1 files changed, 40 insertions, 47 deletions
diff --git a/third_party/libyuv/source/compare_win.cc b/third_party/libyuv/source/compare_win.cc
index 19806f275..dc86fe25b 100644
--- a/third_party/libyuv/source/compare_win.cc
+++ b/third_party/libyuv/source/compare_win.cc
@@ -9,6 +9,8 @@
*/
#include "libyuv/basic_types.h"
+
+#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
@@ -16,9 +18,8 @@ namespace libyuv {
extern "C" {
#endif
-// This module is for Visual C x86.
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
- defined(_MSC_VER) && !defined(__clang__)
+// This module is for 32 bit Visual C x86 and clangcl
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
__declspec(naked)
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
@@ -100,41 +101,32 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
}
#endif // _MSC_VER >= 1700
-#define HAS_HASHDJB2_SSE41
-static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
-static uvec32 kHashMul0 = {
+uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
+uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15
0xa3476dc1, // 33 ^ 14
0x3b4039a1, // 33 ^ 13
0x4f5f0981, // 33 ^ 12
};
-static uvec32 kHashMul1 = {
+uvec32 kHashMul1 = {
0x30f35d61, // 33 ^ 11
0x855cb541, // 33 ^ 10
0x040a9121, // 33 ^ 9
0x747c7101, // 33 ^ 8
};
-static uvec32 kHashMul2 = {
+uvec32 kHashMul2 = {
0xec41d4e1, // 33 ^ 7
0x4cfa3cc1, // 33 ^ 6
0x025528a1, // 33 ^ 5
0x00121881, // 33 ^ 4
};
-static uvec32 kHashMul3 = {
+uvec32 kHashMul3 = {
0x00008c61, // 33 ^ 3
0x00000441, // 33 ^ 2
0x00000021, // 33 ^ 1
0x00000001, // 33 ^ 0
};
-// 27: 66 0F 38 40 C6 pmulld xmm0,xmm6
-// 44: 66 0F 38 40 DD pmulld xmm3,xmm5
-// 59: 66 0F 38 40 E5 pmulld xmm4,xmm5
-// 72: 66 0F 38 40 D5 pmulld xmm2,xmm5
-// 83: 66 0F 38 40 CD pmulld xmm1,xmm5
-#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
- _asm _emit 0x40 _asm _emit reg
-
__declspec(naked)
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
__asm {
@@ -143,30 +135,30 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
movd xmm0, [esp + 12] // seed
pxor xmm7, xmm7 // constant 0 for unpck
- movdqa xmm6, kHash16x33
+ movdqa xmm6, xmmword ptr kHash16x33
wloop:
movdqu xmm1, [eax] // src[0-15]
lea eax, [eax + 16]
- pmulld(0xc6) // pmulld xmm0,xmm6 hash *= 33 ^ 16
- movdqa xmm5, kHashMul0
+ pmulld xmm0, xmm6 // hash *= 33 ^ 16
+ movdqa xmm5, xmmword ptr kHashMul0
movdqa xmm2, xmm1
punpcklbw xmm2, xmm7 // src[0-7]
movdqa xmm3, xmm2
punpcklwd xmm3, xmm7 // src[0-3]
- pmulld(0xdd) // pmulld xmm3, xmm5
- movdqa xmm5, kHashMul1
+ pmulld xmm3, xmm5
+ movdqa xmm5, xmmword ptr kHashMul1
movdqa xmm4, xmm2
punpckhwd xmm4, xmm7 // src[4-7]
- pmulld(0xe5) // pmulld xmm4, xmm5
- movdqa xmm5, kHashMul2
+ pmulld xmm4, xmm5
+ movdqa xmm5, xmmword ptr kHashMul2
punpckhbw xmm1, xmm7 // src[8-15]
movdqa xmm2, xmm1
punpcklwd xmm2, xmm7 // src[8-11]
- pmulld(0xd5) // pmulld xmm2, xmm5
- movdqa xmm5, kHashMul3
+ pmulld xmm2, xmm5
+ movdqa xmm5, xmmword ptr kHashMul3
punpckhwd xmm1, xmm7 // src[12-15]
- pmulld(0xcd) // pmulld xmm1, xmm5
+ pmulld xmm1, xmm5
paddd xmm3, xmm4 // add 16 results
paddd xmm1, xmm2
paddd xmm1, xmm3
@@ -191,36 +183,37 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
- movd xmm0, [esp + 12] // seed
- movdqa xmm6, kHash16x33
+ vmovd xmm0, [esp + 12] // seed
wloop:
- vpmovzxbd xmm3, dword ptr [eax] // src[0-3]
- pmulld xmm0, xmm6 // hash *= 33 ^ 16
- vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7]
- pmulld xmm3, kHashMul0
- vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11]
- pmulld xmm4, kHashMul1
- vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15]
- pmulld xmm2, kHashMul2
+ vpmovzxbd xmm3, [eax] // src[0-3]
+ vpmulld xmm0, xmm0, xmmword ptr kHash16x33 // hash *= 33 ^ 16
+ vpmovzxbd xmm4, [eax + 4] // src[4-7]
+ vpmulld xmm3, xmm3, xmmword ptr kHashMul0
+ vpmovzxbd xmm2, [eax + 8] // src[8-11]
+ vpmulld xmm4, xmm4, xmmword ptr kHashMul1
+ vpmovzxbd xmm1, [eax + 12] // src[12-15]
+ vpmulld xmm2, xmm2, xmmword ptr kHashMul2
lea eax, [eax + 16]
- pmulld xmm1, kHashMul3
- paddd xmm3, xmm4 // add 16 results
- paddd xmm1, xmm2
- paddd xmm1, xmm3
- pshufd xmm2, xmm1, 0x0e // upper 2 dwords
- paddd xmm1, xmm2
- pshufd xmm2, xmm1, 0x01
- paddd xmm1, xmm2
- paddd xmm0, xmm1
+ vpmulld xmm1, xmm1, xmmword ptr kHashMul3
+ vpaddd xmm3, xmm3, xmm4 // add 16 results
+ vpaddd xmm1, xmm1, xmm2
+ vpaddd xmm1, xmm1, xmm3
+ vpshufd xmm2, xmm1, 0x0e // upper 2 dwords
+ vpaddd xmm1, xmm1,xmm2
+ vpshufd xmm2, xmm1, 0x01
+ vpaddd xmm1, xmm1, xmm2
+ vpaddd xmm0, xmm0, xmm1
sub ecx, 16
jg wloop
- movd eax, xmm0 // return hash
+ vmovd eax, xmm0 // return hash
+ vzeroupper
ret
}
}
#endif // _MSC_VER >= 1700
+
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
#ifdef __cplusplus