diff options
author | James Bankoski <jimbankoski@google.com> | 2016-07-01 19:08:04 +0000 |
---|---|---|
committer | James Bankoski <jimbankoski@google.com> | 2016-07-01 19:14:28 +0000 |
commit | c5372cf077394856eb1aa10e72bcc8e25bb9b3ec (patch) | |
tree | 7cfaf2e17b6c1374a3c3d10026dd74c1ed9992c0 /third_party/libyuv/source/row_any.cc | |
parent | aa81375d73ee33d382e7f717c519db6159e497ee (diff) | |
download | libvpx-c5372cf077394856eb1aa10e72bcc8e25bb9b3ec.tar libvpx-c5372cf077394856eb1aa10e72bcc8e25bb9b3ec.tar.gz libvpx-c5372cf077394856eb1aa10e72bcc8e25bb9b3ec.tar.bz2 libvpx-c5372cf077394856eb1aa10e72bcc8e25bb9b3ec.zip |
Revert "libyuv: update to 2f101fdb"
Compile failures on linux platform.
BUG=webm:1253
This reverts commit aa81375d73ee33d382e7f717c519db6159e497ee.
Change-Id: Ibab2c4827bc21518dc03c6e9716b5015cff56fc7
Diffstat (limited to 'third_party/libyuv/source/row_any.cc')
-rw-r--r-- | third_party/libyuv/source/row_any.cc | 370 |
1 files changed, 113 insertions, 257 deletions
diff --git a/third_party/libyuv/source/row_any.cc b/third_party/libyuv/source/row_any.cc index 494164fd0..1cb1f6b93 100644 --- a/third_party/libyuv/source/row_any.cc +++ b/third_party/libyuv/source/row_any.cc @@ -22,39 +22,6 @@ extern "C" { // Subsampled source needs to be increase by 1 of not even. #define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift)) -// Any 4 planes to 1 with yuvconstants -#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ - void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \ - const uint8* a_buf, uint8* dst_ptr, \ - const struct YuvConstants* yuvconstants, int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 5]); \ - memset(temp, 0, 64 * 4); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \ - } \ - memcpy(temp, y_buf + n, r); \ - memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - memcpy(temp + 192, a_buf + n, r); \ - ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \ - yuvconstants, MASK + 1); \ - memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \ - SS(r, DUVSHIFT) * BPP); \ - } - -#ifdef HAS_I422ALPHATOARGBROW_SSSE3 -ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7) -#endif -#ifdef HAS_I422ALPHATOARGBROW_AVX2 -ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15) -#endif -#ifdef HAS_I422ALPHATOARGBROW_NEON -ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7) -#endif -#undef ANY41C - // Any 3 planes to 1. #define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \ @@ -73,100 +40,83 @@ ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7) memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ SS(r, DUVSHIFT) * BPP); \ } -#ifdef HAS_I422TOYUY2ROW_SSE2 -ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15) -ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15) -#endif -#ifdef HAS_I422TOYUY2ROW_NEON -ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) -#endif -#ifdef HAS_I422TOUYVYROW_NEON -ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) -#endif -#ifdef HAS_BLENDPLANEROW_AVX2 -ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31) -#endif -#ifdef HAS_BLENDPLANEROW_SSSE3 -ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7) -#endif -#undef ANY31 - -// Note that odd width replication includes 444 due to implementation -// on arm that subsamples 444 to 422 internally. -// Any 3 planes to 1 with yuvconstants -#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ - void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \ - uint8* dst_ptr, const struct YuvConstants* yuvconstants, \ - int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 4]); \ - memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ - } \ - memcpy(temp, y_buf + n, r); \ - memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - if (width & 1) { \ - temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1]; \ - temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \ - } \ - ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, \ - yuvconstants, MASK + 1); \ - memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ - SS(r, DUVSHIFT) * BPP); \ - } #ifdef HAS_I422TOARGBROW_SSSE3 -ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) -#endif -#ifdef HAS_I411TOARGBROW_SSSE3 -ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7) +ANY31(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) #endif #ifdef HAS_I444TOARGBROW_SSSE3 -ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) -ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) -ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7) -ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7) -ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7) -ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7) +ANY31(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) +ANY31(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7) +ANY31(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7) +ANY31(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7) +ANY31(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) +ANY31(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7) +ANY31(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7) +ANY31(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7) +ANY31(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7) +ANY31(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7) +ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15) +ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15) #endif // HAS_I444TOARGBROW_SSSE3 #ifdef HAS_I422TORGB24ROW_AVX2 -ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15) +ANY31(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15) +#endif +#ifdef HAS_I422TORAWROW_AVX2 +ANY31(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15) +#endif +#ifdef HAS_J422TOARGBROW_SSSE3 +ANY31(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, 1, 0, 4, 7) +#endif +#ifdef HAS_J422TOARGBROW_AVX2 +ANY31(J422ToARGBRow_Any_AVX2, J422ToARGBRow_AVX2, 1, 0, 4, 15) #endif #ifdef HAS_I422TOARGBROW_AVX2 -ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) +ANY31(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) +#endif +#ifdef HAS_I422TOBGRAROW_AVX2 +ANY31(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15) #endif #ifdef HAS_I422TORGBAROW_AVX2 -ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) +ANY31(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) +#endif +#ifdef HAS_I422TOABGRROW_AVX2 +ANY31(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15) #endif #ifdef HAS_I444TOARGBROW_AVX2 -ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) +ANY31(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) #endif #ifdef HAS_I411TOARGBROW_AVX2 -ANY31C(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15) +ANY31(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15) #endif #ifdef HAS_I422TOARGB4444ROW_AVX2 -ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7) +ANY31(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7) #endif #ifdef HAS_I422TOARGB1555ROW_AVX2 -ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7) +ANY31(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7) #endif #ifdef HAS_I422TORGB565ROW_AVX2 -ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7) +ANY31(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7) #endif #ifdef HAS_I422TOARGBROW_NEON -ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) -ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) -ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7) -ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7) -ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7) -ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) -ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) -ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) -#endif -#undef ANY31C +ANY31(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) +ANY31(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) +ANY31(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7) +ANY31(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7) +ANY31(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7) +ANY31(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7) +ANY31(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7) +ANY31(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7) +ANY31(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) +ANY31(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) +ANY31(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) +#endif +#ifdef HAS_I422TOYUY2ROW_NEON +ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) +#endif +#ifdef HAS_I422TOUYVYROW_NEON +ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) +#endif +#undef ANY31 // Any 2 planes to 1. #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ @@ -186,6 +136,32 @@ ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ } +// Biplanar to RGB. +#ifdef HAS_NV12TOARGBROW_SSSE3 +ANY21(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7) +ANY21(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7) +#endif +#ifdef HAS_NV12TOARGBROW_AVX2 +ANY21(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) +ANY21(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15) +#endif +#ifdef HAS_NV12TOARGBROW_NEON +ANY21(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) +ANY21(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7) +#endif +#ifdef HAS_NV12TORGB565ROW_SSSE3 +ANY21(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) +ANY21(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) +#endif +#ifdef HAS_NV12TORGB565ROW_AVX2 +ANY21(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15) +ANY21(NV21ToRGB565Row_Any_AVX2, NV21ToRGB565Row_AVX2, 1, 1, 2, 2, 15) +#endif +#ifdef HAS_NV12TORGB565ROW_NEON +ANY21(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7) +ANY21(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, 1, 1, 2, 2, 7) +#endif + // Merge functions. #ifdef HAS_MERGEUVROW_SSE2 ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15) @@ -245,55 +221,6 @@ ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7) #endif #undef ANY21 -// Any 2 planes to 1 with yuvconstants -#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ - void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \ - uint8* dst_ptr, const struct YuvConstants* yuvconstants, \ - int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 3]); \ - memset(temp, 0, 64 * 2); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ - } \ - memcpy(temp, y_buf + n * SBPP, r * SBPP); \ - memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ - SS(r, UVSHIFT) * SBPP2); \ - ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ - } - -// Biplanar to RGB. -#ifdef HAS_NV12TOARGBROW_SSSE3 -ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7) -#endif -#ifdef HAS_NV12TOARGBROW_AVX2 -ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) -#endif -#ifdef HAS_NV12TOARGBROW_NEON -ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) -#endif -#ifdef HAS_NV21TOARGBROW_SSSE3 -ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7) -#endif -#ifdef HAS_NV21TOARGBROW_AVX2 -ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15) -#endif -#ifdef HAS_NV21TOARGBROW_NEON -ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7) -#endif -#ifdef HAS_NV12TORGB565ROW_SSSE3 -ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) -#endif -#ifdef HAS_NV12TORGB565ROW_AVX2 -ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15) -#endif -#ifdef HAS_NV12TORGB565ROW_NEON -ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7) -#endif -#undef ANY21C - // Any 1 to 1. #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ @@ -325,10 +252,8 @@ ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3) ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3) ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3) #endif -#if defined(HAS_ARGBTORGB565ROW_AVX2) -ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7) -#endif #if defined(HAS_ARGBTOARGB4444ROW_AVX2) +ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7) ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7) ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7) #endif @@ -344,16 +269,15 @@ ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7) #if defined(HAS_I400TOARGBROW_AVX2) ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15) #endif -#if defined(HAS_RGB24TOARGBROW_SSSE3) +#if defined(HAS_YUY2TOARGBROW_SSSE3) +ANY11(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15) +ANY11(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15) ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15) ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15) ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7) ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7) ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7) #endif -#if defined(HAS_RAWTORGB24ROW_SSSE3) -ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7) -#endif #if defined(HAS_RGB565TOARGBROW_AVX2) ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15) #endif @@ -363,6 +287,10 @@ ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15) #if defined(HAS_ARGB4444TOARGBROW_AVX2) ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15) #endif +#if defined(HAS_YUY2TOARGBROW_AVX2) +ANY11(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31) +ANY11(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31) +#endif #if defined(HAS_ARGBTORGB24ROW_NEON) ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7) ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7) @@ -371,9 +299,8 @@ ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7) ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7) ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7) ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7) -#endif -#if defined(HAS_RAWTORGB24ROW_NEON) -ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7) +ANY11(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7) +ANY11(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7) #endif #ifdef HAS_ARGBTOYROW_AVX2 ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31) @@ -454,6 +381,9 @@ ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7) #ifdef HAS_ARGBATTENUATEROW_SSSE3 ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3) #endif +#ifdef HAS_ARGBATTENUATEROW_SSE2 +ANY11(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, 0, 4, 4, 3) +#endif #ifdef HAS_ARGBUNATTENUATEROW_SSE2 ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3) #endif @@ -466,44 +396,8 @@ ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7) #ifdef HAS_ARGBATTENUATEROW_NEON ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) #endif -#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 -ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7) -#endif -#ifdef HAS_ARGBEXTRACTALPHAROW_NEON -ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15) -#endif #undef ANY11 -// Any 1 to 1 blended. Destination is read, modify, write. -#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8 temp[128 * 2]); \ - memset(temp, 0, 128 * 2); /* for YUY2 and msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_ptr, n); \ - } \ - memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ - memcpy(temp + 128, dst_ptr + n * BPP, r * BPP); \ - ANY_SIMD(temp, temp + 128, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ - } - -#ifdef HAS_ARGBCOPYALPHAROW_AVX2 -ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15) -#endif -#ifdef HAS_ARGBCOPYALPHAROW_SSE2 -ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7) -#endif -#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 -ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15) -#endif -#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 -ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7) -#endif -#undef ANY11B - // Any 1 to 1 with parameter. #define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \ @@ -546,35 +440,6 @@ ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8*, 4, 4, 3) #endif #undef ANY11P -// Any 1 to 1 with yuvconstants -#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \ - const struct YuvConstants* yuvconstants, int width) { \ - SIMD_ALIGNED(uint8 temp[128 * 2]); \ - memset(temp, 0, 128); /* for YUY2 and msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \ - } \ - memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ - ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ - } -#if defined(HAS_YUY2TOARGBROW_SSSE3) -ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15) -ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15) -#endif -#if defined(HAS_YUY2TOARGBROW_AVX2) -ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31) -ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31) -#endif -#if defined(HAS_YUY2TOARGBROW_NEON) -ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7) -ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7) -#endif -#undef ANY11C - // Any 1 to 1 interpolate. Takes 2 rows of source via stride. #define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \ @@ -599,11 +464,14 @@ ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31) #ifdef HAS_INTERPOLATEROW_SSSE3 ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15) #endif +#ifdef HAS_INTERPOLATEROW_SSE2 +ANY11T(InterpolateRow_Any_SSE2, InterpolateRow_SSE2, 1, 1, 15) +#endif #ifdef HAS_INTERPOLATEROW_NEON ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15) #endif -#ifdef HAS_INTERPOLATEROW_DSPR2 -ANY11T(InterpolateRow_Any_DSPR2, InterpolateRow_DSPR2, 1, 1, 3) +#ifdef HAS_INTERPOLATEROW_MIPS_DSPR2 +ANY11T(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2, 1, 1, 3) #endif #undef ANY11T @@ -628,6 +496,9 @@ ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31) #ifdef HAS_MIRRORROW_SSSE3 ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15) #endif +#ifdef HAS_MIRRORROW_SSE2 +ANY11M(MirrorRow_Any_SSE2, MirrorRow_SSE2, 1, 15) +#endif #ifdef HAS_MIRRORROW_NEON ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15) #endif @@ -677,25 +548,9 @@ ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3) ANY_SIMD(src_ptr, dst_u, dst_v, n); \ } \ memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ - /* repeat last 4 bytes for 422 subsampler */ \ - if ((width & 1) && BPP == 4 && DUVSHIFT == 1) { \ - memcpy(temp + SS(r, UVSHIFT) * BPP, \ - temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \ - } \ - /* repeat last 4 - 12 bytes for 411 subsampler */ \ - if (((width & 3) == 1) && BPP == 4 && DUVSHIFT == 2) { \ - memcpy(temp + SS(r, UVSHIFT) * BPP, \ - temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \ - memcpy(temp + SS(r, UVSHIFT) * BPP + BPP, \ - temp + SS(r, UVSHIFT) * BPP - BPP, BPP * 2); \ - } \ - if (((width & 3) == 2) && BPP == 4 && DUVSHIFT == 2) { \ + if ((width & 1) && BPP == 4) { /* repeat last 4 bytes for subsampler */ \ memcpy(temp + SS(r, UVSHIFT) * BPP, \ - temp + SS(r, UVSHIFT) * BPP - BPP * 2, BPP * 2); \ - } \ - if (((width & 3) == 3) && BPP == 4 && DUVSHIFT == 2) { \ - memcpy(temp + SS(r, UVSHIFT) * BPP, \ - temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \ + temp + SS(r, UVSHIFT) * BPP - BPP, 4); \ } \ ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \ memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \ @@ -711,8 +566,8 @@ ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31) #ifdef HAS_SPLITUVROW_NEON ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15) #endif -#ifdef HAS_SPLITUVROW_DSPR2 -ANY12(SplitUVRow_Any_DSPR2, SplitUVRow_DSPR2, 0, 2, 0, 15) +#ifdef HAS_SPLITUVROW_MIPS_DSPR2 +ANY12(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_MIPS_DSPR2, 0, 2, 0, 15) #endif #ifdef HAS_ARGBTOUV444ROW_SSSE3 ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15) @@ -721,12 +576,16 @@ ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15) ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31) ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31) #endif +#ifdef HAS_ARGBTOUV422ROW_SSSE3 +ANY12(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_SSSE3, 0, 4, 1, 15) +#endif #ifdef HAS_YUY2TOUV422ROW_SSE2 ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15) ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15) #endif #ifdef HAS_YUY2TOUV422ROW_NEON ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7) +ANY12(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON, 0, 4, 1, 15) ANY12(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, 0, 4, 2, 31) ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15) ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) @@ -748,11 +607,11 @@ ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \ SS(r, UVSHIFT) * BPP); \ - if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */\ + if ((width & 1) && BPP == 4) { /* repeat last 4 bytes for subsampler */ \ memcpy(temp + SS(r, UVSHIFT) * BPP, \ - temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \ + temp + SS(r, UVSHIFT) * BPP - BPP, 4); \ memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \ - temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ + temp + 128 + SS(r, UVSHIFT) * BPP - BPP, 4); \ } \ ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \ memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \ @@ -762,9 +621,6 @@ ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) #ifdef HAS_ARGBTOUVROW_AVX2 ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31) #endif -#ifdef HAS_ARGBTOUVJROW_AVX2 -ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31) -#endif #ifdef HAS_ARGBTOUVROW_SSSE3 ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15) ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15) |