diff options
author | Jim Bankoski <jimbankoski@google.com> | 2016-06-29 16:18:01 -0700 |
---|---|---|
committer | James Bankoski <jimbankoski@google.com> | 2016-06-30 13:25:39 +0000 |
commit | aa81375d73ee33d382e7f717c519db6159e497ee (patch) | |
tree | b20b8b92299aff97cee21c68d519e1c5816b9386 /third_party/libyuv/include | |
parent | e85607410e7c3ddf50b52944972ed8a48607117b (diff) | |
download | libvpx-aa81375d73ee33d382e7f717c519db6159e497ee.tar libvpx-aa81375d73ee33d382e7f717c519db6159e497ee.tar.gz libvpx-aa81375d73ee33d382e7f717c519db6159e497ee.tar.bz2 libvpx-aa81375d73ee33d382e7f717c519db6159e497ee.zip |
libyuv: update to 2f101fdb
Fixes color issue when scaling without breaking mingw.
BUG=https://bugs.chromium.org/p/libyuv/issues/detail?id=605
BUG=https://bugs.chromium.org/p/webm/issues/detail?id=1252
Change-Id: Ifba747feb0c6a08f2b353b820a24c6c145d440ad
Diffstat (limited to 'third_party/libyuv/include')
-rw-r--r-- | third_party/libyuv/include/libyuv/convert.h | 6 | ||||
-rw-r--r-- | third_party/libyuv/include/libyuv/convert_argb.h | 88 | ||||
-rw-r--r-- | third_party/libyuv/include/libyuv/convert_from.h | 2 | ||||
-rw-r--r-- | third_party/libyuv/include/libyuv/cpu_id.h | 11 | ||||
-rw-r--r-- | third_party/libyuv/include/libyuv/planar_functions.h | 94 | ||||
-rw-r--r-- | third_party/libyuv/include/libyuv/rotate_row.h | 71 | ||||
-rw-r--r-- | third_party/libyuv/include/libyuv/row.h | 1143 | ||||
-rw-r--r-- | third_party/libyuv/include/libyuv/scale_argb.h | 1 | ||||
-rw-r--r-- | third_party/libyuv/include/libyuv/scale_row.h | 135 | ||||
-rw-r--r-- | third_party/libyuv/include/libyuv/version.h | 2 | ||||
-rw-r--r-- | third_party/libyuv/include/libyuv/video_common.h | 6 |
11 files changed, 890 insertions, 669 deletions
diff --git a/third_party/libyuv/include/libyuv/convert.h b/third_party/libyuv/include/libyuv/convert.h index a8d3fa07a..a2cdc5718 100644 --- a/third_party/libyuv/include/libyuv/convert.h +++ b/third_party/libyuv/include/libyuv/convert.h @@ -12,10 +12,8 @@ #define INCLUDE_LIBYUV_CONVERT_H_ #include "libyuv/basic_types.h" -// TODO(fbarchard): Remove the following headers includes. -#include "libyuv/convert_from.h" -#include "libyuv/planar_functions.h" -#include "libyuv/rotate.h" + +#include "libyuv/rotate.h" // For enum RotationMode. #ifdef __cplusplus namespace libyuv { diff --git a/third_party/libyuv/include/libyuv/convert_argb.h b/third_party/libyuv/include/libyuv/convert_argb.h index 360c6d359..079d273b1 100644 --- a/third_party/libyuv/include/libyuv/convert_argb.h +++ b/third_party/libyuv/include/libyuv/convert_argb.h @@ -12,10 +12,8 @@ #define INCLUDE_LIBYUV_CONVERT_ARGB_H_ #include "libyuv/basic_types.h" -// TODO(fbarchard): Remove the following headers includes -#include "libyuv/convert_from.h" -#include "libyuv/planar_functions.h" -#include "libyuv/rotate.h" + +#include "libyuv/rotate.h" // For enum RotationMode. // TODO(fbarchard): This set of functions should exactly match convert.h // TODO(fbarchard): Add tests. Create random content of right size and convert @@ -60,6 +58,22 @@ int I444ToARGB(const uint8* src_y, int src_stride_y, uint8* dst_argb, int dst_stride_argb, int width, int height); +// Convert J444 to ARGB. +LIBYUV_API +int J444ToARGB(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + +// Convert I444 to ABGR. +LIBYUV_API +int I444ToABGR(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_abgr, int dst_stride_abgr, + int width, int height); + // Convert I411 to ARGB. LIBYUV_API int I411ToARGB(const uint8* src_y, int src_stride_y, @@ -68,6 +82,24 @@ int I411ToARGB(const uint8* src_y, int src_stride_y, uint8* dst_argb, int dst_stride_argb, int width, int height); +// Convert I420 with Alpha to preattenuated ARGB. +LIBYUV_API +int I420AlphaToARGB(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + const uint8* src_a, int src_stride_a, + uint8* dst_argb, int dst_stride_argb, + int width, int height, int attenuate); + +// Convert I420 with Alpha to preattenuated ABGR. +LIBYUV_API +int I420AlphaToABGR(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + const uint8* src_a, int src_stride_a, + uint8* dst_abgr, int dst_stride_abgr, + int width, int height, int attenuate); + // Convert I400 (grey) to ARGB. Reverse of ARGBToI400. LIBYUV_API int I400ToARGB(const uint8* src_y, int src_stride_y, @@ -131,6 +163,54 @@ int J422ToARGB(const uint8* src_y, int src_stride_y, uint8* dst_argb, int dst_stride_argb, int width, int height); +// Convert J420 to ABGR. +LIBYUV_API +int J420ToABGR(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_abgr, int dst_stride_abgr, + int width, int height); + +// Convert J422 to ABGR. +LIBYUV_API +int J422ToABGR(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_abgr, int dst_stride_abgr, + int width, int height); + +// Convert H420 to ARGB. +LIBYUV_API +int H420ToARGB(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + +// Convert H422 to ARGB. +LIBYUV_API +int H422ToARGB(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + +// Convert H420 to ABGR. +LIBYUV_API +int H420ToABGR(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_abgr, int dst_stride_abgr, + int width, int height); + +// Convert H422 to ABGR. +LIBYUV_API +int H422ToABGR(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_abgr, int dst_stride_abgr, + int width, int height); + // BGRA little endian (argb in memory) to ARGB. LIBYUV_API int BGRAToARGB(const uint8* src_frame, int src_stride_frame, diff --git a/third_party/libyuv/include/libyuv/convert_from.h b/third_party/libyuv/include/libyuv/convert_from.h index 9fd8d4de5..39e1578a0 100644 --- a/third_party/libyuv/include/libyuv/convert_from.h +++ b/third_party/libyuv/include/libyuv/convert_from.h @@ -56,8 +56,6 @@ int I400Copy(const uint8* src_y, int src_stride_y, uint8* dst_y, int dst_stride_y, int width, int height); -// TODO(fbarchard): I420ToM420 - LIBYUV_API int I420ToNV12(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, diff --git a/third_party/libyuv/include/libyuv/cpu_id.h b/third_party/libyuv/include/libyuv/cpu_id.h index dc858a814..dfb7445e2 100644 --- a/third_party/libyuv/include/libyuv/cpu_id.h +++ b/third_party/libyuv/include/libyuv/cpu_id.h @@ -18,9 +18,8 @@ namespace libyuv { extern "C" { #endif -// TODO(fbarchard): Consider overlapping bits for different architectures. // Internal flag to indicate cpuid requires initialization. -#define kCpuInit 0x1 +static const int kCpuInitialized = 0x1; // These flags are only valid on ARM processors. static const int kCpuHasARM = 0x2; @@ -37,12 +36,12 @@ static const int kCpuHasAVX = 0x200; static const int kCpuHasAVX2 = 0x400; static const int kCpuHasERMS = 0x800; static const int kCpuHasFMA3 = 0x1000; +static const int kCpuHasAVX3 = 0x2000; // 0x2000, 0x4000, 0x8000 reserved for future X86 flags. // These flags are only valid on MIPS processors. static const int kCpuHasMIPS = 0x10000; -static const int kCpuHasMIPS_DSP = 0x20000; -static const int kCpuHasMIPS_DSPR2 = 0x40000; +static const int kCpuHasDSPR2 = 0x20000; // Internal function used to auto-init. LIBYUV_API @@ -57,13 +56,13 @@ int ArmCpuCaps(const char* cpuinfo_name); // returns non-zero if instruction set is detected static __inline int TestCpuFlag(int test_flag) { LIBYUV_API extern int cpu_info_; - return (cpu_info_ == kCpuInit ? InitCpuFlags() : cpu_info_) & test_flag; + return (!cpu_info_ ? InitCpuFlags() : cpu_info_) & test_flag; } // For testing, allow CPU flags to be disabled. // ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3. // MaskCpuFlags(-1) to enable all cpu specific optimizations. -// MaskCpuFlags(0) to disable all cpu specific optimizations. +// MaskCpuFlags(1) to disable all cpu specific optimizations. LIBYUV_API void MaskCpuFlags(int enable_flags); diff --git a/third_party/libyuv/include/libyuv/planar_functions.h b/third_party/libyuv/include/libyuv/planar_functions.h index ae994db89..881b0c5c6 100644 --- a/third_party/libyuv/include/libyuv/planar_functions.h +++ b/third_party/libyuv/include/libyuv/planar_functions.h @@ -145,13 +145,6 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y, uint8* dst_rgb565, int dst_stride_rgb565, int width, int height); -// Convert NV21 to RGB565. -LIBYUV_API -int NV21ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height); - // I422ToARGB is in convert_argb.h // Convert I422 to BGRA. LIBYUV_API @@ -177,6 +170,14 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y, uint8* dst_rgba, int dst_stride_rgba, int width, int height); +// Alias +#define RGB24ToRAW RAWToRGB24 + +LIBYUV_API +int RAWToRGB24(const uint8* src_raw, int src_stride_raw, + uint8* dst_rgb24, int dst_stride_rgb24, + int width, int height); + // Draw a rectangle into I420. LIBYUV_API int I420Rect(uint8* dst_y, int dst_stride_y, @@ -281,13 +282,19 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height); -// Copy ARGB to ARGB. +// Copy Alpha channel of ARGB to alpha of ARGB. LIBYUV_API int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height); -// Copy ARGB to ARGB. +// Extract the alpha channel from ARGB. +LIBYUV_API +int ARGBExtractAlpha(const uint8* src_argb, int src_stride_argb, + uint8* dst_a, int dst_stride_a, + int width, int height); + +// Copy Y channel to Alpha of ARGB. LIBYUV_API int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, uint8* dst_argb, int dst_stride_argb, @@ -301,6 +308,7 @@ LIBYUV_API ARGBBlendRow GetARGBBlend(); // Alpha Blend ARGB images and store to destination. +// Source is pre-multiplied by alpha using ARGBAttenuate. // Alpha of destination is set to 255. LIBYUV_API int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, @@ -308,6 +316,31 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, uint8* dst_argb, int dst_stride_argb, int width, int height); +// Alpha Blend plane and store to destination. +// Source is not pre-multiplied by alpha. +LIBYUV_API +int BlendPlane(const uint8* src_y0, int src_stride_y0, + const uint8* src_y1, int src_stride_y1, + const uint8* alpha, int alpha_stride, + uint8* dst_y, int dst_stride_y, + int width, int height); + +// Alpha Blend YUV images and store to destination. +// Source is not pre-multiplied by alpha. +// Alpha is full width x height and subsampled to half size to apply to UV. +LIBYUV_API +int I420Blend(const uint8* src_y0, int src_stride_y0, + const uint8* src_u0, int src_stride_u0, + const uint8* src_v0, int src_stride_v0, + const uint8* src_y1, int src_stride_y1, + const uint8* src_u1, int src_stride_u1, + const uint8* src_v1, int src_stride_v1, + const uint8* alpha, int alpha_stride, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + // Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255. LIBYUV_API int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, @@ -357,12 +390,6 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height); -// Convert MJPG to ARGB. -LIBYUV_API -int MJPGToARGB(const uint8* sample, size_t sample_size, - uint8* argb, int argb_stride, - int w, int h, int dw, int dh); - // Internal function - do not call directly. // Computes table of cumulative sum for image where the value is the sum // of all values above and to the left of the entry. Used by ARGBBlur. @@ -389,22 +416,49 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height, uint32 value); -// Interpolate between two ARGB images using specified amount of interpolation +// Interpolate between two images using specified amount of interpolation // (0 to 255) and store to destination. -// 'interpolation' is specified as 8 bit fraction where 0 means 100% src_argb0 -// and 255 means 1% src_argb0 and 99% src_argb1. -// Internally uses ARGBScale bilinear filtering. -// Caveat: This function will write up to 16 bytes beyond the end of dst_argb. +// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0 +// and 255 means 1% src0 and 99% src1. +LIBYUV_API +int InterpolatePlane(const uint8* src0, int src_stride0, + const uint8* src1, int src_stride1, + uint8* dst, int dst_stride, + int width, int height, int interpolation); + +// Interpolate between two ARGB images using specified amount of interpolation +// Internally calls InterpolatePlane with width * 4 (bpp). LIBYUV_API int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, const uint8* src_argb1, int src_stride_argb1, uint8* dst_argb, int dst_stride_argb, int width, int height, int interpolation); +// Interpolate between two YUV images using specified amount of interpolation +// Internally calls InterpolatePlane on each plane where the U and V planes +// are half width and half height. +LIBYUV_API +int I420Interpolate(const uint8* src0_y, int src0_stride_y, + const uint8* src0_u, int src0_stride_u, + const uint8* src0_v, int src0_stride_v, + const uint8* src1_y, int src1_stride_y, + const uint8* src1_u, int src1_stride_u, + const uint8* src1_v, int src1_stride_v, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height, int interpolation); + #if defined(__pnacl__) || defined(__CLR_VER) || \ (defined(__i386__) && !defined(__SSE2__)) #define LIBYUV_DISABLE_X86 #endif +// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#define LIBYUV_DISABLE_X86 +#endif +#endif // The following are available on all x86 platforms: #if !defined(LIBYUV_DISABLE_X86) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) diff --git a/third_party/libyuv/include/libyuv/rotate_row.h b/third_party/libyuv/include/libyuv/rotate_row.h index c41cf3273..ebc487f9a 100644 --- a/third_party/libyuv/include/libyuv/rotate_row.h +++ b/third_party/libyuv/include/libyuv/rotate_row.h @@ -22,53 +22,24 @@ extern "C" { (defined(__i386__) && !defined(__SSE2__)) #define LIBYUV_DISABLE_X86 #endif - -// Visual C 2012 required for AVX2. -#if defined(_M_IX86) && !defined(__clang__) && \ - defined(_MSC_VER) && _MSC_VER >= 1700 -#define VISUALC_HAS_AVX2 1 -#endif // VisualStudio >= 2012 - -// TODO(fbarchard): switch to standard form of inline; fails on clangcl. -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) -#if defined(__APPLE__) && defined(__i386__) -#define DECLARE_FUNCTION(name) \ - ".text \n" \ - ".private_extern _" #name " \n" \ - ".align 4,0x90 \n" \ -"_" #name ": \n" -#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__) -#define DECLARE_FUNCTION(name) \ - ".text \n" \ - ".align 4,0x90 \n" \ -"_" #name ": \n" -#else -#define DECLARE_FUNCTION(name) \ - ".text \n" \ - ".align 4,0x90 \n" \ -#name ": \n" +// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#define LIBYUV_DISABLE_X86 #endif #endif - -// The following are available for Visual C: -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ - defined(_MSC_VER) && !defined(__clang__) +// The following are available for Visual C and clangcl 32 bit: +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) #define HAS_TRANSPOSEWX8_SSSE3 #define HAS_TRANSPOSEUVWX8_SSE2 #endif -// The following are available for GCC but not NaCL: +// The following are available for GCC 32 or 64 bit but not NaCL for 64 bit: #if !defined(LIBYUV_DISABLE_X86) && \ (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__))) #define HAS_TRANSPOSEWX8_SSSE3 #endif -// The following are available for 32 bit GCC: -#if !defined(LIBYUV_DISABLE_X86) && defined(__i386__) && !defined(__clang__) -#define HAS_TRANSPOSEUVWX8_SSE2 -#endif - // The following are available for 64 bit GCC but not NaCL: #if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \ defined(__x86_64__) @@ -85,8 +56,8 @@ extern "C" { #if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \ defined(__mips__) && \ defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_TRANSPOSEWX8_MIPS_DSPR2 -#define HAS_TRANSPOSEUVWx8_MIPS_DSPR2 +#define HAS_TRANSPOSEWX8_DSPR2 +#define HAS_TRANSPOSEUVWX8_DSPR2 #endif // defined(__mips__) void TransposeWxH_C(const uint8* src, int src_stride, @@ -100,7 +71,9 @@ void TransposeWx8_SSSE3(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width); void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width); -void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride, +void TransposeWx8_DSPR2(const uint8* src, int src_stride, + uint8* dst, int dst_stride, int width); +void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width); void TransposeWx8_Any_NEON(const uint8* src, int src_stride, @@ -109,8 +82,8 @@ void TransposeWx8_Any_SSSE3(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width); void TransposeWx8_Fast_Any_SSSE3(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width); -void TransposeWx8_Any_MIPS_DSPR2(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); +void TransposeWx8_Any_DSPR2(const uint8* src, int src_stride, + uint8* dst, int dst_stride, int width); void TransposeUVWxH_C(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a, @@ -126,9 +99,19 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride, void TransposeUVWx8_NEON(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a, uint8* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width); +void TransposeUVWx8_DSPR2(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, int width); + +void TransposeUVWx8_Any_SSE2(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, int width); +void TransposeUVWx8_Any_NEON(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, int width); +void TransposeUVWx8_Any_DSPR2(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, int width); #ifdef __cplusplus } // extern "C" diff --git a/third_party/libyuv/include/libyuv/row.h b/third_party/libyuv/include/libyuv/row.h index ebae3e719..055880ba5 100644 --- a/third_party/libyuv/include/libyuv/row.h +++ b/third_party/libyuv/include/libyuv/row.h @@ -41,6 +41,12 @@ extern "C" { (defined(__i386__) && !defined(__SSE2__)) #define LIBYUV_DISABLE_X86 #endif +// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#define LIBYUV_DISABLE_X86 +#endif +#endif // True if compiling for SSSE3 as a requirement. #if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3)) #define LIBYUV_SSSE3_ONLY @@ -56,6 +62,26 @@ extern "C" { #endif // clang >= 3.5 #endif // __clang__ +// GCC >= 4.7.0 required for AVX2. +#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) +#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) +#define GCC_HAS_AVX2 1 +#endif // GNUC >= 4.7 +#endif // __GNUC__ + +// clang >= 3.4.0 required for AVX2. +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) +#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4)) +#define CLANG_HAS_AVX2 1 +#endif // clang >= 3.4 +#endif // __clang__ + +// Visual C 2012 required for AVX2. +#if defined(_M_IX86) && !defined(__clang__) && \ + defined(_MSC_VER) && _MSC_VER >= 1700 +#define VISUALC_HAS_AVX2 1 +#endif // VisualStudio >= 2012 + // The following are available on all x86 platforms: #if !defined(LIBYUV_DISABLE_X86) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) @@ -71,25 +97,23 @@ extern "C" { #define HAS_ARGBTOARGB4444ROW_SSE2 #define HAS_ARGBTORAWROW_SSSE3 #define HAS_ARGBTORGB24ROW_SSSE3 +#define HAS_ARGBTORGB565DITHERROW_SSE2 #define HAS_ARGBTORGB565ROW_SSE2 -#define HAS_ARGBTOUV422ROW_SSSE3 #define HAS_ARGBTOUV444ROW_SSSE3 #define HAS_ARGBTOUVJROW_SSSE3 #define HAS_ARGBTOUVROW_SSSE3 #define HAS_ARGBTOYJROW_SSSE3 #define HAS_ARGBTOYROW_SSSE3 +#define HAS_ARGBEXTRACTALPHAROW_SSE2 #define HAS_BGRATOUVROW_SSSE3 #define HAS_BGRATOYROW_SSSE3 #define HAS_COPYROW_ERMS #define HAS_COPYROW_SSE2 +#define HAS_H422TOARGBROW_SSSE3 #define HAS_I400TOARGBROW_SSE2 -#define HAS_I411TOARGBROW_SSSE3 -#define HAS_I422TOABGRROW_SSSE3 #define HAS_I422TOARGB1555ROW_SSSE3 #define HAS_I422TOARGB4444ROW_SSSE3 #define HAS_I422TOARGBROW_SSSE3 -#define HAS_I422TOBGRAROW_SSSE3 -#define HAS_I422TORAWROW_SSSE3 #define HAS_I422TORGB24ROW_SSSE3 #define HAS_I422TORGB565ROW_SSSE3 #define HAS_I422TORGBAROW_SSSE3 @@ -99,15 +123,13 @@ extern "C" { #define HAS_J400TOARGBROW_SSE2 #define HAS_J422TOARGBROW_SSSE3 #define HAS_MERGEUVROW_SSE2 -#define HAS_MIRRORROW_SSE2 #define HAS_MIRRORROW_SSSE3 -#define HAS_MIRRORROW_UV_SSSE3 #define HAS_MIRRORUVROW_SSSE3 #define HAS_NV12TOARGBROW_SSSE3 #define HAS_NV12TORGB565ROW_SSSE3 #define HAS_NV21TOARGBROW_SSSE3 -#define HAS_NV21TORGB565ROW_SSSE3 #define HAS_RAWTOARGBROW_SSSE3 +#define HAS_RAWTORGB24ROW_SSSE3 #define HAS_RAWTOYROW_SSSE3 #define HAS_RGB24TOARGBROW_SSSE3 #define HAS_RGB24TOYROW_SSSE3 @@ -145,9 +167,9 @@ extern "C" { #define HAS_ARGBSHADEROW_SSE2 #define HAS_ARGBSUBTRACTROW_SSE2 #define HAS_ARGBUNATTENUATEROW_SSE2 +#define HAS_BLENDPLANEROW_SSSE3 #define HAS_COMPUTECUMULATIVESUMROW_SSE2 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 -#define HAS_INTERPOLATEROW_SSE2 #define HAS_INTERPOLATEROW_SSSE3 #define HAS_RGBCOLORTABLEROW_X86 #define HAS_SOBELROW_SSE2 @@ -155,54 +177,18 @@ extern "C" { #define HAS_SOBELXROW_SSE2 #define HAS_SOBELXYROW_SSE2 #define HAS_SOBELYROW_SSE2 -#endif -// The following are available on x64 Visual C and clangcl. -#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && \ - (!defined(__clang__) || defined(__SSSE3__)) -#define HAS_I422TOARGBROW_SSSE3 +// The following functions fail on gcc/clang 32 bit with fpic and framepointer. +// caveat: clangcl uses row_win.cc which works. +#if defined(NDEBUG) || !(defined(_DEBUG) && defined(__i386__)) || \ + !defined(__i386__) || defined(_MSC_VER) +// TODO(fbarchard): fix build error on x86 debug +// https://code.google.com/p/libyuv/issues/detail?id=524 +#define HAS_I411TOARGBROW_SSSE3 +// TODO(fbarchard): fix build error on android_full_debug=1 +// https://code.google.com/p/libyuv/issues/detail?id=517 +#define HAS_I422ALPHATOARGBROW_SSSE3 #endif - -// GCC >= 4.7.0 required for AVX2. -#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) -#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) -#define GCC_HAS_AVX2 1 -#endif // GNUC >= 4.7 -#endif // __GNUC__ - -// clang >= 3.4.0 required for AVX2. -#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) -#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4)) -#define CLANG_HAS_AVX2 1 -#endif // clang >= 3.4 -#endif // __clang__ - -// Visual C 2012 required for AVX2. -#if defined(_M_IX86) && !defined(__clang__) && \ - defined(_MSC_VER) && _MSC_VER >= 1700 -#define VISUALC_HAS_AVX2 1 -#endif // VisualStudio >= 2012 - -// The following are available require VS2012. Port to GCC. -#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2) -#define HAS_ARGB1555TOARGBROW_AVX2 -#define HAS_ARGB4444TOARGBROW_AVX2 -#define HAS_ARGBTOARGB1555ROW_AVX2 -#define HAS_ARGBTOARGB4444ROW_AVX2 -#define HAS_ARGBTORGB565DITHERROW_AVX2 -#define HAS_ARGBTORGB565DITHERROW_SSE2 -#define HAS_ARGBTORGB565ROW_AVX2 -#define HAS_I411TOARGBROW_AVX2 -#define HAS_I422TOARGB1555ROW_AVX2 -#define HAS_I422TOARGB4444ROW_AVX2 -#define HAS_I422TORGB565ROW_AVX2 -#define HAS_I444TOARGBROW_AVX2 -#define HAS_J400TOARGBROW_AVX2 -#define HAS_NV12TOARGBROW_AVX2 -#define HAS_NV12TORGB565ROW_AVX2 -#define HAS_NV21TOARGBROW_AVX2 -#define HAS_NV21TORGB565ROW_AVX2 -#define HAS_RGB565TOARGBROW_AVX2 #endif // The following are available on all x86 platforms, but @@ -215,21 +201,34 @@ extern "C" { #define HAS_ARGBMIRRORROW_AVX2 #define HAS_ARGBPOLYNOMIALROW_AVX2 #define HAS_ARGBSHUFFLEROW_AVX2 +#define HAS_ARGBTORGB565DITHERROW_AVX2 +#define HAS_ARGBTOUVJROW_AVX2 #define HAS_ARGBTOUVROW_AVX2 #define HAS_ARGBTOYJROW_AVX2 #define HAS_ARGBTOYROW_AVX2 #define HAS_COPYROW_AVX +#define HAS_H422TOARGBROW_AVX2 #define HAS_I400TOARGBROW_AVX2 -#define HAS_I422TOABGRROW_AVX2 +#if !(defined(_DEBUG) && defined(__i386__)) +// TODO(fbarchard): fix build error on android_full_debug=1 +// https://code.google.com/p/libyuv/issues/detail?id=517 +#define HAS_I422ALPHATOARGBROW_AVX2 +#endif +#define HAS_I411TOARGBROW_AVX2 +#define HAS_I422TOARGB1555ROW_AVX2 +#define HAS_I422TOARGB4444ROW_AVX2 #define HAS_I422TOARGBROW_AVX2 -#define HAS_I422TOBGRAROW_AVX2 -#define HAS_I422TORAWROW_AVX2 #define HAS_I422TORGB24ROW_AVX2 +#define HAS_I422TORGB565ROW_AVX2 #define HAS_I422TORGBAROW_AVX2 +#define HAS_I444TOARGBROW_AVX2 #define HAS_INTERPOLATEROW_AVX2 #define HAS_J422TOARGBROW_AVX2 #define HAS_MERGEUVROW_AVX2 #define HAS_MIRRORROW_AVX2 +#define HAS_NV12TOARGBROW_AVX2 +#define HAS_NV12TORGB565ROW_AVX2 +#define HAS_NV21TOARGBROW_AVX2 #define HAS_SPLITUVROW_AVX2 #define HAS_UYVYTOARGBROW_AVX2 #define HAS_UYVYTOUV422ROW_AVX2 @@ -246,15 +245,27 @@ extern "C" { #define HAS_ARGBMULTIPLYROW_AVX2 #define HAS_ARGBSUBTRACTROW_AVX2 #define HAS_ARGBUNATTENUATEROW_AVX2 +#define HAS_BLENDPLANEROW_AVX2 #endif -// The following are disabled when SSSE3 is available: -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \ - !defined(LIBYUV_SSSE3_ONLY) -#define HAS_ARGBATTENUATEROW_SSE2 -#define HAS_ARGBBLENDROW_SSE2 -#define HAS_MIRRORROW_SSE2 +// The following are available for AVX2 Visual C and clangcl 32 bit: +// TODO(fbarchard): Port to gcc. +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ + (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2)) +#define HAS_ARGB1555TOARGBROW_AVX2 +#define HAS_ARGB4444TOARGBROW_AVX2 +#define HAS_ARGBTOARGB1555ROW_AVX2 +#define HAS_ARGBTOARGB4444ROW_AVX2 +#define HAS_ARGBTORGB565ROW_AVX2 +#define HAS_J400TOARGBROW_AVX2 +#define HAS_RGB565TOARGBROW_AVX2 +#endif + +// The following are also available on x64 Visual C. +#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && defined(_M_X64) && \ + (!defined(__clang__) || defined(__SSSE3__)) +#define HAS_I422ALPHATOARGBROW_SSSE3 +#define HAS_I422TOARGBROW_SSSE3 #endif // The following are available on Neon platforms: @@ -268,43 +279,44 @@ extern "C" { #define HAS_ARGB4444TOARGBROW_NEON #define HAS_ARGB4444TOUVROW_NEON #define HAS_ARGB4444TOYROW_NEON +#define HAS_ARGBSETROW_NEON #define HAS_ARGBTOARGB1555ROW_NEON #define HAS_ARGBTOARGB4444ROW_NEON #define HAS_ARGBTORAWROW_NEON #define HAS_ARGBTORGB24ROW_NEON +#define HAS_ARGBTORGB565DITHERROW_NEON #define HAS_ARGBTORGB565ROW_NEON #define HAS_ARGBTOUV411ROW_NEON -#define HAS_ARGBTOUV422ROW_NEON #define HAS_ARGBTOUV444ROW_NEON #define HAS_ARGBTOUVJROW_NEON #define HAS_ARGBTOUVROW_NEON #define HAS_ARGBTOYJROW_NEON #define HAS_ARGBTOYROW_NEON +#define HAS_ARGBEXTRACTALPHAROW_NEON #define HAS_BGRATOUVROW_NEON #define HAS_BGRATOYROW_NEON #define HAS_COPYROW_NEON -#define HAS_J400TOARGBROW_NEON +#define HAS_I400TOARGBROW_NEON #define HAS_I411TOARGBROW_NEON -#define HAS_I422TOABGRROW_NEON +#define HAS_I422ALPHATOARGBROW_NEON #define HAS_I422TOARGB1555ROW_NEON #define HAS_I422TOARGB4444ROW_NEON #define HAS_I422TOARGBROW_NEON -#define HAS_I422TOBGRAROW_NEON -#define HAS_I422TORAWROW_NEON #define HAS_I422TORGB24ROW_NEON #define HAS_I422TORGB565ROW_NEON #define HAS_I422TORGBAROW_NEON #define HAS_I422TOUYVYROW_NEON #define HAS_I422TOYUY2ROW_NEON #define HAS_I444TOARGBROW_NEON +#define HAS_J400TOARGBROW_NEON #define HAS_MERGEUVROW_NEON #define HAS_MIRRORROW_NEON #define HAS_MIRRORUVROW_NEON #define HAS_NV12TOARGBROW_NEON #define HAS_NV12TORGB565ROW_NEON #define HAS_NV21TOARGBROW_NEON -#define HAS_NV21TORGB565ROW_NEON #define HAS_RAWTOARGBROW_NEON +#define HAS_RAWTORGB24ROW_NEON #define HAS_RAWTOUVROW_NEON #define HAS_RAWTOYROW_NEON #define HAS_RGB24TOARGBROW_NEON @@ -316,29 +328,28 @@ extern "C" { #define HAS_RGBATOUVROW_NEON #define HAS_RGBATOYROW_NEON #define HAS_SETROW_NEON -#define HAS_ARGBSETROW_NEON #define HAS_SPLITUVROW_NEON #define HAS_UYVYTOARGBROW_NEON #define HAS_UYVYTOUV422ROW_NEON #define HAS_UYVYTOUVROW_NEON #define HAS_UYVYTOYROW_NEON -#define HAS_I400TOARGBROW_NEON #define HAS_YUY2TOARGBROW_NEON #define HAS_YUY2TOUV422ROW_NEON #define HAS_YUY2TOUVROW_NEON #define HAS_YUY2TOYROW_NEON -#define HAS_ARGBTORGB565DITHERROW_NEON // Effects: #define HAS_ARGBADDROW_NEON #define HAS_ARGBATTENUATEROW_NEON #define HAS_ARGBBLENDROW_NEON +#define HAS_ARGBCOLORMATRIXROW_NEON #define HAS_ARGBGRAYROW_NEON #define HAS_ARGBMIRRORROW_NEON #define HAS_ARGBMULTIPLYROW_NEON #define HAS_ARGBQUANTIZEROW_NEON #define HAS_ARGBSEPIAROW_NEON #define HAS_ARGBSHADEROW_NEON +#define HAS_ARGBSHUFFLEROW_NEON #define HAS_ARGBSUBTRACTROW_NEON #define HAS_INTERPOLATEROW_NEON #define HAS_SOBELROW_NEON @@ -346,8 +357,6 @@ extern "C" { #define HAS_SOBELXROW_NEON #define HAS_SOBELXYROW_NEON #define HAS_SOBELYROW_NEON -#define HAS_ARGBCOLORMATRIXROW_NEON -#define HAS_ARGBSHUFFLEROW_NEON #endif // The following are available on Mips platforms: @@ -355,17 +364,15 @@ extern "C" { (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6) #define HAS_COPYROW_MIPS #if defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_I422TOABGRROW_MIPS_DSPR2 -#define HAS_I422TOARGBROW_MIPS_DSPR2 -#define HAS_I422TOBGRAROW_MIPS_DSPR2 -#define HAS_INTERPOLATEROW_MIPS_DSPR2 -#define HAS_MIRRORROW_MIPS_DSPR2 -#define HAS_MIRRORUVROW_MIPS_DSPR2 -#define HAS_SPLITUVROW_MIPS_DSPR2 +#define HAS_I422TOARGBROW_DSPR2 +#define HAS_INTERPOLATEROW_DSPR2 +#define HAS_MIRRORROW_DSPR2 +#define HAS_MIRRORUVROW_DSPR2 +#define HAS_SPLITUVROW_DSPR2 #endif #endif -#if defined(_MSC_VER) && !defined(__CLR_VER) +#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__) #define SIMD_ALIGNED(var) __declspec(align(16)) var #define SIMD_ALIGNED32(var) __declspec(align(64)) var typedef __declspec(align(16)) int16 vec16[8]; @@ -380,7 +387,7 @@ typedef __declspec(align(32)) int8 lvec8[32]; typedef __declspec(align(32)) uint16 ulvec16[16]; typedef __declspec(align(32)) uint32 ulvec32[8]; typedef __declspec(align(32)) uint8 ulvec8[32]; -#elif defined(__GNUC__) +#elif !defined(__pnacl__) && (defined(__GNUC__) || defined(__clang__)) // Caveat GCC 4.2 to 4.7 have a known issue using vectors with const. #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) #define SIMD_ALIGNED32(var) var __attribute__((aligned(64))) @@ -413,6 +420,56 @@ typedef uint32 ulvec32[8]; typedef uint8 ulvec8[32]; #endif +#if defined(__aarch64__) +// This struct is for Arm64 color conversion. +struct YuvConstants { + uvec16 kUVToRB; + uvec16 kUVToRB2; + uvec16 kUVToG; + uvec16 kUVToG2; + vec16 kUVBiasBGR; + vec32 kYToRgb; +}; +#elif defined(__arm__) +// This struct is for ArmV7 color conversion. +struct YuvConstants { + uvec8 kUVToRB; + uvec8 kUVToG; + vec16 kUVBiasBGR; + vec32 kYToRgb; +}; +#else +// This struct is for Intel color conversion. +struct YuvConstants { + lvec8 kUVToB; + lvec8 kUVToG; + lvec8 kUVToR; + lvec16 kUVBiasB; + lvec16 kUVBiasG; + lvec16 kUVBiasR; + lvec16 kYToRgb; +}; + +// Offsets into YuvConstants structure +#define KUVTOB 0 +#define KUVTOG 32 +#define KUVTOR 64 +#define KUVBIASB 96 +#define KUVBIASG 128 +#define KUVBIASR 160 +#define KYTORGB 192 +#endif + +// Conversion matrix for YUV to RGB +extern const struct YuvConstants kYuvI601Constants; // BT.601 +extern const struct YuvConstants kYuvJPEGConstants; // JPeg color space +extern const struct YuvConstants kYuvH709Constants; // BT.709 + +// Conversion matrix for YVU to BGR +extern const struct YuvConstants kYvuI601Constants; // BT.601 +extern const struct YuvConstants kYvuJPEGConstants; // JPeg color space +extern const struct YuvConstants kYvuH709Constants; // BT.709 + #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) #define OMITFP #else @@ -502,159 +559,166 @@ void I444ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_NEON(const uint8* src_y, +void I422AlphaToARGBRow_NEON(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + const uint8* a_buf, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); -void I422ToBGRARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - int width); -void I422ToABGRRow_NEON(const uint8* src_y, +void I411ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, - uint8* dst_abgr, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + const struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb24, + const struct YuvConstants* yuvconstants, int width); -void I422ToRAWRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - int width); void I422ToRGB565Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb565, + const struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb1555, + const struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb4444, + const struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_NEON(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToARGBRow_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_NEON(const uint8* src_y, const uint8* src_uv, uint8* dst_rgb565, + const struct YuvConstants* yuvconstants, int width); -void NV21ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_rgb565, - int width); +void NV21ToARGBRow_NEON(const uint8* src_y, + const uint8* src_vu, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void YUY2ToARGBRow_NEON(const uint8* src_yuy2, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_NEON(const uint8* src_uyvy, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); -void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix); -void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix); +void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width); +void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width); +void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width); +void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width); +void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width); +void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width); +void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width); +void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int width); +void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int width); +void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width); +void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int width); +void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width); +void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width); void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); -void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); + int width); void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); + int width); void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int pix); -void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix); -void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix); -void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix); -void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix); -void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix); -void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix); -void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int pix); -void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int pix); -void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int pix); -void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix); -void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix); -void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix); -void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix); + uint8* dst_u, uint8* dst_v, int width); +void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width); +void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width); +void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width); +void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width); +void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width); +void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width); +void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width); +void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width); +void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width); +void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width); +void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width); +void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int width); +void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int width); +void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int width); +void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int width); +void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width); +void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width); +void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width); +void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width); +void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width); +void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int width); +void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int width); +void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int width); +void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width); +void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int width); +void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width); +void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width); +void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int width); +void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int width); +void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int width); +void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int width); +void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int width); +void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width); +void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, + int width); +void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, + int width); void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); +void ARGBToUVJRow_AVX2(const uint8* src_argb, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width); void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb, @@ -665,6 +729,10 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr, uint8* dst_u, uint8* dst_v, int width); void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba, uint8* dst_u, uint8* dst_v, int width); +void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); +void ARGBToUVJRow_Any_AVX2(const uint8* src_argb, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width); void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb, @@ -676,33 +744,31 @@ void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr, void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba, uint8* dst_u, uint8* dst_v, int width); void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); -void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); + int width); void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); + int width); void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width); void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb, @@ -729,25 +795,15 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb, void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV422Row_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); - void ARGBToUV444Row_C(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV422Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); void ARGBToUV411Row_C(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJ422Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); void MirrorRow_AVX2(const uint8* src, uint8* dst, int width); void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width); -void MirrorRow_SSE2(const uint8* src, uint8* dst, int width); void MirrorRow_NEON(const uint8* src, uint8* dst, int width); -void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width); +void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width); void MirrorRow_C(const uint8* src, uint8* dst, int width); void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width); void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width); @@ -758,10 +814,9 @@ void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); -void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); +void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int width); +void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width); @@ -771,20 +826,23 @@ void ARGBMirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_Any_NEON(const uint8* src, uint8* dst, int width); -void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); +void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); +void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int width); +void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int width); +void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int width); +void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int width); void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); + int width); void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); + int width); void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); -void SplitUVRow_Any_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); + int width); +void SplitUVRow_Any_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int width); void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, int width); @@ -816,10 +874,26 @@ void CopyRow_16_C(const uint16* src, uint16* dst, int count); void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); +void ARGBCopyAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, + int width); +void ARGBCopyAlphaRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, + int width); + +void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width); +void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width); +void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width); +void ARGBExtractAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_a, + int width); +void ARGBExtractAlphaRow_Any_NEON(const uint8* src_argb, uint8* dst_a, + int width); void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width); void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); +void ARGBCopyYToAlphaRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, + int width); +void ARGBCopyYToAlphaRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, + int width); void SetRow_C(uint8* dst, uint8 v8, int count); void SetRow_X86(uint8* dst, uint8 v8, int count); @@ -835,524 +909,541 @@ void ARGBSetRow_Any_NEON(uint8* dst_argb, uint32 v32, int count); // ARGBShufflers for BGRAToARGB etc. void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); + const uint8* shuffler, int width); void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); + const uint8* shuffler, int width); void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); + const uint8* shuffler, int width); void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); + const uint8* shuffler, int width); void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); + const uint8* shuffler, int width); void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); + const uint8* shuffler, int width); void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); + const uint8* shuffler, int width); void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); + const uint8* shuffler, int width); void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); + const uint8* shuffler, int width); -void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix); -void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix); +void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width); +void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int width); +void RAWToRGB24Row_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width); +void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int width); void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, - int pix); + int width); void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, - int pix); -void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, int pix); + int width); +void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, int width); void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb, - int pix); + int width); void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb, - int pix); + int width); -void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix); -void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix); +void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width); +void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width); +void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width); +void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width); void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, - int pix); + int width); void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, - int pix); -void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix); -void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix); -void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix); -void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix); -void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix); + int width); +void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width); +void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width); +void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width); +void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width); +void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width); +void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width); +void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, + int width); +void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int width); +void RAWToRGB24Row_Any_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width); void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb, - int pix); + int width); void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb, - int pix); + int width); void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb, - int pix); + int width); void RGB565ToARGBRow_Any_AVX2(const uint8* src_rgb565, uint8* dst_argb, - int pix); + int width); void ARGB1555ToARGBRow_Any_AVX2(const uint8* src_argb1555, uint8* dst_argb, - int pix); + int width); void ARGB4444ToARGBRow_Any_AVX2(const uint8* src_argb4444, uint8* dst_argb, - int pix); + int width); -void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix); +void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, + int width); +void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int width); +void RAWToRGB24Row_Any_NEON(const uint8* src_raw, uint8* dst_rgb24, int width); void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb, - int pix); + int width); void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb, - int pix); + int width); void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb, - int pix); + int width); -void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); +void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int pix); + const uint32 dither4, int width); void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int pix); + const uint32 dither4, int width); void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int pix); + const uint32 dither4, int width); -void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix); +void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); +void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb, const uint32 dither4, int width); -void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); - -void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix); -void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int pix); -void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix); -void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix); -void J400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix); -void J400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int pix); -void J400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix); +void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width); + +void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); +void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); +void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width); +void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width); +void J400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width); +void J400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width); +void J400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width); void I444ToARGBRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); +void I422AlphaToARGBRow_C(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + const uint8* a_buf, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I411ToARGBRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_C(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); -void NV21ToRGB565Row_C(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); void NV12ToRGB565Row_C(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void NV21ToARGBRow_C(const uint8* src_y, - const uint8* src_vu, + const uint8* src_uv, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_C(const uint8* src_yuy2, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_C(const uint8* src_uyvy, uint8* dst_argb, - int width); -void J422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToBGRARow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - int width); -void I422ToABGRRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, + const struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + const struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb24, + const struct YuvConstants* yuvconstants, int width); -void I422ToRAWRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - int width); void I422ToARGB4444Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb4444, + const struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb4444, + const struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb565, + const struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); -void I422ToBGRARow_AVX2(const uint8* src_y, +void I422ToARGBRow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); -void I422ToABGRRow_AVX2(const uint8* src_y, +void I444ToARGBRow_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I444ToARGBRow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I444ToARGBRow_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I444ToARGBRow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + const uint8* a_buf, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_AVX2(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + const uint8* a_buf, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I411ToARGBRow_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I411ToARGBRow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_SSSE3(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_AVX2(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); -void NV21ToRGB565Row_AVX2(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); +void NV21ToARGBRow_SSSE3(const uint8* src_y, + const uint8* src_uv, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToARGBRow_AVX2(const uint8* src_y, + const uint8* src_uv, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_AVX2(const uint8* src_uyvy, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); -void J422ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void J422ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToBGRARow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - int width); -void I422ToABGRRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width); void I422ToRGBARow_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + const struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb24, + const struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb24, + const struct YuvConstants* yuvconstants, int width); -void I422ToRAWRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - int width); -void I422ToRAWRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - int width); void I422ToARGBRow_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, - int width); -void I422ToBGRARow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, - int width); -void I422ToABGRRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I444ToARGBRow_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I444ToARGBRow_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); +void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + const uint8* a_buf, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_Any_AVX2(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + const uint8* a_buf, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I411ToARGBRow_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I411ToARGBRow_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_Any_SSSE3(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_Any_AVX2(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); +void NV21ToARGBRow_Any_SSSE3(const uint8* src_y, + const uint8* src_vu, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void NV21ToARGBRow_Any_AVX2(const uint8* src_y, const uint8* src_vu, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_Any_AVX2(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToRGB565Row_Any_AVX2(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); -void J422ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void J422ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToBGRARow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - int width); -void I422ToABGRRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width); void I422ToRGBARow_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + const struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + const struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + const struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + const struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + const struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + const struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + const struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); -void I422ToRAWRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToRAWRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width); void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); @@ -1365,13 +1456,23 @@ void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width); // ARGB preattenuated alpha blend. void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1, uint8* dst_argb, int width); -void ARGBBlendRow_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1, uint8* dst_argb, int width); void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1, uint8* dst_argb, int width); +// Unattenuated planar alpha blend. +void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1, + const uint8* alpha, uint8* dst, int width); +void BlendPlaneRow_Any_SSSE3(const uint8* src0, const uint8* src1, + const uint8* alpha, uint8* dst, int width); +void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1, + const uint8* alpha, uint8* dst, int width); +void BlendPlaneRow_Any_AVX2(const uint8* src0, const uint8* src1, + const uint8* alpha, uint8* dst, int width); +void BlendPlaneRow_C(const uint8* src0, const uint8* src1, + const uint8* alpha, uint8* dst, int width); + // ARGB multiply images. Same API as Blend, but these require // pointer and width alignment for SSE2. void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1, @@ -1422,26 +1523,32 @@ void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1, void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1, uint8* dst_argb, int width); -void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); +void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, + int width); +void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, + int width); void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int pix); + const uint32 dither4, int width); void ARGBToRGB565DitherRow_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int pix); + const uint32 dither4, int width); -void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix); +void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, + int width); +void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, + int width); -void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); +void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, + int width); +void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, + int width); void ARGBToRGB565DitherRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, const uint32 dither4, int width); @@ -1449,186 +1556,169 @@ void I444ToARGBRow_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); +void I422AlphaToARGBRow_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + const uint8* src_a, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I411ToARGBRow_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, - int width); -void I422ToBGRARow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToABGRRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); -void I422ToRAWRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); void I422ToARGB4444Row_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_Any_NEON(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void NV21ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_uv, + const uint8* src_vu, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_Any_NEON(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToRGB565Row_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy, uint8* dst_argb, + const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); +void I422ToARGBRow_DSPR2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_DSPR2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); -void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix); +void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width); void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix); + uint8* dst_u, uint8* dst_v, int width); +void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int width); void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix); + uint8* dst_u, uint8* dst_v, int width); +void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width); void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void YUY2ToUV422Row_NEON(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix); + uint8* dst_u, uint8* dst_v, int width); +void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width); void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void YUY2ToUV422Row_C(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix); + uint8* dst_u, uint8* dst_v, int width); +void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int width); void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix); + uint8* dst_u, uint8* dst_v, int width); +void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int width); void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int pix); + uint8* dst_u, uint8* dst_v, int width); +void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int width); void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix); + uint8* dst_u, uint8* dst_v, int width); +void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width); void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix); + uint8* dst_u, uint8* dst_v, int width); +void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int width); void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void UYVYToUV422Row_SSE2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix); + uint8* dst_u, uint8* dst_v, int width); +void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width); void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix); + uint8* dst_u, uint8* dst_v, int width); +void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width); void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void UYVYToUV422Row_NEON(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); -void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix); +void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width); void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void UYVYToUV422Row_C(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix); + uint8* dst_u, uint8* dst_v, int width); +void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int width); void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix); + uint8* dst_u, uint8* dst_v, int width); +void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int width); void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int pix); + uint8* dst_u, uint8* dst_v, int width); +void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int width); void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int width); void I422ToYUY2Row_C(const uint8* src_y, const uint8* src_u, @@ -1673,7 +1763,6 @@ void I422ToUYVYRow_Any_NEON(const uint8* src_y, // Effects related row functions. void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width); @@ -1753,9 +1842,6 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); -void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); @@ -1765,24 +1851,21 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); -void InterpolateRow_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); +void InterpolateRow_DSPR2(uint8* dst_ptr, const uint8* src_ptr, + ptrdiff_t src_stride_ptr, int width, + int source_y_fraction); void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); -void InterpolateRow_Any_SSE2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); -void InterpolateRow_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); +void InterpolateRow_Any_DSPR2(uint8* dst_ptr, const uint8* src_ptr, + ptrdiff_t src_stride_ptr, int width, + int source_y_fraction); void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr, ptrdiff_t src_stride_ptr, diff --git a/third_party/libyuv/include/libyuv/scale_argb.h b/third_party/libyuv/include/libyuv/scale_argb.h index 0c9b36257..b56cf5209 100644 --- a/third_party/libyuv/include/libyuv/scale_argb.h +++ b/third_party/libyuv/include/libyuv/scale_argb.h @@ -35,7 +35,6 @@ int ARGBScaleClip(const uint8* src_argb, int src_stride_argb, int clip_x, int clip_y, int clip_width, int clip_height, enum FilterMode filtering); -// TODO(fbarchard): Implement this. // Scale with YUV conversion to ARGB and clipping. LIBYUV_API int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y, diff --git a/third_party/libyuv/include/libyuv/scale_row.h b/third_party/libyuv/include/libyuv/scale_row.h index 94ad9cf86..df699e6c2 100644 --- a/third_party/libyuv/include/libyuv/scale_row.h +++ b/third_party/libyuv/include/libyuv/scale_row.h @@ -23,6 +23,26 @@ extern "C" { (defined(__i386__) && !defined(__SSE2__)) #define LIBYUV_DISABLE_X86 #endif +// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#define LIBYUV_DISABLE_X86 +#endif +#endif + +// GCC >= 4.7.0 required for AVX2. +#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) +#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) +#define GCC_HAS_AVX2 1 +#endif // GNUC >= 4.7 +#endif // __GNUC__ + +// clang >= 3.4.0 required for AVX2. +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) +#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4)) +#define CLANG_HAS_AVX2 1 +#endif // clang >= 3.4 +#endif // __clang__ // Visual C 2012 required for AVX2. #if defined(_M_IX86) && !defined(__clang__) && \ @@ -42,24 +62,23 @@ extern "C" { #define HAS_SCALEARGBROWDOWNEVEN_SSE2 #define HAS_SCALECOLSUP2_SSE2 #define HAS_SCALEFILTERCOLS_SSSE3 -#define HAS_SCALEROWDOWN2_SSE2 +#define HAS_SCALEROWDOWN2_SSSE3 #define HAS_SCALEROWDOWN34_SSSE3 #define HAS_SCALEROWDOWN38_SSSE3 -#define HAS_SCALEROWDOWN4_SSE2 +#define HAS_SCALEROWDOWN4_SSSE3 +#define HAS_SCALEADDROW_SSE2 #endif -// The following are available on VS2012: -#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2) +// The following are available on all x86 platforms, but +// require VS2012, clang 3.4 or gcc 4.7. +// The code supports NaCL but requires a new compiler and validator. +#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \ + defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) #define HAS_SCALEADDROW_AVX2 #define HAS_SCALEROWDOWN2_AVX2 #define HAS_SCALEROWDOWN4_AVX2 #endif -// The following are available on Visual C: -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && !defined(__clang__) -#define HAS_SCALEADDROW_SSE2 -#endif - // The following are available on Neon platforms: #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) @@ -77,10 +96,10 @@ extern "C" { // The following are available on Mips platforms: #if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \ defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_SCALEROWDOWN2_MIPS_DSPR2 -#define HAS_SCALEROWDOWN4_MIPS_DSPR2 -#define HAS_SCALEROWDOWN34_MIPS_DSPR2 -#define HAS_SCALEROWDOWN38_MIPS_DSPR2 +#define HAS_SCALEROWDOWN2_DSPR2 +#define HAS_SCALEROWDOWN4_DSPR2 +#define HAS_SCALEROWDOWN34_DSPR2 +#define HAS_SCALEROWDOWN38_DSPR2 #endif // Scale ARGB vertically with bilinear interpolation. @@ -133,6 +152,8 @@ void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride, uint16* dst, int dst_width); void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width); +void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst, int dst_width); void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, uint16* dst, int dst_width); void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, @@ -214,22 +235,22 @@ void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx); // Specialized scalers for x86. -void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); +void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); +void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); +void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width); void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width); void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width); -void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); +void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); +void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width); void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, @@ -251,22 +272,26 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width); -void ScaleRowDown2_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Linear_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); +void ScaleRowDown2_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); +void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); +void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); +void ScaleRowDown2Box_Odd_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width); void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width); void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width); +void ScaleRowDown2Box_Odd_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); +void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); +void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); void ScaleRowDown4_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width); void ScaleRowDown4Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, @@ -418,6 +443,8 @@ void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width); void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width); +void ScaleRowDown2Box_Odd_NEON(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst, int dst_width); void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width); void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, @@ -447,28 +474,26 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, void ScaleFilterCols_Any_NEON(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx); - -void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); +void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst, int dst_width); +void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst, int dst_width); +void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst, int dst_width); +void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst, int dst_width); +void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst, int dst_width); +void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* d, int dst_width); +void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* d, int dst_width); +void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst, int dst_width); +void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); +void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); #ifdef __cplusplus } // extern "C" diff --git a/third_party/libyuv/include/libyuv/version.h b/third_party/libyuv/include/libyuv/version.h index 9d1d746c2..ca0c062eb 100644 --- a/third_party/libyuv/include/libyuv/version.h +++ b/third_party/libyuv/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1456 +#define LIBYUV_VERSION 1602 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/third_party/libyuv/include/libyuv/video_common.h b/third_party/libyuv/include/libyuv/video_common.h index cb6582f24..ad934e424 100644 --- a/third_party/libyuv/include/libyuv/video_common.h +++ b/third_party/libyuv/include/libyuv/video_common.h @@ -62,7 +62,7 @@ enum FourCC { // 2 Secondary YUV formats: row biplanar. FOURCC_M420 = FOURCC('M', '4', '2', '0'), - FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated. + FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated. // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp. FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'), @@ -90,7 +90,8 @@ enum FourCC { FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'), FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420. FOURCC_J420 = FOURCC('J', '4', '2', '0'), - FOURCC_J400 = FOURCC('J', '4', '0', '0'), + FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc + FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc // 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc. FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420. @@ -150,6 +151,7 @@ enum FourCCBpp { FOURCC_BPP_YU12 = 12, FOURCC_BPP_J420 = 12, FOURCC_BPP_J400 = 8, + FOURCC_BPP_H420 = 12, FOURCC_BPP_MJPG = 0, // 0 means unknown. FOURCC_BPP_H264 = 0, FOURCC_BPP_IYUV = 12, |