diff options
55 files changed, 1560 insertions, 1607 deletions
@@ -79,9 +79,6 @@ COMPILING THE APPLICATIONS/LIBRARIES: x86-os2-gcc x86-solaris-gcc x86-win32-gcc - x86-win32-vs7 - x86-win32-vs8 - x86-win32-vs9 x86-win32-vs10 x86-win32-vs11 x86-win32-vs12 @@ -98,8 +95,6 @@ COMPILING THE APPLICATIONS/LIBRARIES: x86_64-linux-icc x86_64-solaris-gcc x86_64-win64-gcc - x86_64-win64-vs8 - x86_64-win64-vs9 x86_64-win64-vs10 x86_64-win64-vs11 x86_64-win64-vs12 diff --git a/build/make/Makefile b/build/make/Makefile index 3e8c02490..dfb7e4b7a 100644 --- a/build/make/Makefile +++ b/build/make/Makefile @@ -418,7 +418,6 @@ ifneq ($(call enabled,DIST-SRCS),) DIST-SRCS-yes += build/make/gen_asm_deps.sh DIST-SRCS-yes += build/make/Makefile DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_def.sh - DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_proj.sh DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_sln.sh DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_vcxproj.sh DIST-SRCS-$(CONFIG_MSVS) += build/make/msvs_common.sh diff --git a/build/make/configure.sh b/build/make/configure.sh index f2b8b3765..2e1597779 100644 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -1396,10 +1396,6 @@ EOF fi fi - if [ "${tgt_isa}" = "x86_64" ] || [ "${tgt_isa}" = "x86" ]; then - soft_enable use_x86inc - fi - # Position Independent Code (PIC) support, for building relocatable # shared objects enabled gcc && enabled pic && check_add_cflags -fPIC diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh deleted file mode 100755 index 2b91fbfbc..000000000 --- a/build/make/gen_msvs_proj.sh +++ /dev/null @@ -1,490 +0,0 @@ -#!/bin/bash -## -## Copyright (c) 2010 The WebM project authors. All Rights Reserved. -## -## Use of this source code is governed by a BSD-style license -## that can be found in the LICENSE file in the root of the source -## tree. An additional intellectual property rights grant can be found -## in the file PATENTS. All contributing project authors may -## be found in the AUTHORS file in the root of the source tree. -## - -self=$0 -self_basename=${self##*/} -self_dirname=$(dirname "$0") - -. "$self_dirname/msvs_common.sh"|| exit 127 - -show_help() { - cat <<EOF -Usage: ${self_basename} --name=projname [options] file1 [file2 ...] - -This script generates a Visual Studio project file from a list of source -code files. - -Options: - --help Print this message - --exe Generate a project for building an Application - --lib Generate a project for creating a static library - --dll Generate a project for creating a dll - --static-crt Use the static C runtime (/MT) - --target=isa-os-cc Target specifier (required) - --out=filename Write output to a file [stdout] - --name=project_name Name of the project (required) - --proj-guid=GUID GUID to use for the project - --module-def=filename File containing export definitions (for DLLs) - --ver=version Version (7,8,9) of visual studio to generate for - --src-path-bare=dir Path to root of source tree - -Ipath/to/include Additional include directories - -DFLAG[=value] Preprocessor macros to define - -Lpath/to/lib Additional library search paths - -llibname Library to link against -EOF - exit 1 -} - -generate_filter() { - local var=$1 - local name=$2 - local pats=$3 - local file_list_sz - local i - local f - local saveIFS="$IFS" - local pack - echo "generating filter '$name' from ${#file_list[@]} files" >&2 - IFS=* - - open_tag Filter \ - Name=$name \ - Filter=$pats \ - UniqueIdentifier=`generate_uuid` \ - - file_list_sz=${#file_list[@]} - for i in ${!file_list[@]}; do - f=${file_list[i]} - for pat in ${pats//;/$IFS}; do - if [ "${f##*.}" == "$pat" ]; then - unset file_list[i] - - objf=$(echo ${f%.*}.obj \ - | sed -e "s,$src_path_bare,," \ - -e 's/^[\./]\+//g' -e 's,[:/ ],_,g') - open_tag File RelativePath="$f" - - if [ "$pat" == "asm" ] && $asm_use_custom_step; then - # Avoid object file name collisions, i.e. vpx_config.c and - # vpx_config.asm produce the same object file without - # this additional suffix. - objf=${objf%.obj}_asm.obj - for plat in "${platforms[@]}"; do - for cfg in Debug Release; do - open_tag FileConfiguration \ - Name="${cfg}|${plat}" \ - - tag Tool \ - Name="VCCustomBuildTool" \ - Description="Assembling \$(InputFileName)" \ - CommandLine="$(eval echo \$asm_${cfg}_cmdline) -o \$(IntDir)\\$objf" \ - Outputs="\$(IntDir)\\$objf" \ - - close_tag FileConfiguration - done - done - fi - if [ "$pat" == "c" ] || \ - [ "$pat" == "cc" ] || [ "$pat" == "cpp" ]; then - for plat in "${platforms[@]}"; do - for cfg in Debug Release; do - open_tag FileConfiguration \ - Name="${cfg}|${plat}" \ - - tag Tool \ - Name="VCCLCompilerTool" \ - ObjectFile="\$(IntDir)\\$objf" \ - - close_tag FileConfiguration - done - done - fi - close_tag File - - break - fi - done - done - - close_tag Filter - IFS="$saveIFS" -} - -# Process command line -unset target -for opt in "$@"; do - optval="${opt#*=}" - case "$opt" in - --help|-h) show_help - ;; - --target=*) target="${optval}" - ;; - --out=*) outfile="$optval" - ;; - --name=*) name="${optval}" - ;; - --proj-guid=*) guid="${optval}" - ;; - --module-def=*) link_opts="${link_opts} ModuleDefinitionFile=${optval}" - ;; - --exe) proj_kind="exe" - ;; - --dll) proj_kind="dll" - ;; - --lib) proj_kind="lib" - ;; - --src-path-bare=*) - src_path_bare=$(fix_path "$optval") - src_path_bare=${src_path_bare%/} - ;; - --static-crt) use_static_runtime=true - ;; - --ver=*) - vs_ver="$optval" - case "$optval" in - [789]) - ;; - *) die Unrecognized Visual Studio Version in $opt - ;; - esac - ;; - -I*) - opt=${opt##-I} - opt=$(fix_path "$opt") - opt="${opt%/}" - incs="${incs}${incs:+;}"${opt}"" - yasmincs="${yasmincs} -I"${opt}"" - ;; - -D*) defines="${defines}${defines:+;}${opt##-D}" - ;; - -L*) # fudge . to $(OutDir) - if [ "${opt##-L}" == "." ]; then - libdirs="${libdirs}${libdirs:+;}"\$(OutDir)"" - else - # Also try directories for this platform/configuration - opt=${opt##-L} - opt=$(fix_path "$opt") - libdirs="${libdirs}${libdirs:+;}"${opt}"" - libdirs="${libdirs}${libdirs:+;}"${opt}/\$(PlatformName)/\$(ConfigurationName)"" - libdirs="${libdirs}${libdirs:+;}"${opt}/\$(PlatformName)"" - fi - ;; - -l*) libs="${libs}${libs:+ }${opt##-l}.lib" - ;; - -*) die_unknown $opt - ;; - *) - # The paths in file_list are fixed outside of the loop. - file_list[${#file_list[@]}]="$opt" - case "$opt" in - *.asm) uses_asm=true - ;; - esac - ;; - esac -done - -# Make one call to fix_path for file_list to improve performance. -fix_file_list file_list - -outfile=${outfile:-/dev/stdout} -guid=${guid:-`generate_uuid`} -asm_use_custom_step=false -uses_asm=${uses_asm:-false} -case "${vs_ver:-8}" in - 7) vs_ver_id="7.10" - asm_use_custom_step=$uses_asm - warn_64bit='Detect64BitPortabilityProblems=true' - ;; - 8) vs_ver_id="8.00" - asm_use_custom_step=$uses_asm - warn_64bit='Detect64BitPortabilityProblems=true' - ;; - 9) vs_ver_id="9.00" - asm_use_custom_step=$uses_asm - warn_64bit='Detect64BitPortabilityProblems=false' - ;; -esac - -[ -n "$name" ] || die "Project name (--name) must be specified!" -[ -n "$target" ] || die "Target (--target) must be specified!" - -if ${use_static_runtime:-false}; then - release_runtime=0 - debug_runtime=1 - lib_sfx=mt -else - release_runtime=2 - debug_runtime=3 - lib_sfx=md -fi - -# Calculate debug lib names: If a lib ends in ${lib_sfx}.lib, then rename -# it to ${lib_sfx}d.lib. This precludes linking to release libs from a -# debug exe, so this may need to be refactored later. -for lib in ${libs}; do - if [ "$lib" != "${lib%${lib_sfx}.lib}" ]; then - lib=${lib%.lib}d.lib - fi - debug_libs="${debug_libs}${debug_libs:+ }${lib}" -done - - -# List Keyword for this target -case "$target" in - x86*) keyword="ManagedCProj" - ;; - *) die "Unsupported target $target!" -esac - -# List of all platforms supported for this target -case "$target" in - x86_64*) - platforms[0]="x64" - asm_Debug_cmdline="yasm -Xvc -g cv8 -f win64 ${yasmincs} "\$(InputPath)"" - asm_Release_cmdline="yasm -Xvc -f win64 ${yasmincs} "\$(InputPath)"" - ;; - x86*) - platforms[0]="Win32" - asm_Debug_cmdline="yasm -Xvc -g cv8 -f win32 ${yasmincs} "\$(InputPath)"" - asm_Release_cmdline="yasm -Xvc -f win32 ${yasmincs} "\$(InputPath)"" - ;; - *) die "Unsupported target $target!" - ;; -esac - -generate_vcproj() { - case "$proj_kind" in - exe) vs_ConfigurationType=1 - ;; - dll) vs_ConfigurationType=2 - ;; - *) vs_ConfigurationType=4 - ;; - esac - - echo "<?xml version=\"1.0\" encoding=\"Windows-1252\"?>" - open_tag VisualStudioProject \ - ProjectType="Visual C++" \ - Version="${vs_ver_id}" \ - Name="${name}" \ - ProjectGUID="{${guid}}" \ - RootNamespace="${name}" \ - Keyword="${keyword}" \ - - open_tag Platforms - for plat in "${platforms[@]}"; do - tag Platform Name="$plat" - done - close_tag Platforms - - open_tag Configurations - for plat in "${platforms[@]}"; do - plat_no_ws=`echo $plat | sed 's/[^A-Za-z0-9_]/_/g'` - open_tag Configuration \ - Name="Debug|$plat" \ - OutputDirectory="\$(SolutionDir)$plat_no_ws/\$(ConfigurationName)" \ - IntermediateDirectory="$plat_no_ws/\$(ConfigurationName)/${name}" \ - ConfigurationType="$vs_ConfigurationType" \ - CharacterSet="1" \ - - case "$target" in - x86*) - case "$name" in - vpx) - tag Tool \ - Name="VCCLCompilerTool" \ - Optimization="0" \ - AdditionalIncludeDirectories="$incs" \ - PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \ - RuntimeLibrary="$debug_runtime" \ - UsePrecompiledHeader="0" \ - WarningLevel="3" \ - DebugInformationFormat="2" \ - $warn_64bit \ - - $uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="true" - ;; - *) - tag Tool \ - Name="VCCLCompilerTool" \ - Optimization="0" \ - AdditionalIncludeDirectories="$incs" \ - PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \ - RuntimeLibrary="$debug_runtime" \ - UsePrecompiledHeader="0" \ - WarningLevel="3" \ - DebugInformationFormat="2" \ - $warn_64bit \ - - $uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="true" - ;; - esac - ;; - esac - - case "$proj_kind" in - exe) - case "$target" in - x86*) - case "$name" in - *) - tag Tool \ - Name="VCLinkerTool" \ - AdditionalDependencies="$debug_libs \$(NoInherit)" \ - AdditionalLibraryDirectories="$libdirs" \ - GenerateDebugInformation="true" \ - ProgramDatabaseFile="\$(OutDir)/${name}.pdb" \ - ;; - esac - ;; - esac - ;; - lib) - case "$target" in - x86*) - tag Tool \ - Name="VCLibrarianTool" \ - OutputFile="\$(OutDir)/${name}${lib_sfx}d.lib" \ - - ;; - esac - ;; - dll) - tag Tool \ - Name="VCLinkerTool" \ - AdditionalDependencies="\$(NoInherit)" \ - LinkIncremental="2" \ - GenerateDebugInformation="true" \ - AssemblyDebug="1" \ - TargetMachine="1" \ - $link_opts \ - - ;; - esac - - close_tag Configuration - - open_tag Configuration \ - Name="Release|$plat" \ - OutputDirectory="\$(SolutionDir)$plat_no_ws/\$(ConfigurationName)" \ - IntermediateDirectory="$plat_no_ws/\$(ConfigurationName)/${name}" \ - ConfigurationType="$vs_ConfigurationType" \ - CharacterSet="1" \ - WholeProgramOptimization="0" \ - - case "$target" in - x86*) - case "$name" in - vpx) - tag Tool \ - Name="VCCLCompilerTool" \ - Optimization="2" \ - FavorSizeorSpeed="1" \ - AdditionalIncludeDirectories="$incs" \ - PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \ - RuntimeLibrary="$release_runtime" \ - UsePrecompiledHeader="0" \ - WarningLevel="3" \ - DebugInformationFormat="0" \ - $warn_64bit \ - - $uses_asm && tag Tool Name="YASM" IncludePaths="$incs" - ;; - *) - tag Tool \ - Name="VCCLCompilerTool" \ - AdditionalIncludeDirectories="$incs" \ - Optimization="2" \ - FavorSizeorSpeed="1" \ - PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \ - RuntimeLibrary="$release_runtime" \ - UsePrecompiledHeader="0" \ - WarningLevel="3" \ - DebugInformationFormat="0" \ - $warn_64bit \ - - $uses_asm && tag Tool Name="YASM" IncludePaths="$incs" - ;; - esac - ;; - esac - - case "$proj_kind" in - exe) - case "$target" in - x86*) - case "$name" in - *) - tag Tool \ - Name="VCLinkerTool" \ - AdditionalDependencies="$libs \$(NoInherit)" \ - AdditionalLibraryDirectories="$libdirs" \ - - ;; - esac - ;; - esac - ;; - lib) - case "$target" in - x86*) - tag Tool \ - Name="VCLibrarianTool" \ - OutputFile="\$(OutDir)/${name}${lib_sfx}.lib" \ - - ;; - esac - ;; - dll) # note differences to debug version: LinkIncremental, AssemblyDebug - tag Tool \ - Name="VCLinkerTool" \ - AdditionalDependencies="\$(NoInherit)" \ - LinkIncremental="1" \ - GenerateDebugInformation="true" \ - TargetMachine="1" \ - $link_opts \ - - ;; - esac - - close_tag Configuration - done - close_tag Configurations - - open_tag Files - generate_filter srcs "Source Files" "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx" - generate_filter hdrs "Header Files" "h;hm;inl;inc;xsd" - generate_filter resrcs "Resource Files" "rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav" - generate_filter resrcs "Build Files" "mk" - close_tag Files - - tag Globals - close_tag VisualStudioProject - - # This must be done from within the {} subshell - echo "Ignored files list (${#file_list[@]} items) is:" >&2 - for f in "${file_list[@]}"; do - echo " $f" >&2 - done -} - -generate_vcproj | - sed -e '/"/s;\([^ "]\)/;\1\\;g' > ${outfile} - -exit -<!-- -TODO: Add any files not captured by filters. - <File - RelativePath=".\ReadMe.txt" - > - </File> ---> diff --git a/build/make/gen_msvs_sln.sh b/build/make/gen_msvs_sln.sh index 664b404c9..7d5f46810 100755 --- a/build/make/gen_msvs_sln.sh +++ b/build/make/gen_msvs_sln.sh @@ -55,16 +55,11 @@ indent_pop() { parse_project() { local file=$1 - if [ "$sfx" = "vcproj" ]; then - local name=`grep Name "$file" | awk 'BEGIN {FS="\""}{if (NR==1) print $2}'` - local guid=`grep ProjectGUID "$file" | awk 'BEGIN {FS="\""}{if (NR==1) print $2}'` - else - local name=`grep RootNamespace "$file" | sed 's,.*<.*>\(.*\)</.*>.*,\1,'` - local guid=`grep ProjectGuid "$file" | sed 's,.*<.*>\(.*\)</.*>.*,\1,'` - fi + local name=`grep RootNamespace "$file" | sed 's,.*<.*>\(.*\)</.*>.*,\1,'` + local guid=`grep ProjectGuid "$file" | sed 's,.*<.*>\(.*\)</.*>.*,\1,'` # save the project GUID to a varaible, normalizing to the basename of the - # vcproj file without the extension + # vcxproj file without the extension local var var=${file##*/} var=${var%%.${sfx}} @@ -72,13 +67,8 @@ parse_project() { eval "${var}_name=$name" eval "${var}_guid=$guid" - if [ "$sfx" = "vcproj" ]; then - cur_config_list=`grep -A1 '<Configuration' $file | - grep Name | cut -d\" -f2` - else - cur_config_list=`grep -B1 'Label="Configuration"' $file | - grep Condition | cut -d\' -f4` - fi + cur_config_list=`grep -B1 'Label="Configuration"' $file | + grep Condition | cut -d\' -f4` new_config_list=$(for i in $config_list $cur_config_list; do echo $i done | sort | uniq) @@ -103,25 +93,6 @@ process_project() { eval "${var}_guid=$guid" echo "Project(\"{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}\") = \"$name\", \"$file\", \"$guid\"" - indent_push - - eval "local deps=\"\${${var}_deps}\"" - if [ -n "$deps" ] && [ "$sfx" = "vcproj" ]; then - echo "${indent}ProjectSection(ProjectDependencies) = postProject" - indent_push - - for dep in $deps; do - eval "local dep_guid=\${${dep}_guid}" - [ -z "${dep_guid}" ] && die "Unknown GUID for $dep (dependency of $var)" - echo "${indent}$dep_guid = $dep_guid" - done - - indent_pop - echo "${indent}EndProjectSection" - - fi - - indent_pop echo "EndProject" } @@ -191,11 +162,7 @@ process_makefile() { IFS=$'\r'$'\n' local TAB=$'\t' cat <<EOF -ifeq (\$(CONFIG_VS_VERSION),7) -MSBUILD_TOOL := devenv.com -else MSBUILD_TOOL := msbuild.exe -endif found_devenv := \$(shell which \$(MSBUILD_TOOL) >/dev/null 2>&1 && echo yes) .nodevenv.once: ${TAB}@echo " * \$(MSBUILD_TOOL) not found in path." @@ -204,7 +171,7 @@ ${TAB}@echo " * You will have to build all configurations manually using the" ${TAB}@echo " * Visual Studio IDE. To allow make to build them automatically," ${TAB}@echo " * add the Common7/IDE directory of your Visual Studio" ${TAB}@echo " * installation to your path, eg:" -${TAB}@echo " * C:\Program Files\Microsoft Visual Studio 8\Common7\IDE" +${TAB}@echo " * C:\Program Files\Microsoft Visual Studio 10.0\Common7\IDE" ${TAB}@echo " * " ${TAB}@touch \$@ CLEAN-OBJS += \$(if \$(found_devenv),,.nodevenv.once) @@ -221,16 +188,9 @@ clean:: ${TAB}rm -rf "$platform"/"$config" .PHONY: $nows_sln_config ifneq (\$(found_devenv),) - ifeq (\$(CONFIG_VS_VERSION),7) -$nows_sln_config: $outfile -${TAB}\$(MSBUILD_TOOL) $outfile -build "$config" - - else $nows_sln_config: $outfile ${TAB}\$(MSBUILD_TOOL) $outfile -m -t:Build \\ ${TAB}${TAB}-p:Configuration="$config" -p:Platform="$platform" - - endif else $nows_sln_config: $outfile .nodevenv.once ${TAB}@echo " * Skipping build of $sln_config (\$(MSBUILD_TOOL) not in path)." @@ -255,23 +215,12 @@ for opt in "$@"; do ;; --ver=*) vs_ver="$optval" case $optval in - [789]|10|11|12|14) + 10|11|12|14) ;; *) die Unrecognized Visual Studio Version in $opt ;; esac ;; - --ver=*) vs_ver="$optval" - case $optval in - 7) sln_vers="8.00" - sln_vers_str="Visual Studio .NET 2003" - ;; - [89]) - ;; - *) die "Unrecognized Visual Studio Version '$optval' in $opt" - ;; - esac - ;; --target=*) target="${optval}" ;; -*) die_unknown $opt @@ -281,16 +230,7 @@ for opt in "$@"; do done outfile=${outfile:-/dev/stdout} mkoutfile=${mkoutfile:-/dev/stdout} -case "${vs_ver:-8}" in - 7) sln_vers="8.00" - sln_vers_str="Visual Studio .NET 2003" - ;; - 8) sln_vers="9.00" - sln_vers_str="Visual Studio 2005" - ;; - 9) sln_vers="10.00" - sln_vers_str="Visual Studio 2008" - ;; +case "${vs_ver:-10}" in 10) sln_vers="11.00" sln_vers_str="Visual Studio 2010" ;; @@ -304,14 +244,7 @@ case "${vs_ver:-8}" in sln_vers_str="Visual Studio 2015" ;; esac -case "${vs_ver:-8}" in - [789]) - sfx=vcproj - ;; - 10|11|12|14) - sfx=vcxproj - ;; -esac +sfx=vcxproj for f in "${file_list[@]}"; do parse_project $f @@ -132,9 +132,6 @@ all_platforms="${all_platforms} x86-linux-icc" all_platforms="${all_platforms} x86-os2-gcc" all_platforms="${all_platforms} x86-solaris-gcc" all_platforms="${all_platforms} x86-win32-gcc" -all_platforms="${all_platforms} x86-win32-vs7" -all_platforms="${all_platforms} x86-win32-vs8" -all_platforms="${all_platforms} x86-win32-vs9" all_platforms="${all_platforms} x86-win32-vs10" all_platforms="${all_platforms} x86-win32-vs11" all_platforms="${all_platforms} x86-win32-vs12" @@ -152,8 +149,6 @@ all_platforms="${all_platforms} x86_64-linux-gcc" all_platforms="${all_platforms} x86_64-linux-icc" all_platforms="${all_platforms} x86_64-solaris-gcc" all_platforms="${all_platforms} x86_64-win64-gcc" -all_platforms="${all_platforms} x86_64-win64-vs8" -all_platforms="${all_platforms} x86_64-win64-vs9" all_platforms="${all_platforms} x86_64-win64-vs10" all_platforms="${all_platforms} x86_64-win64-vs11" all_platforms="${all_platforms} x86_64-win64-vs12" @@ -272,7 +267,6 @@ CONFIG_LIST=" install_bins install_libs install_srcs - use_x86inc debug gprof gcov @@ -334,7 +328,6 @@ CMDLINE_SELECT=" gprof gcov pic - use_x86inc optimizations ccache runtime_cpu_detect @@ -646,17 +639,9 @@ process_toolchain() { vs*) enable_feature msvs enable_feature solution vs_version=${tgt_cc##vs} - case $vs_version in - [789]) - VCPROJ_SFX=vcproj - gen_vcproj_cmd=${source_path}/build/make/gen_msvs_proj.sh - ;; - 10|11|12|14) - VCPROJ_SFX=vcxproj - gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh - enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror" - ;; - esac + VCPROJ_SFX=vcxproj + gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh + enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror" all_targets="${all_targets} solution" INLINE="__forceinline" ;; diff --git a/examples.mk b/examples.mk index c891a5496..cc7fb1ddc 100644 --- a/examples.mk +++ b/examples.mk @@ -215,6 +215,17 @@ vp8cx_set_ref.SRCS += vpx_ports/msvc.h vp8cx_set_ref.GUID = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A vp8cx_set_ref.DESCRIPTION = VP8 set encoder reference frame +ifeq ($(CONFIG_VP9_ENCODER),yes) +ifeq ($(CONFIG_DECODERS),yes) +EXAMPLES-yes += vp9cx_set_ref.c +vp9cx_set_ref.SRCS += ivfenc.h ivfenc.c +vp9cx_set_ref.SRCS += tools_common.h tools_common.c +vp9cx_set_ref.SRCS += video_common.h +vp9cx_set_ref.SRCS += video_writer.h video_writer.c +vp9cx_set_ref.GUID = 65D7F14A-2EE6-4293-B958-AB5107A03B55 +vp9cx_set_ref.DESCRIPTION = VP9 set encoder reference frame +endif +endif ifeq ($(CONFIG_MULTI_RES_ENCODING),yes) ifeq ($(CONFIG_LIBYUV),yes) diff --git a/examples/vp9cx_set_ref.c b/examples/vp9cx_set_ref.c new file mode 100644 index 000000000..acf79dbca --- /dev/null +++ b/examples/vp9cx_set_ref.c @@ -0,0 +1,453 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +// VP9 Set Reference Frame +// ============================ +// +// This is an example demonstrating how to overwrite the VP9 encoder's +// internal reference frame. In the sample we set the last frame to the +// current frame. This technique could be used to bounce between two cameras. +// +// The decoder would also have to set the reference frame to the same value +// on the same frame, or the video will become corrupt. The 'test_decode' +// variable is set to 1 in this example that tests if the encoder and decoder +// results are matching. +// +// Usage +// ----- +// This example encodes a raw video. And the last argument passed in specifies +// the frame number to update the reference frame on. For example, run +// examples/vp9cx_set_ref 352 288 in.yuv out.ivf 4 30 +// The parameter is parsed as follows: +// +// +// Extra Variables +// --------------- +// This example maintains the frame number passed on the command line +// in the `update_frame_num` variable. +// +// +// Configuration +// ------------- +// +// The reference frame is updated on the frame specified on the command +// line. +// +// Observing The Effects +// --------------------- +// The encoder and decoder results should be matching when the same reference +// frame setting operation is done in both encoder and decoder. Otherwise, +// the encoder/decoder mismatch would be seen. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "vpx/vp8cx.h" +#include "vpx/vpx_decoder.h" +#include "vpx/vpx_encoder.h" + +#include "./tools_common.h" +#include "./video_writer.h" + +static const char *exec_name; + +void usage_exit() { + fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile> " + "<frame> <limit(optional)>\n", + exec_name); + exit(EXIT_FAILURE); +} + +static int compare_img(const vpx_image_t *const img1, + const vpx_image_t *const img2) { + uint32_t l_w = img1->d_w; + uint32_t c_w = + (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; + const uint32_t c_h = + (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; + uint32_t i; + int match = 1; + + match &= (img1->fmt == img2->fmt); + match &= (img1->d_w == img2->d_w); + match &= (img1->d_h == img2->d_h); + + for (i = 0; i < img1->d_h; ++i) + match &= (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y], + img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y], + l_w) == 0); + + for (i = 0; i < c_h; ++i) + match &= (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U], + img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U], + c_w) == 0); + + for (i = 0; i < c_h; ++i) + match &= (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V], + img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V], + c_w) == 0); + + return match; +} + +#define mmin(a, b) ((a) < (b) ? (a) : (b)) +static void find_mismatch(const vpx_image_t *const img1, + const vpx_image_t *const img2, + int yloc[4], int uloc[4], int vloc[4]) { + const uint32_t bsize = 64; + const uint32_t bsizey = bsize >> img1->y_chroma_shift; + const uint32_t bsizex = bsize >> img1->x_chroma_shift; + const uint32_t c_w = + (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; + const uint32_t c_h = + (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; + int match = 1; + uint32_t i, j; + yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1; + for (i = 0, match = 1; match && i < img1->d_h; i += bsize) { + for (j = 0; match && j < img1->d_w; j += bsize) { + int k, l; + const int si = mmin(i + bsize, img1->d_h) - i; + const int sj = mmin(j + bsize, img1->d_w) - j; + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + if (*(img1->planes[VPX_PLANE_Y] + + (i + k) * img1->stride[VPX_PLANE_Y] + j + l) != + *(img2->planes[VPX_PLANE_Y] + + (i + k) * img2->stride[VPX_PLANE_Y] + j + l)) { + yloc[0] = i + k; + yloc[1] = j + l; + yloc[2] = *(img1->planes[VPX_PLANE_Y] + + (i + k) * img1->stride[VPX_PLANE_Y] + j + l); + yloc[3] = *(img2->planes[VPX_PLANE_Y] + + (i + k) * img2->stride[VPX_PLANE_Y] + j + l); + match = 0; + break; + } + } + } + } + } + + uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1; + for (i = 0, match = 1; match && i < c_h; i += bsizey) { + for (j = 0; match && j < c_w; j += bsizex) { + int k, l; + const int si = mmin(i + bsizey, c_h - i); + const int sj = mmin(j + bsizex, c_w - j); + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + if (*(img1->planes[VPX_PLANE_U] + + (i + k) * img1->stride[VPX_PLANE_U] + j + l) != + *(img2->planes[VPX_PLANE_U] + + (i + k) * img2->stride[VPX_PLANE_U] + j + l)) { + uloc[0] = i + k; + uloc[1] = j + l; + uloc[2] = *(img1->planes[VPX_PLANE_U] + + (i + k) * img1->stride[VPX_PLANE_U] + j + l); + uloc[3] = *(img2->planes[VPX_PLANE_U] + + (i + k) * img2->stride[VPX_PLANE_U] + j + l); + match = 0; + break; + } + } + } + } + } + vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1; + for (i = 0, match = 1; match && i < c_h; i += bsizey) { + for (j = 0; match && j < c_w; j += bsizex) { + int k, l; + const int si = mmin(i + bsizey, c_h - i); + const int sj = mmin(j + bsizex, c_w - j); + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + if (*(img1->planes[VPX_PLANE_V] + + (i + k) * img1->stride[VPX_PLANE_V] + j + l) != + *(img2->planes[VPX_PLANE_V] + + (i + k) * img2->stride[VPX_PLANE_V] + j + l)) { + vloc[0] = i + k; + vloc[1] = j + l; + vloc[2] = *(img1->planes[VPX_PLANE_V] + + (i + k) * img1->stride[VPX_PLANE_V] + j + l); + vloc[3] = *(img2->planes[VPX_PLANE_V] + + (i + k) * img2->stride[VPX_PLANE_V] + j + l); + match = 0; + break; + } + } + } + } + } +} + +static void testing_decode(vpx_codec_ctx_t *encoder, + vpx_codec_ctx_t *decoder, + vpx_codec_enc_cfg_t *cfg, + unsigned int frame_out, + int *mismatch_seen) { + vpx_image_t enc_img, dec_img; + struct vp9_ref_frame ref_enc, ref_dec; + + if (*mismatch_seen) + return; + + ref_enc.idx = 0; + ref_dec.idx = 0; + if (vpx_codec_control(encoder, VP9_GET_REFERENCE, &ref_enc)) + die_codec(encoder, "Failed to get encoder reference frame"); + enc_img = ref_enc.img; + if (vpx_codec_control(decoder, VP9_GET_REFERENCE, &ref_dec)) + die_codec(decoder, "Failed to get decoder reference frame"); + dec_img = ref_dec.img; + + if (!compare_img(&enc_img, &dec_img)) { + int y[4], u[4], v[4]; + + *mismatch_seen = 1; + + find_mismatch(&enc_img, &dec_img, y, u, v); + printf("Encode/decode mismatch on frame %d at" + " Y[%d, %d] {%d/%d}," + " U[%d, %d] {%d/%d}," + " V[%d, %d] {%d/%d}", + frame_out, + y[0], y[1], y[2], y[3], + u[0], u[1], u[2], u[3], + v[0], v[1], v[2], v[3]); + } + + vpx_img_free(&enc_img); + vpx_img_free(&dec_img); +} + +static int encode_frame(vpx_codec_ctx_t *ecodec, + vpx_codec_enc_cfg_t *cfg, + vpx_image_t *img, + unsigned int frame_in, + VpxVideoWriter *writer, + int test_decode, + vpx_codec_ctx_t *dcodec, + unsigned int *frame_out, + int *mismatch_seen) { + int got_pkts = 0; + vpx_codec_iter_t iter = NULL; + const vpx_codec_cx_pkt_t *pkt = NULL; + int got_data; + const vpx_codec_err_t res = vpx_codec_encode(ecodec, img, frame_in, 1, + 0, VPX_DL_GOOD_QUALITY); + if (res != VPX_CODEC_OK) + die_codec(ecodec, "Failed to encode frame"); + + got_data = 0; + + while ((pkt = vpx_codec_get_cx_data(ecodec, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0; + + if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT)) { + *frame_out += 1; + } + + if (!vpx_video_writer_write_frame(writer, + pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) { + die_codec(ecodec, "Failed to write compressed frame"); + } + printf(keyframe ? "K" : "."); + fflush(stdout); + got_data = 1; + + // Decode 1 frame. + if (test_decode) { + if (vpx_codec_decode(dcodec, pkt->data.frame.buf, + (unsigned int)pkt->data.frame.sz, NULL, 0)) + die_codec(dcodec, "Failed to decode frame."); + } + } + } + + // Mismatch checking + if (got_data && test_decode) { + testing_decode(ecodec, dcodec, cfg, *frame_out, mismatch_seen); + } + + return got_pkts; +} + +int main(int argc, char **argv) { + FILE *infile = NULL; + // Encoder + vpx_codec_ctx_t ecodec = {0}; + vpx_codec_enc_cfg_t cfg = {0}; + unsigned int frame_in = 0; + vpx_image_t raw; + vpx_codec_err_t res; + VpxVideoInfo info = {0}; + VpxVideoWriter *writer = NULL; + const VpxInterface *encoder = NULL; + + // Test encoder/decoder mismatch. + int test_decode = 1; + // Decoder + vpx_codec_ctx_t dcodec; + unsigned int frame_out = 0; + + // The frame number to set reference frame on + int update_frame_num = 0; + int mismatch_seen = 0; + + const int fps = 30; + const int bitrate = 500; + + const char *width_arg = NULL; + const char *height_arg = NULL; + const char *infile_arg = NULL; + const char *outfile_arg = NULL; + unsigned int limit = 0; + exec_name = argv[0]; + + if (argc < 6) + die("Invalid number of arguments"); + + width_arg = argv[1]; + height_arg = argv[2]; + infile_arg = argv[3]; + outfile_arg = argv[4]; + + encoder = get_vpx_encoder_by_name("vp9"); + if (!encoder) + die("Unsupported codec."); + + update_frame_num = atoi(argv[5]); + // In VP9, the reference buffers (cm->buffer_pool->frame_bufs[i].buf) are + // allocated while calling vpx_codec_encode(), thus, setting reference for + // 1st frame isn't supported. + if (update_frame_num <= 1) + die("Couldn't parse frame number '%s'\n", argv[5]); + + if (argc > 6) { + limit = atoi(argv[6]); + if (update_frame_num > limit) + die("Update frame number couldn't larger than limit\n"); + } + + info.codec_fourcc = encoder->fourcc; + info.frame_width = strtol(width_arg, NULL, 0); + info.frame_height = strtol(height_arg, NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || + info.frame_height <= 0 || + (info.frame_width % 2) != 0 || + (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image."); + } + + printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface())); + + res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); + if (res) + die_codec(&ecodec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + cfg.rc_target_bitrate = bitrate; + cfg.g_lag_in_frames = 3; + + writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info); + if (!writer) + die("Failed to open %s for writing.", outfile_arg); + + if (!(infile = fopen(infile_arg, "rb"))) + die("Failed to open %s for reading.", infile_arg); + + if (vpx_codec_enc_init(&ecodec, encoder->codec_interface(), &cfg, 0)) + die_codec(&ecodec, "Failed to initialize encoder"); + + // Disable alt_ref. + if (vpx_codec_control(&ecodec, VP8E_SET_ENABLEAUTOALTREF, 0)) + die_codec(&ecodec, "Failed to set enable auto alt ref"); + + if (test_decode) { + const VpxInterface *decoder = get_vpx_decoder_by_name("vp9"); + if (vpx_codec_dec_init(&dcodec, decoder->codec_interface(), NULL, 0)) + die_codec(&dcodec, "Failed to initialize decoder."); + } + + // Encode frames. + while (vpx_img_read(&raw, infile)) { + if (limit && frame_in >= limit) + break; + if (update_frame_num > 1 && frame_out + 1 == update_frame_num) { + vpx_ref_frame_t ref; + ref.frame_type = VP8_LAST_FRAME; + ref.img = raw; + // Set reference frame in encoder. + if (vpx_codec_control(&ecodec, VP8_SET_REFERENCE, &ref)) + die_codec(&ecodec, "Failed to set reference frame"); + printf(" <SET_REF>"); + + // If set_reference in decoder is commented out, the enc/dec mismatch + // would be seen. + if (test_decode) { + if (vpx_codec_control(&dcodec, VP8_SET_REFERENCE, &ref)) + die_codec(&dcodec, "Failed to set reference frame"); + } + } + + encode_frame(&ecodec, &cfg, &raw, frame_in, writer, test_decode, + &dcodec, &frame_out, &mismatch_seen); + frame_in++; + if (mismatch_seen) + break; + } + + // Flush encoder. + if (!mismatch_seen) + while (encode_frame(&ecodec, &cfg, NULL, frame_in, writer, test_decode, + &dcodec, &frame_out, &mismatch_seen)) {} + + printf("\n"); + fclose(infile); + printf("Processed %d frames.\n", frame_out); + + if (test_decode) { + if (!mismatch_seen) + printf("Encoder/decoder results are matching.\n"); + else + printf("Encoder/decoder results are NOT matching.\n"); + } + + if (test_decode) + if (vpx_codec_destroy(&dcodec)) + die_codec(&dcodec, "Failed to destroy decoder"); + + vpx_img_free(&raw); + if (vpx_codec_destroy(&ecodec)) + die_codec(&ecodec, "Failed to destroy encoder."); + + vpx_video_writer_close(writer); + + return EXIT_SUCCESS; +} diff --git a/test/convolve_test.cc b/test/convolve_test.cc index 73b0edb99..5933a62b0 100644 --- a/test/convolve_test.cc +++ b/test/convolve_test.cc @@ -431,7 +431,8 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> { void CopyOutputToRef() { memcpy(output_ref_, output_, kOutputBufferSize); #if CONFIG_VP9_HIGHBITDEPTH - memcpy(output16_ref_, output16_, kOutputBufferSize); + memcpy(output16_ref_, output16_, + kOutputBufferSize * sizeof(output16_ref_[0])); #endif } @@ -443,41 +444,41 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> { } uint8_t *input() const { + const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); #if CONFIG_VP9_HIGHBITDEPTH if (UUT_->use_highbd_ == 0) { - return input_ + BorderTop() * kOuterBlockSize + BorderLeft(); + return input_ + offset; } else { - return CONVERT_TO_BYTEPTR(input16_ + BorderTop() * kOuterBlockSize + - BorderLeft()); + return CONVERT_TO_BYTEPTR(input16_) + offset; } #else - return input_ + BorderTop() * kOuterBlockSize + BorderLeft(); + return input_ + offset; #endif } uint8_t *output() const { + const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); #if CONFIG_VP9_HIGHBITDEPTH if (UUT_->use_highbd_ == 0) { - return output_ + BorderTop() * kOuterBlockSize + BorderLeft(); + return output_ + offset; } else { - return CONVERT_TO_BYTEPTR(output16_ + BorderTop() * kOuterBlockSize + - BorderLeft()); + return CONVERT_TO_BYTEPTR(output16_) + offset; } #else - return output_ + BorderTop() * kOuterBlockSize + BorderLeft(); + return output_ + offset; #endif } uint8_t *output_ref() const { + const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); #if CONFIG_VP9_HIGHBITDEPTH if (UUT_->use_highbd_ == 0) { - return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft(); + return output_ref_ + offset; } else { - return CONVERT_TO_BYTEPTR(output16_ref_ + BorderTop() * kOuterBlockSize + - BorderLeft()); + return CONVERT_TO_BYTEPTR(output16_ref_) + offset; } #else - return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft(); + return output_ref_ + offset; #endif } @@ -989,14 +990,12 @@ void wrap_ ## func ## _ ## bd(const uint8_t *src, ptrdiff_t src_stride, \ w, h, bd); \ } #if HAVE_SSE2 && ARCH_X86_64 -#if CONFIG_USE_X86INC WRAP(convolve_copy_sse2, 8) WRAP(convolve_avg_sse2, 8) WRAP(convolve_copy_sse2, 10) WRAP(convolve_avg_sse2, 10) WRAP(convolve_copy_sse2, 12) WRAP(convolve_avg_sse2, 12) -#endif // CONFIG_USE_X86INC WRAP(convolve8_horiz_sse2, 8) WRAP(convolve8_avg_horiz_sse2, 8) WRAP(convolve8_vert_sse2, 8) @@ -1090,11 +1089,7 @@ INSTANTIATE_TEST_CASE_P(C, ConvolveTest, #if HAVE_SSE2 && ARCH_X86_64 #if CONFIG_VP9_HIGHBITDEPTH const ConvolveFunctions convolve8_sse2( -#if CONFIG_USE_X86INC wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8, -#else - wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, -#endif // CONFIG_USE_X86INC wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8, wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, @@ -1102,11 +1097,7 @@ const ConvolveFunctions convolve8_sse2( wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8); const ConvolveFunctions convolve10_sse2( -#if CONFIG_USE_X86INC wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10, -#else - wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, -#endif // CONFIG_USE_X86INC wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10, wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, @@ -1114,11 +1105,7 @@ const ConvolveFunctions convolve10_sse2( wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10); const ConvolveFunctions convolve12_sse2( -#if CONFIG_USE_X86INC wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12, -#else - wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, -#endif // CONFIG_USE_X86INC wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, @@ -1132,11 +1119,7 @@ const ConvolveParam kArrayConvolve_sse2[] = { }; #else const ConvolveFunctions convolve8_sse2( -#if CONFIG_USE_X86INC vpx_convolve_copy_sse2, vpx_convolve_avg_sse2, -#else - vpx_convolve_copy_c, vpx_convolve_avg_c, -#endif // CONFIG_USE_X86INC vpx_convolve8_horiz_sse2, vpx_convolve8_avg_horiz_sse2, vpx_convolve8_vert_sse2, vpx_convolve8_avg_vert_sse2, vpx_convolve8_sse2, vpx_convolve8_avg_sse2, diff --git a/test/vp8cx_set_ref.sh b/test/cx_set_ref.sh index 5d760bcde..0a58dc187 100755 --- a/test/vp8cx_set_ref.sh +++ b/test/cx_set_ref.sh @@ -1,6 +1,6 @@ #!/bin/sh ## -## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## Copyright (c) 2016 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source @@ -8,30 +8,27 @@ ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## -## This file tests the libvpx vp8cx_set_ref example. To add new tests to this +## This file tests the libvpx cx_set_ref example. To add new tests to this ## file, do the following: ## 1. Write a shell function (this is your test). -## 2. Add the function to vp8cx_set_ref_tests (on a new line). +## 2. Add the function to cx_set_ref_tests (on a new line). ## . $(dirname $0)/tools_common.sh # Environment check: $YUV_RAW_INPUT is required. -vp8cx_set_ref_verify_environment() { +cx_set_ref_verify_environment() { if [ ! -e "${YUV_RAW_INPUT}" ]; then echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." return 1 fi } -# Runs vp8cx_set_ref and updates the reference frame before encoding frame 90. -# $1 is the codec name, which vp8cx_set_ref does not support at present: It's -# currently used only to name the output file. -# TODO(tomfinegan): Pass the codec param once the example is updated to support -# VP9. +# Runs cx_set_ref and updates the reference frame before encoding frame 90. +# $1 is the codec name. vpx_set_ref() { - local encoder="${LIBVPX_BIN_PATH}/vp8cx_set_ref${VPX_TEST_EXE_SUFFIX}" local codec="$1" - local output_file="${VPX_TEST_OUTPUT_DIR}/vp8cx_set_ref_${codec}.ivf" + local encoder="${LIBVPX_BIN_PATH}/${codec}cx_set_ref${VPX_TEST_EXE_SUFFIX}" + local output_file="${VPX_TEST_OUTPUT_DIR}/${codec}cx_set_ref_${codec}.ivf" local ref_frame_num=90 if [ ! -x "${encoder}" ]; then @@ -46,12 +43,18 @@ vpx_set_ref() { [ -e "${output_file}" ] || return 1 } -vp8cx_set_ref_vp8() { +cx_set_ref_vp8() { if [ "$(vp8_encode_available)" = "yes" ]; then vpx_set_ref vp8 || return 1 fi } -vp8cx_set_ref_tests="vp8cx_set_ref_vp8" +cx_set_ref_vp9() { + if [ "$(vp9_encode_available)" = "yes" ]; then + vpx_set_ref vp9 || return 1 + fi +} + +cx_set_ref_tests="cx_set_ref_vp8 cx_set_ref_vp9" -run_tests vp8cx_set_ref_verify_environment "${vp8cx_set_ref_tests}" +run_tests cx_set_ref_verify_environment "${cx_set_ref_tests}" diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc index ddaf9395b..e6224b21a 100644 --- a/test/dct16x16_test.cc +++ b/test/dct16x16_test.cc @@ -25,20 +25,12 @@ #include "vpx/vpx_codec.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" +#include "vpx_ports/msvc.h" // for round() using libvpx_test::ACMRandom; namespace { -#ifdef _MSC_VER -static int round(double x) { - if (x < 0) - return static_cast<int>(ceil(x - 0.5)); - else - return static_cast<int>(floor(x + 0.5)); -} -#endif - const int kNumCoeffs = 256; const double C1 = 0.995184726672197; const double C2 = 0.98078528040323; diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc index 16d88255e..278d72dfa 100644 --- a/test/dct32x32_test.cc +++ b/test/dct32x32_test.cc @@ -25,18 +25,11 @@ #include "vpx/vpx_codec.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" +#include "vpx_ports/msvc.h" // for round() using libvpx_test::ACMRandom; namespace { -#ifdef _MSC_VER -static int round(double x) { - if (x < 0) - return static_cast<int>(ceil(x - 0.5)); - else - return static_cast<int>(floor(x + 0.5)); -} -#endif const int kNumCoeffs = 1024; const double kPi = 3.141592653589793238462643383279502884; diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc index 735cccf8d..a24085606 100644 --- a/test/fdct4x4_test.cc +++ b/test/fdct4x4_test.cc @@ -487,7 +487,7 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8))); #endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE -#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE +#if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( SSE2, Trans4x4WHT, ::testing::Values( diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc index 29f215817..083ee6628 100644 --- a/test/fdct8x8_test.cc +++ b/test/fdct8x8_test.cc @@ -766,7 +766,7 @@ INSTANTIATE_TEST_CASE_P( &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12))); #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE -#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 && \ +#if HAVE_SSSE3 && ARCH_X86_64 && \ !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( SSSE3, FwdTrans8x8DCT, diff --git a/test/hadamard_test.cc b/test/hadamard_test.cc index 7a5bd5b4c..b8eec523f 100644 --- a/test/hadamard_test.cc +++ b/test/hadamard_test.cc @@ -152,10 +152,10 @@ INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test, ::testing::Values(&vpx_hadamard_8x8_sse2)); #endif // HAVE_SSE2 -#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 +#if HAVE_SSSE3 && ARCH_X86_64 INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test, ::testing::Values(&vpx_hadamard_8x8_ssse3)); -#endif // HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 +#endif // HAVE_SSSE3 && ARCH_X86_64 #if HAVE_NEON INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test, diff --git a/test/idct8x8_test.cc b/test/idct8x8_test.cc index 7f9d751d6..04487c452 100644 --- a/test/idct8x8_test.cc +++ b/test/idct8x8_test.cc @@ -17,20 +17,12 @@ #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "vpx/vpx_integer.h" +#include "vpx_ports/msvc.h" // for round() using libvpx_test::ACMRandom; namespace { -#ifdef _MSC_VER -static int round(double x) { - if (x < 0) - return static_cast<int>(ceil(x - 0.5)); - else - return static_cast<int>(floor(x + 0.5)); -} -#endif - void reference_dct_1d(double input[8], double output[8]) { const double kPi = 3.141592653589793238462643383279502884; const double kInvSqrt2 = 0.707106781186547524400844362104; @@ -86,7 +78,7 @@ TEST(VP9Idct8x8Test, AccuracyCheck) { reference_dct_2d(input, output_r); for (int j = 0; j < 64; ++j) - coeff[j] = round(output_r[j]); + coeff[j] = static_cast<tran_low_t>(round(output_r[j])); vpx_idct8x8_64_add_c(coeff, dst, 8); for (int j = 0; j < 64; ++j) { const int diff = dst[j] - src[j]; diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc index 6c824128b..1efb1a4eb 100644 --- a/test/partial_idct_test.cc +++ b/test/partial_idct_test.cc @@ -295,7 +295,7 @@ INSTANTIATE_TEST_CASE_P( TX_4X4, 1))); #endif -#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 && \ +#if HAVE_SSSE3 && ARCH_X86_64 && \ !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( SSSE3_64, PartialIDctTest, diff --git a/test/sad_test.cc b/test/sad_test.cc index e6bd0d793..fa7b6c840 100644 --- a/test/sad_test.cc +++ b/test/sad_test.cc @@ -690,7 +690,6 @@ INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests)); //------------------------------------------------------------------------------ // x86 functions #if HAVE_SSE2 -#if CONFIG_USE_X86INC const SadMxNParam sse2_tests[] = { make_tuple(64, 64, &vpx_sad64x64_sse2, -1), make_tuple(64, 32, &vpx_sad64x32_sse2, -1), @@ -852,7 +851,6 @@ const SadMxNx4Param x4d_sse2_tests[] = { #endif // CONFIG_VP9_HIGHBITDEPTH }; INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests)); -#endif // CONFIG_USE_X86INC #endif // HAVE_SSE2 #if HAVE_SSE3 diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc index 2acf744d5..8928bf87c 100644 --- a/test/test_intra_pred_speed.cc +++ b/test/test_intra_pred_speed.cc @@ -187,21 +187,21 @@ INTRA_PRED_TEST(C, TestIntraPred4, vpx_dc_predictor_4x4_c, vpx_d153_predictor_4x4_c, vpx_d207_predictor_4x4_c, vpx_d63_predictor_4x4_c, vpx_tm_predictor_4x4_c) -#if HAVE_SSE2 && CONFIG_USE_X86INC +#if HAVE_SSE2 INTRA_PRED_TEST(SSE2, TestIntraPred4, vpx_dc_predictor_4x4_sse2, vpx_dc_left_predictor_4x4_sse2, vpx_dc_top_predictor_4x4_sse2, vpx_dc_128_predictor_4x4_sse2, vpx_v_predictor_4x4_sse2, vpx_h_predictor_4x4_sse2, vpx_d45_predictor_4x4_sse2, NULL, NULL, NULL, vpx_d207_predictor_4x4_sse2, NULL, vpx_tm_predictor_4x4_sse2) -#endif // HAVE_SSE2 && CONFIG_USE_X86INC +#endif // HAVE_SSE2 -#if HAVE_SSSE3 && CONFIG_USE_X86INC +#if HAVE_SSSE3 INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, vpx_d153_predictor_4x4_ssse3, NULL, vpx_d63_predictor_4x4_ssse3, NULL) -#endif // HAVE_SSSE3 && CONFIG_USE_X86INC +#endif // HAVE_SSSE3 #if HAVE_DSPR2 INTRA_PRED_TEST(DSPR2, TestIntraPred4, vpx_dc_predictor_4x4_dspr2, NULL, NULL, @@ -237,20 +237,20 @@ INTRA_PRED_TEST(C, TestIntraPred8, vpx_dc_predictor_8x8_c, vpx_d153_predictor_8x8_c, vpx_d207_predictor_8x8_c, vpx_d63_predictor_8x8_c, vpx_tm_predictor_8x8_c) -#if HAVE_SSE2 && CONFIG_USE_X86INC +#if HAVE_SSE2 INTRA_PRED_TEST(SSE2, TestIntraPred8, vpx_dc_predictor_8x8_sse2, vpx_dc_left_predictor_8x8_sse2, vpx_dc_top_predictor_8x8_sse2, vpx_dc_128_predictor_8x8_sse2, vpx_v_predictor_8x8_sse2, vpx_h_predictor_8x8_sse2, vpx_d45_predictor_8x8_sse2, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_sse2) -#endif // HAVE_SSE2 && CONFIG_USE_X86INC +#endif // HAVE_SSE2 -#if HAVE_SSSE3 && CONFIG_USE_X86INC +#if HAVE_SSSE3 INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, vpx_d153_predictor_8x8_ssse3, vpx_d207_predictor_8x8_ssse3, vpx_d63_predictor_8x8_ssse3, NULL) -#endif // HAVE_SSSE3 && CONFIG_USE_X86INC +#endif // HAVE_SSSE3 #if HAVE_DSPR2 INTRA_PRED_TEST(DSPR2, TestIntraPred8, vpx_dc_predictor_8x8_dspr2, NULL, NULL, @@ -286,22 +286,22 @@ INTRA_PRED_TEST(C, TestIntraPred16, vpx_dc_predictor_16x16_c, vpx_d153_predictor_16x16_c, vpx_d207_predictor_16x16_c, vpx_d63_predictor_16x16_c, vpx_tm_predictor_16x16_c) -#if HAVE_SSE2 && CONFIG_USE_X86INC +#if HAVE_SSE2 INTRA_PRED_TEST(SSE2, TestIntraPred16, vpx_dc_predictor_16x16_sse2, vpx_dc_left_predictor_16x16_sse2, vpx_dc_top_predictor_16x16_sse2, vpx_dc_128_predictor_16x16_sse2, vpx_v_predictor_16x16_sse2, vpx_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_16x16_sse2) -#endif // HAVE_SSE2 && CONFIG_USE_X86INC +#endif // HAVE_SSE2 -#if HAVE_SSSE3 && CONFIG_USE_X86INC +#if HAVE_SSSE3 INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL, NULL, vpx_d45_predictor_16x16_ssse3, NULL, NULL, vpx_d153_predictor_16x16_ssse3, vpx_d207_predictor_16x16_ssse3, vpx_d63_predictor_16x16_ssse3, NULL) -#endif // HAVE_SSSE3 && CONFIG_USE_X86INC +#endif // HAVE_SSSE3 #if HAVE_DSPR2 INTRA_PRED_TEST(DSPR2, TestIntraPred16, vpx_dc_predictor_16x16_dspr2, NULL, @@ -337,21 +337,21 @@ INTRA_PRED_TEST(C, TestIntraPred32, vpx_dc_predictor_32x32_c, vpx_d153_predictor_32x32_c, vpx_d207_predictor_32x32_c, vpx_d63_predictor_32x32_c, vpx_tm_predictor_32x32_c) -#if HAVE_SSE2 && CONFIG_USE_X86INC +#if HAVE_SSE2 INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2, vpx_dc_left_predictor_32x32_sse2, vpx_dc_top_predictor_32x32_sse2, vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2, vpx_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_32x32_sse2) -#endif // HAVE_SSE2 && CONFIG_USE_X86INC +#endif // HAVE_SSE2 -#if HAVE_SSSE3 && CONFIG_USE_X86INC +#if HAVE_SSSE3 INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL, NULL, vpx_d45_predictor_32x32_ssse3, NULL, NULL, vpx_d153_predictor_32x32_ssse3, vpx_d207_predictor_32x32_ssse3, vpx_d63_predictor_32x32_ssse3, NULL) -#endif // HAVE_SSSE3 && CONFIG_USE_X86INC +#endif // HAVE_SSSE3 #if HAVE_NEON INTRA_PRED_TEST(NEON, TestIntraPred32, vpx_dc_predictor_32x32_neon, diff --git a/test/variance_test.cc b/test/variance_test.cc index cb6339041..08c84a613 100644 --- a/test/variance_test.cc +++ b/test/variance_test.cc @@ -1002,7 +1002,6 @@ INSTANTIATE_TEST_CASE_P( make_tuple(2, 3, &vpx_variance4x8_sse2, 0), make_tuple(2, 2, &vpx_variance4x4_sse2, 0))); -#if CONFIG_USE_X86INC INSTANTIATE_TEST_CASE_P( SSE2, VpxSubpelVarianceTest, ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_sse2, 0), @@ -1035,7 +1034,6 @@ INSTANTIATE_TEST_CASE_P( make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_sse2, 0), make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_sse2, 0), make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_sse2, 0))); -#endif // CONFIG_USE_X86INC #if CONFIG_VP9_HIGHBITDEPTH /* TODO(debargha): This test does not support the highbd version @@ -1088,7 +1086,6 @@ INSTANTIATE_TEST_CASE_P( make_tuple(3, 4, &vpx_highbd_8_variance8x16_sse2, 8), make_tuple(3, 3, &vpx_highbd_8_variance8x8_sse2, 8))); -#if CONFIG_USE_X86INC INSTANTIATE_TEST_CASE_P( SSE2, VpxHBDSubpelVarianceTest, ::testing::Values( @@ -1162,12 +1159,10 @@ INSTANTIATE_TEST_CASE_P( make_tuple(3, 4, &vpx_highbd_8_sub_pixel_avg_variance8x16_sse2, 8), make_tuple(3, 3, &vpx_highbd_8_sub_pixel_avg_variance8x8_sse2, 8), make_tuple(3, 2, &vpx_highbd_8_sub_pixel_avg_variance8x4_sse2, 8))); -#endif // CONFIG_USE_X86INC #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_SSE2 #if HAVE_SSSE3 -#if CONFIG_USE_X86INC INSTANTIATE_TEST_CASE_P( SSSE3, VpxSubpelVarianceTest, ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_ssse3, 0), @@ -1200,7 +1195,6 @@ INSTANTIATE_TEST_CASE_P( make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_ssse3, 0), make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_ssse3, 0), make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_ssse3, 0))); -#endif // CONFIG_USE_X86INC #endif // HAVE_SSSE3 #if HAVE_AVX2 diff --git a/test/vp9_error_block_test.cc b/test/vp9_error_block_test.cc index 23a249e2b..341cc19cb 100644 --- a/test/vp9_error_block_test.cc +++ b/test/vp9_error_block_test.cc @@ -157,9 +157,9 @@ TEST_P(ErrorBlockTest, ExtremeValues) { << "First failed at test case " << first_failure; } +#if HAVE_SSE2 || HAVE_AVX using std::tr1::make_tuple; -#if CONFIG_USE_X86INC int64_t wrap_vp9_highbd_block_error_8bit_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, @@ -167,6 +167,7 @@ int64_t wrap_vp9_highbd_block_error_8bit_c(const tran_low_t *coeff, EXPECT_EQ(8, bps); return vp9_highbd_block_error_8bit_c(coeff, dqcoeff, block_size, ssz); } +#endif // HAVE_SSE2 || HAVE_AVX #if HAVE_SSE2 int64_t wrap_vp9_highbd_block_error_8bit_sse2(const tran_low_t *coeff, @@ -206,6 +207,5 @@ INSTANTIATE_TEST_CASE_P( &wrap_vp9_highbd_block_error_8bit_c, VPX_BITS_8))); #endif // HAVE_AVX -#endif // CONFIG_USE_X86INC #endif // CONFIG_VP9_HIGHBITDEPTH } // namespace diff --git a/test/vp9_intrapred_test.cc b/test/vp9_intrapred_test.cc index 416f3c322..ea8abfb77 100644 --- a/test/vp9_intrapred_test.cc +++ b/test/vp9_intrapred_test.cc @@ -131,7 +131,6 @@ using std::tr1::make_tuple; #if HAVE_SSE2 #if CONFIG_VP9_HIGHBITDEPTH -#if CONFIG_USE_X86INC INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest, ::testing::Values( make_tuple(&vpx_highbd_dc_predictor_32x32_sse2, @@ -225,7 +224,6 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest, make_tuple(&vpx_highbd_tm_predictor_8x8_sse2, &vpx_highbd_tm_predictor_8x8_c, 8, 12))); -#endif // CONFIG_USE_X86INC #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_SSE2 } // namespace diff --git a/test/vp9_subtract_test.cc b/test/vp9_subtract_test.cc index 3cad4d7e6..4793a9716 100644 --- a/test/vp9_subtract_test.cc +++ b/test/vp9_subtract_test.cc @@ -93,7 +93,7 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) { INSTANTIATE_TEST_CASE_P(C, VP9SubtractBlockTest, ::testing::Values(vpx_subtract_block_c)); -#if HAVE_SSE2 && CONFIG_USE_X86INC +#if HAVE_SSE2 INSTANTIATE_TEST_CASE_P(SSE2, VP9SubtractBlockTest, ::testing::Values(vpx_subtract_block_sse2)); #endif diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c index 35ad891ef..cd7c24821 100644 --- a/vp8/common/reconintra4x4.c +++ b/vp8/common/reconintra4x4.c @@ -14,6 +14,7 @@ #include "./vpx_dsp_rtcd.h" #include "vp8_rtcd.h" #include "blockd.h" +#include "reconintra4x4.h" typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left); diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c index 7da3d71ad..96d00cbf7 100644 --- a/vp8/encoder/ratectrl.c +++ b/vp8/encoder/ratectrl.c @@ -1593,7 +1593,7 @@ int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q) { if (Q < thresh_qp && cpi->projected_frame_size > thresh_rate && pred_err_mb > thresh_pred_err_mb) { - double new_correction_factor = cpi->rate_correction_factor; + double new_correction_factor; const int target_size = cpi->av_per_frame_bandwidth; int target_bits_per_mb; // Drop this frame: advance frame counters, and set force_maxqp flag. diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c index 7bab27d4f..88320584c 100644 --- a/vp9/common/vp9_blockd.c +++ b/vp9/common/vp9_blockd.c @@ -66,7 +66,7 @@ void vp9_foreach_transformed_block_in_plane( for (r = 0; r < max_blocks_high; r += (1 << tx_size)) { // Skip visiting the sub blocks that are wholly within the UMV. for (c = 0; c < max_blocks_wide; c += (1 << tx_size)) { - visit(plane, i, plane_bsize, tx_size, arg); + visit(plane, i, r, c, plane_bsize, tx_size, arg); i += step; } i += extra_step; diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 3d26fb2b5..85b99c4bc 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -270,6 +270,7 @@ static INLINE const vpx_prob *get_y_mode_probs(const MODE_INFO *mi, } typedef void (*foreach_transformed_block_visitor)(int plane, int block, + int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg); @@ -283,17 +284,6 @@ void vp9_foreach_transformed_block( const MACROBLOCKD* const xd, BLOCK_SIZE bsize, foreach_transformed_block_visitor visit, void *arg); -static INLINE void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, int block, - int *x, int *y) { - const int bwl = b_width_log2_lookup[plane_bsize]; - const int tx_cols_log2 = bwl - tx_size; - const int tx_cols = 1 << tx_cols_log2; - const int raster_mb = block >> (tx_size << 1); - *x = (raster_mb & (tx_cols - 1)) << tx_size; - *y = (raster_mb >> tx_cols_log2) << tx_size; -} - void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, int aoff, int loff); diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c index c04cc8f05..5dad81d64 100644 --- a/vp9/common/vp9_postproc.c +++ b/vp9/common/vp9_postproc.c @@ -618,15 +618,16 @@ int vp9_post_proc_frame(struct VP9Common *cm, // Alloc memory for prev_mip in the first frame. if (cm->current_video_frame == 1) { - cm->postproc_state.last_base_qindex = cm->base_qindex; - cm->postproc_state.last_frame_valid = 1; + ppstate->last_base_qindex = cm->base_qindex; + ppstate->last_frame_valid = 1; + } + + if ((flags & VP9D_MFQE) && ppstate->prev_mip == NULL) { ppstate->prev_mip = vpx_calloc(cm->mi_alloc_size, sizeof(*cm->mip)); if (!ppstate->prev_mip) { return 1; } ppstate->prev_mi = ppstate->prev_mip + cm->mi_stride + 1; - memset(ppstate->prev_mip, 0, - cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip)); } // Allocate post_proc_buffer_int if needed. @@ -664,9 +665,9 @@ int vp9_post_proc_frame(struct VP9Common *cm, "Failed to allocate post-processing buffer"); if ((flags & VP9D_MFQE) && cm->current_video_frame >= 2 && - cm->postproc_state.last_frame_valid && cm->bit_depth == 8 && - cm->postproc_state.last_base_qindex <= last_q_thresh && - cm->base_qindex - cm->postproc_state.last_base_qindex >= q_diff_thresh) { + ppstate->last_frame_valid && cm->bit_depth == 8 && + ppstate->last_base_qindex <= last_q_thresh && + cm->base_qindex - ppstate->last_base_qindex >= q_diff_thresh) { vp9_mfqe(cm); // TODO(jackychen): Consider whether enable deblocking by default // if mfqe is enabled. Need to take both the quality and the speed @@ -692,8 +693,8 @@ int vp9_post_proc_frame(struct VP9Common *cm, vp8_yv12_copy_frame(cm->frame_to_show, ppbuf); } - cm->postproc_state.last_base_qindex = cm->base_qindex; - cm->postproc_state.last_frame_valid = 1; + ppstate->last_base_qindex = cm->base_qindex; + ppstate->last_frame_valid = 1; if (flags & VP9D_ADDNOISE) { const int noise_level = ppflags->noise_level; @@ -714,7 +715,8 @@ int vp9_post_proc_frame(struct VP9Common *cm, dest->uv_width = dest->y_width >> cm->subsampling_x; dest->uv_height = dest->y_height >> cm->subsampling_y; - swap_mi_and_prev_mi(cm); + if (flags & VP9D_MFQE) + swap_mi_and_prev_mi(cm); return 0; } #endif // CONFIG_VP9_POSTPROC diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 84718e970..67fe18c7e 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -178,7 +178,7 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, // Co-ordinate of containing block to pixel precision. const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); -#if CONFIG_BETTER_HW_COMPATIBILITY +#if 0 // CONFIG_BETTER_HW_COMPATIBILITY assert(xd->mi[0]->sb_type != BLOCK_4X8 && xd->mi[0]->sb_type != BLOCK_8X4); assert(mv_q4.row == mv.row * (1 << (1 - pd->subsampling_y)) && diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 846133674..276f14554 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -21,29 +21,6 @@ EOF } forward_decls qw/vp9_common_forward_decls/; -# x86inc.asm had specific constraints. break it out so it's easy to disable. -# zero all the variables to avoid tricky else conditions. -$mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = $avx_x86inc = - $avx2_x86inc = ''; -$mmx_x86_64_x86inc = $sse_x86_64_x86inc = $sse2_x86_64_x86inc = - $ssse3_x86_64_x86inc = $avx_x86_64_x86inc = $avx2_x86_64_x86inc = ''; -if (vpx_config("CONFIG_USE_X86INC") eq "yes") { - $mmx_x86inc = 'mmx'; - $sse_x86inc = 'sse'; - $sse2_x86inc = 'sse2'; - $ssse3_x86inc = 'ssse3'; - $avx_x86inc = 'avx'; - $avx2_x86inc = 'avx2'; - if ($opts{arch} eq "x86_64") { - $mmx_x86_64_x86inc = 'mmx'; - $sse_x86_64_x86inc = 'sse'; - $sse2_x86_64_x86inc = 'sse2'; - $ssse3_x86_64_x86inc = 'ssse3'; - $avx_x86_64_x86inc = 'avx'; - $avx2_x86_64_x86inc = 'avx2'; - } -} - # functions that are 64 bit only. $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = ''; if ($opts{arch} eq "x86_64") { @@ -202,10 +179,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vp9_block_error/; add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd"; - specialize qw/vp9_highbd_block_error/, "$sse2_x86inc"; + specialize qw/vp9_highbd_block_error sse2/; add_proto qw/int64_t vp9_highbd_block_error_8bit/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; - specialize qw/vp9_highbd_block_error_8bit/, "$sse2_x86inc", "$avx_x86inc"; + specialize qw/vp9_highbd_block_error_8bit sse2 avx/; add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_fp/; @@ -217,16 +194,16 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vp9_fdct8x8_quant/; } else { add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; - specialize qw/vp9_block_error avx2 msa/, "$sse2_x86inc"; + specialize qw/vp9_block_error avx2 msa sse2/; add_proto qw/int64_t vp9_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size"; - specialize qw/vp9_block_error_fp neon/, "$sse2_x86inc"; + specialize qw/vp9_block_error_fp neon sse2/; add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64_x86inc"; + specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64"; add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64_x86inc"; + specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64"; add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/; @@ -245,7 +222,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vp9_fht16x16 sse2/; add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; - specialize qw/vp9_fwht4x4/, "$sse2_x86inc"; + specialize qw/vp9_fwht4x4 sse2/; } else { add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; specialize qw/vp9_fht4x4 sse2 msa/; @@ -257,7 +234,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vp9_fht16x16 sse2 msa/; add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; - specialize qw/vp9_fwht4x4 msa/, "$sse2_x86inc"; + specialize qw/vp9_fwht4x4 msa sse2/; } # diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index d63912932..3199b98aa 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -589,7 +589,7 @@ static void dec_build_inter_predictors(VPxWorker *const worker, MACROBLOCKD *xd, // Co-ordinate of containing block to pixel precision. int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); -#if CONFIG_BETTER_HW_COMPATIBILITY +#if 0 // CONFIG_BETTER_HW_COMPATIBILITY assert(xd->mi[0]->sb_type != BLOCK_4X8 && xd->mi[0]->sb_type != BLOCK_8X4); assert(mv_q4.row == mv->row * (1 << (1 - pd->subsampling_y)) && diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index ffc6839ad..6869036bc 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -902,10 +902,10 @@ void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, frame_mvs += cm->mi_cols; } } -#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH - if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && - (xd->above_mi == NULL || xd->left_mi == NULL) && - !is_inter_block(mi) && need_top_left[mi->uv_mode]) - assert(0); +#if 0 // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH + if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && + (xd->above_mi == NULL || xd->left_mi == NULL) && + !is_inter_block(mi) && need_top_left[mi->uv_mode]) + assert(0); #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH } diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index 935c04f3a..9ed980081 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -187,47 +187,45 @@ vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi, vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { - RefBuffer *ref_buf = NULL; - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + int idx; + YV12_BUFFER_CONFIG *ref_buf = NULL; // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the // encoder is using the frame buffers for. This is just a stub to keep the // vpxenc --test-decode functionality working, and will be replaced in a // later commit that adds VP9-specific controls for this functionality. + // (Yunqing) The set_reference control depends on the following setting in + // encoder. + // cpi->lst_fb_idx = 0; + // cpi->gld_fb_idx = 1; + // cpi->alt_fb_idx = 2; if (ref_frame_flag == VP9_LAST_FLAG) { - ref_buf = &cm->frame_refs[0]; + idx = cm->ref_frame_map[0]; } else if (ref_frame_flag == VP9_GOLD_FLAG) { - ref_buf = &cm->frame_refs[1]; + idx = cm->ref_frame_map[1]; } else if (ref_frame_flag == VP9_ALT_FLAG) { - ref_buf = &cm->frame_refs[2]; + idx = cm->ref_frame_map[2]; } else { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Invalid reference frame"); return cm->error.error_code; } - if (!equal_dimensions(ref_buf->buf, sd)) { + if (idx < 0 || idx >= FRAME_BUFFERS) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Incorrect buffer dimensions"); - } else { - int *ref_fb_ptr = &ref_buf->idx; - - // Find an empty frame buffer. - const int free_fb = get_free_fb(cm); - if (cm->new_fb_idx == INVALID_IDX) { - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Unable to find free frame buffer"); - return cm->error.error_code; - } + "Invalid reference frame map"); + return cm->error.error_code; + } - // Decrease ref_count since it will be increased again in - // ref_cnt_fb() below. - --frame_bufs[free_fb].ref_count; + // Get the destination reference buffer. + ref_buf = &cm->buffer_pool->frame_bufs[idx].buf; - // Manage the reference counters and copy image. - ref_cnt_fb(frame_bufs, ref_fb_ptr, free_fb); - ref_buf->buf = &frame_bufs[*ref_fb_ptr].buf; - vp8_yv12_copy_frame(sd, ref_buf->buf); + if (!equal_dimensions(ref_buf, sd)) { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Incorrect buffer dimensions"); + } else { + // Overwrite the reference frame buffer. + vp8_yv12_copy_frame(sd, ref_buf); } return cm->error.error_code; diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 169943c10..05fbc4194 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -31,6 +31,9 @@ struct optimize_ctx { ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; }; +#define HETEROMULT 12 +#define HETEROCOEF 4 + void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane]; @@ -67,6 +70,48 @@ static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] ={ {10, 6}, {8, 7}, }; rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\ } +// This function eliminates isolated small nonzero coefficients. +static void eliminate_small_coeff(const tran_low_t *const coeff_ptr, + const TX_SIZE tx_size, + const int16_t *const zbin_ptr, + tran_low_t *const qcoeff_ptr, + tran_low_t *const dqcoeff_ptr, + uint16_t *const eob_ptr, + const int16_t *const scan) { + const int zbins[2] = + {tx_size == TX_32X32 ? ROUND_POWER_OF_TWO(zbin_ptr[0], 1) : zbin_ptr[0], + tx_size == TX_32X32 ? ROUND_POWER_OF_TWO(zbin_ptr[1], 1) : zbin_ptr[1]}; + const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1}; + const int hetero_zbins[2] = {(HETEROCOEF + 1) * zbins[0] / HETEROCOEF, + (HETEROCOEF + 1) * zbins[1] / HETEROCOEF}; + const int hetero_nzbins[2] = {hetero_zbins[0] * -1, hetero_zbins[1] * -1}; + int eob = *eob_ptr, i = eob - 1, rc, tail_count = 0; + + assert(i >= 0); + rc = scan[i]; + while (i > 0 && coeff_ptr[rc] <= hetero_zbins[rc != 0] && + coeff_ptr[rc] >= hetero_nzbins[rc != 0]) { + if (coeff_ptr[rc] > zbins[rc != 0] || coeff_ptr[rc] < nzbins[rc != 0]) + ++tail_count; + if ((eob - i) * HETEROMULT >= tail_count * zbins[1]) { + eob = i; + tail_count = 0; + } + --i; + rc = scan[i]; + } + + for (i = eob; i < (*eob_ptr); ++i) { + rc = scan[i]; + qcoeff_ptr[rc] = 0; + dqcoeff_ptr[rc] = 0; + } + + while (eob > 0 && qcoeff_ptr[scan[eob - 1]] == 0) --eob; + + *eob_ptr = eob; +} + // This function is a place holder for now but may ultimately need // to scan previous tokens to work out the correct context. static int trellis_get_coeff_context(const int16_t *scan, @@ -335,7 +380,7 @@ static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src, } #endif // CONFIG_VP9_HIGHBITDEPTH -void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, +void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; @@ -346,10 +391,8 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; - int i, j; const int16_t *src_diff; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - src_diff = &p->src_diff[4 * (j * diff_stride + i)]; + src_diff = &p->src_diff[4 * (row * diff_stride + col)]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -425,7 +468,7 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, } } -void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, +void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; @@ -435,12 +478,8 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; - int i, j; const int16_t *src_diff; - - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - src_diff = &p->src_diff[4 * (j * diff_stride + i)]; - + src_diff = &p->src_diff[4 * (row * diff_stride + col)]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { switch (tx_size) { @@ -506,7 +545,7 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, } } -void vp9_xform_quant(MACROBLOCK *x, int plane, int block, +void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; @@ -517,10 +556,8 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; - int i, j; const int16_t *src_diff; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - src_diff = &p->src_diff[4 * (j * diff_stride + i)]; + src_diff = &p->src_diff[4 * (row * diff_stride + col)]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -593,9 +630,14 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, assert(0); break; } + if (!x->skip_block && *eob > 0) { + eliminate_small_coeff(coeff, tx_size, p->zbin, qcoeff, dqcoeff, eob, + scan_order->scan); + } } -static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, +static void encode_block(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct encode_b_args *const args = arg; MACROBLOCK *const x = args->x; @@ -604,13 +646,11 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - int i, j; uint8_t *dst; ENTROPY_CONTEXT *a, *l; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i]; - a = &ctx->ta[plane][i]; - l = &ctx->tl[plane][j]; + dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; + a = &ctx->ta[plane][col]; + l = &ctx->tl[plane][row]; // TODO(jingning): per transformed block zero forcing only enabled for // luma component. will integrate chroma components as well. @@ -629,17 +669,17 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, *a = *l = 0; return; } else { - vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size); + vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size); } } else { if (max_txsize_lookup[plane_bsize] == tx_size) { int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1)); if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) { // full forward transform and quantization - vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) { // fast path forward transform and quantization - vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size); + vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size); } else { // skip forward transform p->eobs[block] = 0; @@ -647,7 +687,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, return; } } else { - vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); } } } @@ -715,19 +755,18 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, } } -static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, +static void encode_block_pass1(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { MACROBLOCK *const x = (MACROBLOCK *)arg; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - int i, j; uint8_t *dst; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i]; + dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; - vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); if (p->eobs[block] > 0) { #if CONFIG_VP9_HIGHBITDEPTH @@ -774,7 +813,8 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { } } -void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, +void vp9_encode_block_intra(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct encode_b_args* const args = arg; MACROBLOCK *const x = args->x; @@ -795,18 +835,16 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, uint16_t *eob = &p->eobs[block]; const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; - int i, j; struct optimize_ctx *const ctx = args->ctx; ENTROPY_CONTEXT *a = NULL; ENTROPY_CONTEXT *l = NULL; int entropy_ctx = 0; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - dst = &pd->dst.buf[4 * (j * dst_stride + i)]; - src = &p->src.buf[4 * (j * src_stride + i)]; - src_diff = &p->src_diff[4 * (j * diff_stride + i)]; + dst = &pd->dst.buf[4 * (row * dst_stride + col)]; + src = &p->src.buf[4 * (row * src_stride + col)]; + src_diff = &p->src_diff[4 * (row * diff_stride + col)]; if (args->ctx != NULL) { - a = &ctx->ta[plane][i]; - l = &ctx->tl[plane][j]; + a = &ctx->ta[plane][col]; + l = &ctx->tl[plane][row]; entropy_ctx = combine_entropy_contexts(*a, *l); } @@ -826,7 +864,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_predict_intra_block(xd, bwl, tx_size, mode, x->skip_encode ? src : dst, x->skip_encode ? src_stride : dst_stride, - dst, dst_stride, i, j, plane); + dst, dst_stride, col, row, plane); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -926,6 +964,10 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); + if (!x->skip_block && *eob > 0) { + eliminate_small_coeff(coeff, tx_size, p->zbin, qcoeff, dqcoeff, + eob, scan_order->scan); + } } if (args->ctx != NULL && !x->skip_recode) { *a = *l = optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; @@ -942,6 +984,10 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); + if (!x->skip_block && *eob > 0) { + eliminate_small_coeff(coeff, tx_size, p->zbin, qcoeff, dqcoeff, + eob, scan_order->scan); + } } if (args->ctx != NULL && !x->skip_recode) { *a = *l = optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; @@ -958,6 +1004,10 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); + if (!x->skip_block && *eob > 0) { + eliminate_small_coeff(coeff, tx_size, p->zbin, qcoeff, dqcoeff, + eob, scan_order->scan); + } } if (args->ctx != NULL && !x->skip_recode) { *a = *l = optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; @@ -977,6 +1027,10 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); + if (!x->skip_block && *eob > 0) { + eliminate_small_coeff(coeff, tx_size, p->zbin, qcoeff, dqcoeff, + eob, scan_order->scan); + } } if (args->ctx != NULL && !x->skip_recode) { *a = *l = optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 25b0b23e0..62d75a129 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -25,16 +25,17 @@ struct encode_b_args { }; void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize); void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize); -void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, +void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size); -void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, +void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size); -void vp9_xform_quant(MACROBLOCK *x, int plane, int block, +void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size); void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); -void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, +void vp9_encode_block_intra(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg); void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane, diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 53a3ec7de..66ccc92c4 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -506,10 +506,10 @@ static int get_ul_intra_threshold(VP9_COMMON *cm) { ret_val = UL_INTRA_THRESH; break; case VPX_BITS_10: - ret_val = UL_INTRA_THRESH >> 2; + ret_val = UL_INTRA_THRESH << 2; break; case VPX_BITS_12: - ret_val = UL_INTRA_THRESH >> 4; + ret_val = UL_INTRA_THRESH << 4; break; default: assert(0 && "cm->bit_depth should be VPX_BITS_8, " @@ -532,10 +532,10 @@ static int get_smooth_intra_threshold(VP9_COMMON *cm) { ret_val = SMOOTH_INTRA_THRESH; break; case VPX_BITS_10: - ret_val = SMOOTH_INTRA_THRESH >> 2; + ret_val = SMOOTH_INTRA_THRESH << 4; break; case VPX_BITS_12: - ret_val = SMOOTH_INTRA_THRESH >> 4; + ret_val = SMOOTH_INTRA_THRESH << 8; break; default: assert(0 && "cm->bit_depth should be VPX_BITS_8, " diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 7db6ef2b0..5ccadd005 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -932,7 +932,8 @@ struct estimate_block_intra_args { RD_COST *rdc; }; -static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, +static void estimate_block_intra(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct estimate_block_intra_args* const args = arg; VP9_COMP *const cpi = args->cpi; @@ -945,20 +946,19 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, uint8_t *const dst_buf_base = pd->dst.buf; const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; - int i, j; RD_COST this_rdc; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); + (void)block; - p->src.buf = &src_buf_base[4 * (j * src_stride + i)]; - pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)]; + p->src.buf = &src_buf_base[4 * (row * src_stride + col)]; + pd->dst.buf = &dst_buf_base[4 * (row * dst_stride + col)]; // Use source buffer as an approximation for the fully reconstructed buffer. vp9_predict_intra_block(xd, b_width_log2_lookup[plane_bsize], tx_size, args->mode, x->skip_encode ? p->src.buf : pd->dst.buf, x->skip_encode ? src_stride : dst_stride, pd->dst.buf, dst_stride, - i, j, plane); + col, row, plane); if (plane == 0) { int64_t this_sse = INT64_MAX; @@ -1744,7 +1744,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_VP9_HIGHBITDEPTH const int large_block = bsize > BLOCK_32X32; #else - const int large_block = bsize >= BLOCK_32X32; + const int large_block = + x->sb_is_skin ? bsize > BLOCK_32X32 : bsize >= BLOCK_32X32; #endif mi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 6e4ffee92..28530386c 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -498,18 +498,16 @@ static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, } } -static int rate_block(int plane, int block, BLOCK_SIZE plane_bsize, +static int rate_block(int plane, int block, int row, int col, TX_SIZE tx_size, struct rdcost_block_args* args) { - int x_idx, y_idx; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx); - - return cost_coeffs(args->x, plane, block, args->t_above + x_idx, - args->t_left + y_idx, tx_size, + return cost_coeffs(args->x, plane, block, args->t_above + col, + args->t_left + row, tx_size, args->so->scan, args->so->neighbors, args->use_fast_coef_costing); } -static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, +static void block_rd_txfm(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct rdcost_block_args *args = arg; MACROBLOCK *const x = args->x; @@ -525,20 +523,20 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, if (!is_inter_block(mi)) { struct encode_b_args arg = {x, NULL, &mi->skip}; - vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &arg); + vp9_encode_block_intra(plane, block, row, col, plane_bsize, tx_size, &arg); dist_block(x, plane, block, tx_size, &dist, &sse); } else if (max_txsize_lookup[plane_bsize] == tx_size) { if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == SKIP_TXFM_NONE) { // full forward transform and quantization - vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); dist_block(x, plane, block, tx_size, &dist, &sse); } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == SKIP_TXFM_AC_ONLY) { // compute DC coefficient tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); - vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size); + vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size); sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; dist = sse; if (x->plane[plane].eobs[block]) { @@ -562,7 +560,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, } } else { // full forward transform and quantization - vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); dist_block(x, plane, block, tx_size, &dist, &sse); } @@ -572,7 +570,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, return; } - rate = rate_block(plane, block, plane_bsize, tx_size, args); + rate = rate_block(plane, block, row, col, tx_size, args); rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist); rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse); @@ -2465,9 +2463,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (pred_filter_search) { INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE; - if (xd->above_mi) + if (xd->above_mi && is_inter_block(xd->above_mi)) af = xd->above_mi->interp_filter; - if (xd->left_mi) + if (xd->left_mi && is_inter_block(xd->left_mi)) lf = xd->left_mi->interp_filter; if ((this_mode != NEWMV) || (af == lf)) diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index edec755dd..4400da42d 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -319,7 +319,8 @@ struct tokenize_b_args { TOKENEXTRA **tp; }; -static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize, +static void set_entropy_context_b(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct tokenize_b_args* const args = arg; ThreadData *const td = args->td; @@ -327,10 +328,8 @@ static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize, MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; - int aoff, loff; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff); vp9_set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0, - aoff, loff); + col, row); } static INLINE void add_token(TOKENEXTRA **t, const vpx_prob *context_tree, @@ -353,7 +352,8 @@ static INLINE void add_token_no_extra(TOKENEXTRA **t, ++counts[token]; } -static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, +static void tokenize_b(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct tokenize_b_args* const args = arg; VP9_COMP *cpi = args->cpi; @@ -384,11 +384,8 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, const int tx_eob = 16 << (tx_size << 1); int16_t token; EXTRABIT extra; - int aoff, loff; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff); - - pt = get_entropy_context(tx_size, pd->above_context + aoff, - pd->left_context + loff); + pt = get_entropy_context(tx_size, pd->above_context + col, + pd->left_context + row); so = get_scan(xd, tx_size, type, block); scan = so->scan; nb = so->neighbors; @@ -426,20 +423,23 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, *tp = t; - vp9_set_contexts(xd, pd, plane_bsize, tx_size, c > 0, aoff, loff); + vp9_set_contexts(xd, pd, plane_bsize, tx_size, c > 0, col, row); } struct is_skippable_args { uint16_t *eobs; int *skippable; }; -static void is_skippable(int plane, int block, + +static void is_skippable(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *argv) { struct is_skippable_args *args = argv; (void)plane; (void)plane_bsize; (void)tx_size; + (void)row; + (void)col; args->skippable[0] &= (!args->eobs[block]); } @@ -453,14 +453,15 @@ int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { return result; } -static void has_high_freq_coeff(int plane, int block, +static void has_high_freq_coeff(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *argv) { struct is_skippable_args *args = argv; int eobs = (tx_size == TX_4X4) ? 3 : 10; (void) plane; (void) plane_bsize; - + (void) row; + (void) col; *(args->skippable) |= (args->eobs[block] > eobs); } diff --git a/vp9/encoder/x86/vp9_dct_ssse3.c b/vp9/encoder/x86/vp9_dct_ssse3.c index b09eac0d1..1a1d4eabc 100644 --- a/vp9/encoder/x86/vp9_dct_ssse3.c +++ b/vp9/encoder/x86/vp9_dct_ssse3.c @@ -9,11 +9,6 @@ */ #include <assert.h> -#if defined(_MSC_VER) && _MSC_VER <= 1500 -// Need to include math.h before calling tmmintrin.h/intrin.h -// in certain versions of MSVS. -#include <math.h> -#endif #include <tmmintrin.h> // SSSE3 #include "./vp9_rtcd.h" diff --git a/vp9/encoder/x86/vp9_frame_scale_ssse3.c b/vp9/encoder/x86/vp9_frame_scale_ssse3.c index 38af3b13a..23325d63b 100644 --- a/vp9/encoder/x86/vp9_frame_scale_ssse3.c +++ b/vp9/encoder/x86/vp9_frame_scale_ssse3.c @@ -8,11 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ -#if defined(_MSC_VER) && _MSC_VER <= 1500 -// Need to include math.h before calling tmmintrin.h/intrin.h -// in certain versions of MSVS. -#include <math.h> -#endif #include <tmmintrin.h> // SSSE3 #include "./vp9_rtcd.h" diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 9ad86cbf8..51c6fbb02 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -105,19 +105,6 @@ struct vpx_codec_alg_priv { BufferPool *buffer_pool; }; -static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) { - switch (frame) { - case VP8_LAST_FRAME: - return VP9_LAST_FLAG; - case VP8_GOLD_FRAME: - return VP9_GOLD_FLAG; - case VP8_ALTR_FRAME: - return VP9_ALT_FLAG; - } - assert(0 && "Invalid Reference Frame"); - return VP9_LAST_FLAG; -} - static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx, const struct vpx_internal_error_info *error) { const vpx_codec_err_t res = error->error_code; diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index 6531e2c61..08adea026 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -785,7 +785,8 @@ static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; image2yuvconfig(&frame->img, &sd); return vp9_set_reference_dec(&frame_worker_data->pbi->common, - (VP9_REFFRAME)frame->frame_type, &sd); + ref_frame_to_vp9_reframe(frame->frame_type), + &sd); } else { return VPX_CODEC_INVALID_PARAM; } diff --git a/vp9/vp9_iface_common.h b/vp9/vp9_iface_common.h index 938d4224b..44a5e8157 100644 --- a/vp9/vp9_iface_common.h +++ b/vp9/vp9_iface_common.h @@ -133,4 +133,16 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, return VPX_CODEC_OK; } +static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) { + switch (frame) { + case VP8_LAST_FRAME: + return VP9_LAST_FLAG; + case VP8_GOLD_FRAME: + return VP9_GOLD_FLAG; + case VP8_ALTR_FRAME: + return VP9_ALT_FLAG; + } + assert(0 && "Invalid Reference Frame"); + return VP9_LAST_FLAG; +} #endif // VP9_VP9_IFACE_COMMON_H_ diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 5f3de8f8a..b8342b9e1 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -101,7 +101,6 @@ ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c endif -ifeq ($(CONFIG_USE_X86INC),yes) VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_error_sse2.asm @@ -109,13 +108,10 @@ VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_highbd_error_avx.asm else VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm endif -endif ifeq ($(ARCH_X86_64),yes) -ifeq ($(CONFIG_USE_X86INC),yes) VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm endif -endif VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_intrin_sse2.c VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.c diff --git a/vpx/vpx_integer.h b/vpx/vpx_integer.h index 829c9d132..2945c87ca 100644 --- a/vpx/vpx_integer.h +++ b/vpx/vpx_integer.h @@ -24,7 +24,7 @@ #define VPX_INLINE inline #endif -#if (defined(_MSC_VER) && (_MSC_VER < 1600)) || defined(VPX_EMULATE_INTTYPES) +#if defined(VPX_EMULATE_INTTYPES) typedef signed char int8_t; typedef signed short int16_t; typedef signed int int32_t; @@ -33,16 +33,6 @@ typedef unsigned char uint8_t; typedef unsigned short uint16_t; typedef unsigned int uint32_t; -#if (defined(_MSC_VER) && (_MSC_VER < 1600)) -typedef signed __int64 int64_t; -typedef unsigned __int64 uint64_t; -#define INT64_MAX _I64_MAX -#define INT32_MAX _I32_MAX -#define INT32_MIN _I32_MIN -#define INT16_MAX _I16_MAX -#define INT16_MIN _I16_MIN -#endif - #ifndef _UINTPTR_T_DEFINED typedef size_t uintptr_t; #endif diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index 018126d4b..93855c5ad 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk @@ -40,18 +40,14 @@ endif # intra predictions DSP_SRCS-yes += intrapred.c -ifeq ($(CONFIG_USE_X86INC),yes) DSP_SRCS-$(HAVE_SSE) += x86/intrapred_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/intrapred_sse2.asm DSP_SRCS-$(HAVE_SSSE3) += x86/intrapred_ssse3.asm DSP_SRCS-$(HAVE_SSSE3) += x86/vpx_subpixel_8t_ssse3.asm -endif # CONFIG_USE_X86INC ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) -ifeq ($(CONFIG_USE_X86INC),yes) DSP_SRCS-$(HAVE_SSE) += x86/highbd_intrapred_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/highbd_intrapred_sse2.asm -endif # CONFIG_USE_X86INC endif # CONFIG_VP9_HIGHBITDEPTH ifneq ($(filter yes,$(CONFIG_POSTPROC) $(CONFIG_VP9_POSTPROC)),) @@ -87,9 +83,8 @@ ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_SSE2) += x86/vpx_high_subpixel_8t_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/vpx_high_subpixel_bilinear_sse2.asm endif -ifeq ($(CONFIG_USE_X86INC),yes) + DSP_SRCS-$(HAVE_SSE2) += x86/vpx_convolve_copy_sse2.asm -endif ifeq ($(HAVE_NEON_ASM),yes) DSP_SRCS-yes += arm/vpx_convolve_copy_neon_asm$(ASM) @@ -179,10 +174,8 @@ DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_impl_sse2.h DSP_SRCS-$(HAVE_SSE2) += x86/fwd_dct32x32_impl_sse2.h ifeq ($(ARCH_X86_64),yes) -ifeq ($(CONFIG_USE_X86INC),yes) DSP_SRCS-$(HAVE_SSSE3) += x86/fwd_txfm_ssse3_x86_64.asm endif -endif DSP_SRCS-$(HAVE_AVX2) += x86/fwd_txfm_avx2.c DSP_SRCS-$(HAVE_AVX2) += x86/fwd_dct32x32_impl_avx2.h DSP_SRCS-$(HAVE_NEON) += arm/fwd_txfm_neon.c @@ -197,12 +190,10 @@ DSP_SRCS-yes += inv_txfm.h DSP_SRCS-yes += inv_txfm.c DSP_SRCS-$(HAVE_SSE2) += x86/inv_txfm_sse2.h DSP_SRCS-$(HAVE_SSE2) += x86/inv_txfm_sse2.c -ifeq ($(CONFIG_USE_X86INC),yes) DSP_SRCS-$(HAVE_SSE2) += x86/inv_wht_sse2.asm ifeq ($(ARCH_X86_64),yes) DSP_SRCS-$(HAVE_SSSE3) += x86/inv_txfm_ssse3_x86_64.asm endif # ARCH_X86_64 -endif # CONFIG_USE_X86INC ifeq ($(HAVE_NEON_ASM),yes) DSP_SRCS-yes += arm/save_reg_neon$(ASM) @@ -254,11 +245,9 @@ ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c endif ifeq ($(ARCH_X86_64),yes) -ifeq ($(CONFIG_USE_X86INC),yes) DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx_x86_64.asm endif -endif # avg DSP_SRCS-yes += avg.c @@ -267,10 +256,8 @@ DSP_SRCS-$(HAVE_NEON) += arm/avg_neon.c DSP_SRCS-$(HAVE_MSA) += mips/avg_msa.c DSP_SRCS-$(HAVE_NEON) += arm/hadamard_neon.c ifeq ($(ARCH_X86_64),yes) -ifeq ($(CONFIG_USE_X86INC),yes) DSP_SRCS-$(HAVE_SSSE3) += x86/avg_ssse3_x86_64.asm endif -endif endif # CONFIG_VP9_ENCODER @@ -292,7 +279,6 @@ DSP_SRCS-$(HAVE_SSE4_1) += x86/sad_sse4.asm DSP_SRCS-$(HAVE_AVX2) += x86/sad4d_avx2.c DSP_SRCS-$(HAVE_AVX2) += x86/sad_avx2.c -ifeq ($(CONFIG_USE_X86INC),yes) DSP_SRCS-$(HAVE_SSE) += x86/sad4d_sse2.asm DSP_SRCS-$(HAVE_SSE) += x86/sad_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/sad4d_sse2.asm @@ -303,7 +289,6 @@ ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad4d_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad_sse2.asm endif # CONFIG_VP9_HIGHBITDEPTH -endif # CONFIG_USE_X86INC endif # CONFIG_ENCODERS @@ -334,17 +319,13 @@ ifeq ($(ARCH_X86_64),yes) DSP_SRCS-$(HAVE_SSE2) += x86/ssim_opt_x86_64.asm endif # ARCH_X86_64 -ifeq ($(CONFIG_USE_X86INC),yes) DSP_SRCS-$(HAVE_SSE) += x86/subpel_variance_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/subpel_variance_sse2.asm # Contains SSE2 and SSSE3 -endif # CONFIG_USE_X86INC ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_SSE2) += x86/highbd_variance_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/highbd_variance_impl_sse2.asm -ifeq ($(CONFIG_USE_X86INC),yes) DSP_SRCS-$(HAVE_SSE2) += x86/highbd_subpel_variance_impl_sse2.asm -endif # CONFIG_USE_X86INC endif # CONFIG_VP9_HIGHBITDEPTH endif # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 37239a195..7b61415b6 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -11,29 +11,6 @@ EOF } forward_decls qw/vpx_dsp_forward_decls/; -# x86inc.asm had specific constraints. break it out so it's easy to disable. -# zero all the variables to avoid tricky else conditions. -$mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = $avx_x86inc = - $avx2_x86inc = ''; -$mmx_x86_64_x86inc = $sse_x86_64_x86inc = $sse2_x86_64_x86inc = - $ssse3_x86_64_x86inc = $avx_x86_64_x86inc = $avx2_x86_64_x86inc = ''; -if (vpx_config("CONFIG_USE_X86INC") eq "yes") { - $mmx_x86inc = 'mmx'; - $sse_x86inc = 'sse'; - $sse2_x86inc = 'sse2'; - $ssse3_x86inc = 'ssse3'; - $avx_x86inc = 'avx'; - $avx2_x86inc = 'avx2'; - if ($opts{arch} eq "x86_64") { - $mmx_x86_64_x86inc = 'mmx'; - $sse_x86_64_x86inc = 'sse'; - $sse2_x86_64_x86inc = 'sse2'; - $ssse3_x86_64_x86inc = 'ssse3'; - $avx_x86_64_x86inc = 'avx'; - $avx2_x86_64_x86inc = 'avx2'; - } -} - # optimizations which depend on multiple features $avx2_ssse3 = ''; if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) { @@ -55,19 +32,19 @@ if ($opts{arch} eq "x86_64") { # add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d207_predictor_4x4/, "$sse2_x86inc"; +specialize qw/vpx_d207_predictor_4x4 sse2/; add_proto qw/void vpx_d207e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d207e_predictor_4x4/; add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d45_predictor_4x4 neon/, "$sse2_x86inc"; +specialize qw/vpx_d45_predictor_4x4 neon sse2/; add_proto qw/void vpx_d45e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d45e_predictor_4x4/; add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d63_predictor_4x4/, "$ssse3_x86inc"; +specialize qw/vpx_d63_predictor_4x4 ssse3/; add_proto qw/void vpx_d63e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d63e_predictor_4x4/; @@ -76,7 +53,7 @@ add_proto qw/void vpx_d63f_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, co specialize qw/vpx_d63f_predictor_4x4/; add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$sse2_x86inc"; +specialize qw/vpx_h_predictor_4x4 neon dspr2 msa sse2/; add_proto qw/void vpx_he_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_he_predictor_4x4/; @@ -88,49 +65,49 @@ add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, co specialize qw/vpx_d135_predictor_4x4 neon/; add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d153_predictor_4x4/, "$ssse3_x86inc"; +specialize qw/vpx_d153_predictor_4x4 ssse3/; add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_v_predictor_4x4 neon msa sse2/; add_proto qw/void vpx_ve_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_ve_predictor_4x4/; add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse2_x86inc"; +specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa sse2/; add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon/, "$sse2_x86inc"; +specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon sse2/; add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_top_predictor_4x4 msa neon/, "$sse2_x86inc"; +specialize qw/vpx_dc_top_predictor_4x4 msa neon sse2/; add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_left_predictor_4x4 msa neon/, "$sse2_x86inc"; +specialize qw/vpx_dc_left_predictor_4x4 msa neon sse2/; add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_128_predictor_4x4 msa neon/, "$sse2_x86inc"; +specialize qw/vpx_dc_128_predictor_4x4 msa neon sse2/; add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d207_predictor_8x8/, "$ssse3_x86inc"; +specialize qw/vpx_d207_predictor_8x8 ssse3/; add_proto qw/void vpx_d207e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d207e_predictor_8x8/; add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d45_predictor_8x8 neon/, "$sse2_x86inc"; +specialize qw/vpx_d45_predictor_8x8 neon sse2/; add_proto qw/void vpx_d45e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d45e_predictor_8x8/; add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d63_predictor_8x8/, "$ssse3_x86inc"; +specialize qw/vpx_d63_predictor_8x8 ssse3/; add_proto qw/void vpx_d63e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d63e_predictor_8x8/; add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc"; +specialize qw/vpx_h_predictor_8x8 neon dspr2 msa sse2/; add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d117_predictor_8x8/; @@ -139,46 +116,46 @@ add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, co specialize qw/vpx_d135_predictor_8x8/; add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d153_predictor_8x8/, "$ssse3_x86inc"; +specialize qw/vpx_d153_predictor_8x8 ssse3/; add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_v_predictor_8x8 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_v_predictor_8x8 neon msa sse2/; add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc"; +specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa sse2/; add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa sse2/; add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_top_predictor_8x8 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_dc_top_predictor_8x8 neon msa sse2/; add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_left_predictor_8x8 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_dc_left_predictor_8x8 neon msa sse2/; add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_128_predictor_8x8 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_dc_128_predictor_8x8 neon msa sse2/; add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d207_predictor_16x16/, "$ssse3_x86inc"; +specialize qw/vpx_d207_predictor_16x16 ssse3/; add_proto qw/void vpx_d207e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d207e_predictor_16x16/; add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d45_predictor_16x16 neon/, "$ssse3_x86inc"; +specialize qw/vpx_d45_predictor_16x16 neon ssse3/; add_proto qw/void vpx_d45e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d45e_predictor_16x16/; add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d63_predictor_16x16/, "$ssse3_x86inc"; +specialize qw/vpx_d63_predictor_16x16 ssse3/; add_proto qw/void vpx_d63e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d63e_predictor_16x16/; add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$sse2_x86inc"; +specialize qw/vpx_h_predictor_16x16 neon dspr2 msa sse2/; add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d117_predictor_16x16/; @@ -187,46 +164,46 @@ add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, specialize qw/vpx_d135_predictor_16x16/; add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d153_predictor_16x16/, "$ssse3_x86inc"; +specialize qw/vpx_d153_predictor_16x16 ssse3/; add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_v_predictor_16x16 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_v_predictor_16x16 neon msa sse2/; add_proto qw/void vpx_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_tm_predictor_16x16 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_tm_predictor_16x16 neon msa sse2/; add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa sse2/; add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_dc_top_predictor_16x16 neon msa sse2/; add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_dc_left_predictor_16x16 neon msa sse2/; add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_dc_128_predictor_16x16 neon msa sse2/; add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d207_predictor_32x32/, "$ssse3_x86inc"; +specialize qw/vpx_d207_predictor_32x32 ssse3/; add_proto qw/void vpx_d207e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d207e_predictor_32x32/; add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d45_predictor_32x32/, "$ssse3_x86inc"; +specialize qw/vpx_d45_predictor_32x32 ssse3/; add_proto qw/void vpx_d45e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d45e_predictor_32x32/; add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d63_predictor_32x32/, "$ssse3_x86inc"; +specialize qw/vpx_d63_predictor_32x32 ssse3/; add_proto qw/void vpx_d63e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d63e_predictor_32x32/; add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_h_predictor_32x32 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_h_predictor_32x32 neon msa sse2/; add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d117_predictor_32x32/; @@ -235,25 +212,25 @@ add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, specialize qw/vpx_d135_predictor_32x32/; add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d153_predictor_32x32/, "$ssse3_x86inc"; +specialize qw/vpx_d153_predictor_32x32 ssse3/; add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_v_predictor_32x32 neon msa sse2/; add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_tm_predictor_32x32 neon msa sse2/; add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc"; +specialize qw/vpx_dc_predictor_32x32 msa neon sse2/; add_proto qw/void vpx_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc"; +specialize qw/vpx_dc_top_predictor_32x32 msa neon sse2/; add_proto qw/void vpx_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc"; +specialize qw/vpx_dc_left_predictor_32x32 msa neon sse2/; add_proto qw/void vpx_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc"; +specialize qw/vpx_dc_128_predictor_32x32 msa neon sse2/; # High bitdepth functions if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { @@ -288,13 +265,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_d153_predictor_4x4/; add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_v_predictor_4x4/, "$sse2_x86inc"; + specialize qw/vpx_highbd_v_predictor_4x4 sse2/; add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse2_x86inc"; + specialize qw/vpx_highbd_tm_predictor_4x4 sse2/; add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse2_x86inc"; + specialize qw/vpx_highbd_dc_predictor_4x4 sse2/; add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_top_predictor_4x4/; @@ -336,13 +313,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_d153_predictor_8x8/; add_proto qw/void vpx_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_v_predictor_8x8/, "$sse2_x86inc"; + specialize qw/vpx_highbd_v_predictor_8x8 sse2/; add_proto qw/void vpx_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_tm_predictor_8x8/, "$sse2_x86inc"; + specialize qw/vpx_highbd_tm_predictor_8x8 sse2/; add_proto qw/void vpx_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_predictor_8x8/, "$sse2_x86inc";; + specialize qw/vpx_highbd_dc_predictor_8x8 sse2/;; add_proto qw/void vpx_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_top_predictor_8x8/; @@ -384,13 +361,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_d153_predictor_16x16/; add_proto qw/void vpx_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_v_predictor_16x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_v_predictor_16x16 sse2/; add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_tm_predictor_16x16 sse2/; add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_predictor_16x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_dc_predictor_16x16 sse2/; add_proto qw/void vpx_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_top_predictor_16x16/; @@ -432,13 +409,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_d153_predictor_32x32/; add_proto qw/void vpx_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_v_predictor_32x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_v_predictor_32x32 sse2/; add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_tm_predictor_32x32 sse2/; add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_dc_predictor_32x32 sse2/; add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_top_predictor_32x32/; @@ -454,10 +431,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Sub Pixel Filters # add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve_copy neon dspr2 msa/, "$sse2_x86inc"; +specialize qw/vpx_convolve_copy neon dspr2 msa sse2/; add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve_avg neon dspr2 msa/, "$sse2_x86inc"; +specialize qw/vpx_convolve_avg neon dspr2 msa sse2/; add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; specialize qw/vpx_convolve8 sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3"; @@ -500,10 +477,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Sub Pixel Filters # add_proto qw/void vpx_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps"; - specialize qw/vpx_highbd_convolve_copy/, "$sse2_x86inc"; + specialize qw/vpx_highbd_convolve_copy sse2/; add_proto qw/void vpx_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps"; - specialize qw/vpx_highbd_convolve_avg/, "$sse2_x86inc"; + specialize qw/vpx_highbd_convolve_avg sse2/; add_proto qw/void vpx_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps"; specialize qw/vpx_highbd_convolve8/, "$sse2_x86_64"; @@ -674,7 +651,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_fdct4x4_1 sse2/; add_proto qw/void vpx_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; - specialize qw/vpx_fdct8x8 sse2 neon msa/, "$ssse3_x86_64_x86inc"; + specialize qw/vpx_fdct8x8 sse2 neon msa/, "$ssse3_x86_64"; add_proto qw/void vpx_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride"; specialize qw/vpx_fdct8x8_1 sse2 neon msa/; @@ -706,7 +683,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_iwht4x4_1_add/; add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_iwht4x4_16_add/, "$sse2_x86inc"; + specialize qw/vpx_iwht4x4_16_add sse2/; add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; specialize qw/vpx_highbd_idct4x4_1_add/; @@ -792,10 +769,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_idct4x4_1_add sse2/; add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct8x8_64_add sse2/, "$ssse3_x86_64_x86inc"; + specialize qw/vpx_idct8x8_64_add sse2/, "$ssse3_x86_64"; add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct8x8_12_add sse2/, "$ssse3_x86_64_x86inc"; + specialize qw/vpx_idct8x8_12_add sse2/, "$ssse3_x86_64"; add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; specialize qw/vpx_idct8x8_1_add sse2/; @@ -810,15 +787,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_idct16x16_1_add sse2/; add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct32x32_1024_add sse2/, "$ssse3_x86_64_x86inc"; + specialize qw/vpx_idct32x32_1024_add sse2/, "$ssse3_x86_64"; add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct32x32_135_add sse2/, "$ssse3_x86_64_x86inc"; + specialize qw/vpx_idct32x32_135_add sse2/, "$ssse3_x86_64"; # Need to add 135 eob idct32x32 implementations. $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2; add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct32x32_34_add sse2/, "$ssse3_x86_64_x86inc"; + specialize qw/vpx_idct32x32_34_add sse2/, "$ssse3_x86_64"; add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; specialize qw/vpx_idct32x32_1_add sse2/; @@ -893,10 +870,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_idct8x8_1_add sse2 neon dspr2 msa/; add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc"; + specialize qw/vpx_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64"; add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc"; + specialize qw/vpx_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64"; add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; specialize qw/vpx_idct16x16_1_add sse2 neon dspr2 msa/; @@ -908,10 +885,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_idct16x16_10_add sse2 neon dspr2 msa/; add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct32x32_1024_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc"; + specialize qw/vpx_idct32x32_1024_add sse2 neon dspr2 msa/, "$ssse3_x86_64"; add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct32x32_135_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc"; + specialize qw/vpx_idct32x32_135_add sse2 neon dspr2 msa/, "$ssse3_x86_64"; # Need to add 135 eob idct32x32 implementations. $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2; $vpx_idct32x32_135_add_neon=vpx_idct32x32_1024_add_neon; @@ -919,7 +896,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { $vpx_idct32x32_135_add_msa=vpx_idct32x32_1024_add_msa; add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_idct32x32_34_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc"; + specialize qw/vpx_idct32x32_34_add sse2 neon dspr2 msa/, "$ssse3_x86_64"; # Need to add 34 eob idct32x32 neon implementation. $vpx_idct32x32_34_add_neon=vpx_idct32x32_1024_add_neon; @@ -930,7 +907,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_iwht4x4_1_add msa/; add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; - specialize qw/vpx_iwht4x4_16_add msa/, "$sse2_x86inc"; + specialize qw/vpx_iwht4x4_16_add msa sse2/; } # CONFIG_EMULATE_HARDWARE } # CONFIG_VP9_HIGHBITDEPTH } # CONFIG_VP9 @@ -940,10 +917,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc"; + specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64"; add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc"; + specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64"; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; @@ -959,49 +936,49 @@ if (vpx_config("CONFIG_ENCODERS") eq "yes") { # Block subtraction # add_proto qw/void vpx_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; -specialize qw/vpx_subtract_block neon msa/, "$sse2_x86inc"; +specialize qw/vpx_subtract_block neon msa sse2/; # # Single block SAD # add_proto qw/unsigned int vpx_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad64x64 avx2 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad64x64 avx2 neon msa sse2/; add_proto qw/unsigned int vpx_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad64x32 avx2 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad64x32 avx2 msa sse2/; add_proto qw/unsigned int vpx_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad32x64 avx2 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad32x64 avx2 msa sse2/; add_proto qw/unsigned int vpx_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad32x32 avx2 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad32x32 avx2 neon msa sse2/; add_proto qw/unsigned int vpx_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad32x16 avx2 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad32x16 avx2 msa sse2/; add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad16x32 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x32 msa sse2/; add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad16x16 media neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x16 media neon msa sse2/; add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad16x8 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x8 neon msa sse2/; add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad8x16 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x16 neon msa sse2/; add_proto qw/unsigned int vpx_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad8x8 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x8 neon msa sse2/; add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad8x4 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x4 msa sse2/; add_proto qw/unsigned int vpx_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad4x8 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad4x8 msa sse2/; add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad4x4 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad4x4 neon msa sse2/; # # Avg @@ -1017,7 +994,7 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { specialize qw/vpx_minmax_8x8 sse2 neon/; add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff"; - specialize qw/vpx_hadamard_8x8 sse2 neon/, "$ssse3_x86_64_x86inc"; + specialize qw/vpx_hadamard_8x8 sse2 neon/, "$ssse3_x86_64"; add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff"; specialize qw/vpx_hadamard_16x16 sse2 neon/; @@ -1036,43 +1013,43 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { } # CONFIG_VP9_ENCODER add_proto qw/unsigned int vpx_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad64x64_avg avx2 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad64x64_avg avx2 msa sse2/; add_proto qw/unsigned int vpx_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad64x32_avg avx2 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad64x32_avg avx2 msa sse2/; add_proto qw/unsigned int vpx_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad32x64_avg avx2 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad32x64_avg avx2 msa sse2/; add_proto qw/unsigned int vpx_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad32x32_avg avx2 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad32x32_avg avx2 msa sse2/; add_proto qw/unsigned int vpx_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad32x16_avg avx2 msa/, "$sse2_x86inc"; +specialize qw/vpx_sad32x16_avg avx2 msa sse2/; add_proto qw/unsigned int vpx_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad16x32_avg msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x32_avg msa sse2/; add_proto qw/unsigned int vpx_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad16x16_avg msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x16_avg msa sse2/; add_proto qw/unsigned int vpx_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad16x8_avg msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x8_avg msa sse2/; add_proto qw/unsigned int vpx_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad8x16_avg msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x16_avg msa sse2/; add_proto qw/unsigned int vpx_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad8x8_avg msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x8_avg msa sse2/; add_proto qw/unsigned int vpx_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad8x4_avg msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x4_avg msa sse2/; add_proto qw/unsigned int vpx_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad4x8_avg msa/, "$sse2_x86inc"; +specialize qw/vpx_sad4x8_avg msa sse2/; add_proto qw/unsigned int vpx_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad4x4_avg msa/, "$sse2_x86inc"; +specialize qw/vpx_sad4x4_avg msa sse2/; # # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally @@ -1131,43 +1108,43 @@ specialize qw/vpx_sad4x4x8 sse4_1 msa/; # Multi-block SAD, comparing a reference to N independent blocks # add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; -specialize qw/vpx_sad64x64x4d avx2 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad64x64x4d avx2 neon msa sse2/; add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; -specialize qw/vpx_sad64x32x4d msa/, "$sse2_x86inc"; +specialize qw/vpx_sad64x32x4d msa sse2/; add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; -specialize qw/vpx_sad32x64x4d msa/, "$sse2_x86inc"; +specialize qw/vpx_sad32x64x4d msa sse2/; add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; -specialize qw/vpx_sad32x32x4d avx2 neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad32x32x4d avx2 neon msa sse2/; add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; -specialize qw/vpx_sad32x16x4d msa/, "$sse2_x86inc"; +specialize qw/vpx_sad32x16x4d msa sse2/; add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; -specialize qw/vpx_sad16x32x4d msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x32x4d msa sse2/; add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; -specialize qw/vpx_sad16x16x4d neon msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x16x4d neon msa sse2/; add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; -specialize qw/vpx_sad16x8x4d msa/, "$sse2_x86inc"; +specialize qw/vpx_sad16x8x4d msa sse2/; add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; -specialize qw/vpx_sad8x16x4d msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x16x4d msa sse2/; add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; -specialize qw/vpx_sad8x8x4d msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x8x4d msa sse2/; add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; -specialize qw/vpx_sad8x4x4d msa/, "$sse2_x86inc"; +specialize qw/vpx_sad8x4x4d msa sse2/; add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; -specialize qw/vpx_sad4x8x4d msa/, "$sse2_x86inc"; +specialize qw/vpx_sad4x8x4d msa sse2/; add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; -specialize qw/vpx_sad4x4x4d msa/, "$sse2_x86inc"; +specialize qw/vpx_sad4x4x4d msa sse2/; # # Structured Similarity (SSIM) @@ -1191,37 +1168,37 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Single block SAD # add_proto qw/unsigned int vpx_highbd_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; - specialize qw/vpx_highbd_sad64x64/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad64x64 sse2/; add_proto qw/unsigned int vpx_highbd_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; - specialize qw/vpx_highbd_sad64x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad64x32 sse2/; add_proto qw/unsigned int vpx_highbd_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; - specialize qw/vpx_highbd_sad32x64/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad32x64 sse2/; add_proto qw/unsigned int vpx_highbd_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; - specialize qw/vpx_highbd_sad32x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad32x32 sse2/; add_proto qw/unsigned int vpx_highbd_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; - specialize qw/vpx_highbd_sad32x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad32x16 sse2/; add_proto qw/unsigned int vpx_highbd_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; - specialize qw/vpx_highbd_sad16x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad16x32 sse2/; add_proto qw/unsigned int vpx_highbd_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; - specialize qw/vpx_highbd_sad16x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad16x16 sse2/; add_proto qw/unsigned int vpx_highbd_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; - specialize qw/vpx_highbd_sad16x8/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad16x8 sse2/; add_proto qw/unsigned int vpx_highbd_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; - specialize qw/vpx_highbd_sad8x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad8x16 sse2/; add_proto qw/unsigned int vpx_highbd_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; - specialize qw/vpx_highbd_sad8x8/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad8x8 sse2/; add_proto qw/unsigned int vpx_highbd_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; - specialize qw/vpx_highbd_sad8x4/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad8x4 sse2/; add_proto qw/unsigned int vpx_highbd_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vpx_highbd_sad4x8/; @@ -1240,37 +1217,37 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_minmax_8x8/; add_proto qw/unsigned int vpx_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; - specialize qw/vpx_highbd_sad64x64_avg/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad64x64_avg sse2/; add_proto qw/unsigned int vpx_highbd_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; - specialize qw/vpx_highbd_sad64x32_avg/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad64x32_avg sse2/; add_proto qw/unsigned int vpx_highbd_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; - specialize qw/vpx_highbd_sad32x64_avg/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad32x64_avg sse2/; add_proto qw/unsigned int vpx_highbd_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; - specialize qw/vpx_highbd_sad32x32_avg/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad32x32_avg sse2/; add_proto qw/unsigned int vpx_highbd_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; - specialize qw/vpx_highbd_sad32x16_avg/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad32x16_avg sse2/; add_proto qw/unsigned int vpx_highbd_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; - specialize qw/vpx_highbd_sad16x32_avg/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad16x32_avg sse2/; add_proto qw/unsigned int vpx_highbd_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; - specialize qw/vpx_highbd_sad16x16_avg/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad16x16_avg sse2/; add_proto qw/unsigned int vpx_highbd_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; - specialize qw/vpx_highbd_sad16x8_avg/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad16x8_avg sse2/; add_proto qw/unsigned int vpx_highbd_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; - specialize qw/vpx_highbd_sad8x16_avg/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad8x16_avg sse2/; add_proto qw/unsigned int vpx_highbd_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; - specialize qw/vpx_highbd_sad8x8_avg/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad8x8_avg sse2/; add_proto qw/unsigned int vpx_highbd_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; - specialize qw/vpx_highbd_sad8x4_avg/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad8x4_avg sse2/; add_proto qw/unsigned int vpx_highbd_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vpx_highbd_sad4x8_avg/; @@ -1335,43 +1312,43 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Multi-block SAD, comparing a reference to N independent blocks # add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; - specialize qw/vpx_highbd_sad64x64x4d/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad64x64x4d sse2/; add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; - specialize qw/vpx_highbd_sad64x32x4d/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad64x32x4d sse2/; add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; - specialize qw/vpx_highbd_sad32x64x4d/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad32x64x4d sse2/; add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; - specialize qw/vpx_highbd_sad32x32x4d/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad32x32x4d sse2/; add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; - specialize qw/vpx_highbd_sad32x16x4d/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad32x16x4d sse2/; add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; - specialize qw/vpx_highbd_sad16x32x4d/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad16x32x4d sse2/; add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; - specialize qw/vpx_highbd_sad16x16x4d/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad16x16x4d sse2/; add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; - specialize qw/vpx_highbd_sad16x8x4d/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad16x8x4d sse2/; add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; - specialize qw/vpx_highbd_sad8x16x4d/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad8x16x4d sse2/; add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; - specialize qw/vpx_highbd_sad8x8x4d/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad8x8x4d sse2/; add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; - specialize qw/vpx_highbd_sad8x4x4d/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad8x4x4d sse2/; add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; - specialize qw/vpx_highbd_sad4x8x4d/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad4x8x4d sse2/; add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; - specialize qw/vpx_highbd_sad4x4x4d/, "$sse2_x86inc"; + specialize qw/vpx_highbd_sad4x4x4d sse2/; # # Structured Similarity (SSIM) @@ -1460,82 +1437,82 @@ add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, # Subpixel Variance # add_proto qw/uint32_t vpx_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance64x32 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance32x64 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance32x16 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance16x32 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance16x16 media neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance16x16 media neon msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance16x8 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance8x16 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance8x8 media neon msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance8x8 media neon msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance8x4 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance4x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance4x8 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_sub_pixel_variance4x4 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_variance4x4 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_sub_pixel_avg_variance64x64 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_sub_pixel_avg_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_avg_variance64x32 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_sub_pixel_avg_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_avg_variance32x64 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_sub_pixel_avg_variance32x32 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_sub_pixel_avg_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_avg_variance32x16 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_sub_pixel_avg_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_avg_variance16x32 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_sub_pixel_avg_variance16x16 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_avg_variance16x16 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_sub_pixel_avg_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_avg_variance16x8 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_sub_pixel_avg_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_avg_variance8x16 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_sub_pixel_avg_variance8x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_avg_variance8x8 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_sub_pixel_avg_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_avg_variance8x4 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_sub_pixel_avg_variance4x8 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_avg_variance4x8 msa sse2 ssse3/; add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_sub_pixel_avg_variance4x4 msa/, "$sse2_x86inc", "$ssse3_x86inc"; + specialize qw/vpx_sub_pixel_avg_variance4x4 msa sse2 ssse3/; # # Specialty Subpixel @@ -1691,217 +1668,217 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Subpixel Variance # add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_12_sub_pixel_variance64x64/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_variance64x64 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_12_sub_pixel_variance64x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_variance64x32 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_12_sub_pixel_variance32x64/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_variance32x64 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_12_sub_pixel_variance32x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_variance32x32 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_12_sub_pixel_variance32x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_variance32x16 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_12_sub_pixel_variance16x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_variance16x32 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_12_sub_pixel_variance16x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_variance16x16 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_12_sub_pixel_variance16x8/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_variance16x8 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_12_sub_pixel_variance8x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_variance8x16 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_12_sub_pixel_variance8x8/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_variance8x8 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_12_sub_pixel_variance8x4/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_variance8x4 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_10_sub_pixel_variance64x64/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_variance64x64 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_10_sub_pixel_variance64x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_variance64x32 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_10_sub_pixel_variance32x64/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_variance32x64 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_10_sub_pixel_variance32x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_variance32x32 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_10_sub_pixel_variance32x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_variance32x16 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_10_sub_pixel_variance16x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_variance16x32 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_10_sub_pixel_variance16x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_variance16x16 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_10_sub_pixel_variance16x8/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_variance16x8 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_10_sub_pixel_variance8x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_variance8x16 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_10_sub_pixel_variance8x8/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_variance8x8 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_10_sub_pixel_variance8x4/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_variance8x4 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_8_sub_pixel_variance64x64/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_variance64x64 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_8_sub_pixel_variance64x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_variance64x32 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_8_sub_pixel_variance32x64/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_variance32x64 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_8_sub_pixel_variance32x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_variance32x32 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_8_sub_pixel_variance32x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_variance32x16 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_8_sub_pixel_variance16x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_variance16x32 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_8_sub_pixel_variance16x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_variance16x16 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_8_sub_pixel_variance16x8/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_variance16x8 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_8_sub_pixel_variance8x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_variance8x16 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_8_sub_pixel_variance8x8/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_variance8x8 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/vpx_highbd_8_sub_pixel_variance8x4/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_variance8x4 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x64/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x64 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x32 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x64/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x64 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x32 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x16 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x32 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x16 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x8/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x8 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x16 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x8/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x8 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x4/, "$sse2_x86inc"; + specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x4 sse2/; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x64/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x64 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x32 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x64/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x64 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x32 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x16 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x32 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x16 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x8/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x8 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x16 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x8/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x8 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x4/, "$sse2_x86inc"; + specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x4 sse2/; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x64/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x64 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x32 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x64/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x64 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x32 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x16 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x32/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x32 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x16 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x8/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x8 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x16/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x16 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x8/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x8 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x4/, "$sse2_x86inc"; + specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x4 sse2/; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; diff --git a/vpx_dsp/x86/highbd_variance_sse2.c b/vpx_dsp/x86/highbd_variance_sse2.c index 14d029c9a..0d62adf8b 100644 --- a/vpx_dsp/x86/highbd_variance_sse2.c +++ b/vpx_dsp/x86/highbd_variance_sse2.c @@ -143,24 +143,28 @@ uint32_t vpx_highbd_10_variance##w##x##h##_sse2( \ const uint8_t *src8, int src_stride, \ const uint8_t *ref8, int ref_stride, uint32_t *sse) { \ int sum; \ + int64_t var; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ highbd_10_variance_sse2( \ src, src_stride, ref, ref_stride, w, h, sse, &sum, \ vpx_highbd_calc##block_size##x##block_size##var_sse2, block_size); \ - return *sse - (((int64_t)sum * sum) >> shift); \ + var = (int64_t)(*sse) - (((int64_t)sum * sum) >> shift); \ + return (var >= 0) ? (uint32_t)var : 0; \ } \ \ uint32_t vpx_highbd_12_variance##w##x##h##_sse2( \ const uint8_t *src8, int src_stride, \ const uint8_t *ref8, int ref_stride, uint32_t *sse) { \ int sum; \ + int64_t var; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ highbd_12_variance_sse2( \ src, src_stride, ref, ref_stride, w, h, sse, &sum, \ vpx_highbd_calc##block_size##x##block_size##var_sse2, block_size); \ - return *sse - (((int64_t)sum * sum) >> shift); \ + var = (int64_t)(*sse) - (((int64_t)sum * sum) >> shift); \ + return (var >= 0) ? (uint32_t)var : 0; \ } VAR_FN(64, 64, 16, 12); @@ -242,7 +246,6 @@ unsigned int vpx_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride, return *sse; } -#if CONFIG_USE_X86INC // The 2 unused parameters are place holders for PIC enabled build. // These definitions are for functions defined in // highbd_subpel_variance_impl_sse2.asm @@ -589,4 +592,3 @@ FNS(sse2); #undef FNS #undef FN -#endif // CONFIG_USE_X86INC diff --git a/vpx_dsp/x86/variance_sse2.c b/vpx_dsp/x86/variance_sse2.c index 6987c2e24..92dc752c0 100644 --- a/vpx_dsp/x86/variance_sse2.c +++ b/vpx_dsp/x86/variance_sse2.c @@ -308,7 +308,6 @@ unsigned int vpx_mse16x16_sse2(const uint8_t *src, int src_stride, return *sse; } -#if CONFIG_USE_X86INC // The 2 unused parameters are place holders for PIC enabled build. // These definitions are for functions defined in subpel_variance.asm #define DECL(w, opt) \ @@ -474,4 +473,3 @@ FNS(ssse3, ssse3); #undef FNS #undef FN -#endif // CONFIG_USE_X86INC diff --git a/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c b/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c index b71867853..cbd22dcd0 100644 --- a/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c +++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c @@ -8,10 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ -// Due to a header conflict between math.h and intrinsics includes with ceil() -// in certain configurations under vs9 this include needs to precede -// immintrin.h. - #include <immintrin.h> #include "./vpx_dsp_rtcd.h" diff --git a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c index 6fd52087c..2bb0f4a24 100644 --- a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c +++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c @@ -8,10 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ -// Due to a header conflict between math.h and intrinsics includes with ceil() -// in certain configurations under vs9 this include needs to precede -// tmmintrin.h. - #include <tmmintrin.h> #include "./vpx_dsp_rtcd.h" diff --git a/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm b/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm index 9cc4f9d7d..c1a6f23ab 100644 --- a/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm +++ b/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm @@ -23,11 +23,7 @@ pw_64: times 8 dw 64 ; z = signed SAT(x + y) SECTION .text -%if ARCH_X86_64 - %define LOCAL_VARS_SIZE 16*4 -%else - %define LOCAL_VARS_SIZE 16*6 -%endif +%define LOCAL_VARS_SIZE 16*6 %macro SETUP_LOCAL_VARS 0 ; TODO(slavarnway): using xmm registers for these on ARCH_X86_64 + @@ -54,11 +50,11 @@ SECTION .text mova k6k7, m3 %if ARCH_X86_64 %define krd m12 - %define tmp m13 + %define tmp0 [rsp + 16*4] + %define tmp1 [rsp + 16*5] mova krd, [GLOBAL(pw_64)] %else - %define tmp [rsp + 16*4] - %define krd [rsp + 16*5] + %define krd [rsp + 16*4] %if CONFIG_PIC=0 mova m6, [GLOBAL(pw_64)] %else @@ -71,50 +67,31 @@ SECTION .text %endif %endm -%macro HORIZx4_ROW 2 - mova %2, %1 - punpcklbw %1, %1 - punpckhbw %2, %2 - - mova m3, %2 - palignr %2, %1, 1 - palignr m3, %1, 5 - - pmaddubsw %2, k0k1k4k5 - pmaddubsw m3, k2k3k6k7 - mova m4, %2 ;k0k1 - mova m5, m3 ;k2k3 - psrldq %2, 8 ;k4k5 - psrldq m3, 8 ;k6k7 - paddsw %2, m4 - paddsw m5, m3 - paddsw %2, m5 - paddsw %2, krd - psraw %2, 7 - packuswb %2, %2 -%endm - ;------------------------------------------------------------------------------- +%if ARCH_X86_64 + %define LOCAL_VARS_SIZE_H4 0 +%else + %define LOCAL_VARS_SIZE_H4 16*4 +%endif + %macro SUBPIX_HFILTER4 1 -cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \ +cglobal filter_block1d4_%1, 6, 6, 11, LOCAL_VARS_SIZE_H4, \ src, sstride, dst, dstride, height, filter mova m4, [filterq] packsswb m4, m4 %if ARCH_X86_64 - %define k0k1k4k5 m8 - %define k2k3k6k7 m9 - %define krd m10 - %define orig_height r7d + %define k0k1k4k5 m8 + %define k2k3k6k7 m9 + %define krd m10 mova krd, [GLOBAL(pw_64)] pshuflw k0k1k4k5, m4, 0b ;k0_k1 pshufhw k0k1k4k5, k0k1k4k5, 10101010b ;k0_k1_k4_k5 pshuflw k2k3k6k7, m4, 01010101b ;k2_k3 pshufhw k2k3k6k7, k2k3k6k7, 11111111b ;k2_k3_k6_k7 %else - %define k0k1k4k5 [rsp + 16*0] - %define k2k3k6k7 [rsp + 16*1] - %define krd [rsp + 16*2] - %define orig_height [rsp + 16*3] + %define k0k1k4k5 [rsp + 16*0] + %define k2k3k6k7 [rsp + 16*1] + %define krd [rsp + 16*2] pshuflw m6, m4, 0b ;k0_k1 pshufhw m6, m6, 10101010b ;k0_k1_k4_k5 pshuflw m7, m4, 01010101b ;k2_k3 @@ -131,61 +108,46 @@ cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \ mova k2k3k6k7, m7 mova krd, m1 %endif - mov orig_height, heightd - shr heightd, 1 + dec heightd + .loop: ;Do two rows at once - movh m0, [srcq - 3] - movh m1, [srcq + 5] - punpcklqdq m0, m1 - mova m1, m0 - movh m2, [srcq + sstrideq - 3] - movh m3, [srcq + sstrideq + 5] - punpcklqdq m2, m3 - mova m3, m2 - punpcklbw m0, m0 - punpckhbw m1, m1 - punpcklbw m2, m2 - punpckhbw m3, m3 - mova m4, m1 - palignr m4, m0, 1 - pmaddubsw m4, k0k1k4k5 - palignr m1, m0, 5 + movu m4, [srcq - 3] + movu m5, [srcq + sstrideq - 3] + punpckhbw m1, m4, m4 + punpcklbw m4, m4 + punpckhbw m3, m5, m5 + punpcklbw m5, m5 + palignr m0, m1, m4, 1 + pmaddubsw m0, k0k1k4k5 + palignr m1, m4, 5 pmaddubsw m1, k2k3k6k7 - mova m7, m3 - palignr m7, m2, 1 - pmaddubsw m7, k0k1k4k5 - palignr m3, m2, 5 + palignr m2, m3, m5, 1 + pmaddubsw m2, k0k1k4k5 + palignr m3, m5, 5 pmaddubsw m3, k2k3k6k7 - mova m0, m4 ;k0k1 - mova m5, m1 ;k2k3 - mova m2, m7 ;k0k1 upper - psrldq m4, 8 ;k4k5 - psrldq m1, 8 ;k6k7 - paddsw m4, m0 - paddsw m5, m1 - mova m1, m3 ;k2k3 upper - psrldq m7, 8 ;k4k5 upper - psrldq m3, 8 ;k6k7 upper - paddsw m7, m2 - paddsw m4, m5 - paddsw m1, m3 - paddsw m7, m1 - paddsw m4, krd - psraw m4, 7 - packuswb m4, m4 - paddsw m7, krd - psraw m7, 7 - packuswb m7, m7 + punpckhqdq m4, m0, m2 + punpcklqdq m0, m2 + punpckhqdq m5, m1, m3 + punpcklqdq m1, m3 + paddsw m0, m4 + paddsw m1, m5 +%ifidn %1, h8_avg + movd m4, [dstq] + movd m5, [dstq + dstrideq] +%endif + paddsw m0, m1 + paddsw m0, krd + psraw m0, 7 + packuswb m0, m0 + psrldq m1, m0, 4 %ifidn %1, h8_avg - movd m0, [dstq] - pavgb m4, m0 - movd m2, [dstq + dstrideq] - pavgb m7, m2 + pavgb m0, m4 + pavgb m1, m5 %endif - movd [dstq], m4 - movd [dstq + dstrideq], m7 + movd [dstq], m0 + movd [dstq + dstrideq], m1 lea srcq, [srcq + sstrideq ] prefetcht0 [srcq + 4 * sstrideq - 3] @@ -193,205 +155,156 @@ cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \ lea dstq, [dstq + 2 * dstrideq ] prefetcht0 [srcq + 2 * sstrideq - 3] - dec heightd - jnz .loop + sub heightd, 2 + jg .loop ; Do last row if output_height is odd - mov heightd, orig_height - and heightd, 1 - je .done - - movh m0, [srcq - 3] ; load src - movh m1, [srcq + 5] - punpcklqdq m0, m1 - - HORIZx4_ROW m0, m1 + jne .done + + movu m4, [srcq - 3] + punpckhbw m1, m4, m4 + punpcklbw m4, m4 + palignr m0, m1, m4, 1 + palignr m1, m4, 5 + pmaddubsw m0, k0k1k4k5 + pmaddubsw m1, k2k3k6k7 + psrldq m2, m0, 8 + psrldq m3, m1, 8 + paddsw m0, m2 + paddsw m1, m3 + paddsw m0, m1 + paddsw m0, krd + psraw m0, 7 + packuswb m0, m0 %ifidn %1, h8_avg - movd m0, [dstq] - pavgb m1, m0 + movd m4, [dstq] + pavgb m0, m4 %endif - movd [dstq], m1 + movd [dstq], m0 .done: - RET -%endm - -%macro HORIZx8_ROW 5 - mova %2, %1 - punpcklbw %1, %1 - punpckhbw %2, %2 - - mova %3, %2 - mova %4, %2 - mova %5, %2 - - palignr %2, %1, 1 - palignr %3, %1, 5 - palignr %4, %1, 9 - palignr %5, %1, 13 - - pmaddubsw %2, k0k1 - pmaddubsw %3, k2k3 - pmaddubsw %4, k4k5 - pmaddubsw %5, k6k7 - paddsw %2, %4 - paddsw %5, %3 - paddsw %2, %5 - paddsw %2, krd - psraw %2, 7 - packuswb %2, %2 - SWAP %1, %2 + REP_RET %endm ;------------------------------------------------------------------------------- %macro SUBPIX_HFILTER8 1 -cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 14, LOCAL_VARS_SIZE, \ +cglobal filter_block1d8_%1, 6, 6, 14, LOCAL_VARS_SIZE, \ src, sstride, dst, dstride, height, filter mova m4, [filterq] SETUP_LOCAL_VARS -%if ARCH_X86_64 - %define orig_height r7d -%else - %define orig_height heightmp -%endif - mov orig_height, heightd - shr heightd, 1 + dec heightd .loop: - movh m0, [srcq - 3] - movh m3, [srcq + 5] - movh m4, [srcq + sstrideq - 3] - movh m7, [srcq + sstrideq + 5] - punpcklqdq m0, m3 - mova m1, m0 + ;Do two rows at once + movu m0, [srcq - 3] + movu m4, [srcq + sstrideq - 3] + punpckhbw m1, m0, m0 punpcklbw m0, m0 - punpckhbw m1, m1 - mova m5, m1 - palignr m5, m0, 13 + palignr m5, m1, m0, 13 pmaddubsw m5, k6k7 - mova m2, m1 - mova m3, m1 + palignr m2, m1, m0, 5 + palignr m3, m1, m0, 9 palignr m1, m0, 1 pmaddubsw m1, k0k1 - punpcklqdq m4, m7 - mova m6, m4 + punpckhbw m6, m4, m4 punpcklbw m4, m4 - palignr m2, m0, 5 - punpckhbw m6, m6 - palignr m3, m0, 9 - mova m7, m6 pmaddubsw m2, k2k3 pmaddubsw m3, k4k5 - palignr m7, m4, 13 - mova m0, m6 - palignr m0, m4, 5 + palignr m7, m6, m4, 13 + palignr m0, m6, m4, 5 pmaddubsw m7, k6k7 paddsw m1, m3 paddsw m2, m5 paddsw m1, m2 - mova m5, m6 +%ifidn %1, h8_avg + movh m2, [dstq] + movhps m2, [dstq + dstrideq] +%endif + palignr m5, m6, m4, 9 palignr m6, m4, 1 pmaddubsw m0, k2k3 pmaddubsw m6, k0k1 - palignr m5, m4, 9 paddsw m1, krd pmaddubsw m5, k4k5 psraw m1, 7 paddsw m0, m7 -%ifidn %1, h8_avg - movh m7, [dstq] - movh m2, [dstq + dstrideq] -%endif - packuswb m1, m1 paddsw m6, m5 paddsw m6, m0 paddsw m6, krd psraw m6, 7 - packuswb m6, m6 + packuswb m1, m6 %ifidn %1, h8_avg - pavgb m1, m7 - pavgb m6, m2 + pavgb m1, m2 %endif - movh [dstq], m1 - movh [dstq + dstrideq], m6 + movh [dstq], m1 + movhps [dstq + dstrideq], m1 lea srcq, [srcq + sstrideq ] prefetcht0 [srcq + 4 * sstrideq - 3] lea srcq, [srcq + sstrideq ] lea dstq, [dstq + 2 * dstrideq ] prefetcht0 [srcq + 2 * sstrideq - 3] - dec heightd - jnz .loop - - ;Do last row if output_height is odd - mov heightd, orig_height - and heightd, 1 - je .done + sub heightd, 2 + jg .loop - movh m0, [srcq - 3] - movh m3, [srcq + 5] - punpcklqdq m0, m3 - - HORIZx8_ROW m0, m1, m2, m3, m4 + ; Do last row if output_height is odd + jne .done + movu m0, [srcq - 3] + punpckhbw m3, m0, m0 + punpcklbw m0, m0 + palignr m1, m3, m0, 1 + palignr m2, m3, m0, 5 + palignr m4, m3, m0, 13 + palignr m3, m0, 9 + pmaddubsw m1, k0k1 + pmaddubsw m2, k2k3 + pmaddubsw m3, k4k5 + pmaddubsw m4, k6k7 + paddsw m1, m3 + paddsw m4, m2 + paddsw m1, m4 + paddsw m1, krd + psraw m1, 7 + packuswb m1, m1 %ifidn %1, h8_avg - movh m1, [dstq] - pavgb m0, m1 + movh m0, [dstq] + pavgb m1, m0 %endif - movh [dstq], m0 + movh [dstq], m1 .done: - RET + REP_RET %endm ;------------------------------------------------------------------------------- %macro SUBPIX_HFILTER16 1 -cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \ +cglobal filter_block1d16_%1, 6, 6, 14, LOCAL_VARS_SIZE, \ src, sstride, dst, dstride, height, filter mova m4, [filterq] SETUP_LOCAL_VARS + .loop: prefetcht0 [srcq + 2 * sstrideq -3] - movh m0, [srcq - 3] - movh m4, [srcq + 5] - movh m6, [srcq + 13] - punpcklqdq m0, m4 - mova m7, m0 - punpckhbw m0, m0 - mova m1, m0 - punpcklqdq m4, m6 - mova m3, m0 - punpcklbw m7, m7 - - palignr m3, m7, 13 - mova m2, m0 - pmaddubsw m3, k6k7 - palignr m0, m7, 1 + movu m0, [srcq - 3] + movu m4, [srcq - 2] pmaddubsw m0, k0k1 - palignr m1, m7, 5 - pmaddubsw m1, k2k3 - palignr m2, m7, 9 - pmaddubsw m2, k4k5 - paddsw m1, m3 - mova m3, m4 - punpckhbw m4, m4 - mova m5, m4 - punpcklbw m3, m3 - mova m7, m4 - palignr m5, m3, 5 - mova m6, m4 - palignr m4, m3, 1 pmaddubsw m4, k0k1 + movu m1, [srcq - 1] + movu m5, [srcq + 0] + pmaddubsw m1, k2k3 pmaddubsw m5, k2k3 - palignr m6, m3, 9 + movu m2, [srcq + 1] + movu m6, [srcq + 2] + pmaddubsw m2, k4k5 pmaddubsw m6, k4k5 - palignr m7, m3, 13 + movu m3, [srcq + 3] + movu m7, [srcq + 4] + pmaddubsw m3, k6k7 pmaddubsw m7, k6k7 paddsw m0, m2 + paddsw m1, m3 paddsw m0, m1 -%ifidn %1, h8_avg - mova m1, [dstq] -%endif paddsw m4, m6 paddsw m5, m7 paddsw m4, m5 @@ -399,16 +312,18 @@ cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \ paddsw m4, krd psraw m0, 7 psraw m4, 7 - packuswb m0, m4 + packuswb m0, m0 + packuswb m4, m4 + punpcklbw m0, m4 %ifidn %1, h8_avg - pavgb m0, m1 + pavgb m0, [dstq] %endif lea srcq, [srcq + sstrideq] mova [dstq], m0 lea dstq, [dstq + dstrideq] dec heightd jnz .loop - RET + REP_RET %endm INIT_XMM ssse3 @@ -420,204 +335,463 @@ SUBPIX_HFILTER4 h8 SUBPIX_HFILTER4 h8_avg ;------------------------------------------------------------------------------- + +; TODO(Linfeng): Detect cpu type and choose the code with better performance. +%define X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON 1 + +%if ARCH_X86_64 && X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON + %define NUM_GENERAL_REG_USED 9 +%else + %define NUM_GENERAL_REG_USED 6 +%endif + %macro SUBPIX_VFILTER 2 -cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ +cglobal filter_block1d%2_%1, 6, NUM_GENERAL_REG_USED, 15, LOCAL_VARS_SIZE, \ src, sstride, dst, dstride, height, filter mova m4, [filterq] SETUP_LOCAL_VARS -%if ARCH_X86_64 - %define src1q r7 - %define sstride6q r8 - %define dst_stride dstrideq + +%ifidn %2, 8 + %define movx movh %else - %define src1q filterq - %define sstride6q dstrideq - %define dst_stride dstridemp + %define movx movd %endif - mov src1q, srcq - add src1q, sstrideq - lea sstride6q, [sstrideq + sstrideq * 4] - add sstride6q, sstrideq ;pitch * 6 -%ifidn %2, 8 - %define movx movh + dec heightd + +%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON + +%if ARCH_X86_64 + %define src1q r7 + %define sstride6q r8 + %define dst_stride dstrideq %else - %define movx movd + %define src1q filterq + %define sstride6q dstrideq + %define dst_stride dstridemp %endif + mov src1q, srcq + add src1q, sstrideq + lea sstride6q, [sstrideq + sstrideq * 4] + add sstride6q, sstrideq ;pitch * 6 + .loop: - movx m0, [srcq ] ;A - movx m1, [srcq + sstrideq ] ;B - punpcklbw m0, m1 ;A B - movx m2, [srcq + sstrideq * 2 ] ;C - pmaddubsw m0, k0k1 - mova m6, m2 - movx m3, [src1q + sstrideq * 2] ;D - punpcklbw m2, m3 ;C D - pmaddubsw m2, k2k3 - movx m4, [srcq + sstrideq * 4 ] ;E - mova m7, m4 - movx m5, [src1q + sstrideq * 4] ;F - punpcklbw m4, m5 ;E F - pmaddubsw m4, k4k5 - punpcklbw m1, m6 ;A B next iter - movx m6, [srcq + sstride6q ] ;G - punpcklbw m5, m6 ;E F next iter - punpcklbw m3, m7 ;C D next iter - pmaddubsw m5, k4k5 - movx m7, [src1q + sstride6q ] ;H - punpcklbw m6, m7 ;G H - pmaddubsw m6, k6k7 - pmaddubsw m3, k2k3 - pmaddubsw m1, k0k1 - paddsw m0, m4 - paddsw m2, m6 - movx m6, [srcq + sstrideq * 8 ] ;H next iter - punpcklbw m7, m6 - pmaddubsw m7, k6k7 - paddsw m0, m2 - paddsw m0, krd - psraw m0, 7 - paddsw m1, m5 - packuswb m0, m0 - - paddsw m3, m7 - paddsw m1, m3 - paddsw m1, krd - psraw m1, 7 - lea srcq, [srcq + sstrideq * 2 ] - lea src1q, [src1q + sstrideq * 2] - packuswb m1, m1 + ;Do two rows at once + movx m0, [srcq ] ;A + movx m1, [src1q ] ;B + punpcklbw m0, m1 ;A B + movx m2, [srcq + sstrideq * 2 ] ;C + pmaddubsw m0, k0k1 + mova m6, m2 + movx m3, [src1q + sstrideq * 2] ;D + punpcklbw m2, m3 ;C D + pmaddubsw m2, k2k3 + movx m4, [srcq + sstrideq * 4 ] ;E + mova m7, m4 + movx m5, [src1q + sstrideq * 4] ;F + punpcklbw m4, m5 ;E F + pmaddubsw m4, k4k5 + punpcklbw m1, m6 ;A B next iter + movx m6, [srcq + sstride6q ] ;G + punpcklbw m5, m6 ;E F next iter + punpcklbw m3, m7 ;C D next iter + pmaddubsw m5, k4k5 + movx m7, [src1q + sstride6q ] ;H + punpcklbw m6, m7 ;G H + pmaddubsw m6, k6k7 + pmaddubsw m3, k2k3 + pmaddubsw m1, k0k1 + paddsw m0, m4 + paddsw m2, m6 + movx m6, [srcq + sstrideq * 8 ] ;H next iter + punpcklbw m7, m6 + pmaddubsw m7, k6k7 + paddsw m0, m2 + paddsw m0, krd + psraw m0, 7 + paddsw m1, m5 + packuswb m0, m0 + + paddsw m3, m7 + paddsw m1, m3 + paddsw m1, krd + psraw m1, 7 + lea srcq, [srcq + sstrideq * 2 ] + lea src1q, [src1q + sstrideq * 2] + packuswb m1, m1 + +%ifidn %1, v8_avg + movx m2, [dstq] + pavgb m0, m2 +%endif + movx [dstq], m0 + add dstq, dst_stride +%ifidn %1, v8_avg + movx m3, [dstq] + pavgb m1, m3 +%endif + movx [dstq], m1 + add dstq, dst_stride + sub heightd, 2 + jg .loop + ; Do last row if output_height is odd + jne .done + + movx m0, [srcq ] ;A + movx m1, [srcq + sstrideq ] ;B + movx m6, [srcq + sstride6q ] ;G + punpcklbw m0, m1 ;A B + movx m7, [src1q + sstride6q ] ;H + pmaddubsw m0, k0k1 + movx m2, [srcq + sstrideq * 2 ] ;C + punpcklbw m6, m7 ;G H + movx m3, [src1q + sstrideq * 2] ;D + pmaddubsw m6, k6k7 + movx m4, [srcq + sstrideq * 4 ] ;E + punpcklbw m2, m3 ;C D + movx m5, [src1q + sstrideq * 4] ;F + punpcklbw m4, m5 ;E F + pmaddubsw m2, k2k3 + pmaddubsw m4, k4k5 + paddsw m2, m6 + paddsw m0, m4 + paddsw m0, m2 + paddsw m0, krd + psraw m0, 7 + packuswb m0, m0 %ifidn %1, v8_avg - movx m2, [dstq] - pavgb m0, m2 + movx m1, [dstq] + pavgb m0, m1 %endif - movx [dstq], m0 - add dstq, dst_stride + movx [dstq], m0 + +%else + ; ARCH_X86_64 + + movx m0, [srcq ] ;A + movx m1, [srcq + sstrideq ] ;B + lea srcq, [srcq + sstrideq * 2 ] + movx m2, [srcq] ;C + movx m3, [srcq + sstrideq] ;D + lea srcq, [srcq + sstrideq * 2 ] + movx m4, [srcq] ;E + movx m5, [srcq + sstrideq] ;F + lea srcq, [srcq + sstrideq * 2 ] + movx m6, [srcq] ;G + punpcklbw m0, m1 ;A B + punpcklbw m1, m2 ;A B next iter + punpcklbw m2, m3 ;C D + punpcklbw m3, m4 ;C D next iter + punpcklbw m4, m5 ;E F + punpcklbw m5, m6 ;E F next iter + +.loop: + ;Do two rows at once + movx m7, [srcq + sstrideq] ;H + lea srcq, [srcq + sstrideq * 2 ] + movx m14, [srcq] ;H next iter + punpcklbw m6, m7 ;G H + punpcklbw m7, m14 ;G H next iter + pmaddubsw m8, m0, k0k1 + pmaddubsw m9, m1, k0k1 + mova m0, m2 + mova m1, m3 + pmaddubsw m10, m2, k2k3 + pmaddubsw m11, m3, k2k3 + mova m2, m4 + mova m3, m5 + pmaddubsw m4, k4k5 + pmaddubsw m5, k4k5 + paddsw m8, m4 + paddsw m9, m5 + mova m4, m6 + mova m5, m7 + pmaddubsw m6, k6k7 + pmaddubsw m7, k6k7 + paddsw m10, m6 + paddsw m11, m7 + paddsw m8, m10 + paddsw m9, m11 + mova m6, m14 + paddsw m8, krd + paddsw m9, krd + psraw m8, 7 + psraw m9, 7 +%ifidn %2, 4 + packuswb m8, m8 + packuswb m9, m9 +%else + packuswb m8, m9 +%endif + %ifidn %1, v8_avg - movx m3, [dstq] - pavgb m1, m3 + movx m7, [dstq] +%ifidn %2, 4 + movx m10, [dstq + dstrideq] + pavgb m9, m10 +%else + movhpd m7, [dstq + dstrideq] +%endif + pavgb m8, m7 %endif - movx [dstq], m1 - add dstq, dst_stride - sub heightd, 2 - cmp heightd, 1 - jg .loop - - cmp heightd, 0 - je .done - - movx m0, [srcq ] ;A - movx m1, [srcq + sstrideq ] ;B - movx m6, [srcq + sstride6q ] ;G - punpcklbw m0, m1 ;A B - movx m7, [src1q + sstride6q ] ;H - pmaddubsw m0, k0k1 - movx m2, [srcq + sstrideq * 2 ] ;C - punpcklbw m6, m7 ;G H - movx m3, [src1q + sstrideq * 2] ;D - pmaddubsw m6, k6k7 - movx m4, [srcq + sstrideq * 4 ] ;E - punpcklbw m2, m3 ;C D - movx m5, [src1q + sstrideq * 4] ;F - punpcklbw m4, m5 ;E F - pmaddubsw m2, k2k3 - pmaddubsw m4, k4k5 - paddsw m2, m6 - paddsw m0, m4 - paddsw m0, m2 - paddsw m0, krd - psraw m0, 7 - packuswb m0, m0 + movx [dstq], m8 +%ifidn %2, 4 + movx [dstq + dstrideq], m9 +%else + movhpd [dstq + dstrideq], m8 +%endif + + lea dstq, [dstq + dstrideq * 2 ] + sub heightd, 2 + jg .loop + + ; Do last row if output_height is odd + jne .done + + movx m7, [srcq + sstrideq] ;H + punpcklbw m6, m7 ;G H + pmaddubsw m0, k0k1 + pmaddubsw m2, k2k3 + pmaddubsw m4, k4k5 + pmaddubsw m6, k6k7 + paddsw m0, m4 + paddsw m2, m6 + paddsw m0, m2 + paddsw m0, krd + psraw m0, 7 + packuswb m0, m0 %ifidn %1, v8_avg - movx m1, [dstq] - pavgb m0, m1 + movx m1, [dstq] + pavgb m0, m1 %endif - movx [dstq], m0 + movx [dstq], m0 + +%endif ; ARCH_X86_64 + .done: - RET + REP_RET + %endm ;------------------------------------------------------------------------------- %macro SUBPIX_VFILTER16 1 -cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ +cglobal filter_block1d16_%1, 6, NUM_GENERAL_REG_USED, 16, LOCAL_VARS_SIZE, \ src, sstride, dst, dstride, height, filter - mova m4, [filterq] + mova m4, [filterq] SETUP_LOCAL_VARS + +%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON + %if ARCH_X86_64 - %define src1q r7 - %define sstride6q r8 - %define dst_stride dstrideq + %define src1q r7 + %define sstride6q r8 + %define dst_stride dstrideq %else - %define src1q filterq - %define sstride6q dstrideq - %define dst_stride dstridemp + %define src1q filterq + %define sstride6q dstrideq + %define dst_stride dstridemp %endif - mov src1q, srcq - add src1q, sstrideq - lea sstride6q, [sstrideq + sstrideq * 4] - add sstride6q, sstrideq ;pitch * 6 + lea src1q, [srcq + sstrideq] + lea sstride6q, [sstrideq + sstrideq * 4] + add sstride6q, sstrideq ;pitch * 6 .loop: - movh m0, [srcq ] ;A - movh m1, [srcq + sstrideq ] ;B - movh m2, [srcq + sstrideq * 2 ] ;C - movh m3, [src1q + sstrideq * 2] ;D - movh m4, [srcq + sstrideq * 4 ] ;E - movh m5, [src1q + sstrideq * 4] ;F - - punpcklbw m0, m1 ;A B - movh m6, [srcq + sstride6q] ;G - punpcklbw m2, m3 ;C D - movh m7, [src1q + sstride6q] ;H - punpcklbw m4, m5 ;E F - pmaddubsw m0, k0k1 - movh m3, [srcq + 8] ;A - pmaddubsw m2, k2k3 - punpcklbw m6, m7 ;G H - movh m5, [srcq + sstrideq + 8] ;B - pmaddubsw m4, k4k5 - punpcklbw m3, m5 ;A B - movh m7, [srcq + sstrideq * 2 + 8] ;C - pmaddubsw m6, k6k7 - movh m5, [src1q + sstrideq * 2 + 8] ;D - punpcklbw m7, m5 ;C D - paddsw m2, m6 - pmaddubsw m3, k0k1 - movh m1, [srcq + sstrideq * 4 + 8] ;E - paddsw m0, m4 - pmaddubsw m7, k2k3 - movh m6, [src1q + sstrideq * 4 + 8] ;F - punpcklbw m1, m6 ;E F - paddsw m0, m2 - paddsw m0, krd - movh m2, [srcq + sstride6q + 8] ;G - pmaddubsw m1, k4k5 - movh m5, [src1q + sstride6q + 8] ;H - psraw m0, 7 - punpcklbw m2, m5 ;G H - pmaddubsw m2, k6k7 + movh m0, [srcq ] ;A + movh m1, [src1q ] ;B + movh m2, [srcq + sstrideq * 2 ] ;C + movh m3, [src1q + sstrideq * 2] ;D + movh m4, [srcq + sstrideq * 4 ] ;E + movh m5, [src1q + sstrideq * 4] ;F + + punpcklbw m0, m1 ;A B + movh m6, [srcq + sstride6q] ;G + punpcklbw m2, m3 ;C D + movh m7, [src1q + sstride6q] ;H + punpcklbw m4, m5 ;E F + pmaddubsw m0, k0k1 + movh m3, [srcq + 8] ;A + pmaddubsw m2, k2k3 + punpcklbw m6, m7 ;G H + movh m5, [srcq + sstrideq + 8] ;B + pmaddubsw m4, k4k5 + punpcklbw m3, m5 ;A B + movh m7, [srcq + sstrideq * 2 + 8] ;C + pmaddubsw m6, k6k7 + movh m5, [src1q + sstrideq * 2 + 8] ;D + punpcklbw m7, m5 ;C D + paddsw m2, m6 + pmaddubsw m3, k0k1 + movh m1, [srcq + sstrideq * 4 + 8] ;E + paddsw m0, m4 + pmaddubsw m7, k2k3 + movh m6, [src1q + sstrideq * 4 + 8] ;F + punpcklbw m1, m6 ;E F + paddsw m0, m2 + paddsw m0, krd + movh m2, [srcq + sstride6q + 8] ;G + pmaddubsw m1, k4k5 + movh m5, [src1q + sstride6q + 8] ;H + psraw m0, 7 + punpcklbw m2, m5 ;G H + pmaddubsw m2, k6k7 + paddsw m7, m2 + paddsw m3, m1 + paddsw m3, m7 + paddsw m3, krd + psraw m3, 7 + packuswb m0, m3 + + add srcq, sstrideq + add src1q, sstrideq +%ifidn %1, v8_avg + pavgb m0, [dstq] +%endif + mova [dstq], m0 + add dstq, dst_stride + dec heightd + jnz .loop + REP_RET + +%else + ; ARCH_X86_64 + dec heightd + + movu m1, [srcq ] ;A + movu m3, [srcq + sstrideq ] ;B + lea srcq, [srcq + sstrideq * 2] + punpcklbw m0, m1, m3 ;A B + punpckhbw m1, m3 ;A B + movu m5, [srcq] ;C + punpcklbw m2, m3, m5 ;A B next iter + punpckhbw m3, m5 ;A B next iter + mova tmp0, m2 ;store to stack + mova tmp1, m3 ;store to stack + movu m7, [srcq + sstrideq] ;D + lea srcq, [srcq + sstrideq * 2] + punpcklbw m4, m5, m7 ;C D + punpckhbw m5, m7 ;C D + movu m9, [srcq] ;E + punpcklbw m6, m7, m9 ;C D next iter + punpckhbw m7, m9 ;C D next iter + movu m11, [srcq + sstrideq] ;F + lea srcq, [srcq + sstrideq * 2] + punpcklbw m8, m9, m11 ;E F + punpckhbw m9, m11 ;E F + movu m2, [srcq] ;G + punpcklbw m10, m11, m2 ;E F next iter + punpckhbw m11, m2 ;E F next iter + +.loop: + ;Do two rows at once + pmaddubsw m13, m0, k0k1 + mova m0, m4 + pmaddubsw m14, m8, k4k5 + pmaddubsw m15, m4, k2k3 + mova m4, m8 + paddsw m13, m14 + movu m3, [srcq + sstrideq] ;H + lea srcq, [srcq + sstrideq * 2] + punpcklbw m14, m2, m3 ;G H + mova m8, m14 + pmaddubsw m14, k6k7 + paddsw m15, m14 + paddsw m13, m15 + paddsw m13, krd + psraw m13, 7 + + pmaddubsw m14, m1, k0k1 + pmaddubsw m1, m9, k4k5 + pmaddubsw m15, m5, k2k3 + paddsw m14, m1 + mova m1, m5 + mova m5, m9 + punpckhbw m2, m3 ;G H + mova m9, m2 + pmaddubsw m2, k6k7 + paddsw m15, m2 + paddsw m14, m15 + paddsw m14, krd + psraw m14, 7 + packuswb m13, m14 %ifidn %1, v8_avg - mova m4, [dstq] + pavgb m13, [dstq] %endif - movh [dstq], m0 - paddsw m7, m2 - paddsw m3, m1 - paddsw m3, m7 - paddsw m3, krd - psraw m3, 7 - packuswb m0, m3 - - add srcq, sstrideq - add src1q, sstrideq + mova [dstq], m13 + + ; next iter + pmaddubsw m15, tmp0, k0k1 + pmaddubsw m14, m10, k4k5 + pmaddubsw m13, m6, k2k3 + paddsw m15, m14 + mova tmp0, m6 + mova m6, m10 + movu m2, [srcq] ;G next iter + punpcklbw m14, m3, m2 ;G H next iter + mova m10, m14 + pmaddubsw m14, k6k7 + paddsw m13, m14 + paddsw m15, m13 + paddsw m15, krd + psraw m15, 7 + + pmaddubsw m14, tmp1, k0k1 + mova tmp1, m7 + pmaddubsw m13, m7, k2k3 + mova m7, m11 + pmaddubsw m11, k4k5 + paddsw m14, m11 + punpckhbw m3, m2 ;G H next iter + mova m11, m3 + pmaddubsw m3, k6k7 + paddsw m13, m3 + paddsw m14, m13 + paddsw m14, krd + psraw m14, 7 + packuswb m15, m14 %ifidn %1, v8_avg - pavgb m0, m4 + pavgb m15, [dstq + dstrideq] %endif - mova [dstq], m0 - add dstq, dst_stride - dec heightd - jnz .loop - RET + mova [dstq + dstrideq], m15 + lea dstq, [dstq + dstrideq * 2] + sub heightd, 2 + jg .loop + + ; Do last row if output_height is odd + jne .done + + movu m3, [srcq + sstrideq] ;H + punpcklbw m6, m2, m3 ;G H + punpckhbw m2, m3 ;G H + pmaddubsw m0, k0k1 + pmaddubsw m1, k0k1 + pmaddubsw m4, k2k3 + pmaddubsw m5, k2k3 + pmaddubsw m8, k4k5 + pmaddubsw m9, k4k5 + pmaddubsw m6, k6k7 + pmaddubsw m2, k6k7 + paddsw m0, m8 + paddsw m1, m9 + paddsw m4, m6 + paddsw m5, m2 + paddsw m0, m4 + paddsw m1, m5 + paddsw m0, krd + paddsw m1, krd + psraw m0, 7 + psraw m1, 7 + packuswb m0, m1 +%ifidn %1, v8_avg + pavgb m0, [dstq] +%endif + mova [dstq], m0 + +.done: + REP_RET + +%endif ; ARCH_X86_64 + %endm INIT_XMM ssse3 diff --git a/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm b/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm index 3c8cfd225..538b2129d 100644 --- a/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm +++ b/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm @@ -14,14 +14,14 @@ mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 + mov ecx, 0x01000100 movdqa xmm3, [rdx] ;load filters psrldq xmm3, 6 packsswb xmm3, xmm3 pshuflw xmm3, xmm3, 0b ;k3_k4 - movq xmm2, rcx ;rounding + movd xmm2, ecx ;rounding_shift pshufd xmm2, xmm2, 0 movsxd rax, DWORD PTR arg(1) ;pixels_per_line @@ -33,8 +33,7 @@ punpcklbw xmm0, xmm1 pmaddubsw xmm0, xmm3 - paddsw xmm0, xmm2 ;rounding - psraw xmm0, 7 ;shift + pmulhrsw xmm0, xmm2 ;rounding(+64)+shift(>>7) packuswb xmm0, xmm0 ;pack to byte %if %1 @@ -51,7 +50,7 @@ mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 + mov ecx, 0x01000100 movdqa xmm7, [rdx] ;load filters psrldq xmm7, 6 @@ -59,7 +58,7 @@ pshuflw xmm7, xmm7, 0b ;k3_k4 punpcklwd xmm7, xmm7 - movq xmm6, rcx ;rounding + movd xmm6, ecx ;rounding_shift pshufd xmm6, xmm6, 0 movsxd rax, DWORD PTR arg(1) ;pixels_per_line @@ -71,8 +70,7 @@ punpcklbw xmm0, xmm1 pmaddubsw xmm0, xmm7 - paddsw xmm0, xmm6 ;rounding - psraw xmm0, 7 ;shift + pmulhrsw xmm0, xmm6 ;rounding(+64)+shift(>>7) packuswb xmm0, xmm0 ;pack back to byte %if %1 @@ -92,10 +90,8 @@ pmaddubsw xmm0, xmm7 pmaddubsw xmm2, xmm7 - paddsw xmm0, xmm6 ;rounding - paddsw xmm2, xmm6 - psraw xmm0, 7 ;shift - psraw xmm2, 7 + pmulhrsw xmm0, xmm6 ;rounding(+64)+shift(>>7) + pmulhrsw xmm2, xmm6 packuswb xmm0, xmm2 ;pack back to byte %if %1 diff --git a/vpx_ports/bitops.h b/vpx_ports/bitops.h index 84ff3659f..19426faf0 100644 --- a/vpx_ports/bitops.h +++ b/vpx_ports/bitops.h @@ -16,8 +16,7 @@ #include "vpx_ports/msvc.h" #ifdef _MSC_VER -# include <math.h> // the ceil() definition must precede intrin.h -# if _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86)) +# if defined(_M_X64) || defined(_M_IX86) # include <intrin.h> # define USE_MSC_INTRINSICS # endif |