summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README5
-rw-r--r--build/make/Makefile1
-rw-r--r--build/make/configure.sh4
-rwxr-xr-xbuild/make/gen_msvs_proj.sh490
-rwxr-xr-xbuild/make/gen_msvs_sln.sh85
-rwxr-xr-xconfigure21
-rw-r--r--examples.mk11
-rw-r--r--examples/vp9cx_set_ref.c453
-rw-r--r--test/convolve_test.cc45
-rwxr-xr-xtest/cx_set_ref.sh (renamed from test/vp8cx_set_ref.sh)31
-rw-r--r--test/dct16x16_test.cc10
-rw-r--r--test/dct32x32_test.cc9
-rw-r--r--test/fdct4x4_test.cc2
-rw-r--r--test/fdct8x8_test.cc2
-rw-r--r--test/hadamard_test.cc4
-rw-r--r--test/idct8x8_test.cc12
-rw-r--r--test/partial_idct_test.cc2
-rw-r--r--test/sad_test.cc2
-rw-r--r--test/test_intra_pred_speed.cc32
-rw-r--r--test/variance_test.cc6
-rw-r--r--test/vp9_error_block_test.cc4
-rw-r--r--test/vp9_intrapred_test.cc2
-rw-r--r--test/vp9_subtract_test.cc2
-rw-r--r--vp8/common/reconintra4x4.c1
-rw-r--r--vp8/encoder/ratectrl.c2
-rw-r--r--vp9/common/vp9_blockd.c2
-rw-r--r--vp9/common/vp9_blockd.h12
-rw-r--r--vp9/common/vp9_postproc.c22
-rw-r--r--vp9/common/vp9_reconinter.c2
-rw-r--r--vp9/common/vp9_rtcd_defs.pl39
-rw-r--r--vp9/decoder/vp9_decodeframe.c2
-rw-r--r--vp9/decoder/vp9_decodemv.c10
-rw-r--r--vp9/decoder/vp9_decoder.c46
-rw-r--r--vp9/encoder/vp9_encodemb.c130
-rw-r--r--vp9/encoder/vp9_encodemb.h9
-rw-r--r--vp9/encoder/vp9_firstpass.c8
-rw-r--r--vp9/encoder/vp9_pickmode.c15
-rw-r--r--vp9/encoder/vp9_rdopt.c26
-rw-r--r--vp9/encoder/vp9_tokenize.c29
-rw-r--r--vp9/encoder/x86/vp9_dct_ssse3.c5
-rw-r--r--vp9/encoder/x86/vp9_frame_scale_ssse3.c5
-rw-r--r--vp9/vp9_cx_iface.c13
-rw-r--r--vp9/vp9_dx_iface.c3
-rw-r--r--vp9/vp9_iface_common.h12
-rw-r--r--vp9/vp9cx.mk4
-rw-r--r--vpx/vpx_integer.h12
-rw-r--r--vpx_dsp/vpx_dsp.mk21
-rw-r--r--vpx_dsp/vpx_dsp_rtcd_defs.pl509
-rw-r--r--vpx_dsp/x86/highbd_variance_sse2.c10
-rw-r--r--vpx_dsp/x86/variance_sse2.c2
-rw-r--r--vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c4
-rw-r--r--vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c4
-rw-r--r--vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm950
-rw-r--r--vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm20
-rw-r--r--vpx_ports/bitops.h3
55 files changed, 1560 insertions, 1607 deletions
diff --git a/README b/README
index 29072b919..460ad7390 100644
--- a/README
+++ b/README
@@ -79,9 +79,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
x86-os2-gcc
x86-solaris-gcc
x86-win32-gcc
- x86-win32-vs7
- x86-win32-vs8
- x86-win32-vs9
x86-win32-vs10
x86-win32-vs11
x86-win32-vs12
@@ -98,8 +95,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
x86_64-linux-icc
x86_64-solaris-gcc
x86_64-win64-gcc
- x86_64-win64-vs8
- x86_64-win64-vs9
x86_64-win64-vs10
x86_64-win64-vs11
x86_64-win64-vs12
diff --git a/build/make/Makefile b/build/make/Makefile
index 3e8c02490..dfb7e4b7a 100644
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -418,7 +418,6 @@ ifneq ($(call enabled,DIST-SRCS),)
DIST-SRCS-yes += build/make/gen_asm_deps.sh
DIST-SRCS-yes += build/make/Makefile
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_def.sh
- DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_proj.sh
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_sln.sh
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_vcxproj.sh
DIST-SRCS-$(CONFIG_MSVS) += build/make/msvs_common.sh
diff --git a/build/make/configure.sh b/build/make/configure.sh
index f2b8b3765..2e1597779 100644
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -1396,10 +1396,6 @@ EOF
fi
fi
- if [ "${tgt_isa}" = "x86_64" ] || [ "${tgt_isa}" = "x86" ]; then
- soft_enable use_x86inc
- fi
-
# Position Independent Code (PIC) support, for building relocatable
# shared objects
enabled gcc && enabled pic && check_add_cflags -fPIC
diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
deleted file mode 100755
index 2b91fbfbc..000000000
--- a/build/make/gen_msvs_proj.sh
+++ /dev/null
@@ -1,490 +0,0 @@
-#!/bin/bash
-##
-## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-##
-## Use of this source code is governed by a BSD-style license
-## that can be found in the LICENSE file in the root of the source
-## tree. An additional intellectual property rights grant can be found
-## in the file PATENTS. All contributing project authors may
-## be found in the AUTHORS file in the root of the source tree.
-##
-
-self=$0
-self_basename=${self##*/}
-self_dirname=$(dirname "$0")
-
-. "$self_dirname/msvs_common.sh"|| exit 127
-
-show_help() {
- cat <<EOF
-Usage: ${self_basename} --name=projname [options] file1 [file2 ...]
-
-This script generates a Visual Studio project file from a list of source
-code files.
-
-Options:
- --help Print this message
- --exe Generate a project for building an Application
- --lib Generate a project for creating a static library
- --dll Generate a project for creating a dll
- --static-crt Use the static C runtime (/MT)
- --target=isa-os-cc Target specifier (required)
- --out=filename Write output to a file [stdout]
- --name=project_name Name of the project (required)
- --proj-guid=GUID GUID to use for the project
- --module-def=filename File containing export definitions (for DLLs)
- --ver=version Version (7,8,9) of visual studio to generate for
- --src-path-bare=dir Path to root of source tree
- -Ipath/to/include Additional include directories
- -DFLAG[=value] Preprocessor macros to define
- -Lpath/to/lib Additional library search paths
- -llibname Library to link against
-EOF
- exit 1
-}
-
-generate_filter() {
- local var=$1
- local name=$2
- local pats=$3
- local file_list_sz
- local i
- local f
- local saveIFS="$IFS"
- local pack
- echo "generating filter '$name' from ${#file_list[@]} files" >&2
- IFS=*
-
- open_tag Filter \
- Name=$name \
- Filter=$pats \
- UniqueIdentifier=`generate_uuid` \
-
- file_list_sz=${#file_list[@]}
- for i in ${!file_list[@]}; do
- f=${file_list[i]}
- for pat in ${pats//;/$IFS}; do
- if [ "${f##*.}" == "$pat" ]; then
- unset file_list[i]
-
- objf=$(echo ${f%.*}.obj \
- | sed -e "s,$src_path_bare,," \
- -e 's/^[\./]\+//g' -e 's,[:/ ],_,g')
- open_tag File RelativePath="$f"
-
- if [ "$pat" == "asm" ] && $asm_use_custom_step; then
- # Avoid object file name collisions, i.e. vpx_config.c and
- # vpx_config.asm produce the same object file without
- # this additional suffix.
- objf=${objf%.obj}_asm.obj
- for plat in "${platforms[@]}"; do
- for cfg in Debug Release; do
- open_tag FileConfiguration \
- Name="${cfg}|${plat}" \
-
- tag Tool \
- Name="VCCustomBuildTool" \
- Description="Assembling \$(InputFileName)" \
- CommandLine="$(eval echo \$asm_${cfg}_cmdline) -o \$(IntDir)\\$objf" \
- Outputs="\$(IntDir)\\$objf" \
-
- close_tag FileConfiguration
- done
- done
- fi
- if [ "$pat" == "c" ] || \
- [ "$pat" == "cc" ] || [ "$pat" == "cpp" ]; then
- for plat in "${platforms[@]}"; do
- for cfg in Debug Release; do
- open_tag FileConfiguration \
- Name="${cfg}|${plat}" \
-
- tag Tool \
- Name="VCCLCompilerTool" \
- ObjectFile="\$(IntDir)\\$objf" \
-
- close_tag FileConfiguration
- done
- done
- fi
- close_tag File
-
- break
- fi
- done
- done
-
- close_tag Filter
- IFS="$saveIFS"
-}
-
-# Process command line
-unset target
-for opt in "$@"; do
- optval="${opt#*=}"
- case "$opt" in
- --help|-h) show_help
- ;;
- --target=*) target="${optval}"
- ;;
- --out=*) outfile="$optval"
- ;;
- --name=*) name="${optval}"
- ;;
- --proj-guid=*) guid="${optval}"
- ;;
- --module-def=*) link_opts="${link_opts} ModuleDefinitionFile=${optval}"
- ;;
- --exe) proj_kind="exe"
- ;;
- --dll) proj_kind="dll"
- ;;
- --lib) proj_kind="lib"
- ;;
- --src-path-bare=*)
- src_path_bare=$(fix_path "$optval")
- src_path_bare=${src_path_bare%/}
- ;;
- --static-crt) use_static_runtime=true
- ;;
- --ver=*)
- vs_ver="$optval"
- case "$optval" in
- [789])
- ;;
- *) die Unrecognized Visual Studio Version in $opt
- ;;
- esac
- ;;
- -I*)
- opt=${opt##-I}
- opt=$(fix_path "$opt")
- opt="${opt%/}"
- incs="${incs}${incs:+;}&quot;${opt}&quot;"
- yasmincs="${yasmincs} -I&quot;${opt}&quot;"
- ;;
- -D*) defines="${defines}${defines:+;}${opt##-D}"
- ;;
- -L*) # fudge . to $(OutDir)
- if [ "${opt##-L}" == "." ]; then
- libdirs="${libdirs}${libdirs:+;}&quot;\$(OutDir)&quot;"
- else
- # Also try directories for this platform/configuration
- opt=${opt##-L}
- opt=$(fix_path "$opt")
- libdirs="${libdirs}${libdirs:+;}&quot;${opt}&quot;"
- libdirs="${libdirs}${libdirs:+;}&quot;${opt}/\$(PlatformName)/\$(ConfigurationName)&quot;"
- libdirs="${libdirs}${libdirs:+;}&quot;${opt}/\$(PlatformName)&quot;"
- fi
- ;;
- -l*) libs="${libs}${libs:+ }${opt##-l}.lib"
- ;;
- -*) die_unknown $opt
- ;;
- *)
- # The paths in file_list are fixed outside of the loop.
- file_list[${#file_list[@]}]="$opt"
- case "$opt" in
- *.asm) uses_asm=true
- ;;
- esac
- ;;
- esac
-done
-
-# Make one call to fix_path for file_list to improve performance.
-fix_file_list file_list
-
-outfile=${outfile:-/dev/stdout}
-guid=${guid:-`generate_uuid`}
-asm_use_custom_step=false
-uses_asm=${uses_asm:-false}
-case "${vs_ver:-8}" in
- 7) vs_ver_id="7.10"
- asm_use_custom_step=$uses_asm
- warn_64bit='Detect64BitPortabilityProblems=true'
- ;;
- 8) vs_ver_id="8.00"
- asm_use_custom_step=$uses_asm
- warn_64bit='Detect64BitPortabilityProblems=true'
- ;;
- 9) vs_ver_id="9.00"
- asm_use_custom_step=$uses_asm
- warn_64bit='Detect64BitPortabilityProblems=false'
- ;;
-esac
-
-[ -n "$name" ] || die "Project name (--name) must be specified!"
-[ -n "$target" ] || die "Target (--target) must be specified!"
-
-if ${use_static_runtime:-false}; then
- release_runtime=0
- debug_runtime=1
- lib_sfx=mt
-else
- release_runtime=2
- debug_runtime=3
- lib_sfx=md
-fi
-
-# Calculate debug lib names: If a lib ends in ${lib_sfx}.lib, then rename
-# it to ${lib_sfx}d.lib. This precludes linking to release libs from a
-# debug exe, so this may need to be refactored later.
-for lib in ${libs}; do
- if [ "$lib" != "${lib%${lib_sfx}.lib}" ]; then
- lib=${lib%.lib}d.lib
- fi
- debug_libs="${debug_libs}${debug_libs:+ }${lib}"
-done
-
-
-# List Keyword for this target
-case "$target" in
- x86*) keyword="ManagedCProj"
- ;;
- *) die "Unsupported target $target!"
-esac
-
-# List of all platforms supported for this target
-case "$target" in
- x86_64*)
- platforms[0]="x64"
- asm_Debug_cmdline="yasm -Xvc -g cv8 -f win64 ${yasmincs} &quot;\$(InputPath)&quot;"
- asm_Release_cmdline="yasm -Xvc -f win64 ${yasmincs} &quot;\$(InputPath)&quot;"
- ;;
- x86*)
- platforms[0]="Win32"
- asm_Debug_cmdline="yasm -Xvc -g cv8 -f win32 ${yasmincs} &quot;\$(InputPath)&quot;"
- asm_Release_cmdline="yasm -Xvc -f win32 ${yasmincs} &quot;\$(InputPath)&quot;"
- ;;
- *) die "Unsupported target $target!"
- ;;
-esac
-
-generate_vcproj() {
- case "$proj_kind" in
- exe) vs_ConfigurationType=1
- ;;
- dll) vs_ConfigurationType=2
- ;;
- *) vs_ConfigurationType=4
- ;;
- esac
-
- echo "<?xml version=\"1.0\" encoding=\"Windows-1252\"?>"
- open_tag VisualStudioProject \
- ProjectType="Visual C++" \
- Version="${vs_ver_id}" \
- Name="${name}" \
- ProjectGUID="{${guid}}" \
- RootNamespace="${name}" \
- Keyword="${keyword}" \
-
- open_tag Platforms
- for plat in "${platforms[@]}"; do
- tag Platform Name="$plat"
- done
- close_tag Platforms
-
- open_tag Configurations
- for plat in "${platforms[@]}"; do
- plat_no_ws=`echo $plat | sed 's/[^A-Za-z0-9_]/_/g'`
- open_tag Configuration \
- Name="Debug|$plat" \
- OutputDirectory="\$(SolutionDir)$plat_no_ws/\$(ConfigurationName)" \
- IntermediateDirectory="$plat_no_ws/\$(ConfigurationName)/${name}" \
- ConfigurationType="$vs_ConfigurationType" \
- CharacterSet="1" \
-
- case "$target" in
- x86*)
- case "$name" in
- vpx)
- tag Tool \
- Name="VCCLCompilerTool" \
- Optimization="0" \
- AdditionalIncludeDirectories="$incs" \
- PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
- RuntimeLibrary="$debug_runtime" \
- UsePrecompiledHeader="0" \
- WarningLevel="3" \
- DebugInformationFormat="2" \
- $warn_64bit \
-
- $uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="true"
- ;;
- *)
- tag Tool \
- Name="VCCLCompilerTool" \
- Optimization="0" \
- AdditionalIncludeDirectories="$incs" \
- PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
- RuntimeLibrary="$debug_runtime" \
- UsePrecompiledHeader="0" \
- WarningLevel="3" \
- DebugInformationFormat="2" \
- $warn_64bit \
-
- $uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="true"
- ;;
- esac
- ;;
- esac
-
- case "$proj_kind" in
- exe)
- case "$target" in
- x86*)
- case "$name" in
- *)
- tag Tool \
- Name="VCLinkerTool" \
- AdditionalDependencies="$debug_libs \$(NoInherit)" \
- AdditionalLibraryDirectories="$libdirs" \
- GenerateDebugInformation="true" \
- ProgramDatabaseFile="\$(OutDir)/${name}.pdb" \
- ;;
- esac
- ;;
- esac
- ;;
- lib)
- case "$target" in
- x86*)
- tag Tool \
- Name="VCLibrarianTool" \
- OutputFile="\$(OutDir)/${name}${lib_sfx}d.lib" \
-
- ;;
- esac
- ;;
- dll)
- tag Tool \
- Name="VCLinkerTool" \
- AdditionalDependencies="\$(NoInherit)" \
- LinkIncremental="2" \
- GenerateDebugInformation="true" \
- AssemblyDebug="1" \
- TargetMachine="1" \
- $link_opts \
-
- ;;
- esac
-
- close_tag Configuration
-
- open_tag Configuration \
- Name="Release|$plat" \
- OutputDirectory="\$(SolutionDir)$plat_no_ws/\$(ConfigurationName)" \
- IntermediateDirectory="$plat_no_ws/\$(ConfigurationName)/${name}" \
- ConfigurationType="$vs_ConfigurationType" \
- CharacterSet="1" \
- WholeProgramOptimization="0" \
-
- case "$target" in
- x86*)
- case "$name" in
- vpx)
- tag Tool \
- Name="VCCLCompilerTool" \
- Optimization="2" \
- FavorSizeorSpeed="1" \
- AdditionalIncludeDirectories="$incs" \
- PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
- RuntimeLibrary="$release_runtime" \
- UsePrecompiledHeader="0" \
- WarningLevel="3" \
- DebugInformationFormat="0" \
- $warn_64bit \
-
- $uses_asm && tag Tool Name="YASM" IncludePaths="$incs"
- ;;
- *)
- tag Tool \
- Name="VCCLCompilerTool" \
- AdditionalIncludeDirectories="$incs" \
- Optimization="2" \
- FavorSizeorSpeed="1" \
- PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
- RuntimeLibrary="$release_runtime" \
- UsePrecompiledHeader="0" \
- WarningLevel="3" \
- DebugInformationFormat="0" \
- $warn_64bit \
-
- $uses_asm && tag Tool Name="YASM" IncludePaths="$incs"
- ;;
- esac
- ;;
- esac
-
- case "$proj_kind" in
- exe)
- case "$target" in
- x86*)
- case "$name" in
- *)
- tag Tool \
- Name="VCLinkerTool" \
- AdditionalDependencies="$libs \$(NoInherit)" \
- AdditionalLibraryDirectories="$libdirs" \
-
- ;;
- esac
- ;;
- esac
- ;;
- lib)
- case "$target" in
- x86*)
- tag Tool \
- Name="VCLibrarianTool" \
- OutputFile="\$(OutDir)/${name}${lib_sfx}.lib" \
-
- ;;
- esac
- ;;
- dll) # note differences to debug version: LinkIncremental, AssemblyDebug
- tag Tool \
- Name="VCLinkerTool" \
- AdditionalDependencies="\$(NoInherit)" \
- LinkIncremental="1" \
- GenerateDebugInformation="true" \
- TargetMachine="1" \
- $link_opts \
-
- ;;
- esac
-
- close_tag Configuration
- done
- close_tag Configurations
-
- open_tag Files
- generate_filter srcs "Source Files" "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx"
- generate_filter hdrs "Header Files" "h;hm;inl;inc;xsd"
- generate_filter resrcs "Resource Files" "rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
- generate_filter resrcs "Build Files" "mk"
- close_tag Files
-
- tag Globals
- close_tag VisualStudioProject
-
- # This must be done from within the {} subshell
- echo "Ignored files list (${#file_list[@]} items) is:" >&2
- for f in "${file_list[@]}"; do
- echo " $f" >&2
- done
-}
-
-generate_vcproj |
- sed -e '/"/s;\([^ "]\)/;\1\\;g' > ${outfile}
-
-exit
-<!--
-TODO: Add any files not captured by filters.
- <File
- RelativePath=".\ReadMe.txt"
- >
- </File>
--->
diff --git a/build/make/gen_msvs_sln.sh b/build/make/gen_msvs_sln.sh
index 664b404c9..7d5f46810 100755
--- a/build/make/gen_msvs_sln.sh
+++ b/build/make/gen_msvs_sln.sh
@@ -55,16 +55,11 @@ indent_pop() {
parse_project() {
local file=$1
- if [ "$sfx" = "vcproj" ]; then
- local name=`grep Name "$file" | awk 'BEGIN {FS="\""}{if (NR==1) print $2}'`
- local guid=`grep ProjectGUID "$file" | awk 'BEGIN {FS="\""}{if (NR==1) print $2}'`
- else
- local name=`grep RootNamespace "$file" | sed 's,.*<.*>\(.*\)</.*>.*,\1,'`
- local guid=`grep ProjectGuid "$file" | sed 's,.*<.*>\(.*\)</.*>.*,\1,'`
- fi
+ local name=`grep RootNamespace "$file" | sed 's,.*<.*>\(.*\)</.*>.*,\1,'`
+ local guid=`grep ProjectGuid "$file" | sed 's,.*<.*>\(.*\)</.*>.*,\1,'`
# save the project GUID to a varaible, normalizing to the basename of the
- # vcproj file without the extension
+ # vcxproj file without the extension
local var
var=${file##*/}
var=${var%%.${sfx}}
@@ -72,13 +67,8 @@ parse_project() {
eval "${var}_name=$name"
eval "${var}_guid=$guid"
- if [ "$sfx" = "vcproj" ]; then
- cur_config_list=`grep -A1 '<Configuration' $file |
- grep Name | cut -d\" -f2`
- else
- cur_config_list=`grep -B1 'Label="Configuration"' $file |
- grep Condition | cut -d\' -f4`
- fi
+ cur_config_list=`grep -B1 'Label="Configuration"' $file |
+ grep Condition | cut -d\' -f4`
new_config_list=$(for i in $config_list $cur_config_list; do
echo $i
done | sort | uniq)
@@ -103,25 +93,6 @@ process_project() {
eval "${var}_guid=$guid"
echo "Project(\"{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}\") = \"$name\", \"$file\", \"$guid\""
- indent_push
-
- eval "local deps=\"\${${var}_deps}\""
- if [ -n "$deps" ] && [ "$sfx" = "vcproj" ]; then
- echo "${indent}ProjectSection(ProjectDependencies) = postProject"
- indent_push
-
- for dep in $deps; do
- eval "local dep_guid=\${${dep}_guid}"
- [ -z "${dep_guid}" ] && die "Unknown GUID for $dep (dependency of $var)"
- echo "${indent}$dep_guid = $dep_guid"
- done
-
- indent_pop
- echo "${indent}EndProjectSection"
-
- fi
-
- indent_pop
echo "EndProject"
}
@@ -191,11 +162,7 @@ process_makefile() {
IFS=$'\r'$'\n'
local TAB=$'\t'
cat <<EOF
-ifeq (\$(CONFIG_VS_VERSION),7)
-MSBUILD_TOOL := devenv.com
-else
MSBUILD_TOOL := msbuild.exe
-endif
found_devenv := \$(shell which \$(MSBUILD_TOOL) >/dev/null 2>&1 && echo yes)
.nodevenv.once:
${TAB}@echo " * \$(MSBUILD_TOOL) not found in path."
@@ -204,7 +171,7 @@ ${TAB}@echo " * You will have to build all configurations manually using the"
${TAB}@echo " * Visual Studio IDE. To allow make to build them automatically,"
${TAB}@echo " * add the Common7/IDE directory of your Visual Studio"
${TAB}@echo " * installation to your path, eg:"
-${TAB}@echo " * C:\Program Files\Microsoft Visual Studio 8\Common7\IDE"
+${TAB}@echo " * C:\Program Files\Microsoft Visual Studio 10.0\Common7\IDE"
${TAB}@echo " * "
${TAB}@touch \$@
CLEAN-OBJS += \$(if \$(found_devenv),,.nodevenv.once)
@@ -221,16 +188,9 @@ clean::
${TAB}rm -rf "$platform"/"$config"
.PHONY: $nows_sln_config
ifneq (\$(found_devenv),)
- ifeq (\$(CONFIG_VS_VERSION),7)
-$nows_sln_config: $outfile
-${TAB}\$(MSBUILD_TOOL) $outfile -build "$config"
-
- else
$nows_sln_config: $outfile
${TAB}\$(MSBUILD_TOOL) $outfile -m -t:Build \\
${TAB}${TAB}-p:Configuration="$config" -p:Platform="$platform"
-
- endif
else
$nows_sln_config: $outfile .nodevenv.once
${TAB}@echo " * Skipping build of $sln_config (\$(MSBUILD_TOOL) not in path)."
@@ -255,23 +215,12 @@ for opt in "$@"; do
;;
--ver=*) vs_ver="$optval"
case $optval in
- [789]|10|11|12|14)
+ 10|11|12|14)
;;
*) die Unrecognized Visual Studio Version in $opt
;;
esac
;;
- --ver=*) vs_ver="$optval"
- case $optval in
- 7) sln_vers="8.00"
- sln_vers_str="Visual Studio .NET 2003"
- ;;
- [89])
- ;;
- *) die "Unrecognized Visual Studio Version '$optval' in $opt"
- ;;
- esac
- ;;
--target=*) target="${optval}"
;;
-*) die_unknown $opt
@@ -281,16 +230,7 @@ for opt in "$@"; do
done
outfile=${outfile:-/dev/stdout}
mkoutfile=${mkoutfile:-/dev/stdout}
-case "${vs_ver:-8}" in
- 7) sln_vers="8.00"
- sln_vers_str="Visual Studio .NET 2003"
- ;;
- 8) sln_vers="9.00"
- sln_vers_str="Visual Studio 2005"
- ;;
- 9) sln_vers="10.00"
- sln_vers_str="Visual Studio 2008"
- ;;
+case "${vs_ver:-10}" in
10) sln_vers="11.00"
sln_vers_str="Visual Studio 2010"
;;
@@ -304,14 +244,7 @@ case "${vs_ver:-8}" in
sln_vers_str="Visual Studio 2015"
;;
esac
-case "${vs_ver:-8}" in
- [789])
- sfx=vcproj
- ;;
- 10|11|12|14)
- sfx=vcxproj
- ;;
-esac
+sfx=vcxproj
for f in "${file_list[@]}"; do
parse_project $f
diff --git a/configure b/configure
index f82ee046b..c1ad24bfb 100755
--- a/configure
+++ b/configure
@@ -132,9 +132,6 @@ all_platforms="${all_platforms} x86-linux-icc"
all_platforms="${all_platforms} x86-os2-gcc"
all_platforms="${all_platforms} x86-solaris-gcc"
all_platforms="${all_platforms} x86-win32-gcc"
-all_platforms="${all_platforms} x86-win32-vs7"
-all_platforms="${all_platforms} x86-win32-vs8"
-all_platforms="${all_platforms} x86-win32-vs9"
all_platforms="${all_platforms} x86-win32-vs10"
all_platforms="${all_platforms} x86-win32-vs11"
all_platforms="${all_platforms} x86-win32-vs12"
@@ -152,8 +149,6 @@ all_platforms="${all_platforms} x86_64-linux-gcc"
all_platforms="${all_platforms} x86_64-linux-icc"
all_platforms="${all_platforms} x86_64-solaris-gcc"
all_platforms="${all_platforms} x86_64-win64-gcc"
-all_platforms="${all_platforms} x86_64-win64-vs8"
-all_platforms="${all_platforms} x86_64-win64-vs9"
all_platforms="${all_platforms} x86_64-win64-vs10"
all_platforms="${all_platforms} x86_64-win64-vs11"
all_platforms="${all_platforms} x86_64-win64-vs12"
@@ -272,7 +267,6 @@ CONFIG_LIST="
install_bins
install_libs
install_srcs
- use_x86inc
debug
gprof
gcov
@@ -334,7 +328,6 @@ CMDLINE_SELECT="
gprof
gcov
pic
- use_x86inc
optimizations
ccache
runtime_cpu_detect
@@ -646,17 +639,9 @@ process_toolchain() {
vs*) enable_feature msvs
enable_feature solution
vs_version=${tgt_cc##vs}
- case $vs_version in
- [789])
- VCPROJ_SFX=vcproj
- gen_vcproj_cmd=${source_path}/build/make/gen_msvs_proj.sh
- ;;
- 10|11|12|14)
- VCPROJ_SFX=vcxproj
- gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh
- enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror"
- ;;
- esac
+ VCPROJ_SFX=vcxproj
+ gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh
+ enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror"
all_targets="${all_targets} solution"
INLINE="__forceinline"
;;
diff --git a/examples.mk b/examples.mk
index c891a5496..cc7fb1ddc 100644
--- a/examples.mk
+++ b/examples.mk
@@ -215,6 +215,17 @@ vp8cx_set_ref.SRCS += vpx_ports/msvc.h
vp8cx_set_ref.GUID = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A
vp8cx_set_ref.DESCRIPTION = VP8 set encoder reference frame
+ifeq ($(CONFIG_VP9_ENCODER),yes)
+ifeq ($(CONFIG_DECODERS),yes)
+EXAMPLES-yes += vp9cx_set_ref.c
+vp9cx_set_ref.SRCS += ivfenc.h ivfenc.c
+vp9cx_set_ref.SRCS += tools_common.h tools_common.c
+vp9cx_set_ref.SRCS += video_common.h
+vp9cx_set_ref.SRCS += video_writer.h video_writer.c
+vp9cx_set_ref.GUID = 65D7F14A-2EE6-4293-B958-AB5107A03B55
+vp9cx_set_ref.DESCRIPTION = VP9 set encoder reference frame
+endif
+endif
ifeq ($(CONFIG_MULTI_RES_ENCODING),yes)
ifeq ($(CONFIG_LIBYUV),yes)
diff --git a/examples/vp9cx_set_ref.c b/examples/vp9cx_set_ref.c
new file mode 100644
index 000000000..acf79dbca
--- /dev/null
+++ b/examples/vp9cx_set_ref.c
@@ -0,0 +1,453 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+// VP9 Set Reference Frame
+// ============================
+//
+// This is an example demonstrating how to overwrite the VP9 encoder's
+// internal reference frame. In the sample we set the last frame to the
+// current frame. This technique could be used to bounce between two cameras.
+//
+// The decoder would also have to set the reference frame to the same value
+// on the same frame, or the video will become corrupt. The 'test_decode'
+// variable is set to 1 in this example that tests if the encoder and decoder
+// results are matching.
+//
+// Usage
+// -----
+// This example encodes a raw video. And the last argument passed in specifies
+// the frame number to update the reference frame on. For example, run
+// examples/vp9cx_set_ref 352 288 in.yuv out.ivf 4 30
+// The parameter is parsed as follows:
+//
+//
+// Extra Variables
+// ---------------
+// This example maintains the frame number passed on the command line
+// in the `update_frame_num` variable.
+//
+//
+// Configuration
+// -------------
+//
+// The reference frame is updated on the frame specified on the command
+// line.
+//
+// Observing The Effects
+// ---------------------
+// The encoder and decoder results should be matching when the same reference
+// frame setting operation is done in both encoder and decoder. Otherwise,
+// the encoder/decoder mismatch would be seen.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "vpx/vp8cx.h"
+#include "vpx/vpx_decoder.h"
+#include "vpx/vpx_encoder.h"
+
+#include "./tools_common.h"
+#include "./video_writer.h"
+
+static const char *exec_name;
+
+void usage_exit() {
+ fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile> "
+ "<frame> <limit(optional)>\n",
+ exec_name);
+ exit(EXIT_FAILURE);
+}
+
+static int compare_img(const vpx_image_t *const img1,
+ const vpx_image_t *const img2) {
+ uint32_t l_w = img1->d_w;
+ uint32_t c_w =
+ (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift;
+ const uint32_t c_h =
+ (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift;
+ uint32_t i;
+ int match = 1;
+
+ match &= (img1->fmt == img2->fmt);
+ match &= (img1->d_w == img2->d_w);
+ match &= (img1->d_h == img2->d_h);
+
+ for (i = 0; i < img1->d_h; ++i)
+ match &= (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
+ img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
+ l_w) == 0);
+
+ for (i = 0; i < c_h; ++i)
+ match &= (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
+ img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
+ c_w) == 0);
+
+ for (i = 0; i < c_h; ++i)
+ match &= (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
+ img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
+ c_w) == 0);
+
+ return match;
+}
+
+#define mmin(a, b) ((a) < (b) ? (a) : (b))
+static void find_mismatch(const vpx_image_t *const img1,
+ const vpx_image_t *const img2,
+ int yloc[4], int uloc[4], int vloc[4]) {
+ const uint32_t bsize = 64;
+ const uint32_t bsizey = bsize >> img1->y_chroma_shift;
+ const uint32_t bsizex = bsize >> img1->x_chroma_shift;
+ const uint32_t c_w =
+ (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift;
+ const uint32_t c_h =
+ (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift;
+ int match = 1;
+ uint32_t i, j;
+ yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1;
+ for (i = 0, match = 1; match && i < img1->d_h; i += bsize) {
+ for (j = 0; match && j < img1->d_w; j += bsize) {
+ int k, l;
+ const int si = mmin(i + bsize, img1->d_h) - i;
+ const int sj = mmin(j + bsize, img1->d_w) - j;
+ for (k = 0; match && k < si; ++k) {
+ for (l = 0; match && l < sj; ++l) {
+ if (*(img1->planes[VPX_PLANE_Y] +
+ (i + k) * img1->stride[VPX_PLANE_Y] + j + l) !=
+ *(img2->planes[VPX_PLANE_Y] +
+ (i + k) * img2->stride[VPX_PLANE_Y] + j + l)) {
+ yloc[0] = i + k;
+ yloc[1] = j + l;
+ yloc[2] = *(img1->planes[VPX_PLANE_Y] +
+ (i + k) * img1->stride[VPX_PLANE_Y] + j + l);
+ yloc[3] = *(img2->planes[VPX_PLANE_Y] +
+ (i + k) * img2->stride[VPX_PLANE_Y] + j + l);
+ match = 0;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1;
+ for (i = 0, match = 1; match && i < c_h; i += bsizey) {
+ for (j = 0; match && j < c_w; j += bsizex) {
+ int k, l;
+ const int si = mmin(i + bsizey, c_h - i);
+ const int sj = mmin(j + bsizex, c_w - j);
+ for (k = 0; match && k < si; ++k) {
+ for (l = 0; match && l < sj; ++l) {
+ if (*(img1->planes[VPX_PLANE_U] +
+ (i + k) * img1->stride[VPX_PLANE_U] + j + l) !=
+ *(img2->planes[VPX_PLANE_U] +
+ (i + k) * img2->stride[VPX_PLANE_U] + j + l)) {
+ uloc[0] = i + k;
+ uloc[1] = j + l;
+ uloc[2] = *(img1->planes[VPX_PLANE_U] +
+ (i + k) * img1->stride[VPX_PLANE_U] + j + l);
+ uloc[3] = *(img2->planes[VPX_PLANE_U] +
+ (i + k) * img2->stride[VPX_PLANE_U] + j + l);
+ match = 0;
+ break;
+ }
+ }
+ }
+ }
+ }
+ vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1;
+ for (i = 0, match = 1; match && i < c_h; i += bsizey) {
+ for (j = 0; match && j < c_w; j += bsizex) {
+ int k, l;
+ const int si = mmin(i + bsizey, c_h - i);
+ const int sj = mmin(j + bsizex, c_w - j);
+ for (k = 0; match && k < si; ++k) {
+ for (l = 0; match && l < sj; ++l) {
+ if (*(img1->planes[VPX_PLANE_V] +
+ (i + k) * img1->stride[VPX_PLANE_V] + j + l) !=
+ *(img2->planes[VPX_PLANE_V] +
+ (i + k) * img2->stride[VPX_PLANE_V] + j + l)) {
+ vloc[0] = i + k;
+ vloc[1] = j + l;
+ vloc[2] = *(img1->planes[VPX_PLANE_V] +
+ (i + k) * img1->stride[VPX_PLANE_V] + j + l);
+ vloc[3] = *(img2->planes[VPX_PLANE_V] +
+ (i + k) * img2->stride[VPX_PLANE_V] + j + l);
+ match = 0;
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
+static void testing_decode(vpx_codec_ctx_t *encoder,
+ vpx_codec_ctx_t *decoder,
+ vpx_codec_enc_cfg_t *cfg,
+ unsigned int frame_out,
+ int *mismatch_seen) {
+ vpx_image_t enc_img, dec_img;
+ struct vp9_ref_frame ref_enc, ref_dec;
+
+ if (*mismatch_seen)
+ return;
+
+ ref_enc.idx = 0;
+ ref_dec.idx = 0;
+ if (vpx_codec_control(encoder, VP9_GET_REFERENCE, &ref_enc))
+ die_codec(encoder, "Failed to get encoder reference frame");
+ enc_img = ref_enc.img;
+ if (vpx_codec_control(decoder, VP9_GET_REFERENCE, &ref_dec))
+ die_codec(decoder, "Failed to get decoder reference frame");
+ dec_img = ref_dec.img;
+
+ if (!compare_img(&enc_img, &dec_img)) {
+ int y[4], u[4], v[4];
+
+ *mismatch_seen = 1;
+
+ find_mismatch(&enc_img, &dec_img, y, u, v);
+ printf("Encode/decode mismatch on frame %d at"
+ " Y[%d, %d] {%d/%d},"
+ " U[%d, %d] {%d/%d},"
+ " V[%d, %d] {%d/%d}",
+ frame_out,
+ y[0], y[1], y[2], y[3],
+ u[0], u[1], u[2], u[3],
+ v[0], v[1], v[2], v[3]);
+ }
+
+ vpx_img_free(&enc_img);
+ vpx_img_free(&dec_img);
+}
+
+static int encode_frame(vpx_codec_ctx_t *ecodec,
+ vpx_codec_enc_cfg_t *cfg,
+ vpx_image_t *img,
+ unsigned int frame_in,
+ VpxVideoWriter *writer,
+ int test_decode,
+ vpx_codec_ctx_t *dcodec,
+ unsigned int *frame_out,
+ int *mismatch_seen) {
+ int got_pkts = 0;
+ vpx_codec_iter_t iter = NULL;
+ const vpx_codec_cx_pkt_t *pkt = NULL;
+ int got_data;
+ const vpx_codec_err_t res = vpx_codec_encode(ecodec, img, frame_in, 1,
+ 0, VPX_DL_GOOD_QUALITY);
+ if (res != VPX_CODEC_OK)
+ die_codec(ecodec, "Failed to encode frame");
+
+ got_data = 0;
+
+ while ((pkt = vpx_codec_get_cx_data(ecodec, &iter)) != NULL) {
+ got_pkts = 1;
+
+ if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
+ const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;
+
+ if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT)) {
+ *frame_out += 1;
+ }
+
+ if (!vpx_video_writer_write_frame(writer,
+ pkt->data.frame.buf,
+ pkt->data.frame.sz,
+ pkt->data.frame.pts)) {
+ die_codec(ecodec, "Failed to write compressed frame");
+ }
+ printf(keyframe ? "K" : ".");
+ fflush(stdout);
+ got_data = 1;
+
+ // Decode 1 frame.
+ if (test_decode) {
+ if (vpx_codec_decode(dcodec, pkt->data.frame.buf,
+ (unsigned int)pkt->data.frame.sz, NULL, 0))
+ die_codec(dcodec, "Failed to decode frame.");
+ }
+ }
+ }
+
+ // Mismatch checking
+ if (got_data && test_decode) {
+ testing_decode(ecodec, dcodec, cfg, *frame_out, mismatch_seen);
+ }
+
+ return got_pkts;
+}
+
+int main(int argc, char **argv) {
+ FILE *infile = NULL;
+ // Encoder
+ vpx_codec_ctx_t ecodec = {0};
+ vpx_codec_enc_cfg_t cfg = {0};
+ unsigned int frame_in = 0;
+ vpx_image_t raw;
+ vpx_codec_err_t res;
+ VpxVideoInfo info = {0};
+ VpxVideoWriter *writer = NULL;
+ const VpxInterface *encoder = NULL;
+
+ // Test encoder/decoder mismatch.
+ int test_decode = 1;
+ // Decoder
+ vpx_codec_ctx_t dcodec;
+ unsigned int frame_out = 0;
+
+ // The frame number to set reference frame on
+ int update_frame_num = 0;
+ int mismatch_seen = 0;
+
+ const int fps = 30;
+ const int bitrate = 500;
+
+ const char *width_arg = NULL;
+ const char *height_arg = NULL;
+ const char *infile_arg = NULL;
+ const char *outfile_arg = NULL;
+ unsigned int limit = 0;
+ exec_name = argv[0];
+
+ if (argc < 6)
+ die("Invalid number of arguments");
+
+ width_arg = argv[1];
+ height_arg = argv[2];
+ infile_arg = argv[3];
+ outfile_arg = argv[4];
+
+ encoder = get_vpx_encoder_by_name("vp9");
+ if (!encoder)
+ die("Unsupported codec.");
+
+ update_frame_num = atoi(argv[5]);
+ // In VP9, the reference buffers (cm->buffer_pool->frame_bufs[i].buf) are
+ // allocated while calling vpx_codec_encode(), thus, setting reference for
+ // 1st frame isn't supported.
+ if (update_frame_num <= 1)
+ die("Couldn't parse frame number '%s'\n", argv[5]);
+
+ if (argc > 6) {
+ limit = atoi(argv[6]);
+ if (update_frame_num > limit)
+ die("Update frame number couldn't larger than limit\n");
+ }
+
+ info.codec_fourcc = encoder->fourcc;
+ info.frame_width = strtol(width_arg, NULL, 0);
+ info.frame_height = strtol(height_arg, NULL, 0);
+ info.time_base.numerator = 1;
+ info.time_base.denominator = fps;
+
+ if (info.frame_width <= 0 ||
+ info.frame_height <= 0 ||
+ (info.frame_width % 2) != 0 ||
+ (info.frame_height % 2) != 0) {
+ die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
+ }
+
+ if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width,
+ info.frame_height, 1)) {
+ die("Failed to allocate image.");
+ }
+
+ printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));
+
+ res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
+ if (res)
+ die_codec(&ecodec, "Failed to get default codec config.");
+
+ cfg.g_w = info.frame_width;
+ cfg.g_h = info.frame_height;
+ cfg.g_timebase.num = info.time_base.numerator;
+ cfg.g_timebase.den = info.time_base.denominator;
+ cfg.rc_target_bitrate = bitrate;
+ cfg.g_lag_in_frames = 3;
+
+ writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info);
+ if (!writer)
+ die("Failed to open %s for writing.", outfile_arg);
+
+ if (!(infile = fopen(infile_arg, "rb")))
+ die("Failed to open %s for reading.", infile_arg);
+
+ if (vpx_codec_enc_init(&ecodec, encoder->codec_interface(), &cfg, 0))
+ die_codec(&ecodec, "Failed to initialize encoder");
+
+ // Disable alt_ref.
+ if (vpx_codec_control(&ecodec, VP8E_SET_ENABLEAUTOALTREF, 0))
+ die_codec(&ecodec, "Failed to set enable auto alt ref");
+
+ if (test_decode) {
+ const VpxInterface *decoder = get_vpx_decoder_by_name("vp9");
+ if (vpx_codec_dec_init(&dcodec, decoder->codec_interface(), NULL, 0))
+ die_codec(&dcodec, "Failed to initialize decoder.");
+ }
+
+ // Encode frames.
+ while (vpx_img_read(&raw, infile)) {
+ if (limit && frame_in >= limit)
+ break;
+ if (update_frame_num > 1 && frame_out + 1 == update_frame_num) {
+ vpx_ref_frame_t ref;
+ ref.frame_type = VP8_LAST_FRAME;
+ ref.img = raw;
+ // Set reference frame in encoder.
+ if (vpx_codec_control(&ecodec, VP8_SET_REFERENCE, &ref))
+ die_codec(&ecodec, "Failed to set reference frame");
+ printf(" <SET_REF>");
+
+ // If set_reference in decoder is commented out, the enc/dec mismatch
+ // would be seen.
+ if (test_decode) {
+ if (vpx_codec_control(&dcodec, VP8_SET_REFERENCE, &ref))
+ die_codec(&dcodec, "Failed to set reference frame");
+ }
+ }
+
+ encode_frame(&ecodec, &cfg, &raw, frame_in, writer, test_decode,
+ &dcodec, &frame_out, &mismatch_seen);
+ frame_in++;
+ if (mismatch_seen)
+ break;
+ }
+
+ // Flush encoder.
+ if (!mismatch_seen)
+ while (encode_frame(&ecodec, &cfg, NULL, frame_in, writer, test_decode,
+ &dcodec, &frame_out, &mismatch_seen)) {}
+
+ printf("\n");
+ fclose(infile);
+ printf("Processed %d frames.\n", frame_out);
+
+ if (test_decode) {
+ if (!mismatch_seen)
+ printf("Encoder/decoder results are matching.\n");
+ else
+ printf("Encoder/decoder results are NOT matching.\n");
+ }
+
+ if (test_decode)
+ if (vpx_codec_destroy(&dcodec))
+ die_codec(&dcodec, "Failed to destroy decoder");
+
+ vpx_img_free(&raw);
+ if (vpx_codec_destroy(&ecodec))
+ die_codec(&ecodec, "Failed to destroy encoder.");
+
+ vpx_video_writer_close(writer);
+
+ return EXIT_SUCCESS;
+}
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 73b0edb99..5933a62b0 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -431,7 +431,8 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
void CopyOutputToRef() {
memcpy(output_ref_, output_, kOutputBufferSize);
#if CONFIG_VP9_HIGHBITDEPTH
- memcpy(output16_ref_, output16_, kOutputBufferSize);
+ memcpy(output16_ref_, output16_,
+ kOutputBufferSize * sizeof(output16_ref_[0]));
#endif
}
@@ -443,41 +444,41 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
}
uint8_t *input() const {
+ const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
#if CONFIG_VP9_HIGHBITDEPTH
if (UUT_->use_highbd_ == 0) {
- return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ return input_ + offset;
} else {
- return CONVERT_TO_BYTEPTR(input16_ + BorderTop() * kOuterBlockSize +
- BorderLeft());
+ return CONVERT_TO_BYTEPTR(input16_) + offset;
}
#else
- return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ return input_ + offset;
#endif
}
uint8_t *output() const {
+ const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
#if CONFIG_VP9_HIGHBITDEPTH
if (UUT_->use_highbd_ == 0) {
- return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ return output_ + offset;
} else {
- return CONVERT_TO_BYTEPTR(output16_ + BorderTop() * kOuterBlockSize +
- BorderLeft());
+ return CONVERT_TO_BYTEPTR(output16_) + offset;
}
#else
- return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ return output_ + offset;
#endif
}
uint8_t *output_ref() const {
+ const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
#if CONFIG_VP9_HIGHBITDEPTH
if (UUT_->use_highbd_ == 0) {
- return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ return output_ref_ + offset;
} else {
- return CONVERT_TO_BYTEPTR(output16_ref_ + BorderTop() * kOuterBlockSize +
- BorderLeft());
+ return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
}
#else
- return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ return output_ref_ + offset;
#endif
}
@@ -989,14 +990,12 @@ void wrap_ ## func ## _ ## bd(const uint8_t *src, ptrdiff_t src_stride, \
w, h, bd); \
}
#if HAVE_SSE2 && ARCH_X86_64
-#if CONFIG_USE_X86INC
WRAP(convolve_copy_sse2, 8)
WRAP(convolve_avg_sse2, 8)
WRAP(convolve_copy_sse2, 10)
WRAP(convolve_avg_sse2, 10)
WRAP(convolve_copy_sse2, 12)
WRAP(convolve_avg_sse2, 12)
-#endif // CONFIG_USE_X86INC
WRAP(convolve8_horiz_sse2, 8)
WRAP(convolve8_avg_horiz_sse2, 8)
WRAP(convolve8_vert_sse2, 8)
@@ -1090,11 +1089,7 @@ INSTANTIATE_TEST_CASE_P(C, ConvolveTest,
#if HAVE_SSE2 && ARCH_X86_64
#if CONFIG_VP9_HIGHBITDEPTH
const ConvolveFunctions convolve8_sse2(
-#if CONFIG_USE_X86INC
wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8,
-#else
- wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
-#endif // CONFIG_USE_X86INC
wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8,
@@ -1102,11 +1097,7 @@ const ConvolveFunctions convolve8_sse2(
wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
const ConvolveFunctions convolve10_sse2(
-#if CONFIG_USE_X86INC
wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10,
-#else
- wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
-#endif // CONFIG_USE_X86INC
wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10,
@@ -1114,11 +1105,7 @@ const ConvolveFunctions convolve10_sse2(
wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
const ConvolveFunctions convolve12_sse2(
-#if CONFIG_USE_X86INC
wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12,
-#else
- wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
-#endif // CONFIG_USE_X86INC
wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12,
@@ -1132,11 +1119,7 @@ const ConvolveParam kArrayConvolve_sse2[] = {
};
#else
const ConvolveFunctions convolve8_sse2(
-#if CONFIG_USE_X86INC
vpx_convolve_copy_sse2, vpx_convolve_avg_sse2,
-#else
- vpx_convolve_copy_c, vpx_convolve_avg_c,
-#endif // CONFIG_USE_X86INC
vpx_convolve8_horiz_sse2, vpx_convolve8_avg_horiz_sse2,
vpx_convolve8_vert_sse2, vpx_convolve8_avg_vert_sse2,
vpx_convolve8_sse2, vpx_convolve8_avg_sse2,
diff --git a/test/vp8cx_set_ref.sh b/test/cx_set_ref.sh
index 5d760bcde..0a58dc187 100755
--- a/test/vp8cx_set_ref.sh
+++ b/test/cx_set_ref.sh
@@ -1,6 +1,6 @@
#!/bin/sh
##
-## Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+## Copyright (c) 2016 The WebM project authors. All Rights Reserved.
##
## Use of this source code is governed by a BSD-style license
## that can be found in the LICENSE file in the root of the source
@@ -8,30 +8,27 @@
## in the file PATENTS. All contributing project authors may
## be found in the AUTHORS file in the root of the source tree.
##
-## This file tests the libvpx vp8cx_set_ref example. To add new tests to this
+## This file tests the libvpx cx_set_ref example. To add new tests to this
## file, do the following:
## 1. Write a shell function (this is your test).
-## 2. Add the function to vp8cx_set_ref_tests (on a new line).
+## 2. Add the function to cx_set_ref_tests (on a new line).
##
. $(dirname $0)/tools_common.sh
# Environment check: $YUV_RAW_INPUT is required.
-vp8cx_set_ref_verify_environment() {
+cx_set_ref_verify_environment() {
if [ ! -e "${YUV_RAW_INPUT}" ]; then
echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
return 1
fi
}
-# Runs vp8cx_set_ref and updates the reference frame before encoding frame 90.
-# $1 is the codec name, which vp8cx_set_ref does not support at present: It's
-# currently used only to name the output file.
-# TODO(tomfinegan): Pass the codec param once the example is updated to support
-# VP9.
+# Runs cx_set_ref and updates the reference frame before encoding frame 90.
+# $1 is the codec name.
vpx_set_ref() {
- local encoder="${LIBVPX_BIN_PATH}/vp8cx_set_ref${VPX_TEST_EXE_SUFFIX}"
local codec="$1"
- local output_file="${VPX_TEST_OUTPUT_DIR}/vp8cx_set_ref_${codec}.ivf"
+ local encoder="${LIBVPX_BIN_PATH}/${codec}cx_set_ref${VPX_TEST_EXE_SUFFIX}"
+ local output_file="${VPX_TEST_OUTPUT_DIR}/${codec}cx_set_ref_${codec}.ivf"
local ref_frame_num=90
if [ ! -x "${encoder}" ]; then
@@ -46,12 +43,18 @@ vpx_set_ref() {
[ -e "${output_file}" ] || return 1
}
-vp8cx_set_ref_vp8() {
+cx_set_ref_vp8() {
if [ "$(vp8_encode_available)" = "yes" ]; then
vpx_set_ref vp8 || return 1
fi
}
-vp8cx_set_ref_tests="vp8cx_set_ref_vp8"
+cx_set_ref_vp9() {
+ if [ "$(vp9_encode_available)" = "yes" ]; then
+ vpx_set_ref vp9 || return 1
+ fi
+}
+
+cx_set_ref_tests="cx_set_ref_vp8 cx_set_ref_vp9"
-run_tests vp8cx_set_ref_verify_environment "${vp8cx_set_ref_tests}"
+run_tests cx_set_ref_verify_environment "${cx_set_ref_tests}"
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index ddaf9395b..e6224b21a 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -25,20 +25,12 @@
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
+#include "vpx_ports/msvc.h" // for round()
using libvpx_test::ACMRandom;
namespace {
-#ifdef _MSC_VER
-static int round(double x) {
- if (x < 0)
- return static_cast<int>(ceil(x - 0.5));
- else
- return static_cast<int>(floor(x + 0.5));
-}
-#endif
-
const int kNumCoeffs = 256;
const double C1 = 0.995184726672197;
const double C2 = 0.98078528040323;
diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc
index 16d88255e..278d72dfa 100644
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -25,18 +25,11 @@
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
+#include "vpx_ports/msvc.h" // for round()
using libvpx_test::ACMRandom;
namespace {
-#ifdef _MSC_VER
-static int round(double x) {
- if (x < 0)
- return static_cast<int>(ceil(x - 0.5));
- else
- return static_cast<int>(floor(x + 0.5));
-}
-#endif
const int kNumCoeffs = 1024;
const double kPi = 3.141592653589793238462643383279502884;
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index 735cccf8d..a24085606 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -487,7 +487,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
+#if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4WHT,
::testing::Values(
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index 29f215817..083ee6628 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -766,7 +766,7 @@ INSTANTIATE_TEST_CASE_P(
&idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 && \
+#if HAVE_SSSE3 && ARCH_X86_64 && \
!CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSSE3, FwdTrans8x8DCT,
diff --git a/test/hadamard_test.cc b/test/hadamard_test.cc
index 7a5bd5b4c..b8eec523f 100644
--- a/test/hadamard_test.cc
+++ b/test/hadamard_test.cc
@@ -152,10 +152,10 @@ INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test,
::testing::Values(&vpx_hadamard_8x8_sse2));
#endif // HAVE_SSE2
-#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64
+#if HAVE_SSSE3 && ARCH_X86_64
INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test,
::testing::Values(&vpx_hadamard_8x8_ssse3));
-#endif // HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64
+#endif // HAVE_SSSE3 && ARCH_X86_64
#if HAVE_NEON
INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test,
diff --git a/test/idct8x8_test.cc b/test/idct8x8_test.cc
index 7f9d751d6..04487c452 100644
--- a/test/idct8x8_test.cc
+++ b/test/idct8x8_test.cc
@@ -17,20 +17,12 @@
#include "./vpx_dsp_rtcd.h"
#include "test/acm_random.h"
#include "vpx/vpx_integer.h"
+#include "vpx_ports/msvc.h" // for round()
using libvpx_test::ACMRandom;
namespace {
-#ifdef _MSC_VER
-static int round(double x) {
- if (x < 0)
- return static_cast<int>(ceil(x - 0.5));
- else
- return static_cast<int>(floor(x + 0.5));
-}
-#endif
-
void reference_dct_1d(double input[8], double output[8]) {
const double kPi = 3.141592653589793238462643383279502884;
const double kInvSqrt2 = 0.707106781186547524400844362104;
@@ -86,7 +78,7 @@ TEST(VP9Idct8x8Test, AccuracyCheck) {
reference_dct_2d(input, output_r);
for (int j = 0; j < 64; ++j)
- coeff[j] = round(output_r[j]);
+ coeff[j] = static_cast<tran_low_t>(round(output_r[j]));
vpx_idct8x8_64_add_c(coeff, dst, 8);
for (int j = 0; j < 64; ++j) {
const int diff = dst[j] - src[j];
diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc
index 6c824128b..1efb1a4eb 100644
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -295,7 +295,7 @@ INSTANTIATE_TEST_CASE_P(
TX_4X4, 1)));
#endif
-#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 && \
+#if HAVE_SSSE3 && ARCH_X86_64 && \
!CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSSE3_64, PartialIDctTest,
diff --git a/test/sad_test.cc b/test/sad_test.cc
index e6bd0d793..fa7b6c840 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -690,7 +690,6 @@ INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests));
//------------------------------------------------------------------------------
// x86 functions
#if HAVE_SSE2
-#if CONFIG_USE_X86INC
const SadMxNParam sse2_tests[] = {
make_tuple(64, 64, &vpx_sad64x64_sse2, -1),
make_tuple(64, 32, &vpx_sad64x32_sse2, -1),
@@ -852,7 +851,6 @@ const SadMxNx4Param x4d_sse2_tests[] = {
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests));
-#endif // CONFIG_USE_X86INC
#endif // HAVE_SSE2
#if HAVE_SSE3
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc
index 2acf744d5..8928bf87c 100644
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -187,21 +187,21 @@ INTRA_PRED_TEST(C, TestIntraPred4, vpx_dc_predictor_4x4_c,
vpx_d153_predictor_4x4_c, vpx_d207_predictor_4x4_c,
vpx_d63_predictor_4x4_c, vpx_tm_predictor_4x4_c)
-#if HAVE_SSE2 && CONFIG_USE_X86INC
+#if HAVE_SSE2
INTRA_PRED_TEST(SSE2, TestIntraPred4, vpx_dc_predictor_4x4_sse2,
vpx_dc_left_predictor_4x4_sse2, vpx_dc_top_predictor_4x4_sse2,
vpx_dc_128_predictor_4x4_sse2, vpx_v_predictor_4x4_sse2,
vpx_h_predictor_4x4_sse2, vpx_d45_predictor_4x4_sse2, NULL,
NULL, NULL, vpx_d207_predictor_4x4_sse2, NULL,
vpx_tm_predictor_4x4_sse2)
-#endif // HAVE_SSE2 && CONFIG_USE_X86INC
+#endif // HAVE_SSE2
-#if HAVE_SSSE3 && CONFIG_USE_X86INC
+#if HAVE_SSSE3
INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
vpx_d153_predictor_4x4_ssse3, NULL,
vpx_d63_predictor_4x4_ssse3, NULL)
-#endif // HAVE_SSSE3 && CONFIG_USE_X86INC
+#endif // HAVE_SSSE3
#if HAVE_DSPR2
INTRA_PRED_TEST(DSPR2, TestIntraPred4, vpx_dc_predictor_4x4_dspr2, NULL, NULL,
@@ -237,20 +237,20 @@ INTRA_PRED_TEST(C, TestIntraPred8, vpx_dc_predictor_8x8_c,
vpx_d153_predictor_8x8_c, vpx_d207_predictor_8x8_c,
vpx_d63_predictor_8x8_c, vpx_tm_predictor_8x8_c)
-#if HAVE_SSE2 && CONFIG_USE_X86INC
+#if HAVE_SSE2
INTRA_PRED_TEST(SSE2, TestIntraPred8, vpx_dc_predictor_8x8_sse2,
vpx_dc_left_predictor_8x8_sse2, vpx_dc_top_predictor_8x8_sse2,
vpx_dc_128_predictor_8x8_sse2, vpx_v_predictor_8x8_sse2,
vpx_h_predictor_8x8_sse2, vpx_d45_predictor_8x8_sse2, NULL,
NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_sse2)
-#endif // HAVE_SSE2 && CONFIG_USE_X86INC
+#endif // HAVE_SSE2
-#if HAVE_SSSE3 && CONFIG_USE_X86INC
+#if HAVE_SSSE3
INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
vpx_d153_predictor_8x8_ssse3, vpx_d207_predictor_8x8_ssse3,
vpx_d63_predictor_8x8_ssse3, NULL)
-#endif // HAVE_SSSE3 && CONFIG_USE_X86INC
+#endif // HAVE_SSSE3
#if HAVE_DSPR2
INTRA_PRED_TEST(DSPR2, TestIntraPred8, vpx_dc_predictor_8x8_dspr2, NULL, NULL,
@@ -286,22 +286,22 @@ INTRA_PRED_TEST(C, TestIntraPred16, vpx_dc_predictor_16x16_c,
vpx_d153_predictor_16x16_c, vpx_d207_predictor_16x16_c,
vpx_d63_predictor_16x16_c, vpx_tm_predictor_16x16_c)
-#if HAVE_SSE2 && CONFIG_USE_X86INC
+#if HAVE_SSE2
INTRA_PRED_TEST(SSE2, TestIntraPred16, vpx_dc_predictor_16x16_sse2,
vpx_dc_left_predictor_16x16_sse2,
vpx_dc_top_predictor_16x16_sse2,
vpx_dc_128_predictor_16x16_sse2, vpx_v_predictor_16x16_sse2,
vpx_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
vpx_tm_predictor_16x16_sse2)
-#endif // HAVE_SSE2 && CONFIG_USE_X86INC
+#endif // HAVE_SSE2
-#if HAVE_SSSE3 && CONFIG_USE_X86INC
+#if HAVE_SSSE3
INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL,
NULL, vpx_d45_predictor_16x16_ssse3,
NULL, NULL, vpx_d153_predictor_16x16_ssse3,
vpx_d207_predictor_16x16_ssse3, vpx_d63_predictor_16x16_ssse3,
NULL)
-#endif // HAVE_SSSE3 && CONFIG_USE_X86INC
+#endif // HAVE_SSSE3
#if HAVE_DSPR2
INTRA_PRED_TEST(DSPR2, TestIntraPred16, vpx_dc_predictor_16x16_dspr2, NULL,
@@ -337,21 +337,21 @@ INTRA_PRED_TEST(C, TestIntraPred32, vpx_dc_predictor_32x32_c,
vpx_d153_predictor_32x32_c, vpx_d207_predictor_32x32_c,
vpx_d63_predictor_32x32_c, vpx_tm_predictor_32x32_c)
-#if HAVE_SSE2 && CONFIG_USE_X86INC
+#if HAVE_SSE2
INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2,
vpx_dc_left_predictor_32x32_sse2,
vpx_dc_top_predictor_32x32_sse2,
vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2,
vpx_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL,
NULL, vpx_tm_predictor_32x32_sse2)
-#endif // HAVE_SSE2 && CONFIG_USE_X86INC
+#endif // HAVE_SSE2
-#if HAVE_SSSE3 && CONFIG_USE_X86INC
+#if HAVE_SSSE3
INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL,
NULL, vpx_d45_predictor_32x32_ssse3, NULL, NULL,
vpx_d153_predictor_32x32_ssse3, vpx_d207_predictor_32x32_ssse3,
vpx_d63_predictor_32x32_ssse3, NULL)
-#endif // HAVE_SSSE3 && CONFIG_USE_X86INC
+#endif // HAVE_SSSE3
#if HAVE_NEON
INTRA_PRED_TEST(NEON, TestIntraPred32, vpx_dc_predictor_32x32_neon,
diff --git a/test/variance_test.cc b/test/variance_test.cc
index cb6339041..08c84a613 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -1002,7 +1002,6 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(2, 3, &vpx_variance4x8_sse2, 0),
make_tuple(2, 2, &vpx_variance4x4_sse2, 0)));
-#if CONFIG_USE_X86INC
INSTANTIATE_TEST_CASE_P(
SSE2, VpxSubpelVarianceTest,
::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_sse2, 0),
@@ -1035,7 +1034,6 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_sse2, 0),
make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_sse2, 0),
make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_sse2, 0)));
-#endif // CONFIG_USE_X86INC
#if CONFIG_VP9_HIGHBITDEPTH
/* TODO(debargha): This test does not support the highbd version
@@ -1088,7 +1086,6 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(3, 4, &vpx_highbd_8_variance8x16_sse2, 8),
make_tuple(3, 3, &vpx_highbd_8_variance8x8_sse2, 8)));
-#if CONFIG_USE_X86INC
INSTANTIATE_TEST_CASE_P(
SSE2, VpxHBDSubpelVarianceTest,
::testing::Values(
@@ -1162,12 +1159,10 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(3, 4, &vpx_highbd_8_sub_pixel_avg_variance8x16_sse2, 8),
make_tuple(3, 3, &vpx_highbd_8_sub_pixel_avg_variance8x8_sse2, 8),
make_tuple(3, 2, &vpx_highbd_8_sub_pixel_avg_variance8x4_sse2, 8)));
-#endif // CONFIG_USE_X86INC
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSE2
#if HAVE_SSSE3
-#if CONFIG_USE_X86INC
INSTANTIATE_TEST_CASE_P(
SSSE3, VpxSubpelVarianceTest,
::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_ssse3, 0),
@@ -1200,7 +1195,6 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_ssse3, 0),
make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_ssse3, 0),
make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_ssse3, 0)));
-#endif // CONFIG_USE_X86INC
#endif // HAVE_SSSE3
#if HAVE_AVX2
diff --git a/test/vp9_error_block_test.cc b/test/vp9_error_block_test.cc
index 23a249e2b..341cc19cb 100644
--- a/test/vp9_error_block_test.cc
+++ b/test/vp9_error_block_test.cc
@@ -157,9 +157,9 @@ TEST_P(ErrorBlockTest, ExtremeValues) {
<< "First failed at test case " << first_failure;
}
+#if HAVE_SSE2 || HAVE_AVX
using std::tr1::make_tuple;
-#if CONFIG_USE_X86INC
int64_t wrap_vp9_highbd_block_error_8bit_c(const tran_low_t *coeff,
const tran_low_t *dqcoeff,
intptr_t block_size,
@@ -167,6 +167,7 @@ int64_t wrap_vp9_highbd_block_error_8bit_c(const tran_low_t *coeff,
EXPECT_EQ(8, bps);
return vp9_highbd_block_error_8bit_c(coeff, dqcoeff, block_size, ssz);
}
+#endif // HAVE_SSE2 || HAVE_AVX
#if HAVE_SSE2
int64_t wrap_vp9_highbd_block_error_8bit_sse2(const tran_low_t *coeff,
@@ -206,6 +207,5 @@ INSTANTIATE_TEST_CASE_P(
&wrap_vp9_highbd_block_error_8bit_c, VPX_BITS_8)));
#endif // HAVE_AVX
-#endif // CONFIG_USE_X86INC
#endif // CONFIG_VP9_HIGHBITDEPTH
} // namespace
diff --git a/test/vp9_intrapred_test.cc b/test/vp9_intrapred_test.cc
index 416f3c322..ea8abfb77 100644
--- a/test/vp9_intrapred_test.cc
+++ b/test/vp9_intrapred_test.cc
@@ -131,7 +131,6 @@ using std::tr1::make_tuple;
#if HAVE_SSE2
#if CONFIG_VP9_HIGHBITDEPTH
-#if CONFIG_USE_X86INC
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
@@ -225,7 +224,6 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 12)));
-#endif // CONFIG_USE_X86INC
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSE2
} // namespace
diff --git a/test/vp9_subtract_test.cc b/test/vp9_subtract_test.cc
index 3cad4d7e6..4793a9716 100644
--- a/test/vp9_subtract_test.cc
+++ b/test/vp9_subtract_test.cc
@@ -93,7 +93,7 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
INSTANTIATE_TEST_CASE_P(C, VP9SubtractBlockTest,
::testing::Values(vpx_subtract_block_c));
-#if HAVE_SSE2 && CONFIG_USE_X86INC
+#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(SSE2, VP9SubtractBlockTest,
::testing::Values(vpx_subtract_block_sse2));
#endif
diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index 35ad891ef..cd7c24821 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -14,6 +14,7 @@
#include "./vpx_dsp_rtcd.h"
#include "vp8_rtcd.h"
#include "blockd.h"
+#include "reconintra4x4.h"
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left);
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 7da3d71ad..96d00cbf7 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -1593,7 +1593,7 @@ int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q) {
if (Q < thresh_qp &&
cpi->projected_frame_size > thresh_rate &&
pred_err_mb > thresh_pred_err_mb) {
- double new_correction_factor = cpi->rate_correction_factor;
+ double new_correction_factor;
const int target_size = cpi->av_per_frame_bandwidth;
int target_bits_per_mb;
// Drop this frame: advance frame counters, and set force_maxqp flag.
diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c
index 7bab27d4f..88320584c 100644
--- a/vp9/common/vp9_blockd.c
+++ b/vp9/common/vp9_blockd.c
@@ -66,7 +66,7 @@ void vp9_foreach_transformed_block_in_plane(
for (r = 0; r < max_blocks_high; r += (1 << tx_size)) {
// Skip visiting the sub blocks that are wholly within the UMV.
for (c = 0; c < max_blocks_wide; c += (1 << tx_size)) {
- visit(plane, i, plane_bsize, tx_size, arg);
+ visit(plane, i, r, c, plane_bsize, tx_size, arg);
i += step;
}
i += extra_step;
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 3d26fb2b5..85b99c4bc 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -270,6 +270,7 @@ static INLINE const vpx_prob *get_y_mode_probs(const MODE_INFO *mi,
}
typedef void (*foreach_transformed_block_visitor)(int plane, int block,
+ int row, int col,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size,
void *arg);
@@ -283,17 +284,6 @@ void vp9_foreach_transformed_block(
const MACROBLOCKD* const xd, BLOCK_SIZE bsize,
foreach_transformed_block_visitor visit, void *arg);
-static INLINE void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, int block,
- int *x, int *y) {
- const int bwl = b_width_log2_lookup[plane_bsize];
- const int tx_cols_log2 = bwl - tx_size;
- const int tx_cols = 1 << tx_cols_log2;
- const int raster_mb = block >> (tx_size << 1);
- *x = (raster_mb & (tx_cols - 1)) << tx_size;
- *y = (raster_mb >> tx_cols_log2) << tx_size;
-}
-
void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob,
int aoff, int loff);
diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c
index c04cc8f05..5dad81d64 100644
--- a/vp9/common/vp9_postproc.c
+++ b/vp9/common/vp9_postproc.c
@@ -618,15 +618,16 @@ int vp9_post_proc_frame(struct VP9Common *cm,
// Alloc memory for prev_mip in the first frame.
if (cm->current_video_frame == 1) {
- cm->postproc_state.last_base_qindex = cm->base_qindex;
- cm->postproc_state.last_frame_valid = 1;
+ ppstate->last_base_qindex = cm->base_qindex;
+ ppstate->last_frame_valid = 1;
+ }
+
+ if ((flags & VP9D_MFQE) && ppstate->prev_mip == NULL) {
ppstate->prev_mip = vpx_calloc(cm->mi_alloc_size, sizeof(*cm->mip));
if (!ppstate->prev_mip) {
return 1;
}
ppstate->prev_mi = ppstate->prev_mip + cm->mi_stride + 1;
- memset(ppstate->prev_mip, 0,
- cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
}
// Allocate post_proc_buffer_int if needed.
@@ -664,9 +665,9 @@ int vp9_post_proc_frame(struct VP9Common *cm,
"Failed to allocate post-processing buffer");
if ((flags & VP9D_MFQE) && cm->current_video_frame >= 2 &&
- cm->postproc_state.last_frame_valid && cm->bit_depth == 8 &&
- cm->postproc_state.last_base_qindex <= last_q_thresh &&
- cm->base_qindex - cm->postproc_state.last_base_qindex >= q_diff_thresh) {
+ ppstate->last_frame_valid && cm->bit_depth == 8 &&
+ ppstate->last_base_qindex <= last_q_thresh &&
+ cm->base_qindex - ppstate->last_base_qindex >= q_diff_thresh) {
vp9_mfqe(cm);
// TODO(jackychen): Consider whether enable deblocking by default
// if mfqe is enabled. Need to take both the quality and the speed
@@ -692,8 +693,8 @@ int vp9_post_proc_frame(struct VP9Common *cm,
vp8_yv12_copy_frame(cm->frame_to_show, ppbuf);
}
- cm->postproc_state.last_base_qindex = cm->base_qindex;
- cm->postproc_state.last_frame_valid = 1;
+ ppstate->last_base_qindex = cm->base_qindex;
+ ppstate->last_frame_valid = 1;
if (flags & VP9D_ADDNOISE) {
const int noise_level = ppflags->noise_level;
@@ -714,7 +715,8 @@ int vp9_post_proc_frame(struct VP9Common *cm,
dest->uv_width = dest->y_width >> cm->subsampling_x;
dest->uv_height = dest->y_height >> cm->subsampling_y;
- swap_mi_and_prev_mi(cm);
+ if (flags & VP9D_MFQE)
+ swap_mi_and_prev_mi(cm);
return 0;
}
#endif // CONFIG_VP9_POSTPROC
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index 84718e970..67fe18c7e 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -178,7 +178,7 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
// Co-ordinate of containing block to pixel precision.
const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
-#if CONFIG_BETTER_HW_COMPATIBILITY
+#if 0 // CONFIG_BETTER_HW_COMPATIBILITY
assert(xd->mi[0]->sb_type != BLOCK_4X8 &&
xd->mi[0]->sb_type != BLOCK_8X4);
assert(mv_q4.row == mv.row * (1 << (1 - pd->subsampling_y)) &&
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 846133674..276f14554 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -21,29 +21,6 @@ EOF
}
forward_decls qw/vp9_common_forward_decls/;
-# x86inc.asm had specific constraints. break it out so it's easy to disable.
-# zero all the variables to avoid tricky else conditions.
-$mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = $avx_x86inc =
- $avx2_x86inc = '';
-$mmx_x86_64_x86inc = $sse_x86_64_x86inc = $sse2_x86_64_x86inc =
- $ssse3_x86_64_x86inc = $avx_x86_64_x86inc = $avx2_x86_64_x86inc = '';
-if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
- $mmx_x86inc = 'mmx';
- $sse_x86inc = 'sse';
- $sse2_x86inc = 'sse2';
- $ssse3_x86inc = 'ssse3';
- $avx_x86inc = 'avx';
- $avx2_x86inc = 'avx2';
- if ($opts{arch} eq "x86_64") {
- $mmx_x86_64_x86inc = 'mmx';
- $sse_x86_64_x86inc = 'sse';
- $sse2_x86_64_x86inc = 'sse2';
- $ssse3_x86_64_x86inc = 'ssse3';
- $avx_x86_64_x86inc = 'avx';
- $avx2_x86_64_x86inc = 'avx2';
- }
-}
-
# functions that are 64 bit only.
$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
if ($opts{arch} eq "x86_64") {
@@ -202,10 +179,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_block_error/;
add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
- specialize qw/vp9_highbd_block_error/, "$sse2_x86inc";
+ specialize qw/vp9_highbd_block_error sse2/;
add_proto qw/int64_t vp9_highbd_block_error_8bit/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
- specialize qw/vp9_highbd_block_error_8bit/, "$sse2_x86inc", "$avx_x86inc";
+ specialize qw/vp9_highbd_block_error_8bit sse2 avx/;
add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_fp/;
@@ -217,16 +194,16 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_fdct8x8_quant/;
} else {
add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
- specialize qw/vp9_block_error avx2 msa/, "$sse2_x86inc";
+ specialize qw/vp9_block_error avx2 msa sse2/;
add_proto qw/int64_t vp9_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
- specialize qw/vp9_block_error_fp neon/, "$sse2_x86inc";
+ specialize qw/vp9_block_error_fp neon sse2/;
add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64_x86inc";
+ specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64";
add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64_x86inc";
+ specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64";
add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/;
@@ -245,7 +222,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_fht16x16 sse2/;
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fwht4x4/, "$sse2_x86inc";
+ specialize qw/vp9_fwht4x4 sse2/;
} else {
add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp9_fht4x4 sse2 msa/;
@@ -257,7 +234,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_fht16x16 sse2 msa/;
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fwht4x4 msa/, "$sse2_x86inc";
+ specialize qw/vp9_fwht4x4 msa sse2/;
}
#
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index d63912932..3199b98aa 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -589,7 +589,7 @@ static void dec_build_inter_predictors(VPxWorker *const worker, MACROBLOCKD *xd,
// Co-ordinate of containing block to pixel precision.
int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
-#if CONFIG_BETTER_HW_COMPATIBILITY
+#if 0 // CONFIG_BETTER_HW_COMPATIBILITY
assert(xd->mi[0]->sb_type != BLOCK_4X8 &&
xd->mi[0]->sb_type != BLOCK_8X4);
assert(mv_q4.row == mv->row * (1 << (1 - pd->subsampling_y)) &&
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index ffc6839ad..6869036bc 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -902,10 +902,10 @@ void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
frame_mvs += cm->mi_cols;
}
}
-#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
- if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
- (xd->above_mi == NULL || xd->left_mi == NULL) &&
- !is_inter_block(mi) && need_top_left[mi->uv_mode])
- assert(0);
+#if 0 // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+ if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
+ (xd->above_mi == NULL || xd->left_mi == NULL) &&
+ !is_inter_block(mi) && need_top_left[mi->uv_mode])
+ assert(0);
#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
}
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 935c04f3a..9ed980081 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -187,47 +187,45 @@ vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi,
vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
VP9_REFFRAME ref_frame_flag,
YV12_BUFFER_CONFIG *sd) {
- RefBuffer *ref_buf = NULL;
- RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+ int idx;
+ YV12_BUFFER_CONFIG *ref_buf = NULL;
// TODO(jkoleszar): The decoder doesn't have any real knowledge of what the
// encoder is using the frame buffers for. This is just a stub to keep the
// vpxenc --test-decode functionality working, and will be replaced in a
// later commit that adds VP9-specific controls for this functionality.
+ // (Yunqing) The set_reference control depends on the following setting in
+ // encoder.
+ // cpi->lst_fb_idx = 0;
+ // cpi->gld_fb_idx = 1;
+ // cpi->alt_fb_idx = 2;
if (ref_frame_flag == VP9_LAST_FLAG) {
- ref_buf = &cm->frame_refs[0];
+ idx = cm->ref_frame_map[0];
} else if (ref_frame_flag == VP9_GOLD_FLAG) {
- ref_buf = &cm->frame_refs[1];
+ idx = cm->ref_frame_map[1];
} else if (ref_frame_flag == VP9_ALT_FLAG) {
- ref_buf = &cm->frame_refs[2];
+ idx = cm->ref_frame_map[2];
} else {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Invalid reference frame");
return cm->error.error_code;
}
- if (!equal_dimensions(ref_buf->buf, sd)) {
+ if (idx < 0 || idx >= FRAME_BUFFERS) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
- "Incorrect buffer dimensions");
- } else {
- int *ref_fb_ptr = &ref_buf->idx;
-
- // Find an empty frame buffer.
- const int free_fb = get_free_fb(cm);
- if (cm->new_fb_idx == INVALID_IDX) {
- vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
- "Unable to find free frame buffer");
- return cm->error.error_code;
- }
+ "Invalid reference frame map");
+ return cm->error.error_code;
+ }
- // Decrease ref_count since it will be increased again in
- // ref_cnt_fb() below.
- --frame_bufs[free_fb].ref_count;
+ // Get the destination reference buffer.
+ ref_buf = &cm->buffer_pool->frame_bufs[idx].buf;
- // Manage the reference counters and copy image.
- ref_cnt_fb(frame_bufs, ref_fb_ptr, free_fb);
- ref_buf->buf = &frame_bufs[*ref_fb_ptr].buf;
- vp8_yv12_copy_frame(sd, ref_buf->buf);
+ if (!equal_dimensions(ref_buf, sd)) {
+ vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
+ "Incorrect buffer dimensions");
+ } else {
+ // Overwrite the reference frame buffer.
+ vp8_yv12_copy_frame(sd, ref_buf);
}
return cm->error.error_code;
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 169943c10..05fbc4194 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -31,6 +31,9 @@ struct optimize_ctx {
ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
};
+#define HETEROMULT 12
+#define HETEROCOEF 4
+
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
@@ -67,6 +70,48 @@ static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] ={ {10, 6}, {8, 7}, };
rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
}
+// This function eliminates isolated small nonzero coefficients.
+static void eliminate_small_coeff(const tran_low_t *const coeff_ptr,
+ const TX_SIZE tx_size,
+ const int16_t *const zbin_ptr,
+ tran_low_t *const qcoeff_ptr,
+ tran_low_t *const dqcoeff_ptr,
+ uint16_t *const eob_ptr,
+ const int16_t *const scan) {
+ const int zbins[2] =
+ {tx_size == TX_32X32 ? ROUND_POWER_OF_TWO(zbin_ptr[0], 1) : zbin_ptr[0],
+ tx_size == TX_32X32 ? ROUND_POWER_OF_TWO(zbin_ptr[1], 1) : zbin_ptr[1]};
+ const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
+ const int hetero_zbins[2] = {(HETEROCOEF + 1) * zbins[0] / HETEROCOEF,
+ (HETEROCOEF + 1) * zbins[1] / HETEROCOEF};
+ const int hetero_nzbins[2] = {hetero_zbins[0] * -1, hetero_zbins[1] * -1};
+ int eob = *eob_ptr, i = eob - 1, rc, tail_count = 0;
+
+ assert(i >= 0);
+ rc = scan[i];
+ while (i > 0 && coeff_ptr[rc] <= hetero_zbins[rc != 0] &&
+ coeff_ptr[rc] >= hetero_nzbins[rc != 0]) {
+ if (coeff_ptr[rc] > zbins[rc != 0] || coeff_ptr[rc] < nzbins[rc != 0])
+ ++tail_count;
+ if ((eob - i) * HETEROMULT >= tail_count * zbins[1]) {
+ eob = i;
+ tail_count = 0;
+ }
+ --i;
+ rc = scan[i];
+ }
+
+ for (i = eob; i < (*eob_ptr); ++i) {
+ rc = scan[i];
+ qcoeff_ptr[rc] = 0;
+ dqcoeff_ptr[rc] = 0;
+ }
+
+ while (eob > 0 && qcoeff_ptr[scan[eob - 1]] == 0) --eob;
+
+ *eob_ptr = eob;
+}
+
// This function is a place holder for now but may ultimately need
// to scan previous tokens to work out the correct context.
static int trellis_get_coeff_context(const int16_t *scan,
@@ -335,7 +380,7 @@ static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
}
#endif // CONFIG_VP9_HIGHBITDEPTH
-void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
+void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
@@ -346,10 +391,8 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
- int i, j;
const int16_t *src_diff;
- txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
- src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+ src_diff = &p->src_diff[4 * (row * diff_stride + col)];
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -425,7 +468,7 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
}
}
-void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
+void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
@@ -435,12 +478,8 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
- int i, j;
const int16_t *src_diff;
-
- txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
- src_diff = &p->src_diff[4 * (j * diff_stride + i)];
-
+ src_diff = &p->src_diff[4 * (row * diff_stride + col)];
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
@@ -506,7 +545,7 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
}
}
-void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
+void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
@@ -517,10 +556,8 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
- int i, j;
const int16_t *src_diff;
- txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
- src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+ src_diff = &p->src_diff[4 * (row * diff_stride + col)];
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -593,9 +630,14 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
assert(0);
break;
}
+ if (!x->skip_block && *eob > 0) {
+ eliminate_small_coeff(coeff, tx_size, p->zbin, qcoeff, dqcoeff, eob,
+ scan_order->scan);
+ }
}
-static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
+static void encode_block(int plane, int block, int row, int col,
+ BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct encode_b_args *const args = arg;
MACROBLOCK *const x = args->x;
@@ -604,13 +646,11 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- int i, j;
uint8_t *dst;
ENTROPY_CONTEXT *a, *l;
- txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
- dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
- a = &ctx->ta[plane][i];
- l = &ctx->tl[plane][j];
+ dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
+ a = &ctx->ta[plane][col];
+ l = &ctx->tl[plane][row];
// TODO(jingning): per transformed block zero forcing only enabled for
// luma component. will integrate chroma components as well.
@@ -629,17 +669,17 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
*a = *l = 0;
return;
} else {
- vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+ vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size);
}
} else {
if (max_txsize_lookup[plane_bsize] == tx_size) {
int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
// full forward transform and quantization
- vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+ vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
} else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
// fast path forward transform and quantization
- vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
+ vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size);
} else {
// skip forward transform
p->eobs[block] = 0;
@@ -647,7 +687,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
return;
}
} else {
- vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+ vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
}
}
}
@@ -715,19 +755,18 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
}
}
-static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
+static void encode_block_pass1(int plane, int block, int row, int col,
+ BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
MACROBLOCK *const x = (MACROBLOCK *)arg;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- int i, j;
uint8_t *dst;
- txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
- dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
+ dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
- vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+ vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
if (p->eobs[block] > 0) {
#if CONFIG_VP9_HIGHBITDEPTH
@@ -774,7 +813,8 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
}
}
-void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
+void vp9_encode_block_intra(int plane, int block, int row, int col,
+ BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct encode_b_args* const args = arg;
MACROBLOCK *const x = args->x;
@@ -795,18 +835,16 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
uint16_t *eob = &p->eobs[block];
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
- int i, j;
struct optimize_ctx *const ctx = args->ctx;
ENTROPY_CONTEXT *a = NULL;
ENTROPY_CONTEXT *l = NULL;
int entropy_ctx = 0;
- txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
- dst = &pd->dst.buf[4 * (j * dst_stride + i)];
- src = &p->src.buf[4 * (j * src_stride + i)];
- src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+ dst = &pd->dst.buf[4 * (row * dst_stride + col)];
+ src = &p->src.buf[4 * (row * src_stride + col)];
+ src_diff = &p->src_diff[4 * (row * diff_stride + col)];
if (args->ctx != NULL) {
- a = &ctx->ta[plane][i];
- l = &ctx->tl[plane][j];
+ a = &ctx->ta[plane][col];
+ l = &ctx->tl[plane][row];
entropy_ctx = combine_entropy_contexts(*a, *l);
}
@@ -826,7 +864,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
vp9_predict_intra_block(xd, bwl, tx_size, mode, x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
- dst, dst_stride, i, j, plane);
+ dst, dst_stride, col, row, plane);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -926,6 +964,10 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
+ if (!x->skip_block && *eob > 0) {
+ eliminate_small_coeff(coeff, tx_size, p->zbin, qcoeff, dqcoeff,
+ eob, scan_order->scan);
+ }
}
if (args->ctx != NULL && !x->skip_recode) {
*a = *l = optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
@@ -942,6 +984,10 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
+ if (!x->skip_block && *eob > 0) {
+ eliminate_small_coeff(coeff, tx_size, p->zbin, qcoeff, dqcoeff,
+ eob, scan_order->scan);
+ }
}
if (args->ctx != NULL && !x->skip_recode) {
*a = *l = optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
@@ -958,6 +1004,10 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
+ if (!x->skip_block && *eob > 0) {
+ eliminate_small_coeff(coeff, tx_size, p->zbin, qcoeff, dqcoeff,
+ eob, scan_order->scan);
+ }
}
if (args->ctx != NULL && !x->skip_recode) {
*a = *l = optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
@@ -977,6 +1027,10 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
+ if (!x->skip_block && *eob > 0) {
+ eliminate_small_coeff(coeff, tx_size, p->zbin, qcoeff, dqcoeff,
+ eob, scan_order->scan);
+ }
}
if (args->ctx != NULL && !x->skip_recode) {
*a = *l = optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index 25b0b23e0..62d75a129 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -25,16 +25,17 @@ struct encode_b_args {
};
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize);
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize);
-void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
+void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
-void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
+void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
-void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
+void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
-void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
+void vp9_encode_block_intra(int plane, int block, int row, int col,
+ BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg);
void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 53a3ec7de..66ccc92c4 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -506,10 +506,10 @@ static int get_ul_intra_threshold(VP9_COMMON *cm) {
ret_val = UL_INTRA_THRESH;
break;
case VPX_BITS_10:
- ret_val = UL_INTRA_THRESH >> 2;
+ ret_val = UL_INTRA_THRESH << 2;
break;
case VPX_BITS_12:
- ret_val = UL_INTRA_THRESH >> 4;
+ ret_val = UL_INTRA_THRESH << 4;
break;
default:
assert(0 && "cm->bit_depth should be VPX_BITS_8, "
@@ -532,10 +532,10 @@ static int get_smooth_intra_threshold(VP9_COMMON *cm) {
ret_val = SMOOTH_INTRA_THRESH;
break;
case VPX_BITS_10:
- ret_val = SMOOTH_INTRA_THRESH >> 2;
+ ret_val = SMOOTH_INTRA_THRESH << 4;
break;
case VPX_BITS_12:
- ret_val = SMOOTH_INTRA_THRESH >> 4;
+ ret_val = SMOOTH_INTRA_THRESH << 8;
break;
default:
assert(0 && "cm->bit_depth should be VPX_BITS_8, "
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 7db6ef2b0..5ccadd005 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -932,7 +932,8 @@ struct estimate_block_intra_args {
RD_COST *rdc;
};
-static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
+static void estimate_block_intra(int plane, int block, int row, int col,
+ BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct estimate_block_intra_args* const args = arg;
VP9_COMP *const cpi = args->cpi;
@@ -945,20 +946,19 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
uint8_t *const dst_buf_base = pd->dst.buf;
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
- int i, j;
RD_COST this_rdc;
- txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+ (void)block;
- p->src.buf = &src_buf_base[4 * (j * src_stride + i)];
- pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)];
+ p->src.buf = &src_buf_base[4 * (row * src_stride + col)];
+ pd->dst.buf = &dst_buf_base[4 * (row * dst_stride + col)];
// Use source buffer as an approximation for the fully reconstructed buffer.
vp9_predict_intra_block(xd, b_width_log2_lookup[plane_bsize],
tx_size, args->mode,
x->skip_encode ? p->src.buf : pd->dst.buf,
x->skip_encode ? src_stride : dst_stride,
pd->dst.buf, dst_stride,
- i, j, plane);
+ col, row, plane);
if (plane == 0) {
int64_t this_sse = INT64_MAX;
@@ -1744,7 +1744,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
#if CONFIG_VP9_HIGHBITDEPTH
const int large_block = bsize > BLOCK_32X32;
#else
- const int large_block = bsize >= BLOCK_32X32;
+ const int large_block =
+ x->sb_is_skin ? bsize > BLOCK_32X32 : bsize >= BLOCK_32X32;
#endif
mi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref;
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 6e4ffee92..28530386c 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -498,18 +498,16 @@ static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
}
}
-static int rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
+static int rate_block(int plane, int block, int row, int col,
TX_SIZE tx_size, struct rdcost_block_args* args) {
- int x_idx, y_idx;
- txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
-
- return cost_coeffs(args->x, plane, block, args->t_above + x_idx,
- args->t_left + y_idx, tx_size,
+ return cost_coeffs(args->x, plane, block, args->t_above + col,
+ args->t_left + row, tx_size,
args->so->scan, args->so->neighbors,
args->use_fast_coef_costing);
}
-static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
+static void block_rd_txfm(int plane, int block, int row, int col,
+ BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct rdcost_block_args *args = arg;
MACROBLOCK *const x = args->x;
@@ -525,20 +523,20 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
if (!is_inter_block(mi)) {
struct encode_b_args arg = {x, NULL, &mi->skip};
- vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &arg);
+ vp9_encode_block_intra(plane, block, row, col, plane_bsize, tx_size, &arg);
dist_block(x, plane, block, tx_size, &dist, &sse);
} else if (max_txsize_lookup[plane_bsize] == tx_size) {
if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
SKIP_TXFM_NONE) {
// full forward transform and quantization
- vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+ vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
dist_block(x, plane, block, tx_size, &dist, &sse);
} else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
SKIP_TXFM_AC_ONLY) {
// compute DC coefficient
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
- vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
+ vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size);
sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
dist = sse;
if (x->plane[plane].eobs[block]) {
@@ -562,7 +560,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
}
} else {
// full forward transform and quantization
- vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+ vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
dist_block(x, plane, block, tx_size, &dist, &sse);
}
@@ -572,7 +570,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
return;
}
- rate = rate_block(plane, block, plane_bsize, tx_size, args);
+ rate = rate_block(plane, block, row, col, tx_size, args);
rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
@@ -2465,9 +2463,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (pred_filter_search) {
INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
- if (xd->above_mi)
+ if (xd->above_mi && is_inter_block(xd->above_mi))
af = xd->above_mi->interp_filter;
- if (xd->left_mi)
+ if (xd->left_mi && is_inter_block(xd->left_mi))
lf = xd->left_mi->interp_filter;
if ((this_mode != NEWMV) || (af == lf))
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index edec755dd..4400da42d 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -319,7 +319,8 @@ struct tokenize_b_args {
TOKENEXTRA **tp;
};
-static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize,
+static void set_entropy_context_b(int plane, int block, int row, int col,
+ BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct tokenize_b_args* const args = arg;
ThreadData *const td = args->td;
@@ -327,10 +328,8 @@ static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize,
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *p = &x->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
- int aoff, loff;
- txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff);
vp9_set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0,
- aoff, loff);
+ col, row);
}
static INLINE void add_token(TOKENEXTRA **t, const vpx_prob *context_tree,
@@ -353,7 +352,8 @@ static INLINE void add_token_no_extra(TOKENEXTRA **t,
++counts[token];
}
-static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
+static void tokenize_b(int plane, int block, int row, int col,
+ BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct tokenize_b_args* const args = arg;
VP9_COMP *cpi = args->cpi;
@@ -384,11 +384,8 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
const int tx_eob = 16 << (tx_size << 1);
int16_t token;
EXTRABIT extra;
- int aoff, loff;
- txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff);
-
- pt = get_entropy_context(tx_size, pd->above_context + aoff,
- pd->left_context + loff);
+ pt = get_entropy_context(tx_size, pd->above_context + col,
+ pd->left_context + row);
so = get_scan(xd, tx_size, type, block);
scan = so->scan;
nb = so->neighbors;
@@ -426,20 +423,23 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
*tp = t;
- vp9_set_contexts(xd, pd, plane_bsize, tx_size, c > 0, aoff, loff);
+ vp9_set_contexts(xd, pd, plane_bsize, tx_size, c > 0, col, row);
}
struct is_skippable_args {
uint16_t *eobs;
int *skippable;
};
-static void is_skippable(int plane, int block,
+
+static void is_skippable(int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *argv) {
struct is_skippable_args *args = argv;
(void)plane;
(void)plane_bsize;
(void)tx_size;
+ (void)row;
+ (void)col;
args->skippable[0] &= (!args->eobs[block]);
}
@@ -453,14 +453,15 @@ int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
return result;
}
-static void has_high_freq_coeff(int plane, int block,
+static void has_high_freq_coeff(int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *argv) {
struct is_skippable_args *args = argv;
int eobs = (tx_size == TX_4X4) ? 3 : 10;
(void) plane;
(void) plane_bsize;
-
+ (void) row;
+ (void) col;
*(args->skippable) |= (args->eobs[block] > eobs);
}
diff --git a/vp9/encoder/x86/vp9_dct_ssse3.c b/vp9/encoder/x86/vp9_dct_ssse3.c
index b09eac0d1..1a1d4eabc 100644
--- a/vp9/encoder/x86/vp9_dct_ssse3.c
+++ b/vp9/encoder/x86/vp9_dct_ssse3.c
@@ -9,11 +9,6 @@
*/
#include <assert.h>
-#if defined(_MSC_VER) && _MSC_VER <= 1500
-// Need to include math.h before calling tmmintrin.h/intrin.h
-// in certain versions of MSVS.
-#include <math.h>
-#endif
#include <tmmintrin.h> // SSSE3
#include "./vp9_rtcd.h"
diff --git a/vp9/encoder/x86/vp9_frame_scale_ssse3.c b/vp9/encoder/x86/vp9_frame_scale_ssse3.c
index 38af3b13a..23325d63b 100644
--- a/vp9/encoder/x86/vp9_frame_scale_ssse3.c
+++ b/vp9/encoder/x86/vp9_frame_scale_ssse3.c
@@ -8,11 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#if defined(_MSC_VER) && _MSC_VER <= 1500
-// Need to include math.h before calling tmmintrin.h/intrin.h
-// in certain versions of MSVS.
-#include <math.h>
-#endif
#include <tmmintrin.h> // SSSE3
#include "./vp9_rtcd.h"
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 9ad86cbf8..51c6fbb02 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -105,19 +105,6 @@ struct vpx_codec_alg_priv {
BufferPool *buffer_pool;
};
-static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
- switch (frame) {
- case VP8_LAST_FRAME:
- return VP9_LAST_FLAG;
- case VP8_GOLD_FRAME:
- return VP9_GOLD_FLAG;
- case VP8_ALTR_FRAME:
- return VP9_ALT_FLAG;
- }
- assert(0 && "Invalid Reference Frame");
- return VP9_LAST_FLAG;
-}
-
static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx,
const struct vpx_internal_error_info *error) {
const vpx_codec_err_t res = error->error_code;
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 6531e2c61..08adea026 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -785,7 +785,8 @@ static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx,
FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
image2yuvconfig(&frame->img, &sd);
return vp9_set_reference_dec(&frame_worker_data->pbi->common,
- (VP9_REFFRAME)frame->frame_type, &sd);
+ ref_frame_to_vp9_reframe(frame->frame_type),
+ &sd);
} else {
return VPX_CODEC_INVALID_PARAM;
}
diff --git a/vp9/vp9_iface_common.h b/vp9/vp9_iface_common.h
index 938d4224b..44a5e8157 100644
--- a/vp9/vp9_iface_common.h
+++ b/vp9/vp9_iface_common.h
@@ -133,4 +133,16 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
return VPX_CODEC_OK;
}
+static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
+ switch (frame) {
+ case VP8_LAST_FRAME:
+ return VP9_LAST_FLAG;
+ case VP8_GOLD_FRAME:
+ return VP9_GOLD_FLAG;
+ case VP8_ALTR_FRAME:
+ return VP9_ALT_FLAG;
+ }
+ assert(0 && "Invalid Reference Frame");
+ return VP9_LAST_FLAG;
+}
#endif // VP9_VP9_IFACE_COMMON_H_
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 5f3de8f8a..b8342b9e1 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -101,7 +101,6 @@ ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c
endif
-ifeq ($(CONFIG_USE_X86INC),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_error_sse2.asm
@@ -109,13 +108,10 @@ VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_highbd_error_avx.asm
else
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
endif
-endif
ifeq ($(ARCH_X86_64),yes)
-ifeq ($(CONFIG_USE_X86INC),yes)
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm
endif
-endif
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_intrin_sse2.c
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.c
diff --git a/vpx/vpx_integer.h b/vpx/vpx_integer.h
index 829c9d132..2945c87ca 100644
--- a/vpx/vpx_integer.h
+++ b/vpx/vpx_integer.h
@@ -24,7 +24,7 @@
#define VPX_INLINE inline
#endif
-#if (defined(_MSC_VER) && (_MSC_VER < 1600)) || defined(VPX_EMULATE_INTTYPES)
+#if defined(VPX_EMULATE_INTTYPES)
typedef signed char int8_t;
typedef signed short int16_t;
typedef signed int int32_t;
@@ -33,16 +33,6 @@ typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
-#if (defined(_MSC_VER) && (_MSC_VER < 1600))
-typedef signed __int64 int64_t;
-typedef unsigned __int64 uint64_t;
-#define INT64_MAX _I64_MAX
-#define INT32_MAX _I32_MAX
-#define INT32_MIN _I32_MIN
-#define INT16_MAX _I16_MAX
-#define INT16_MIN _I16_MIN
-#endif
-
#ifndef _UINTPTR_T_DEFINED
typedef size_t uintptr_t;
#endif
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk
index 018126d4b..93855c5ad 100644
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -40,18 +40,14 @@ endif
# intra predictions
DSP_SRCS-yes += intrapred.c
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE) += x86/intrapred_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/intrapred_sse2.asm
DSP_SRCS-$(HAVE_SSSE3) += x86/intrapred_ssse3.asm
DSP_SRCS-$(HAVE_SSSE3) += x86/vpx_subpixel_8t_ssse3.asm
-endif # CONFIG_USE_X86INC
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE) += x86/highbd_intrapred_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_intrapred_sse2.asm
-endif # CONFIG_USE_X86INC
endif # CONFIG_VP9_HIGHBITDEPTH
ifneq ($(filter yes,$(CONFIG_POSTPROC) $(CONFIG_VP9_POSTPROC)),)
@@ -87,9 +83,8 @@ ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/vpx_high_subpixel_8t_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/vpx_high_subpixel_bilinear_sse2.asm
endif
-ifeq ($(CONFIG_USE_X86INC),yes)
+
DSP_SRCS-$(HAVE_SSE2) += x86/vpx_convolve_copy_sse2.asm
-endif
ifeq ($(HAVE_NEON_ASM),yes)
DSP_SRCS-yes += arm/vpx_convolve_copy_neon_asm$(ASM)
@@ -179,10 +174,8 @@ DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.c
DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_impl_sse2.h
DSP_SRCS-$(HAVE_SSE2) += x86/fwd_dct32x32_impl_sse2.h
ifeq ($(ARCH_X86_64),yes)
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/fwd_txfm_ssse3_x86_64.asm
endif
-endif
DSP_SRCS-$(HAVE_AVX2) += x86/fwd_txfm_avx2.c
DSP_SRCS-$(HAVE_AVX2) += x86/fwd_dct32x32_impl_avx2.h
DSP_SRCS-$(HAVE_NEON) += arm/fwd_txfm_neon.c
@@ -197,12 +190,10 @@ DSP_SRCS-yes += inv_txfm.h
DSP_SRCS-yes += inv_txfm.c
DSP_SRCS-$(HAVE_SSE2) += x86/inv_txfm_sse2.h
DSP_SRCS-$(HAVE_SSE2) += x86/inv_txfm_sse2.c
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/inv_wht_sse2.asm
ifeq ($(ARCH_X86_64),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/inv_txfm_ssse3_x86_64.asm
endif # ARCH_X86_64
-endif # CONFIG_USE_X86INC
ifeq ($(HAVE_NEON_ASM),yes)
DSP_SRCS-yes += arm/save_reg_neon$(ASM)
@@ -254,11 +245,9 @@ ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c
endif
ifeq ($(ARCH_X86_64),yes)
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm
DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx_x86_64.asm
endif
-endif
# avg
DSP_SRCS-yes += avg.c
@@ -267,10 +256,8 @@ DSP_SRCS-$(HAVE_NEON) += arm/avg_neon.c
DSP_SRCS-$(HAVE_MSA) += mips/avg_msa.c
DSP_SRCS-$(HAVE_NEON) += arm/hadamard_neon.c
ifeq ($(ARCH_X86_64),yes)
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/avg_ssse3_x86_64.asm
endif
-endif
endif # CONFIG_VP9_ENCODER
@@ -292,7 +279,6 @@ DSP_SRCS-$(HAVE_SSE4_1) += x86/sad_sse4.asm
DSP_SRCS-$(HAVE_AVX2) += x86/sad4d_avx2.c
DSP_SRCS-$(HAVE_AVX2) += x86/sad_avx2.c
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE) += x86/sad4d_sse2.asm
DSP_SRCS-$(HAVE_SSE) += x86/sad_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/sad4d_sse2.asm
@@ -303,7 +289,6 @@ ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad4d_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad_sse2.asm
endif # CONFIG_VP9_HIGHBITDEPTH
-endif # CONFIG_USE_X86INC
endif # CONFIG_ENCODERS
@@ -334,17 +319,13 @@ ifeq ($(ARCH_X86_64),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/ssim_opt_x86_64.asm
endif # ARCH_X86_64
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE) += x86/subpel_variance_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/subpel_variance_sse2.asm # Contains SSE2 and SSSE3
-endif # CONFIG_USE_X86INC
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_variance_sse2.c
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_variance_impl_sse2.asm
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_subpel_variance_impl_sse2.asm
-endif # CONFIG_USE_X86INC
endif # CONFIG_VP9_HIGHBITDEPTH
endif # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 37239a195..7b61415b6 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -11,29 +11,6 @@ EOF
}
forward_decls qw/vpx_dsp_forward_decls/;
-# x86inc.asm had specific constraints. break it out so it's easy to disable.
-# zero all the variables to avoid tricky else conditions.
-$mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = $avx_x86inc =
- $avx2_x86inc = '';
-$mmx_x86_64_x86inc = $sse_x86_64_x86inc = $sse2_x86_64_x86inc =
- $ssse3_x86_64_x86inc = $avx_x86_64_x86inc = $avx2_x86_64_x86inc = '';
-if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
- $mmx_x86inc = 'mmx';
- $sse_x86inc = 'sse';
- $sse2_x86inc = 'sse2';
- $ssse3_x86inc = 'ssse3';
- $avx_x86inc = 'avx';
- $avx2_x86inc = 'avx2';
- if ($opts{arch} eq "x86_64") {
- $mmx_x86_64_x86inc = 'mmx';
- $sse_x86_64_x86inc = 'sse';
- $sse2_x86_64_x86inc = 'sse2';
- $ssse3_x86_64_x86inc = 'ssse3';
- $avx_x86_64_x86inc = 'avx';
- $avx2_x86_64_x86inc = 'avx2';
- }
-}
-
# optimizations which depend on multiple features
$avx2_ssse3 = '';
if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) {
@@ -55,19 +32,19 @@ if ($opts{arch} eq "x86_64") {
#
add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d207_predictor_4x4/, "$sse2_x86inc";
+specialize qw/vpx_d207_predictor_4x4 sse2/;
add_proto qw/void vpx_d207e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207e_predictor_4x4/;
add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d45_predictor_4x4 neon/, "$sse2_x86inc";
+specialize qw/vpx_d45_predictor_4x4 neon sse2/;
add_proto qw/void vpx_d45e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45e_predictor_4x4/;
add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d63_predictor_4x4/, "$ssse3_x86inc";
+specialize qw/vpx_d63_predictor_4x4 ssse3/;
add_proto qw/void vpx_d63e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63e_predictor_4x4/;
@@ -76,7 +53,7 @@ add_proto qw/void vpx_d63f_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, co
specialize qw/vpx_d63f_predictor_4x4/;
add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$sse2_x86inc";
+specialize qw/vpx_h_predictor_4x4 neon dspr2 msa sse2/;
add_proto qw/void vpx_he_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_he_predictor_4x4/;
@@ -88,49 +65,49 @@ add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, co
specialize qw/vpx_d135_predictor_4x4 neon/;
add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d153_predictor_4x4/, "$ssse3_x86inc";
+specialize qw/vpx_d153_predictor_4x4 ssse3/;
add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_v_predictor_4x4 neon msa sse2/;
add_proto qw/void vpx_ve_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_ve_predictor_4x4/;
add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse2_x86inc";
+specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa sse2/;
add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon/, "$sse2_x86inc";
+specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon sse2/;
add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_top_predictor_4x4 msa neon/, "$sse2_x86inc";
+specialize qw/vpx_dc_top_predictor_4x4 msa neon sse2/;
add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_left_predictor_4x4 msa neon/, "$sse2_x86inc";
+specialize qw/vpx_dc_left_predictor_4x4 msa neon sse2/;
add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_128_predictor_4x4 msa neon/, "$sse2_x86inc";
+specialize qw/vpx_dc_128_predictor_4x4 msa neon sse2/;
add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d207_predictor_8x8/, "$ssse3_x86inc";
+specialize qw/vpx_d207_predictor_8x8 ssse3/;
add_proto qw/void vpx_d207e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207e_predictor_8x8/;
add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d45_predictor_8x8 neon/, "$sse2_x86inc";
+specialize qw/vpx_d45_predictor_8x8 neon sse2/;
add_proto qw/void vpx_d45e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45e_predictor_8x8/;
add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d63_predictor_8x8/, "$ssse3_x86inc";
+specialize qw/vpx_d63_predictor_8x8 ssse3/;
add_proto qw/void vpx_d63e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63e_predictor_8x8/;
add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
+specialize qw/vpx_h_predictor_8x8 neon dspr2 msa sse2/;
add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_8x8/;
@@ -139,46 +116,46 @@ add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, co
specialize qw/vpx_d135_predictor_8x8/;
add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d153_predictor_8x8/, "$ssse3_x86inc";
+specialize qw/vpx_d153_predictor_8x8 ssse3/;
add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_v_predictor_8x8 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_v_predictor_8x8 neon msa sse2/;
add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
+specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa sse2/;
add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa sse2/;
add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_top_predictor_8x8 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_dc_top_predictor_8x8 neon msa sse2/;
add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_left_predictor_8x8 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_dc_left_predictor_8x8 neon msa sse2/;
add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_128_predictor_8x8 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_dc_128_predictor_8x8 neon msa sse2/;
add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d207_predictor_16x16/, "$ssse3_x86inc";
+specialize qw/vpx_d207_predictor_16x16 ssse3/;
add_proto qw/void vpx_d207e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207e_predictor_16x16/;
add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d45_predictor_16x16 neon/, "$ssse3_x86inc";
+specialize qw/vpx_d45_predictor_16x16 neon ssse3/;
add_proto qw/void vpx_d45e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45e_predictor_16x16/;
add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d63_predictor_16x16/, "$ssse3_x86inc";
+specialize qw/vpx_d63_predictor_16x16 ssse3/;
add_proto qw/void vpx_d63e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63e_predictor_16x16/;
add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$sse2_x86inc";
+specialize qw/vpx_h_predictor_16x16 neon dspr2 msa sse2/;
add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_16x16/;
@@ -187,46 +164,46 @@ add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride,
specialize qw/vpx_d135_predictor_16x16/;
add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d153_predictor_16x16/, "$ssse3_x86inc";
+specialize qw/vpx_d153_predictor_16x16 ssse3/;
add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_v_predictor_16x16 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_v_predictor_16x16 neon msa sse2/;
add_proto qw/void vpx_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_tm_predictor_16x16 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_tm_predictor_16x16 neon msa sse2/;
add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa sse2/;
add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_dc_top_predictor_16x16 neon msa sse2/;
add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_dc_left_predictor_16x16 neon msa sse2/;
add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_dc_128_predictor_16x16 neon msa sse2/;
add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d207_predictor_32x32/, "$ssse3_x86inc";
+specialize qw/vpx_d207_predictor_32x32 ssse3/;
add_proto qw/void vpx_d207e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207e_predictor_32x32/;
add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d45_predictor_32x32/, "$ssse3_x86inc";
+specialize qw/vpx_d45_predictor_32x32 ssse3/;
add_proto qw/void vpx_d45e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45e_predictor_32x32/;
add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d63_predictor_32x32/, "$ssse3_x86inc";
+specialize qw/vpx_d63_predictor_32x32 ssse3/;
add_proto qw/void vpx_d63e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63e_predictor_32x32/;
add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_h_predictor_32x32 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_h_predictor_32x32 neon msa sse2/;
add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_32x32/;
@@ -235,25 +212,25 @@ add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride,
specialize qw/vpx_d135_predictor_32x32/;
add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d153_predictor_32x32/, "$ssse3_x86inc";
+specialize qw/vpx_d153_predictor_32x32 ssse3/;
add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_v_predictor_32x32 neon msa sse2/;
add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_tm_predictor_32x32 neon msa sse2/;
add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
+specialize qw/vpx_dc_predictor_32x32 msa neon sse2/;
add_proto qw/void vpx_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc";
+specialize qw/vpx_dc_top_predictor_32x32 msa neon sse2/;
add_proto qw/void vpx_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc";
+specialize qw/vpx_dc_left_predictor_32x32 msa neon sse2/;
add_proto qw/void vpx_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc";
+specialize qw/vpx_dc_128_predictor_32x32 msa neon sse2/;
# High bitdepth functions
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
@@ -288,13 +265,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_d153_predictor_4x4/;
add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_v_predictor_4x4/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_v_predictor_4x4 sse2/;
add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_tm_predictor_4x4 sse2/;
add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_dc_predictor_4x4 sse2/;
add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_top_predictor_4x4/;
@@ -336,13 +313,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_d153_predictor_8x8/;
add_proto qw/void vpx_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_v_predictor_8x8/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_v_predictor_8x8 sse2/;
add_proto qw/void vpx_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_tm_predictor_8x8/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_tm_predictor_8x8 sse2/;
add_proto qw/void vpx_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_predictor_8x8/, "$sse2_x86inc";;
+ specialize qw/vpx_highbd_dc_predictor_8x8 sse2/;;
add_proto qw/void vpx_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_top_predictor_8x8/;
@@ -384,13 +361,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_d153_predictor_16x16/;
add_proto qw/void vpx_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_v_predictor_16x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_v_predictor_16x16 sse2/;
add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_tm_predictor_16x16 sse2/;
add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_predictor_16x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_dc_predictor_16x16 sse2/;
add_proto qw/void vpx_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_top_predictor_16x16/;
@@ -432,13 +409,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_d153_predictor_32x32/;
add_proto qw/void vpx_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_v_predictor_32x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_v_predictor_32x32 sse2/;
add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_tm_predictor_32x32 sse2/;
add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_dc_predictor_32x32 sse2/;
add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_top_predictor_32x32/;
@@ -454,10 +431,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Sub Pixel Filters
#
add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve_copy neon dspr2 msa/, "$sse2_x86inc";
+specialize qw/vpx_convolve_copy neon dspr2 msa sse2/;
add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve_avg neon dspr2 msa/, "$sse2_x86inc";
+specialize qw/vpx_convolve_avg neon dspr2 msa sse2/;
add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vpx_convolve8 sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
@@ -500,10 +477,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Sub Pixel Filters
#
add_proto qw/void vpx_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/vpx_highbd_convolve_copy/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_convolve_copy sse2/;
add_proto qw/void vpx_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/vpx_highbd_convolve_avg/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_convolve_avg sse2/;
add_proto qw/void vpx_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/vpx_highbd_convolve8/, "$sse2_x86_64";
@@ -674,7 +651,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_fdct4x4_1 sse2/;
add_proto qw/void vpx_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vpx_fdct8x8 sse2 neon msa/, "$ssse3_x86_64_x86inc";
+ specialize qw/vpx_fdct8x8 sse2 neon msa/, "$ssse3_x86_64";
add_proto qw/void vpx_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vpx_fdct8x8_1 sse2 neon msa/;
@@ -706,7 +683,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_iwht4x4_1_add/;
add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_iwht4x4_16_add/, "$sse2_x86inc";
+ specialize qw/vpx_iwht4x4_16_add sse2/;
add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vpx_highbd_idct4x4_1_add/;
@@ -792,10 +769,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_idct4x4_1_add sse2/;
add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct8x8_64_add sse2/, "$ssse3_x86_64_x86inc";
+ specialize qw/vpx_idct8x8_64_add sse2/, "$ssse3_x86_64";
add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct8x8_12_add sse2/, "$ssse3_x86_64_x86inc";
+ specialize qw/vpx_idct8x8_12_add sse2/, "$ssse3_x86_64";
add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct8x8_1_add sse2/;
@@ -810,15 +787,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_idct16x16_1_add sse2/;
add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct32x32_1024_add sse2/, "$ssse3_x86_64_x86inc";
+ specialize qw/vpx_idct32x32_1024_add sse2/, "$ssse3_x86_64";
add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct32x32_135_add sse2/, "$ssse3_x86_64_x86inc";
+ specialize qw/vpx_idct32x32_135_add sse2/, "$ssse3_x86_64";
# Need to add 135 eob idct32x32 implementations.
$vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2;
add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct32x32_34_add sse2/, "$ssse3_x86_64_x86inc";
+ specialize qw/vpx_idct32x32_34_add sse2/, "$ssse3_x86_64";
add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct32x32_1_add sse2/;
@@ -893,10 +870,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_idct8x8_1_add sse2 neon dspr2 msa/;
add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
+ specialize qw/vpx_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
+ specialize qw/vpx_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct16x16_1_add sse2 neon dspr2 msa/;
@@ -908,10 +885,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_idct16x16_10_add sse2 neon dspr2 msa/;
add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct32x32_1024_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
+ specialize qw/vpx_idct32x32_1024_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct32x32_135_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
+ specialize qw/vpx_idct32x32_135_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
# Need to add 135 eob idct32x32 implementations.
$vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2;
$vpx_idct32x32_135_add_neon=vpx_idct32x32_1024_add_neon;
@@ -919,7 +896,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
$vpx_idct32x32_135_add_msa=vpx_idct32x32_1024_add_msa;
add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct32x32_34_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
+ specialize qw/vpx_idct32x32_34_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
# Need to add 34 eob idct32x32 neon implementation.
$vpx_idct32x32_34_add_neon=vpx_idct32x32_1024_add_neon;
@@ -930,7 +907,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_iwht4x4_1_add msa/;
add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_iwht4x4_16_add msa/, "$sse2_x86inc";
+ specialize qw/vpx_iwht4x4_16_add msa sse2/;
} # CONFIG_EMULATE_HARDWARE
} # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_VP9
@@ -940,10 +917,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc";
+ specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64";
add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc";
+ specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
@@ -959,49 +936,49 @@ if (vpx_config("CONFIG_ENCODERS") eq "yes") {
# Block subtraction
#
add_proto qw/void vpx_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
-specialize qw/vpx_subtract_block neon msa/, "$sse2_x86inc";
+specialize qw/vpx_subtract_block neon msa sse2/;
#
# Single block SAD
#
add_proto qw/unsigned int vpx_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad64x64 avx2 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad64x64 avx2 neon msa sse2/;
add_proto qw/unsigned int vpx_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad64x32 avx2 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad64x32 avx2 msa sse2/;
add_proto qw/unsigned int vpx_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad32x64 avx2 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x64 avx2 msa sse2/;
add_proto qw/unsigned int vpx_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad32x32 avx2 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x32 avx2 neon msa sse2/;
add_proto qw/unsigned int vpx_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad32x16 avx2 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x16 avx2 msa sse2/;
add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad16x32 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x32 msa sse2/;
add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad16x16 media neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x16 media neon msa sse2/;
add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad16x8 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x8 neon msa sse2/;
add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad8x16 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x16 neon msa sse2/;
add_proto qw/unsigned int vpx_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad8x8 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x8 neon msa sse2/;
add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad8x4 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x4 msa sse2/;
add_proto qw/unsigned int vpx_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad4x8 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x8 msa sse2/;
add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad4x4 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x4 neon msa sse2/;
#
# Avg
@@ -1017,7 +994,7 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
specialize qw/vpx_minmax_8x8 sse2 neon/;
add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
- specialize qw/vpx_hadamard_8x8 sse2 neon/, "$ssse3_x86_64_x86inc";
+ specialize qw/vpx_hadamard_8x8 sse2 neon/, "$ssse3_x86_64";
add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
specialize qw/vpx_hadamard_16x16 sse2 neon/;
@@ -1036,43 +1013,43 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
} # CONFIG_VP9_ENCODER
add_proto qw/unsigned int vpx_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad64x64_avg avx2 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad64x64_avg avx2 msa sse2/;
add_proto qw/unsigned int vpx_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad64x32_avg avx2 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad64x32_avg avx2 msa sse2/;
add_proto qw/unsigned int vpx_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad32x64_avg avx2 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x64_avg avx2 msa sse2/;
add_proto qw/unsigned int vpx_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad32x32_avg avx2 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x32_avg avx2 msa sse2/;
add_proto qw/unsigned int vpx_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad32x16_avg avx2 msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x16_avg avx2 msa sse2/;
add_proto qw/unsigned int vpx_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad16x32_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x32_avg msa sse2/;
add_proto qw/unsigned int vpx_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad16x16_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x16_avg msa sse2/;
add_proto qw/unsigned int vpx_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad16x8_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x8_avg msa sse2/;
add_proto qw/unsigned int vpx_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad8x16_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x16_avg msa sse2/;
add_proto qw/unsigned int vpx_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad8x8_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x8_avg msa sse2/;
add_proto qw/unsigned int vpx_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad8x4_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x4_avg msa sse2/;
add_proto qw/unsigned int vpx_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad4x8_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x8_avg msa sse2/;
add_proto qw/unsigned int vpx_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad4x4_avg msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x4_avg msa sse2/;
#
# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
@@ -1131,43 +1108,43 @@ specialize qw/vpx_sad4x4x8 sse4_1 msa/;
# Multi-block SAD, comparing a reference to N independent blocks
#
add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad64x64x4d avx2 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad64x64x4d avx2 neon msa sse2/;
add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad64x32x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad64x32x4d msa sse2/;
add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad32x64x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x64x4d msa sse2/;
add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad32x32x4d avx2 neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x32x4d avx2 neon msa sse2/;
add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad32x16x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad32x16x4d msa sse2/;
add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad16x32x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x32x4d msa sse2/;
add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad16x16x4d neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x16x4d neon msa sse2/;
add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad16x8x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x8x4d msa sse2/;
add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad8x16x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x16x4d msa sse2/;
add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad8x8x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x8x4d msa sse2/;
add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad8x4x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x4x4d msa sse2/;
add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad4x8x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x8x4d msa sse2/;
add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad4x4x4d msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x4x4d msa sse2/;
#
# Structured Similarity (SSIM)
@@ -1191,37 +1168,37 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Single block SAD
#
add_proto qw/unsigned int vpx_highbd_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad64x64/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad64x64 sse2/;
add_proto qw/unsigned int vpx_highbd_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad64x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad64x32 sse2/;
add_proto qw/unsigned int vpx_highbd_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad32x64/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad32x64 sse2/;
add_proto qw/unsigned int vpx_highbd_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad32x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad32x32 sse2/;
add_proto qw/unsigned int vpx_highbd_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad32x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad32x16 sse2/;
add_proto qw/unsigned int vpx_highbd_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad16x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad16x32 sse2/;
add_proto qw/unsigned int vpx_highbd_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad16x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad16x16 sse2/;
add_proto qw/unsigned int vpx_highbd_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad16x8/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad16x8 sse2/;
add_proto qw/unsigned int vpx_highbd_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad8x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad8x16 sse2/;
add_proto qw/unsigned int vpx_highbd_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad8x8/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad8x8 sse2/;
add_proto qw/unsigned int vpx_highbd_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/vpx_highbd_sad8x4/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad8x4 sse2/;
add_proto qw/unsigned int vpx_highbd_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_highbd_sad4x8/;
@@ -1240,37 +1217,37 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_minmax_8x8/;
add_proto qw/unsigned int vpx_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad64x64_avg/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad64x64_avg sse2/;
add_proto qw/unsigned int vpx_highbd_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad64x32_avg/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad64x32_avg sse2/;
add_proto qw/unsigned int vpx_highbd_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad32x64_avg/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad32x64_avg sse2/;
add_proto qw/unsigned int vpx_highbd_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad32x32_avg/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad32x32_avg sse2/;
add_proto qw/unsigned int vpx_highbd_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad32x16_avg/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad32x16_avg sse2/;
add_proto qw/unsigned int vpx_highbd_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad16x32_avg/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad16x32_avg sse2/;
add_proto qw/unsigned int vpx_highbd_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad16x16_avg/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad16x16_avg sse2/;
add_proto qw/unsigned int vpx_highbd_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad16x8_avg/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad16x8_avg sse2/;
add_proto qw/unsigned int vpx_highbd_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad8x16_avg/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad8x16_avg sse2/;
add_proto qw/unsigned int vpx_highbd_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad8x8_avg/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad8x8_avg sse2/;
add_proto qw/unsigned int vpx_highbd_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/vpx_highbd_sad8x4_avg/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad8x4_avg sse2/;
add_proto qw/unsigned int vpx_highbd_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_highbd_sad4x8_avg/;
@@ -1335,43 +1312,43 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Multi-block SAD, comparing a reference to N independent blocks
#
add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad64x64x4d/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad64x64x4d sse2/;
add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad64x32x4d/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad64x32x4d sse2/;
add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad32x64x4d/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad32x64x4d sse2/;
add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad32x32x4d/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad32x32x4d sse2/;
add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad32x16x4d/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad32x16x4d sse2/;
add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x32x4d/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad16x32x4d sse2/;
add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x16x4d/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad16x16x4d sse2/;
add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad16x8x4d/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad16x8x4d sse2/;
add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x16x4d/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad8x16x4d sse2/;
add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x8x4d/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad8x8x4d sse2/;
add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad8x4x4d/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad8x4x4d sse2/;
add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad4x8x4d/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad4x8x4d sse2/;
add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/vpx_highbd_sad4x4x4d/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_sad4x4x4d sse2/;
#
# Structured Similarity (SSIM)
@@ -1460,82 +1437,82 @@ add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred,
# Subpixel Variance
#
add_proto qw/uint32_t vpx_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance64x32 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance32x64 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance32x16 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance16x32 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance16x16 media neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance16x16 media neon msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance16x8 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance8x16 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance8x8 media neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance8x8 media neon msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance8x4 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance4x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance4x8 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance4x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance4x4 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance64x64 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance64x32 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance32x64 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance32x32 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance32x16 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance16x32 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance16x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance16x16 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance16x8 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance8x16 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance8x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance8x8 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance8x4 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance4x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance4x8 msa sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance4x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance4x4 msa sse2 ssse3/;
#
# Specialty Subpixel
@@ -1691,217 +1668,217 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Subpixel Variance
#
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance64x64/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_variance64x64 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance64x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_variance64x32 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance32x64/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_variance32x64 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance32x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_variance32x32 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance32x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_variance32x16 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance16x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_variance16x32 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance16x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_variance16x16 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance16x8/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_variance16x8 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance8x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_variance8x16 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance8x8/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_variance8x8 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_12_sub_pixel_variance8x4/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_variance8x4 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance64x64/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_variance64x64 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance64x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_variance64x32 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance32x64/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_variance32x64 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance32x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_variance32x32 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance32x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_variance32x16 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance16x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_variance16x32 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance16x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_variance16x16 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance16x8/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_variance16x8 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance8x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_variance8x16 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance8x8/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_variance8x8 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_10_sub_pixel_variance8x4/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_variance8x4 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance64x64/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_variance64x64 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance64x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_variance64x32 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance32x64/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_variance32x64 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance32x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_variance32x32 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance32x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_variance32x16 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance16x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_variance16x32 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance16x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_variance16x16 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance16x8/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_variance16x8 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance8x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_variance8x16 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance8x8/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_variance8x8 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_highbd_8_sub_pixel_variance8x4/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_variance8x4 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x64/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x64 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x32 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x64/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x64 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x32 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x16 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x32 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x16 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x8/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x8 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x16 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x8/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x8 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x4/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x4 sse2/;
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x64/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x64 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x32 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x64/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x64 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x32 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x16 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x32 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x16 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x8/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x8 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x16 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x8/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x8 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x4/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x4 sse2/;
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x64/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x64 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x32 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x64/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x64 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x32 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x16 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x32/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x32 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x16 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x8/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x8 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x16/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x16 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x8/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x8 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x4/, "$sse2_x86inc";
+ specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x4 sse2/;
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
diff --git a/vpx_dsp/x86/highbd_variance_sse2.c b/vpx_dsp/x86/highbd_variance_sse2.c
index 14d029c9a..0d62adf8b 100644
--- a/vpx_dsp/x86/highbd_variance_sse2.c
+++ b/vpx_dsp/x86/highbd_variance_sse2.c
@@ -143,24 +143,28 @@ uint32_t vpx_highbd_10_variance##w##x##h##_sse2( \
const uint8_t *src8, int src_stride, \
const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
int sum; \
+ int64_t var; \
uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
highbd_10_variance_sse2( \
src, src_stride, ref, ref_stride, w, h, sse, &sum, \
vpx_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
- return *sse - (((int64_t)sum * sum) >> shift); \
+ var = (int64_t)(*sse) - (((int64_t)sum * sum) >> shift); \
+ return (var >= 0) ? (uint32_t)var : 0; \
} \
\
uint32_t vpx_highbd_12_variance##w##x##h##_sse2( \
const uint8_t *src8, int src_stride, \
const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
int sum; \
+ int64_t var; \
uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
highbd_12_variance_sse2( \
src, src_stride, ref, ref_stride, w, h, sse, &sum, \
vpx_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
- return *sse - (((int64_t)sum * sum) >> shift); \
+ var = (int64_t)(*sse) - (((int64_t)sum * sum) >> shift); \
+ return (var >= 0) ? (uint32_t)var : 0; \
}
VAR_FN(64, 64, 16, 12);
@@ -242,7 +246,6 @@ unsigned int vpx_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride,
return *sse;
}
-#if CONFIG_USE_X86INC
// The 2 unused parameters are place holders for PIC enabled build.
// These definitions are for functions defined in
// highbd_subpel_variance_impl_sse2.asm
@@ -589,4 +592,3 @@ FNS(sse2);
#undef FNS
#undef FN
-#endif // CONFIG_USE_X86INC
diff --git a/vpx_dsp/x86/variance_sse2.c b/vpx_dsp/x86/variance_sse2.c
index 6987c2e24..92dc752c0 100644
--- a/vpx_dsp/x86/variance_sse2.c
+++ b/vpx_dsp/x86/variance_sse2.c
@@ -308,7 +308,6 @@ unsigned int vpx_mse16x16_sse2(const uint8_t *src, int src_stride,
return *sse;
}
-#if CONFIG_USE_X86INC
// The 2 unused parameters are place holders for PIC enabled build.
// These definitions are for functions defined in subpel_variance.asm
#define DECL(w, opt) \
@@ -474,4 +473,3 @@ FNS(ssse3, ssse3);
#undef FNS
#undef FN
-#endif // CONFIG_USE_X86INC
diff --git a/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c b/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c
index b71867853..cbd22dcd0 100644
--- a/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c
+++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c
@@ -8,10 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-// Due to a header conflict between math.h and intrinsics includes with ceil()
-// in certain configurations under vs9 this include needs to precede
-// immintrin.h.
-
#include <immintrin.h>
#include "./vpx_dsp_rtcd.h"
diff --git a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
index 6fd52087c..2bb0f4a24 100644
--- a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
+++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
@@ -8,10 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-// Due to a header conflict between math.h and intrinsics includes with ceil()
-// in certain configurations under vs9 this include needs to precede
-// tmmintrin.h.
-
#include <tmmintrin.h>
#include "./vpx_dsp_rtcd.h"
diff --git a/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm b/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
index 9cc4f9d7d..c1a6f23ab 100644
--- a/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
+++ b/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
@@ -23,11 +23,7 @@ pw_64: times 8 dw 64
; z = signed SAT(x + y)
SECTION .text
-%if ARCH_X86_64
- %define LOCAL_VARS_SIZE 16*4
-%else
- %define LOCAL_VARS_SIZE 16*6
-%endif
+%define LOCAL_VARS_SIZE 16*6
%macro SETUP_LOCAL_VARS 0
; TODO(slavarnway): using xmm registers for these on ARCH_X86_64 +
@@ -54,11 +50,11 @@ SECTION .text
mova k6k7, m3
%if ARCH_X86_64
%define krd m12
- %define tmp m13
+ %define tmp0 [rsp + 16*4]
+ %define tmp1 [rsp + 16*5]
mova krd, [GLOBAL(pw_64)]
%else
- %define tmp [rsp + 16*4]
- %define krd [rsp + 16*5]
+ %define krd [rsp + 16*4]
%if CONFIG_PIC=0
mova m6, [GLOBAL(pw_64)]
%else
@@ -71,50 +67,31 @@ SECTION .text
%endif
%endm
-%macro HORIZx4_ROW 2
- mova %2, %1
- punpcklbw %1, %1
- punpckhbw %2, %2
-
- mova m3, %2
- palignr %2, %1, 1
- palignr m3, %1, 5
-
- pmaddubsw %2, k0k1k4k5
- pmaddubsw m3, k2k3k6k7
- mova m4, %2 ;k0k1
- mova m5, m3 ;k2k3
- psrldq %2, 8 ;k4k5
- psrldq m3, 8 ;k6k7
- paddsw %2, m4
- paddsw m5, m3
- paddsw %2, m5
- paddsw %2, krd
- psraw %2, 7
- packuswb %2, %2
-%endm
-
;-------------------------------------------------------------------------------
+%if ARCH_X86_64
+ %define LOCAL_VARS_SIZE_H4 0
+%else
+ %define LOCAL_VARS_SIZE_H4 16*4
+%endif
+
%macro SUBPIX_HFILTER4 1
-cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \
+cglobal filter_block1d4_%1, 6, 6, 11, LOCAL_VARS_SIZE_H4, \
src, sstride, dst, dstride, height, filter
mova m4, [filterq]
packsswb m4, m4
%if ARCH_X86_64
- %define k0k1k4k5 m8
- %define k2k3k6k7 m9
- %define krd m10
- %define orig_height r7d
+ %define k0k1k4k5 m8
+ %define k2k3k6k7 m9
+ %define krd m10
mova krd, [GLOBAL(pw_64)]
pshuflw k0k1k4k5, m4, 0b ;k0_k1
pshufhw k0k1k4k5, k0k1k4k5, 10101010b ;k0_k1_k4_k5
pshuflw k2k3k6k7, m4, 01010101b ;k2_k3
pshufhw k2k3k6k7, k2k3k6k7, 11111111b ;k2_k3_k6_k7
%else
- %define k0k1k4k5 [rsp + 16*0]
- %define k2k3k6k7 [rsp + 16*1]
- %define krd [rsp + 16*2]
- %define orig_height [rsp + 16*3]
+ %define k0k1k4k5 [rsp + 16*0]
+ %define k2k3k6k7 [rsp + 16*1]
+ %define krd [rsp + 16*2]
pshuflw m6, m4, 0b ;k0_k1
pshufhw m6, m6, 10101010b ;k0_k1_k4_k5
pshuflw m7, m4, 01010101b ;k2_k3
@@ -131,61 +108,46 @@ cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \
mova k2k3k6k7, m7
mova krd, m1
%endif
- mov orig_height, heightd
- shr heightd, 1
+ dec heightd
+
.loop:
;Do two rows at once
- movh m0, [srcq - 3]
- movh m1, [srcq + 5]
- punpcklqdq m0, m1
- mova m1, m0
- movh m2, [srcq + sstrideq - 3]
- movh m3, [srcq + sstrideq + 5]
- punpcklqdq m2, m3
- mova m3, m2
- punpcklbw m0, m0
- punpckhbw m1, m1
- punpcklbw m2, m2
- punpckhbw m3, m3
- mova m4, m1
- palignr m4, m0, 1
- pmaddubsw m4, k0k1k4k5
- palignr m1, m0, 5
+ movu m4, [srcq - 3]
+ movu m5, [srcq + sstrideq - 3]
+ punpckhbw m1, m4, m4
+ punpcklbw m4, m4
+ punpckhbw m3, m5, m5
+ punpcklbw m5, m5
+ palignr m0, m1, m4, 1
+ pmaddubsw m0, k0k1k4k5
+ palignr m1, m4, 5
pmaddubsw m1, k2k3k6k7
- mova m7, m3
- palignr m7, m2, 1
- pmaddubsw m7, k0k1k4k5
- palignr m3, m2, 5
+ palignr m2, m3, m5, 1
+ pmaddubsw m2, k0k1k4k5
+ palignr m3, m5, 5
pmaddubsw m3, k2k3k6k7
- mova m0, m4 ;k0k1
- mova m5, m1 ;k2k3
- mova m2, m7 ;k0k1 upper
- psrldq m4, 8 ;k4k5
- psrldq m1, 8 ;k6k7
- paddsw m4, m0
- paddsw m5, m1
- mova m1, m3 ;k2k3 upper
- psrldq m7, 8 ;k4k5 upper
- psrldq m3, 8 ;k6k7 upper
- paddsw m7, m2
- paddsw m4, m5
- paddsw m1, m3
- paddsw m7, m1
- paddsw m4, krd
- psraw m4, 7
- packuswb m4, m4
- paddsw m7, krd
- psraw m7, 7
- packuswb m7, m7
+ punpckhqdq m4, m0, m2
+ punpcklqdq m0, m2
+ punpckhqdq m5, m1, m3
+ punpcklqdq m1, m3
+ paddsw m0, m4
+ paddsw m1, m5
+%ifidn %1, h8_avg
+ movd m4, [dstq]
+ movd m5, [dstq + dstrideq]
+%endif
+ paddsw m0, m1
+ paddsw m0, krd
+ psraw m0, 7
+ packuswb m0, m0
+ psrldq m1, m0, 4
%ifidn %1, h8_avg
- movd m0, [dstq]
- pavgb m4, m0
- movd m2, [dstq + dstrideq]
- pavgb m7, m2
+ pavgb m0, m4
+ pavgb m1, m5
%endif
- movd [dstq], m4
- movd [dstq + dstrideq], m7
+ movd [dstq], m0
+ movd [dstq + dstrideq], m1
lea srcq, [srcq + sstrideq ]
prefetcht0 [srcq + 4 * sstrideq - 3]
@@ -193,205 +155,156 @@ cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \
lea dstq, [dstq + 2 * dstrideq ]
prefetcht0 [srcq + 2 * sstrideq - 3]
- dec heightd
- jnz .loop
+ sub heightd, 2
+ jg .loop
; Do last row if output_height is odd
- mov heightd, orig_height
- and heightd, 1
- je .done
-
- movh m0, [srcq - 3] ; load src
- movh m1, [srcq + 5]
- punpcklqdq m0, m1
-
- HORIZx4_ROW m0, m1
+ jne .done
+
+ movu m4, [srcq - 3]
+ punpckhbw m1, m4, m4
+ punpcklbw m4, m4
+ palignr m0, m1, m4, 1
+ palignr m1, m4, 5
+ pmaddubsw m0, k0k1k4k5
+ pmaddubsw m1, k2k3k6k7
+ psrldq m2, m0, 8
+ psrldq m3, m1, 8
+ paddsw m0, m2
+ paddsw m1, m3
+ paddsw m0, m1
+ paddsw m0, krd
+ psraw m0, 7
+ packuswb m0, m0
%ifidn %1, h8_avg
- movd m0, [dstq]
- pavgb m1, m0
+ movd m4, [dstq]
+ pavgb m0, m4
%endif
- movd [dstq], m1
+ movd [dstq], m0
.done:
- RET
-%endm
-
-%macro HORIZx8_ROW 5
- mova %2, %1
- punpcklbw %1, %1
- punpckhbw %2, %2
-
- mova %3, %2
- mova %4, %2
- mova %5, %2
-
- palignr %2, %1, 1
- palignr %3, %1, 5
- palignr %4, %1, 9
- palignr %5, %1, 13
-
- pmaddubsw %2, k0k1
- pmaddubsw %3, k2k3
- pmaddubsw %4, k4k5
- pmaddubsw %5, k6k7
- paddsw %2, %4
- paddsw %5, %3
- paddsw %2, %5
- paddsw %2, krd
- psraw %2, 7
- packuswb %2, %2
- SWAP %1, %2
+ REP_RET
%endm
;-------------------------------------------------------------------------------
%macro SUBPIX_HFILTER8 1
-cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 14, LOCAL_VARS_SIZE, \
+cglobal filter_block1d8_%1, 6, 6, 14, LOCAL_VARS_SIZE, \
src, sstride, dst, dstride, height, filter
mova m4, [filterq]
SETUP_LOCAL_VARS
-%if ARCH_X86_64
- %define orig_height r7d
-%else
- %define orig_height heightmp
-%endif
- mov orig_height, heightd
- shr heightd, 1
+ dec heightd
.loop:
- movh m0, [srcq - 3]
- movh m3, [srcq + 5]
- movh m4, [srcq + sstrideq - 3]
- movh m7, [srcq + sstrideq + 5]
- punpcklqdq m0, m3
- mova m1, m0
+ ;Do two rows at once
+ movu m0, [srcq - 3]
+ movu m4, [srcq + sstrideq - 3]
+ punpckhbw m1, m0, m0
punpcklbw m0, m0
- punpckhbw m1, m1
- mova m5, m1
- palignr m5, m0, 13
+ palignr m5, m1, m0, 13
pmaddubsw m5, k6k7
- mova m2, m1
- mova m3, m1
+ palignr m2, m1, m0, 5
+ palignr m3, m1, m0, 9
palignr m1, m0, 1
pmaddubsw m1, k0k1
- punpcklqdq m4, m7
- mova m6, m4
+ punpckhbw m6, m4, m4
punpcklbw m4, m4
- palignr m2, m0, 5
- punpckhbw m6, m6
- palignr m3, m0, 9
- mova m7, m6
pmaddubsw m2, k2k3
pmaddubsw m3, k4k5
- palignr m7, m4, 13
- mova m0, m6
- palignr m0, m4, 5
+ palignr m7, m6, m4, 13
+ palignr m0, m6, m4, 5
pmaddubsw m7, k6k7
paddsw m1, m3
paddsw m2, m5
paddsw m1, m2
- mova m5, m6
+%ifidn %1, h8_avg
+ movh m2, [dstq]
+ movhps m2, [dstq + dstrideq]
+%endif
+ palignr m5, m6, m4, 9
palignr m6, m4, 1
pmaddubsw m0, k2k3
pmaddubsw m6, k0k1
- palignr m5, m4, 9
paddsw m1, krd
pmaddubsw m5, k4k5
psraw m1, 7
paddsw m0, m7
-%ifidn %1, h8_avg
- movh m7, [dstq]
- movh m2, [dstq + dstrideq]
-%endif
- packuswb m1, m1
paddsw m6, m5
paddsw m6, m0
paddsw m6, krd
psraw m6, 7
- packuswb m6, m6
+ packuswb m1, m6
%ifidn %1, h8_avg
- pavgb m1, m7
- pavgb m6, m2
+ pavgb m1, m2
%endif
- movh [dstq], m1
- movh [dstq + dstrideq], m6
+ movh [dstq], m1
+ movhps [dstq + dstrideq], m1
lea srcq, [srcq + sstrideq ]
prefetcht0 [srcq + 4 * sstrideq - 3]
lea srcq, [srcq + sstrideq ]
lea dstq, [dstq + 2 * dstrideq ]
prefetcht0 [srcq + 2 * sstrideq - 3]
- dec heightd
- jnz .loop
-
- ;Do last row if output_height is odd
- mov heightd, orig_height
- and heightd, 1
- je .done
+ sub heightd, 2
+ jg .loop
- movh m0, [srcq - 3]
- movh m3, [srcq + 5]
- punpcklqdq m0, m3
-
- HORIZx8_ROW m0, m1, m2, m3, m4
+ ; Do last row if output_height is odd
+ jne .done
+ movu m0, [srcq - 3]
+ punpckhbw m3, m0, m0
+ punpcklbw m0, m0
+ palignr m1, m3, m0, 1
+ palignr m2, m3, m0, 5
+ palignr m4, m3, m0, 13
+ palignr m3, m0, 9
+ pmaddubsw m1, k0k1
+ pmaddubsw m2, k2k3
+ pmaddubsw m3, k4k5
+ pmaddubsw m4, k6k7
+ paddsw m1, m3
+ paddsw m4, m2
+ paddsw m1, m4
+ paddsw m1, krd
+ psraw m1, 7
+ packuswb m1, m1
%ifidn %1, h8_avg
- movh m1, [dstq]
- pavgb m0, m1
+ movh m0, [dstq]
+ pavgb m1, m0
%endif
- movh [dstq], m0
+ movh [dstq], m1
.done:
- RET
+ REP_RET
%endm
;-------------------------------------------------------------------------------
%macro SUBPIX_HFILTER16 1
-cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \
+cglobal filter_block1d16_%1, 6, 6, 14, LOCAL_VARS_SIZE, \
src, sstride, dst, dstride, height, filter
mova m4, [filterq]
SETUP_LOCAL_VARS
+
.loop:
prefetcht0 [srcq + 2 * sstrideq -3]
- movh m0, [srcq - 3]
- movh m4, [srcq + 5]
- movh m6, [srcq + 13]
- punpcklqdq m0, m4
- mova m7, m0
- punpckhbw m0, m0
- mova m1, m0
- punpcklqdq m4, m6
- mova m3, m0
- punpcklbw m7, m7
-
- palignr m3, m7, 13
- mova m2, m0
- pmaddubsw m3, k6k7
- palignr m0, m7, 1
+ movu m0, [srcq - 3]
+ movu m4, [srcq - 2]
pmaddubsw m0, k0k1
- palignr m1, m7, 5
- pmaddubsw m1, k2k3
- palignr m2, m7, 9
- pmaddubsw m2, k4k5
- paddsw m1, m3
- mova m3, m4
- punpckhbw m4, m4
- mova m5, m4
- punpcklbw m3, m3
- mova m7, m4
- palignr m5, m3, 5
- mova m6, m4
- palignr m4, m3, 1
pmaddubsw m4, k0k1
+ movu m1, [srcq - 1]
+ movu m5, [srcq + 0]
+ pmaddubsw m1, k2k3
pmaddubsw m5, k2k3
- palignr m6, m3, 9
+ movu m2, [srcq + 1]
+ movu m6, [srcq + 2]
+ pmaddubsw m2, k4k5
pmaddubsw m6, k4k5
- palignr m7, m3, 13
+ movu m3, [srcq + 3]
+ movu m7, [srcq + 4]
+ pmaddubsw m3, k6k7
pmaddubsw m7, k6k7
paddsw m0, m2
+ paddsw m1, m3
paddsw m0, m1
-%ifidn %1, h8_avg
- mova m1, [dstq]
-%endif
paddsw m4, m6
paddsw m5, m7
paddsw m4, m5
@@ -399,16 +312,18 @@ cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \
paddsw m4, krd
psraw m0, 7
psraw m4, 7
- packuswb m0, m4
+ packuswb m0, m0
+ packuswb m4, m4
+ punpcklbw m0, m4
%ifidn %1, h8_avg
- pavgb m0, m1
+ pavgb m0, [dstq]
%endif
lea srcq, [srcq + sstrideq]
mova [dstq], m0
lea dstq, [dstq + dstrideq]
dec heightd
jnz .loop
- RET
+ REP_RET
%endm
INIT_XMM ssse3
@@ -420,204 +335,463 @@ SUBPIX_HFILTER4 h8
SUBPIX_HFILTER4 h8_avg
;-------------------------------------------------------------------------------
+
+; TODO(Linfeng): Detect cpu type and choose the code with better performance.
+%define X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON 1
+
+%if ARCH_X86_64 && X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
+ %define NUM_GENERAL_REG_USED 9
+%else
+ %define NUM_GENERAL_REG_USED 6
+%endif
+
%macro SUBPIX_VFILTER 2
-cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
+cglobal filter_block1d%2_%1, 6, NUM_GENERAL_REG_USED, 15, LOCAL_VARS_SIZE, \
src, sstride, dst, dstride, height, filter
mova m4, [filterq]
SETUP_LOCAL_VARS
-%if ARCH_X86_64
- %define src1q r7
- %define sstride6q r8
- %define dst_stride dstrideq
+
+%ifidn %2, 8
+ %define movx movh
%else
- %define src1q filterq
- %define sstride6q dstrideq
- %define dst_stride dstridemp
+ %define movx movd
%endif
- mov src1q, srcq
- add src1q, sstrideq
- lea sstride6q, [sstrideq + sstrideq * 4]
- add sstride6q, sstrideq ;pitch * 6
-%ifidn %2, 8
- %define movx movh
+ dec heightd
+
+%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
+
+%if ARCH_X86_64
+ %define src1q r7
+ %define sstride6q r8
+ %define dst_stride dstrideq
%else
- %define movx movd
+ %define src1q filterq
+ %define sstride6q dstrideq
+ %define dst_stride dstridemp
%endif
+ mov src1q, srcq
+ add src1q, sstrideq
+ lea sstride6q, [sstrideq + sstrideq * 4]
+ add sstride6q, sstrideq ;pitch * 6
+
.loop:
- movx m0, [srcq ] ;A
- movx m1, [srcq + sstrideq ] ;B
- punpcklbw m0, m1 ;A B
- movx m2, [srcq + sstrideq * 2 ] ;C
- pmaddubsw m0, k0k1
- mova m6, m2
- movx m3, [src1q + sstrideq * 2] ;D
- punpcklbw m2, m3 ;C D
- pmaddubsw m2, k2k3
- movx m4, [srcq + sstrideq * 4 ] ;E
- mova m7, m4
- movx m5, [src1q + sstrideq * 4] ;F
- punpcklbw m4, m5 ;E F
- pmaddubsw m4, k4k5
- punpcklbw m1, m6 ;A B next iter
- movx m6, [srcq + sstride6q ] ;G
- punpcklbw m5, m6 ;E F next iter
- punpcklbw m3, m7 ;C D next iter
- pmaddubsw m5, k4k5
- movx m7, [src1q + sstride6q ] ;H
- punpcklbw m6, m7 ;G H
- pmaddubsw m6, k6k7
- pmaddubsw m3, k2k3
- pmaddubsw m1, k0k1
- paddsw m0, m4
- paddsw m2, m6
- movx m6, [srcq + sstrideq * 8 ] ;H next iter
- punpcklbw m7, m6
- pmaddubsw m7, k6k7
- paddsw m0, m2
- paddsw m0, krd
- psraw m0, 7
- paddsw m1, m5
- packuswb m0, m0
-
- paddsw m3, m7
- paddsw m1, m3
- paddsw m1, krd
- psraw m1, 7
- lea srcq, [srcq + sstrideq * 2 ]
- lea src1q, [src1q + sstrideq * 2]
- packuswb m1, m1
+ ;Do two rows at once
+ movx m0, [srcq ] ;A
+ movx m1, [src1q ] ;B
+ punpcklbw m0, m1 ;A B
+ movx m2, [srcq + sstrideq * 2 ] ;C
+ pmaddubsw m0, k0k1
+ mova m6, m2
+ movx m3, [src1q + sstrideq * 2] ;D
+ punpcklbw m2, m3 ;C D
+ pmaddubsw m2, k2k3
+ movx m4, [srcq + sstrideq * 4 ] ;E
+ mova m7, m4
+ movx m5, [src1q + sstrideq * 4] ;F
+ punpcklbw m4, m5 ;E F
+ pmaddubsw m4, k4k5
+ punpcklbw m1, m6 ;A B next iter
+ movx m6, [srcq + sstride6q ] ;G
+ punpcklbw m5, m6 ;E F next iter
+ punpcklbw m3, m7 ;C D next iter
+ pmaddubsw m5, k4k5
+ movx m7, [src1q + sstride6q ] ;H
+ punpcklbw m6, m7 ;G H
+ pmaddubsw m6, k6k7
+ pmaddubsw m3, k2k3
+ pmaddubsw m1, k0k1
+ paddsw m0, m4
+ paddsw m2, m6
+ movx m6, [srcq + sstrideq * 8 ] ;H next iter
+ punpcklbw m7, m6
+ pmaddubsw m7, k6k7
+ paddsw m0, m2
+ paddsw m0, krd
+ psraw m0, 7
+ paddsw m1, m5
+ packuswb m0, m0
+
+ paddsw m3, m7
+ paddsw m1, m3
+ paddsw m1, krd
+ psraw m1, 7
+ lea srcq, [srcq + sstrideq * 2 ]
+ lea src1q, [src1q + sstrideq * 2]
+ packuswb m1, m1
+
+%ifidn %1, v8_avg
+ movx m2, [dstq]
+ pavgb m0, m2
+%endif
+ movx [dstq], m0
+ add dstq, dst_stride
+%ifidn %1, v8_avg
+ movx m3, [dstq]
+ pavgb m1, m3
+%endif
+ movx [dstq], m1
+ add dstq, dst_stride
+ sub heightd, 2
+ jg .loop
+ ; Do last row if output_height is odd
+ jne .done
+
+ movx m0, [srcq ] ;A
+ movx m1, [srcq + sstrideq ] ;B
+ movx m6, [srcq + sstride6q ] ;G
+ punpcklbw m0, m1 ;A B
+ movx m7, [src1q + sstride6q ] ;H
+ pmaddubsw m0, k0k1
+ movx m2, [srcq + sstrideq * 2 ] ;C
+ punpcklbw m6, m7 ;G H
+ movx m3, [src1q + sstrideq * 2] ;D
+ pmaddubsw m6, k6k7
+ movx m4, [srcq + sstrideq * 4 ] ;E
+ punpcklbw m2, m3 ;C D
+ movx m5, [src1q + sstrideq * 4] ;F
+ punpcklbw m4, m5 ;E F
+ pmaddubsw m2, k2k3
+ pmaddubsw m4, k4k5
+ paddsw m2, m6
+ paddsw m0, m4
+ paddsw m0, m2
+ paddsw m0, krd
+ psraw m0, 7
+ packuswb m0, m0
%ifidn %1, v8_avg
- movx m2, [dstq]
- pavgb m0, m2
+ movx m1, [dstq]
+ pavgb m0, m1
%endif
- movx [dstq], m0
- add dstq, dst_stride
+ movx [dstq], m0
+
+%else
+ ; ARCH_X86_64
+
+ movx m0, [srcq ] ;A
+ movx m1, [srcq + sstrideq ] ;B
+ lea srcq, [srcq + sstrideq * 2 ]
+ movx m2, [srcq] ;C
+ movx m3, [srcq + sstrideq] ;D
+ lea srcq, [srcq + sstrideq * 2 ]
+ movx m4, [srcq] ;E
+ movx m5, [srcq + sstrideq] ;F
+ lea srcq, [srcq + sstrideq * 2 ]
+ movx m6, [srcq] ;G
+ punpcklbw m0, m1 ;A B
+ punpcklbw m1, m2 ;A B next iter
+ punpcklbw m2, m3 ;C D
+ punpcklbw m3, m4 ;C D next iter
+ punpcklbw m4, m5 ;E F
+ punpcklbw m5, m6 ;E F next iter
+
+.loop:
+ ;Do two rows at once
+ movx m7, [srcq + sstrideq] ;H
+ lea srcq, [srcq + sstrideq * 2 ]
+ movx m14, [srcq] ;H next iter
+ punpcklbw m6, m7 ;G H
+ punpcklbw m7, m14 ;G H next iter
+ pmaddubsw m8, m0, k0k1
+ pmaddubsw m9, m1, k0k1
+ mova m0, m2
+ mova m1, m3
+ pmaddubsw m10, m2, k2k3
+ pmaddubsw m11, m3, k2k3
+ mova m2, m4
+ mova m3, m5
+ pmaddubsw m4, k4k5
+ pmaddubsw m5, k4k5
+ paddsw m8, m4
+ paddsw m9, m5
+ mova m4, m6
+ mova m5, m7
+ pmaddubsw m6, k6k7
+ pmaddubsw m7, k6k7
+ paddsw m10, m6
+ paddsw m11, m7
+ paddsw m8, m10
+ paddsw m9, m11
+ mova m6, m14
+ paddsw m8, krd
+ paddsw m9, krd
+ psraw m8, 7
+ psraw m9, 7
+%ifidn %2, 4
+ packuswb m8, m8
+ packuswb m9, m9
+%else
+ packuswb m8, m9
+%endif
+
%ifidn %1, v8_avg
- movx m3, [dstq]
- pavgb m1, m3
+ movx m7, [dstq]
+%ifidn %2, 4
+ movx m10, [dstq + dstrideq]
+ pavgb m9, m10
+%else
+ movhpd m7, [dstq + dstrideq]
+%endif
+ pavgb m8, m7
%endif
- movx [dstq], m1
- add dstq, dst_stride
- sub heightd, 2
- cmp heightd, 1
- jg .loop
-
- cmp heightd, 0
- je .done
-
- movx m0, [srcq ] ;A
- movx m1, [srcq + sstrideq ] ;B
- movx m6, [srcq + sstride6q ] ;G
- punpcklbw m0, m1 ;A B
- movx m7, [src1q + sstride6q ] ;H
- pmaddubsw m0, k0k1
- movx m2, [srcq + sstrideq * 2 ] ;C
- punpcklbw m6, m7 ;G H
- movx m3, [src1q + sstrideq * 2] ;D
- pmaddubsw m6, k6k7
- movx m4, [srcq + sstrideq * 4 ] ;E
- punpcklbw m2, m3 ;C D
- movx m5, [src1q + sstrideq * 4] ;F
- punpcklbw m4, m5 ;E F
- pmaddubsw m2, k2k3
- pmaddubsw m4, k4k5
- paddsw m2, m6
- paddsw m0, m4
- paddsw m0, m2
- paddsw m0, krd
- psraw m0, 7
- packuswb m0, m0
+ movx [dstq], m8
+%ifidn %2, 4
+ movx [dstq + dstrideq], m9
+%else
+ movhpd [dstq + dstrideq], m8
+%endif
+
+ lea dstq, [dstq + dstrideq * 2 ]
+ sub heightd, 2
+ jg .loop
+
+ ; Do last row if output_height is odd
+ jne .done
+
+ movx m7, [srcq + sstrideq] ;H
+ punpcklbw m6, m7 ;G H
+ pmaddubsw m0, k0k1
+ pmaddubsw m2, k2k3
+ pmaddubsw m4, k4k5
+ pmaddubsw m6, k6k7
+ paddsw m0, m4
+ paddsw m2, m6
+ paddsw m0, m2
+ paddsw m0, krd
+ psraw m0, 7
+ packuswb m0, m0
%ifidn %1, v8_avg
- movx m1, [dstq]
- pavgb m0, m1
+ movx m1, [dstq]
+ pavgb m0, m1
%endif
- movx [dstq], m0
+ movx [dstq], m0
+
+%endif ; ARCH_X86_64
+
.done:
- RET
+ REP_RET
+
%endm
;-------------------------------------------------------------------------------
%macro SUBPIX_VFILTER16 1
-cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
+cglobal filter_block1d16_%1, 6, NUM_GENERAL_REG_USED, 16, LOCAL_VARS_SIZE, \
src, sstride, dst, dstride, height, filter
- mova m4, [filterq]
+ mova m4, [filterq]
SETUP_LOCAL_VARS
+
+%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
+
%if ARCH_X86_64
- %define src1q r7
- %define sstride6q r8
- %define dst_stride dstrideq
+ %define src1q r7
+ %define sstride6q r8
+ %define dst_stride dstrideq
%else
- %define src1q filterq
- %define sstride6q dstrideq
- %define dst_stride dstridemp
+ %define src1q filterq
+ %define sstride6q dstrideq
+ %define dst_stride dstridemp
%endif
- mov src1q, srcq
- add src1q, sstrideq
- lea sstride6q, [sstrideq + sstrideq * 4]
- add sstride6q, sstrideq ;pitch * 6
+ lea src1q, [srcq + sstrideq]
+ lea sstride6q, [sstrideq + sstrideq * 4]
+ add sstride6q, sstrideq ;pitch * 6
.loop:
- movh m0, [srcq ] ;A
- movh m1, [srcq + sstrideq ] ;B
- movh m2, [srcq + sstrideq * 2 ] ;C
- movh m3, [src1q + sstrideq * 2] ;D
- movh m4, [srcq + sstrideq * 4 ] ;E
- movh m5, [src1q + sstrideq * 4] ;F
-
- punpcklbw m0, m1 ;A B
- movh m6, [srcq + sstride6q] ;G
- punpcklbw m2, m3 ;C D
- movh m7, [src1q + sstride6q] ;H
- punpcklbw m4, m5 ;E F
- pmaddubsw m0, k0k1
- movh m3, [srcq + 8] ;A
- pmaddubsw m2, k2k3
- punpcklbw m6, m7 ;G H
- movh m5, [srcq + sstrideq + 8] ;B
- pmaddubsw m4, k4k5
- punpcklbw m3, m5 ;A B
- movh m7, [srcq + sstrideq * 2 + 8] ;C
- pmaddubsw m6, k6k7
- movh m5, [src1q + sstrideq * 2 + 8] ;D
- punpcklbw m7, m5 ;C D
- paddsw m2, m6
- pmaddubsw m3, k0k1
- movh m1, [srcq + sstrideq * 4 + 8] ;E
- paddsw m0, m4
- pmaddubsw m7, k2k3
- movh m6, [src1q + sstrideq * 4 + 8] ;F
- punpcklbw m1, m6 ;E F
- paddsw m0, m2
- paddsw m0, krd
- movh m2, [srcq + sstride6q + 8] ;G
- pmaddubsw m1, k4k5
- movh m5, [src1q + sstride6q + 8] ;H
- psraw m0, 7
- punpcklbw m2, m5 ;G H
- pmaddubsw m2, k6k7
+ movh m0, [srcq ] ;A
+ movh m1, [src1q ] ;B
+ movh m2, [srcq + sstrideq * 2 ] ;C
+ movh m3, [src1q + sstrideq * 2] ;D
+ movh m4, [srcq + sstrideq * 4 ] ;E
+ movh m5, [src1q + sstrideq * 4] ;F
+
+ punpcklbw m0, m1 ;A B
+ movh m6, [srcq + sstride6q] ;G
+ punpcklbw m2, m3 ;C D
+ movh m7, [src1q + sstride6q] ;H
+ punpcklbw m4, m5 ;E F
+ pmaddubsw m0, k0k1
+ movh m3, [srcq + 8] ;A
+ pmaddubsw m2, k2k3
+ punpcklbw m6, m7 ;G H
+ movh m5, [srcq + sstrideq + 8] ;B
+ pmaddubsw m4, k4k5
+ punpcklbw m3, m5 ;A B
+ movh m7, [srcq + sstrideq * 2 + 8] ;C
+ pmaddubsw m6, k6k7
+ movh m5, [src1q + sstrideq * 2 + 8] ;D
+ punpcklbw m7, m5 ;C D
+ paddsw m2, m6
+ pmaddubsw m3, k0k1
+ movh m1, [srcq + sstrideq * 4 + 8] ;E
+ paddsw m0, m4
+ pmaddubsw m7, k2k3
+ movh m6, [src1q + sstrideq * 4 + 8] ;F
+ punpcklbw m1, m6 ;E F
+ paddsw m0, m2
+ paddsw m0, krd
+ movh m2, [srcq + sstride6q + 8] ;G
+ pmaddubsw m1, k4k5
+ movh m5, [src1q + sstride6q + 8] ;H
+ psraw m0, 7
+ punpcklbw m2, m5 ;G H
+ pmaddubsw m2, k6k7
+ paddsw m7, m2
+ paddsw m3, m1
+ paddsw m3, m7
+ paddsw m3, krd
+ psraw m3, 7
+ packuswb m0, m3
+
+ add srcq, sstrideq
+ add src1q, sstrideq
+%ifidn %1, v8_avg
+ pavgb m0, [dstq]
+%endif
+ mova [dstq], m0
+ add dstq, dst_stride
+ dec heightd
+ jnz .loop
+ REP_RET
+
+%else
+ ; ARCH_X86_64
+ dec heightd
+
+ movu m1, [srcq ] ;A
+ movu m3, [srcq + sstrideq ] ;B
+ lea srcq, [srcq + sstrideq * 2]
+ punpcklbw m0, m1, m3 ;A B
+ punpckhbw m1, m3 ;A B
+ movu m5, [srcq] ;C
+ punpcklbw m2, m3, m5 ;A B next iter
+ punpckhbw m3, m5 ;A B next iter
+ mova tmp0, m2 ;store to stack
+ mova tmp1, m3 ;store to stack
+ movu m7, [srcq + sstrideq] ;D
+ lea srcq, [srcq + sstrideq * 2]
+ punpcklbw m4, m5, m7 ;C D
+ punpckhbw m5, m7 ;C D
+ movu m9, [srcq] ;E
+ punpcklbw m6, m7, m9 ;C D next iter
+ punpckhbw m7, m9 ;C D next iter
+ movu m11, [srcq + sstrideq] ;F
+ lea srcq, [srcq + sstrideq * 2]
+ punpcklbw m8, m9, m11 ;E F
+ punpckhbw m9, m11 ;E F
+ movu m2, [srcq] ;G
+ punpcklbw m10, m11, m2 ;E F next iter
+ punpckhbw m11, m2 ;E F next iter
+
+.loop:
+ ;Do two rows at once
+ pmaddubsw m13, m0, k0k1
+ mova m0, m4
+ pmaddubsw m14, m8, k4k5
+ pmaddubsw m15, m4, k2k3
+ mova m4, m8
+ paddsw m13, m14
+ movu m3, [srcq + sstrideq] ;H
+ lea srcq, [srcq + sstrideq * 2]
+ punpcklbw m14, m2, m3 ;G H
+ mova m8, m14
+ pmaddubsw m14, k6k7
+ paddsw m15, m14
+ paddsw m13, m15
+ paddsw m13, krd
+ psraw m13, 7
+
+ pmaddubsw m14, m1, k0k1
+ pmaddubsw m1, m9, k4k5
+ pmaddubsw m15, m5, k2k3
+ paddsw m14, m1
+ mova m1, m5
+ mova m5, m9
+ punpckhbw m2, m3 ;G H
+ mova m9, m2
+ pmaddubsw m2, k6k7
+ paddsw m15, m2
+ paddsw m14, m15
+ paddsw m14, krd
+ psraw m14, 7
+ packuswb m13, m14
%ifidn %1, v8_avg
- mova m4, [dstq]
+ pavgb m13, [dstq]
%endif
- movh [dstq], m0
- paddsw m7, m2
- paddsw m3, m1
- paddsw m3, m7
- paddsw m3, krd
- psraw m3, 7
- packuswb m0, m3
-
- add srcq, sstrideq
- add src1q, sstrideq
+ mova [dstq], m13
+
+ ; next iter
+ pmaddubsw m15, tmp0, k0k1
+ pmaddubsw m14, m10, k4k5
+ pmaddubsw m13, m6, k2k3
+ paddsw m15, m14
+ mova tmp0, m6
+ mova m6, m10
+ movu m2, [srcq] ;G next iter
+ punpcklbw m14, m3, m2 ;G H next iter
+ mova m10, m14
+ pmaddubsw m14, k6k7
+ paddsw m13, m14
+ paddsw m15, m13
+ paddsw m15, krd
+ psraw m15, 7
+
+ pmaddubsw m14, tmp1, k0k1
+ mova tmp1, m7
+ pmaddubsw m13, m7, k2k3
+ mova m7, m11
+ pmaddubsw m11, k4k5
+ paddsw m14, m11
+ punpckhbw m3, m2 ;G H next iter
+ mova m11, m3
+ pmaddubsw m3, k6k7
+ paddsw m13, m3
+ paddsw m14, m13
+ paddsw m14, krd
+ psraw m14, 7
+ packuswb m15, m14
%ifidn %1, v8_avg
- pavgb m0, m4
+ pavgb m15, [dstq + dstrideq]
%endif
- mova [dstq], m0
- add dstq, dst_stride
- dec heightd
- jnz .loop
- RET
+ mova [dstq + dstrideq], m15
+ lea dstq, [dstq + dstrideq * 2]
+ sub heightd, 2
+ jg .loop
+
+ ; Do last row if output_height is odd
+ jne .done
+
+ movu m3, [srcq + sstrideq] ;H
+ punpcklbw m6, m2, m3 ;G H
+ punpckhbw m2, m3 ;G H
+ pmaddubsw m0, k0k1
+ pmaddubsw m1, k0k1
+ pmaddubsw m4, k2k3
+ pmaddubsw m5, k2k3
+ pmaddubsw m8, k4k5
+ pmaddubsw m9, k4k5
+ pmaddubsw m6, k6k7
+ pmaddubsw m2, k6k7
+ paddsw m0, m8
+ paddsw m1, m9
+ paddsw m4, m6
+ paddsw m5, m2
+ paddsw m0, m4
+ paddsw m1, m5
+ paddsw m0, krd
+ paddsw m1, krd
+ psraw m0, 7
+ psraw m1, 7
+ packuswb m0, m1
+%ifidn %1, v8_avg
+ pavgb m0, [dstq]
+%endif
+ mova [dstq], m0
+
+.done:
+ REP_RET
+
+%endif ; ARCH_X86_64
+
%endm
INIT_XMM ssse3
diff --git a/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm b/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm
index 3c8cfd225..538b2129d 100644
--- a/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm
+++ b/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm
@@ -14,14 +14,14 @@
mov rdx, arg(5) ;filter ptr
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;output_ptr
- mov rcx, 0x0400040
+ mov ecx, 0x01000100
movdqa xmm3, [rdx] ;load filters
psrldq xmm3, 6
packsswb xmm3, xmm3
pshuflw xmm3, xmm3, 0b ;k3_k4
- movq xmm2, rcx ;rounding
+ movd xmm2, ecx ;rounding_shift
pshufd xmm2, xmm2, 0
movsxd rax, DWORD PTR arg(1) ;pixels_per_line
@@ -33,8 +33,7 @@
punpcklbw xmm0, xmm1
pmaddubsw xmm0, xmm3
- paddsw xmm0, xmm2 ;rounding
- psraw xmm0, 7 ;shift
+ pmulhrsw xmm0, xmm2 ;rounding(+64)+shift(>>7)
packuswb xmm0, xmm0 ;pack to byte
%if %1
@@ -51,7 +50,7 @@
mov rdx, arg(5) ;filter ptr
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;output_ptr
- mov rcx, 0x0400040
+ mov ecx, 0x01000100
movdqa xmm7, [rdx] ;load filters
psrldq xmm7, 6
@@ -59,7 +58,7 @@
pshuflw xmm7, xmm7, 0b ;k3_k4
punpcklwd xmm7, xmm7
- movq xmm6, rcx ;rounding
+ movd xmm6, ecx ;rounding_shift
pshufd xmm6, xmm6, 0
movsxd rax, DWORD PTR arg(1) ;pixels_per_line
@@ -71,8 +70,7 @@
punpcklbw xmm0, xmm1
pmaddubsw xmm0, xmm7
- paddsw xmm0, xmm6 ;rounding
- psraw xmm0, 7 ;shift
+ pmulhrsw xmm0, xmm6 ;rounding(+64)+shift(>>7)
packuswb xmm0, xmm0 ;pack back to byte
%if %1
@@ -92,10 +90,8 @@
pmaddubsw xmm0, xmm7
pmaddubsw xmm2, xmm7
- paddsw xmm0, xmm6 ;rounding
- paddsw xmm2, xmm6
- psraw xmm0, 7 ;shift
- psraw xmm2, 7
+ pmulhrsw xmm0, xmm6 ;rounding(+64)+shift(>>7)
+ pmulhrsw xmm2, xmm6
packuswb xmm0, xmm2 ;pack back to byte
%if %1
diff --git a/vpx_ports/bitops.h b/vpx_ports/bitops.h
index 84ff3659f..19426faf0 100644
--- a/vpx_ports/bitops.h
+++ b/vpx_ports/bitops.h
@@ -16,8 +16,7 @@
#include "vpx_ports/msvc.h"
#ifdef _MSC_VER
-# include <math.h> // the ceil() definition must precede intrin.h
-# if _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86))
+# if defined(_M_X64) || defined(_M_IX86)
# include <intrin.h>
# define USE_MSC_INTRINSICS
# endif