diff options
53 files changed, 1049 insertions, 1277 deletions
diff --git a/build/make/Makefile b/build/make/Makefile index 03dacce5e..c4d53f160 100644 --- a/build/make/Makefile +++ b/build/make/Makefile @@ -411,6 +411,7 @@ ifneq ($(call enabled,DIST-SRCS),) DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_proj.sh DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_sln.sh DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_vcxproj.sh + DIST-SRCS-$(CONFIG_MSVS) += build/make/msvs_common.sh DIST-SRCS-$(CONFIG_MSVS) += build/x86-msvs/obj_int_extract.bat DIST-SRCS-$(CONFIG_MSVS) += build/arm-msvs/obj_int_extract.bat DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh index d0cbf3e54..4e803b81e 100755 --- a/build/make/gen_msvs_proj.sh +++ b/build/make/gen_msvs_proj.sh @@ -9,17 +9,11 @@ ## be found in the AUTHORS file in the root of the source tree. ## - self=$0 self_basename=${self##*/} self_dirname=$(dirname "$0") -EOL=$'\n' -if [ "$(uname -o 2>/dev/null)" = "Cygwin" ] \ - && cygpath --help >/dev/null 2>&1; then - FIXPATH='cygpath -m' -else - FIXPATH='echo' -fi + +. "$self_dirname/msvs_common.sh"|| exit 127 show_help() { cat <<EOF @@ -49,86 +43,6 @@ EOF exit 1 } -die() { - echo "${self_basename}: $@" >&2 - exit 1 -} - -die_unknown(){ - echo "Unknown option \"$1\"." >&2 - echo "See ${self_basename} --help for available options." >&2 - exit 1 -} - -fix_path() { - $FIXPATH "$1" -} - -generate_uuid() { - local hex="0123456789ABCDEF" - local i - local uuid="" - local j - #93995380-89BD-4b04-88EB-625FBE52EBFB - for ((i=0; i<32; i++)); do - (( j = $RANDOM % 16 )) - uuid="${uuid}${hex:$j:1}" - done - echo "${uuid:0:8}-${uuid:8:4}-${uuid:12:4}-${uuid:16:4}-${uuid:20:12}" -} - -indent1=" " -indent="" -indent_push() { - indent="${indent}${indent1}" -} -indent_pop() { - indent="${indent%${indent1}}" -} - -tag_attributes() { - for opt in "$@"; do - optval="${opt#*=}" - [ -n "${optval}" ] || - die "Missing attribute value in '$opt' while generating $tag tag" - echo "${indent}${opt%%=*}=\"${optval}\"" - done -} - -open_tag() { - local tag=$1 - shift - if [ $# -ne 0 ]; then - echo "${indent}<${tag}" - indent_push - tag_attributes "$@" - echo "${indent}>" - else - echo "${indent}<${tag}>" - indent_push - fi -} - -close_tag() { - local tag=$1 - indent_pop - echo "${indent}</${tag}>" -} - -tag() { - local tag=$1 - shift - if [ $# -ne 0 ]; then - echo "${indent}<${tag}" - indent_push - tag_attributes "$@" - indent_pop - echo "${indent}/>" - else - echo "${indent}<${tag}/>" - fi -} - generate_filter() { local var=$1 local name=$2 diff --git a/build/make/gen_msvs_vcxproj.sh b/build/make/gen_msvs_vcxproj.sh index a64e129b2..9dc790629 100755 --- a/build/make/gen_msvs_vcxproj.sh +++ b/build/make/gen_msvs_vcxproj.sh @@ -9,17 +9,11 @@ ## be found in the AUTHORS file in the root of the source tree. ## - self=$0 self_basename=${self##*/} self_dirname=$(dirname "$0") -EOL=$'\n' -if [ "$(uname -o 2>/dev/null)" = "Cygwin" ] \ - && cygpath --help >/dev/null 2>&1; then - FIXPATH='cygpath -m' -else - FIXPATH='echo' -fi + +. "$self_dirname/msvs_common.sh"|| exit 127 show_help() { cat <<EOF @@ -50,86 +44,6 @@ EOF exit 1 } -die() { - echo "${self_basename}: $@" >&2 - exit 1 -} - -die_unknown(){ - echo "Unknown option \"$1\"." >&2 - echo "See ${self_basename} --help for available options." >&2 - exit 1 -} - -fix_path() { - $FIXPATH "$1" -} - -generate_uuid() { - local hex="0123456789ABCDEF" - local i - local uuid="" - local j - #93995380-89BD-4b04-88EB-625FBE52EBFB - for ((i=0; i<32; i++)); do - (( j = $RANDOM % 16 )) - uuid="${uuid}${hex:$j:1}" - done - echo "${uuid:0:8}-${uuid:8:4}-${uuid:12:4}-${uuid:16:4}-${uuid:20:12}" -} - -indent1=" " -indent="" -indent_push() { - indent="${indent}${indent1}" -} -indent_pop() { - indent="${indent%${indent1}}" -} - -tag_attributes() { - for opt in "$@"; do - optval="${opt#*=}" - [ -n "${optval}" ] || - die "Missing attribute value in '$opt' while generating $tag tag" - echo "${indent}${opt%%=*}=\"${optval}\"" - done -} - -open_tag() { - local tag=$1 - shift - if [ $# -ne 0 ]; then - echo "${indent}<${tag}" - indent_push - tag_attributes "$@" - echo "${indent}>" - else - echo "${indent}<${tag}>" - indent_push - fi -} - -close_tag() { - local tag=$1 - indent_pop - echo "${indent}</${tag}>" -} - -tag() { - local tag=$1 - shift - if [ $# -ne 0 ]; then - echo "${indent}<${tag}" - indent_push - tag_attributes "$@" - indent_pop - echo "${indent}/>" - else - echo "${indent}<${tag}/>" - fi -} - tag_content() { local tag=$1 local content=$2 diff --git a/build/make/msvs_common.sh b/build/make/msvs_common.sh new file mode 100644 index 000000000..eb2eb7bcf --- /dev/null +++ b/build/make/msvs_common.sh @@ -0,0 +1,98 @@ +#!/bin/bash +## +## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## + +if [ "$(uname -o 2>/dev/null)" = "Cygwin" ] \ + && cygpath --help >/dev/null 2>&1; then + FIXPATH='cygpath -m' +else + FIXPATH='echo' +fi + +die() { + echo "${self_basename}: $@" >&2 + exit 1 +} + +die_unknown(){ + echo "Unknown option \"$1\"." >&2 + echo "See ${self_basename} --help for available options." >&2 + exit 1 +} + +fix_path() { + $FIXPATH "$1" +} + +generate_uuid() { + local hex="0123456789ABCDEF" + local i + local uuid="" + local j + #93995380-89BD-4b04-88EB-625FBE52EBFB + for ((i=0; i<32; i++)); do + (( j = $RANDOM % 16 )) + uuid="${uuid}${hex:$j:1}" + done + echo "${uuid:0:8}-${uuid:8:4}-${uuid:12:4}-${uuid:16:4}-${uuid:20:12}" +} + +indent1=" " +indent="" +indent_push() { + indent="${indent}${indent1}" +} +indent_pop() { + indent="${indent%${indent1}}" +} + +tag_attributes() { + for opt in "$@"; do + optval="${opt#*=}" + [ -n "${optval}" ] || + die "Missing attribute value in '$opt' while generating $tag tag" + echo "${indent}${opt%%=*}=\"${optval}\"" + done +} + +open_tag() { + local tag=$1 + shift + if [ $# -ne 0 ]; then + echo "${indent}<${tag}" + indent_push + tag_attributes "$@" + echo "${indent}>" + else + echo "${indent}<${tag}>" + indent_push + fi +} + +close_tag() { + local tag=$1 + indent_pop + echo "${indent}</${tag}>" +} + +tag() { + local tag=$1 + shift + if [ $# -ne 0 ]; then + echo "${indent}<${tag}" + indent_push + tag_attributes "$@" + indent_pop + echo "${indent}/>" + else + echo "${indent}<${tag}/>" + fi +} + diff --git a/examples.mk b/examples.mk index 91b980168..f6e7c0062 100644 --- a/examples.mk +++ b/examples.mk @@ -25,6 +25,11 @@ LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer.cpp \ third_party/libwebm/mkvwriter.hpp \ third_party/libwebm/webmids.hpp +LIBWEBM_PARSER_SRCS = third_party/libwebm/mkvparser.cpp \ + third_party/libwebm/mkvreader.cpp \ + third_party/libwebm/mkvparser.hpp \ + third_party/libwebm/mkvreader.hpp + # List of examples to build. UTILS are tools meant for distribution # while EXAMPLES demonstrate specific portions of the API. UTILS-$(CONFIG_DECODERS) += vpxdec.c @@ -39,14 +44,8 @@ vpxdec.SRCS += tools_common.c tools_common.h vpxdec.SRCS += y4menc.c y4menc.h vpxdec.SRCS += $(LIBYUV_SRCS) ifeq ($(CONFIG_WEBM_IO),yes) - vpxdec.SRCS += third_party/nestegg/halloc/halloc.h - vpxdec.SRCS += third_party/nestegg/halloc/src/align.h - vpxdec.SRCS += third_party/nestegg/halloc/src/halloc.c - vpxdec.SRCS += third_party/nestegg/halloc/src/hlist.h - vpxdec.SRCS += third_party/nestegg/halloc/src/macros.h - vpxdec.SRCS += third_party/nestegg/include/nestegg/nestegg.h - vpxdec.SRCS += third_party/nestegg/src/nestegg.c - vpxdec.SRCS += webmdec.c webmdec.h + vpxdec.SRCS += $(LIBWEBM_PARSER_SRCS) + vpxdec.SRCS += webmdec.cc webmdec.h endif vpxdec.GUID = BA5FE66F-38DD-E034-F542-B1578C5FB950 vpxdec.DESCRIPTION = Full featured decoder diff --git a/test/decode_to_md5.sh b/test/decode_to_md5.sh new file mode 100755 index 000000000..da1a87062 --- /dev/null +++ b/test/decode_to_md5.sh @@ -0,0 +1,68 @@ +#!/bin/sh +## +## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## +## This file tests the libvpx decode_to_md5 example. To add new tests to this +## file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to decode_to_md5_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: Make sure input is available: +# $VP8_IVF_FILE and $VP9_IVF_FILE are required. +decode_to_md5_verify_environment() { + if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_IVF_FILE}" ]; then + echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." + return 1 + fi +} + +# Runs decode_to_md5 on $1 and echoes the MD5 sum for the final frame. $2 is +# interpreted as codec name and used solely to name the output file. +decode_to_md5() { + local decoder="${LIBVPX_BIN_PATH}/decode_to_md5${VPX_TEST_EXE_SUFFIX}" + local input_file="$1" + local codec="$2" + local output_file="${VPX_TEST_OUTPUT_DIR}/decode_to_md5_${codec}" + + [ -x "${decoder}" ] || return 1 + + "${decoder}" "${input_file}" "${output_file}" > /dev/null 2>&1 + + [ -e "${output_file}" ] || return 1 + + local md5_last_frame=$(tail -n1 "${output_file}") + echo "${md5_last_frame% *}" | tr -d [:space:] +} + +decode_to_md5_vp8() { + # expected MD5 sum for the last frame. + local expected_md5="56794d911b02190212bca92f88ad60c6" + + if [ "$(vp8_decode_available)" = "yes" ]; then + local actual_md5="$(decode_to_md5 "${VP8_IVF_FILE}" vp8)" || return 1 + [ "${actual_md5}" = "${expected_md5}" ] || return 1 + fi +} + +decode_to_md5_vp9() { + # expected MD5 sum for the last frame. + local expected_md5="2952c0eae93f3dadd1aa84c50d3fd6d2" + + if [ "$(vp9_decode_available)" = "yes" ]; then + local actual_md5="$(decode_to_md5 "${VP9_IVF_FILE}" vp9)" || return 1 + [ "${actual_md5}" = "${expected_md5}" ] || return 1 + fi +} + +decode_to_md5_tests="decode_to_md5_vp8 + decode_to_md5_vp9" + +run_tests decode_to_md5_verify_environment "${decode_to_md5_tests}" diff --git a/test/decode_with_drops.sh b/test/decode_with_drops.sh new file mode 100755 index 000000000..d0321bfb2 --- /dev/null +++ b/test/decode_with_drops.sh @@ -0,0 +1,75 @@ +#!/bin/sh +## +## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## +## This file tests the libvpx decode_with_drops example. To add new tests to +## this file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to decode_with_drops_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: Make sure input is available: +# $VP8_IVF_FILE and $VP9_IVF_FILE are required. +decode_with_drops_verify_environment() { + if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_IVF_FILE}" ]; then + echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." + return 1 + fi +} + +# Runs decode_with_drops on $1, $2 is interpreted as codec name and used solely +# to name the output file. $3 is the drop mode, and is passed directly to +# decode_with_drops. +decode_with_drops() { + local decoder="${LIBVPX_BIN_PATH}/decode_with_drops${VPX_TEST_EXE_SUFFIX}" + local input_file="$1" + local codec="$2" + local output_file="${VPX_TEST_OUTPUT_DIR}/decode_with_drops_${codec}" + local drop_mode="$3" + + [ -x "${decoder}" ] || return 1 + + "${decoder}" "${input_file}" "${output_file}" "${drop_mode}" > /dev/null 2>&1 + + [ -e "${output_file}" ] || return 1 +} + +# Decodes $VP8_IVF_FILE while dropping frames, twice: once in sequence mode, +# and once in pattern mode. +# Note: This test assumes that $VP8_IVF_FILE has exactly 29 frames, and could +# break if the file is modified. +decode_with_drops_vp8() { + if [ "$(vp8_decode_available)" = "yes" ]; then + # Test sequence mode: Drop frames 2-28. + decode_with_drops "${VP8_IVF_FILE}" "vp8" "2-28" + + # Test pattern mode: Drop 3 of every 4 frames. + decode_with_drops "${VP8_IVF_FILE}" "vp8" "3/4" + fi +} + +# Decodes $VP9_IVF_FILE while dropping frames, twice: once in sequence mode, +# and once in pattern mode. +# Note: This test assumes that $VP9_IVF_FILE has exactly 20 frames, and could +# break if the file is modified. +decode_with_drops_vp9() { + if [ "$(vp9_decode_available)" = "yes" ]; then + # Test sequence mode: Drop frames 2-28. + decode_with_drops "${VP9_IVF_FILE}" "vp9" "2-19" + + # Test pattern mode: Drop 3 of every 4 frames. + decode_with_drops "${VP9_IVF_FILE}" "vp9" "3/4" + fi +} + +decode_with_drops_tests="decode_with_drops_vp8 + decode_with_drops_vp9" + +run_tests decode_with_drops_verify_environment "${decode_with_drops_tests}" diff --git a/test/examples.sh b/test/examples.sh new file mode 100755 index 000000000..ac2a18c03 --- /dev/null +++ b/test/examples.sh @@ -0,0 +1,28 @@ +#!/bin/sh +## +## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## +## This file runs all of the tests for the libvpx examples. +## +. $(dirname $0)/tools_common.sh + +example_tests=$(ls $(dirname $0)/*.sh) + +# List of script names to exclude. +exclude_list="examples vpxdec vpxenc tools_common" + +# Filter out the scripts in $exclude_list. +for word in ${exclude_list}; do + example_tests=$(filter_strings "${example_tests}" "${word}" exclude) +done + +for test in ${example_tests}; do + # Source each test script so that exporting variables can be avoided. + . "${test}" +done diff --git a/test/simple_decoder.sh b/test/simple_decoder.sh new file mode 100755 index 000000000..a0db58ff8 --- /dev/null +++ b/test/simple_decoder.sh @@ -0,0 +1,57 @@ +#!/bin/sh +## +## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## +## This file tests the libvpx simple_decoder example code. To add new tests to +## this file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to simple_decoder_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: Make sure input is available: +# $VP8_IVF_FILE and $VP9_IVF_FILE are required. +simple_decoder_verify_environment() { + if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_IVF_FILE}" ]; then + echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." + return 1 + fi +} + +# Runs simple_decoder using $1 as input file. $2 is the codec name, and is used +# solely to name the output file. +simple_decoder() { + local decoder="${LIBVPX_BIN_PATH}/simple_decoder${VPX_TEST_EXE_SUFFIX}" + local input_file="$1" + local codec="$2" + local output_file="${VPX_TEST_OUTPUT_DIR}/simple_decoder_${codec}.raw" + + [ -x "${decoder}" ] || return 1 + + "${decoder}" "${input_file}" "${output_file}" > /dev/null 2>&1 + + [ -e "${output_file}" ] || return 1 +} + +simple_decoder_vp8() { + if [ "$(vp8_decode_available)" = "yes" ]; then + simple_decoder "${VP8_IVF_FILE}" vp8 || return 1 + fi +} + +simple_decoder_vp9() { + if [ "$(vp9_decode_available)" = "yes" ]; then + simple_decoder "${VP9_IVF_FILE}" vp9 || return 1 + fi +} + +simple_decoder_tests="simple_decoder_vp8 + simple_decoder_vp9" + +run_tests simple_decoder_verify_environment "${simple_decoder_tests}" diff --git a/test/simple_encoder.sh b/test/simple_encoder.sh new file mode 100755 index 000000000..13f5e298b --- /dev/null +++ b/test/simple_encoder.sh @@ -0,0 +1,58 @@ +#!/bin/sh +## +## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## +## This file tests the libvpx simple_encoder example. To add new tests to this +## file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to simple_encoder_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +# Environment check: $YUV_RAW_INPUT is required. +simple_encoder_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." + return 1 + fi +} + +# Runs simple_encoder using the codec specified by $1. +simple_encoder() { + local encoder="${LIBVPX_BIN_PATH}/simple_encoder${VPX_TEST_EXE_SUFFIX}" + local codec="$1" + local output_file="${VPX_TEST_OUTPUT_DIR}/simple_encoder_${codec}.ivf" + + [ -x "${encoder}" ] || return 1 + + "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" "${YUV_RAW_INPUT_HEIGHT}" \ + "${YUV_RAW_INPUT}" "${output_file}" 9999 > /dev/null 2>&1 + + [ -e "${output_file}" ] || return 1 +} + +simple_encoder_vp8() { + if [ "$(vp8_encode_available)" = "yes" ]; then + simple_encoder vp8 || return 1 + fi +} + +# TODO(tomfinegan): Add a frame limit param to simple_encoder and enable this +# test. VP9 is just too slow right now: This test takes 4m30s+ on a fast +# machine. +DISABLED_simple_encoder_vp9() { + if [ "$(vp9_encode_available)" = "yes" ]; then + simple_encoder vp9 || return 1 + fi +} + +simple_encoder_tests="simple_encoder_vp8 + DISABLED_simple_encoder_vp9" + +run_tests simple_encoder_verify_environment "${simple_encoder_tests}" diff --git a/test/test.mk b/test/test.mk index da56b00ec..0dcb6c86e 100644 --- a/test/test.mk +++ b/test/test.mk @@ -43,15 +43,13 @@ LIBVPX_TEST_SRCS-yes += encode_test_driver.h ## WebM Parsing ifeq ($(CONFIG_WEBM_IO), yes) -NESTEGG_SRCS += ../third_party/nestegg/halloc/halloc.h -NESTEGG_SRCS += ../third_party/nestegg/halloc/src/align.h -NESTEGG_SRCS += ../third_party/nestegg/halloc/src/halloc.c -NESTEGG_SRCS += ../third_party/nestegg/halloc/src/hlist.h -NESTEGG_SRCS += ../third_party/nestegg/include/nestegg/nestegg.h -NESTEGG_SRCS += ../third_party/nestegg/src/nestegg.c -LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += $(NESTEGG_SRCS) +LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser.cpp +LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvreader.cpp +LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser.hpp +LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvreader.hpp +LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += $(LIBWEBM_PARSER_SRCS) LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../tools_common.h -LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.c +LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.cc LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.h LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += webm_video_source.h endif diff --git a/test/tools_common.sh b/test/tools_common.sh index 45b777178..0f5cbb9e0 100755 --- a/test/tools_common.sh +++ b/test/tools_common.sh @@ -9,6 +9,11 @@ ## be found in the AUTHORS file in the root of the source tree. ## ## This file contains shell code shared by test scripts for libvpx tools. + +# Use $VPX_TEST_TOOLS_COMMON_SH as a pseudo include guard. +if [ -z "${VPX_TEST_TOOLS_COMMON_SH}" ]; then +VPX_TEST_TOOLS_COMMON_SH=included + set -e # Sets $VPX_TOOL_TEST to the name specified by positional parameter one. @@ -441,3 +446,5 @@ $(basename "${0%.*}") test configuration: VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS} EOF fi + +fi # End $VPX_TEST_TOOLS_COMMON_SH pseudo include guard. diff --git a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm index e3ea91fe6..a8730aa04 100644 --- a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm +++ b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm @@ -26,6 +26,7 @@ |vp8_build_intra_predictors_mby_neon_func| PROC push {r4-r8, lr} + vpush {d8-d15} cmp r3, #0 beq case_dc_pred @@ -37,8 +38,8 @@ beq case_tm_pred case_dc_pred - ldr r4, [sp, #24] ; Up - ldr r5, [sp, #28] ; Left + ldr r4, [sp, #88] ; Up + ldr r5, [sp, #92] ; Left ; Default the DC average to 128 mov r12, #128 @@ -143,6 +144,7 @@ skip_dc_pred_up_left vst1.u8 {q0}, [r1]! vst1.u8 {q0}, [r1]! + vpop {d8-d15} pop {r4-r8,pc} case_v_pred ; Copy down above row @@ -165,6 +167,7 @@ case_v_pred vst1.u8 {q0}, [r1]! vst1.u8 {q0}, [r1]! vst1.u8 {q0}, [r1]! + vpop {d8-d15} pop {r4-r8,pc} case_h_pred @@ -224,6 +227,7 @@ case_h_pred vst1.u8 {q2}, [r1]! vst1.u8 {q3}, [r1]! + vpop {d8-d15} pop {r4-r8,pc} case_tm_pred @@ -293,6 +297,7 @@ case_tm_pred_loop subs r12, r12, #1 bne case_tm_pred_loop + vpop {d8-d15} pop {r4-r8,pc} ENDP @@ -307,6 +312,7 @@ case_tm_pred_loop |vp8_build_intra_predictors_mby_s_neon_func| PROC push {r4-r8, lr} + vpush {d8-d15} mov r1, r0 ; unsigned char *ypred_ptr = x->dst.y_buffer; //x->Predictor; @@ -320,8 +326,8 @@ case_tm_pred_loop beq case_tm_pred_s case_dc_pred_s - ldr r4, [sp, #24] ; Up - ldr r5, [sp, #28] ; Left + ldr r4, [sp, #88] ; Up + ldr r5, [sp, #92] ; Left ; Default the DC average to 128 mov r12, #128 @@ -426,6 +432,7 @@ skip_dc_pred_up_left_s vst1.u8 {q0}, [r1], r2 vst1.u8 {q0}, [r1], r2 + vpop {d8-d15} pop {r4-r8,pc} case_v_pred_s ; Copy down above row @@ -448,6 +455,8 @@ case_v_pred_s vst1.u8 {q0}, [r1], r2 vst1.u8 {q0}, [r1], r2 vst1.u8 {q0}, [r1], r2 + + vpop {d8-d15} pop {r4-r8,pc} case_h_pred_s @@ -507,6 +516,7 @@ case_h_pred_s vst1.u8 {q2}, [r1], r2 vst1.u8 {q3}, [r1], r2 + vpop {d8-d15} pop {r4-r8,pc} case_tm_pred_s @@ -576,6 +586,7 @@ case_tm_pred_loop_s subs r12, r12, #1 bne case_tm_pred_loop_s + vpop {d8-d15} pop {r4-r8,pc} ENDP diff --git a/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm b/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm index 6c29c5586..3a3921081 100644 --- a/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm +++ b/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm @@ -22,6 +22,7 @@ ; r3 stride |idct_dequant_0_2x_neon| PROC push {r4, r5} + vpush {d8-d15} add r12, r2, #4 vld1.32 {d2[0]}, [r2], r3 @@ -72,6 +73,7 @@ vst1.32 {d4[1]}, [r2] vst1.32 {d10[1]}, [r0] + vpop {d8-d15} pop {r4, r5} bx lr diff --git a/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm b/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm index d5dce63f6..8da0fa0b7 100644 --- a/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm +++ b/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm @@ -22,6 +22,8 @@ ; r2 *dst ; r3 stride |idct_dequant_full_2x_neon| PROC + vpush {d8-d15} + vld1.16 {q0, q1}, [r1] ; dq (same l/r) vld1.16 {q2, q3}, [r0] ; l q add r0, r0, #32 @@ -184,6 +186,7 @@ vst1.32 {d3[0]}, [r2] vst1.32 {d3[1]}, [r1] + vpop {d8-d15} bx lr ENDP ; |idct_dequant_full_2x_neon| diff --git a/vp8/common/arm/neon/loopfilter_neon.asm b/vp8/common/arm/neon/loopfilter_neon.asm index e44be0a1e..c4f09c775 100644 --- a/vp8/common/arm/neon/loopfilter_neon.asm +++ b/vp8/common/arm/neon/loopfilter_neon.asm @@ -24,10 +24,12 @@ ; sp unsigned char thresh, |vp8_loop_filter_horizontal_edge_y_neon| PROC push {lr} + vpush {d8-d15} + vdup.u8 q0, r2 ; duplicate blimit vdup.u8 q1, r3 ; duplicate limit sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines - ldr r3, [sp, #4] ; load thresh + ldr r3, [sp, #68] ; load thresh add r12, r2, r1 add r1, r1, r1 @@ -52,6 +54,7 @@ vst1.u8 {q7}, [r2@128], r1 ; store oq0 vst1.u8 {q8}, [r12@128], r1 ; store oq1 + vpop {d8-d15} pop {pc} ENDP ; |vp8_loop_filter_horizontal_edge_y_neon| @@ -64,10 +67,12 @@ ; sp+4 unsigned char *v |vp8_loop_filter_horizontal_edge_uv_neon| PROC push {lr} + vpush {d8-d15} + vdup.u8 q0, r2 ; duplicate blimit vdup.u8 q1, r3 ; duplicate limit - ldr r12, [sp, #4] ; load thresh - ldr r2, [sp, #8] ; load v ptr + ldr r12, [sp, #68] ; load thresh + ldr r2, [sp, #72] ; load v ptr vdup.u8 q2, r12 ; duplicate thresh sub r3, r0, r1, lsl #2 ; move u pointer down by 4 lines @@ -104,6 +109,7 @@ vst1.u8 {d16}, [r0@64] ; store u oq1 vst1.u8 {d17}, [r2@64] ; store v oq1 + vpop {d8-d15} pop {pc} ENDP ; |vp8_loop_filter_horizontal_edge_uv_neon| @@ -120,11 +126,13 @@ |vp8_loop_filter_vertical_edge_y_neon| PROC push {lr} + vpush {d8-d15} + vdup.u8 q0, r2 ; duplicate blimit vdup.u8 q1, r3 ; duplicate limit sub r2, r0, #4 ; src ptr down by 4 columns add r1, r1, r1 - ldr r3, [sp, #4] ; load thresh + ldr r3, [sp, #68] ; load thresh add r12, r2, r1, asr #1 vld1.u8 {d6}, [r2], r1 @@ -194,6 +202,7 @@ vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0] vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r12] + vpop {d8-d15} pop {pc} ENDP ; |vp8_loop_filter_vertical_edge_y_neon| @@ -210,9 +219,11 @@ ; sp+4 unsigned char *v |vp8_loop_filter_vertical_edge_uv_neon| PROC push {lr} + vpush {d8-d15} + vdup.u8 q0, r2 ; duplicate blimit sub r12, r0, #4 ; move u pointer down by 4 columns - ldr r2, [sp, #8] ; load v ptr + ldr r2, [sp, #72] ; load v ptr vdup.u8 q1, r3 ; duplicate limit sub r3, r2, #4 ; move v pointer down by 4 columns @@ -233,7 +244,7 @@ vld1.u8 {d20}, [r12] vld1.u8 {d21}, [r3] - ldr r12, [sp, #4] ; load thresh + ldr r12, [sp, #68] ; load thresh ;transpose to 8x16 matrix vtrn.32 q3, q7 @@ -281,6 +292,7 @@ vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0] vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2] + vpop {d8-d15} pop {pc} ENDP ; |vp8_loop_filter_vertical_edge_uv_neon| diff --git a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm index adf848b9c..6eb06516d 100644 --- a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm +++ b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm @@ -9,7 +9,6 @@ ; - ;EXPORT |vp8_loop_filter_simple_horizontal_edge_neon| EXPORT |vp8_loop_filter_bhs_neon| EXPORT |vp8_loop_filter_mbhs_neon| ARM @@ -22,7 +21,7 @@ ; q1 limit, PRESERVE |vp8_loop_filter_simple_horizontal_edge_neon| PROC - + vpush {d8-d15} sub r3, r0, r1, lsl #1 ; move src pointer down by 2 lines vld1.u8 {q7}, [r0@128], r1 ; q0 @@ -82,6 +81,7 @@ vst1.u8 {q6}, [r3@128] ; store op0 vst1.u8 {q7}, [r0@128] ; store oq0 + vpop {d8-d15} bx lr ENDP ; |vp8_loop_filter_simple_horizontal_edge_neon| diff --git a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm index e690df2f7..78d13c895 100644 --- a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm +++ b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm @@ -9,7 +9,6 @@ ; - ;EXPORT |vp8_loop_filter_simple_vertical_edge_neon| EXPORT |vp8_loop_filter_bvs_neon| EXPORT |vp8_loop_filter_mbvs_neon| ARM @@ -22,6 +21,8 @@ ; q1 limit, PRESERVE |vp8_loop_filter_simple_vertical_edge_neon| PROC + vpush {d8-d15} + sub r0, r0, #2 ; move src pointer down by 2 columns add r12, r1, r1 add r3, r0, r1 @@ -120,6 +121,7 @@ vst2.8 {d14[6], d15[6]}, [r0], r12 vst2.8 {d14[7], d15[7]}, [r3] + vpop {d8-d15} bx lr ENDP ; |vp8_loop_filter_simple_vertical_edge_neon| diff --git a/vp8/common/arm/neon/mbloopfilter_neon.asm b/vp8/common/arm/neon/mbloopfilter_neon.asm index f41c156df..d200c3090 100644 --- a/vp8/common/arm/neon/mbloopfilter_neon.asm +++ b/vp8/common/arm/neon/mbloopfilter_neon.asm @@ -28,8 +28,10 @@ ; sp unsigned char thresh, |vp8_mbloop_filter_horizontal_edge_y_neon| PROC push {lr} + vpush {d8-d15} + add r1, r1, r1 ; double stride - ldr r12, [sp, #4] ; load thresh + ldr r12, [sp, #68] ; load thresh sub r0, r0, r1, lsl #1 ; move src pointer down by 4 lines vdup.u8 q2, r12 ; thresh add r12, r0, r1, lsr #1 ; move src pointer up by 1 line @@ -55,6 +57,7 @@ vst1.u8 {q8}, [r12@128] ; store oq1 vst1.u8 {q9}, [r0@128] ; store oq2 + vpop {d8-d15} pop {pc} ENDP ; |vp8_mbloop_filter_horizontal_edge_y_neon| @@ -72,10 +75,12 @@ |vp8_mbloop_filter_horizontal_edge_uv_neon| PROC push {lr} - ldr r12, [sp, #4] ; load thresh + vpush {d8-d15} + + ldr r12, [sp, #68] ; load thresh sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines vdup.u8 q2, r12 ; thresh - ldr r12, [sp, #8] ; load v ptr + ldr r12, [sp, #72] ; load v ptr sub r12, r12, r1, lsl #2 ; move v pointer down by 4 lines vld1.u8 {d6}, [r0@64], r1 ; p3 @@ -116,6 +121,7 @@ vst1.u8 {d18}, [r0@64], r1 ; store u oq2 vst1.u8 {d19}, [r12@64], r1 ; store v oq2 + vpop {d8-d15} pop {pc} ENDP ; |vp8_mbloop_filter_horizontal_edge_uv_neon| @@ -130,7 +136,9 @@ ; sp unsigned char thresh, |vp8_mbloop_filter_vertical_edge_y_neon| PROC push {lr} - ldr r12, [sp, #4] ; load thresh + vpush {d8-d15} + + ldr r12, [sp, #68] ; load thresh sub r0, r0, #4 ; move src pointer down by 4 columns vdup.s8 q2, r12 ; thresh add r12, r0, r1, lsl #3 ; move src pointer down by 8 lines @@ -208,6 +216,7 @@ vst1.8 {d20}, [r0] vst1.8 {d21}, [r12] + vpop {d8-d15} pop {pc} ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon| @@ -224,10 +233,12 @@ ; sp+4 unsigned char *v |vp8_mbloop_filter_vertical_edge_uv_neon| PROC push {lr} - ldr r12, [sp, #4] ; load thresh + vpush {d8-d15} + + ldr r12, [sp, #68] ; load thresh sub r0, r0, #4 ; move u pointer down by 4 columns vdup.u8 q2, r12 ; thresh - ldr r12, [sp, #8] ; load v ptr + ldr r12, [sp, #72] ; load v ptr sub r12, r12, #4 ; move v pointer down by 4 columns vld1.u8 {d6}, [r0], r1 ;load u data @@ -303,6 +314,7 @@ vst1.8 {d20}, [r0] vst1.8 {d21}, [r12] + vpop {d8-d15} pop {pc} ENDP ; |vp8_mbloop_filter_vertical_edge_uv_neon| diff --git a/vp8/common/arm/neon/sad16_neon.asm b/vp8/common/arm/neon/sad16_neon.asm index d7c590e15..7197e5655 100644 --- a/vp8/common/arm/neon/sad16_neon.asm +++ b/vp8/common/arm/neon/sad16_neon.asm @@ -24,6 +24,7 @@ ; r3 int ref_stride |vp8_sad16x16_neon| PROC ;; + vpush {d8-d15} vld1.8 {q0}, [r0], r1 vld1.8 {q4}, [r2], r3 @@ -132,6 +133,7 @@ vmov.32 r0, d0[0] + vpop {d8-d15} bx lr ENDP @@ -143,6 +145,8 @@ ; unsigned char *ref_ptr, ; int ref_stride) |vp8_sad16x8_neon| PROC + vpush {d8-d15} + vld1.8 {q0}, [r0], r1 vld1.8 {q4}, [r2], r3 @@ -200,6 +204,7 @@ vmov.32 r0, d0[0] + vpop {d8-d15} bx lr ENDP diff --git a/vp8/common/arm/neon/sad8_neon.asm b/vp8/common/arm/neon/sad8_neon.asm index 23ba6df93..6b849d933 100644 --- a/vp8/common/arm/neon/sad8_neon.asm +++ b/vp8/common/arm/neon/sad8_neon.asm @@ -25,6 +25,7 @@ ; int ref_stride) |vp8_sad8x8_neon| PROC + vpush {d8-d15} vld1.8 {d0}, [r0], r1 vld1.8 {d8}, [r2], r3 @@ -70,6 +71,7 @@ vmov.32 r0, d0[0] + vpop {d8-d15} bx lr ENDP @@ -82,6 +84,7 @@ ; int ref_stride) |vp8_sad8x16_neon| PROC + vpush {d8-d15} vld1.8 {d0}, [r0], r1 vld1.8 {d8}, [r2], r3 @@ -167,6 +170,7 @@ vmov.32 r0, d0[0] + vpop {d8-d15} bx lr ENDP @@ -179,6 +183,7 @@ ; int ref_stride) |vp8_sad4x4_neon| PROC + vpush {d8-d15} vld1.8 {d0}, [r0], r1 vld1.8 {d8}, [r2], r3 @@ -202,6 +207,7 @@ vpaddl.u32 d0, d1 vmov.32 r0, d0[0] + vpop {d8-d15} bx lr ENDP diff --git a/vp8/common/arm/neon/save_reg_neon.asm b/vp8/common/arm/neon/save_reg_neon.asm deleted file mode 100644 index fd7002e7a..000000000 --- a/vp8/common/arm/neon/save_reg_neon.asm +++ /dev/null @@ -1,36 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_push_neon| - EXPORT |vp8_pop_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -|vp8_push_neon| PROC - vst1.i64 {d8, d9, d10, d11}, [r0]! - vst1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - ENDP - -|vp8_pop_neon| PROC - vld1.i64 {d8, d9, d10, d11}, [r0]! - vld1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - ENDP - - END - diff --git a/vp8/common/arm/neon/shortidct4x4llm_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_neon.asm index 67d2ab015..87ca887be 100644 --- a/vp8/common/arm/neon/shortidct4x4llm_neon.asm +++ b/vp8/common/arm/neon/shortidct4x4llm_neon.asm @@ -37,12 +37,14 @@ ; result of the multiplication that is needed in IDCT. |vp8_short_idct4x4llm_neon| PROC + vpush {d8-d15} + adr r12, idct_coeff vld1.16 {q1, q2}, [r0] vld1.16 {d0}, [r12] vswp d3, d4 ;q2(vp[4] vp[12]) - ldr r0, [sp] ; stride + ldr r0, [sp, #64] ; stride vqdmulh.s16 q3, q2, d0[2] vqdmulh.s16 q4, q2, d0[0] @@ -125,6 +127,7 @@ vst1.32 d2[0], [r3], r0 vst1.32 d2[1], [r3], r0 + vpop {d8-d15} bx lr ENDP diff --git a/vp8/common/arm/neon/sixtappredict16x16_neon.asm b/vp8/common/arm/neon/sixtappredict16x16_neon.asm index 9fdafd360..dd27719bf 100644 --- a/vp8/common/arm/neon/sixtappredict16x16_neon.asm +++ b/vp8/common/arm/neon/sixtappredict16x16_neon.asm @@ -43,10 +43,11 @@ filter16_coeff |vp8_sixtap_predict16x16_neon| PROC push {r4-r5, lr} + vpush {d8-d15} adr r12, filter16_coeff - ldr r4, [sp, #12] ;load parameters from stack - ldr r5, [sp, #16] ;load parameters from stack + ldr r4, [sp, #76] ;load parameters from stack + ldr r5, [sp, #80] ;load parameters from stack cmp r2, #0 ;skip first_pass filter if xoffset=0 beq secondpass_filter16x16_only @@ -291,6 +292,8 @@ secondpass_inner_loop_neon bne filt_blk2d_sp16x16_outloop_neon add sp, sp, #336 + + vpop {d8-d15} pop {r4-r5,pc} ;-------------------- @@ -384,6 +387,7 @@ filt_blk2d_fpo16x16_loop_neon bne filt_blk2d_fpo16x16_loop_neon + vpop {d8-d15} pop {r4-r5,pc} ;-------------------- @@ -482,6 +486,7 @@ secondpass_only_inner_loop_neon bne filt_blk2d_spo16x16_outloop_neon + vpop {d8-d15} pop {r4-r5,pc} ENDP diff --git a/vp8/common/arm/neon/sixtappredict4x4_neon.asm b/vp8/common/arm/neon/sixtappredict4x4_neon.asm index a4222bc62..e32e71305 100644 --- a/vp8/common/arm/neon/sixtappredict4x4_neon.asm +++ b/vp8/common/arm/neon/sixtappredict4x4_neon.asm @@ -35,10 +35,11 @@ filter4_coeff |vp8_sixtap_predict4x4_neon| PROC push {r4, lr} + vpush {d8-d15} adr r12, filter4_coeff - ldr r4, [sp, #8] ;load parameters from stack - ldr lr, [sp, #12] ;load parameters from stack + ldr r4, [sp, #72] ;load parameters from stack + ldr lr, [sp, #76] ;load parameters from stack cmp r2, #0 ;skip first_pass filter if xoffset=0 beq secondpass_filter4x4_only @@ -261,6 +262,7 @@ filter4_coeff vst1.32 {d4[0]}, [r1] vst1.32 {d4[1]}, [r2] + vpop {d8-d15} pop {r4, pc} @@ -348,6 +350,7 @@ firstpass_filter4x4_only vst1.32 {d28[0]}, [r1] vst1.32 {d28[1]}, [r2] + vpop {d8-d15} pop {r4, pc} @@ -413,6 +416,7 @@ secondpass_filter4x4_only vst1.32 {d4[0]}, [r1] vst1.32 {d4[1]}, [r2] + vpop {d8-d15} pop {r4, pc} ENDP diff --git a/vp8/common/arm/neon/sixtappredict8x4_neon.asm b/vp8/common/arm/neon/sixtappredict8x4_neon.asm index a57ec015f..d19bf8920 100644 --- a/vp8/common/arm/neon/sixtappredict8x4_neon.asm +++ b/vp8/common/arm/neon/sixtappredict8x4_neon.asm @@ -35,10 +35,11 @@ filter8_coeff |vp8_sixtap_predict8x4_neon| PROC push {r4-r5, lr} + vpush {d8-d15} adr r12, filter8_coeff - ldr r4, [sp, #12] ;load parameters from stack - ldr r5, [sp, #16] ;load parameters from stack + ldr r4, [sp, #76] ;load parameters from stack + ldr r5, [sp, #80] ;load parameters from stack cmp r2, #0 ;skip first_pass filter if xoffset=0 beq secondpass_filter8x4_only @@ -297,6 +298,8 @@ filter8_coeff vst1.u8 {d9}, [r4], r5 add sp, sp, #32 + + vpop {d8-d15} pop {r4-r5,pc} ;-------------------- @@ -392,6 +395,7 @@ firstpass_filter8x4_only vst1.u8 {d24}, [r4], r5 vst1.u8 {d25}, [r4], r5 + vpop {d8-d15} pop {r4-r5,pc} ;--------------------- @@ -464,6 +468,7 @@ secondpass_filter8x4_only vst1.u8 {d8}, [r4], r5 vst1.u8 {d9}, [r4], r5 + vpop {d8-d15} pop {r4-r5,pc} ENDP diff --git a/vp8/common/arm/neon/sixtappredict8x8_neon.asm b/vp8/common/arm/neon/sixtappredict8x8_neon.asm index 00ed5aeef..4b049252c 100644 --- a/vp8/common/arm/neon/sixtappredict8x8_neon.asm +++ b/vp8/common/arm/neon/sixtappredict8x8_neon.asm @@ -35,11 +35,11 @@ filter8_coeff |vp8_sixtap_predict8x8_neon| PROC push {r4-r5, lr} - + vpush {d8-d15} adr r12, filter8_coeff - ldr r4, [sp, #12] ;load parameters from stack - ldr r5, [sp, #16] ;load parameters from stack + ldr r4, [sp, #76] ;load parameters from stack + ldr r5, [sp, #80] ;load parameters from stack cmp r2, #0 ;skip first_pass filter if xoffset=0 beq secondpass_filter8x8_only @@ -324,6 +324,8 @@ filt_blk2d_sp8x8_loop_neon bne filt_blk2d_sp8x8_loop_neon add sp, sp, #64 + + vpop {d8-d15} pop {r4-r5,pc} ;--------------------- @@ -428,6 +430,7 @@ filt_blk2d_fpo8x8_loop_neon bne filt_blk2d_fpo8x8_loop_neon + vpop {d8-d15} pop {r4-r5,pc} ;--------------------- @@ -515,6 +518,7 @@ filt_blk2d_spo8x8_loop_neon bne filt_blk2d_spo8x8_loop_neon + vpop {d8-d15} pop {r4-r5,pc} ENDP diff --git a/vp8/common/arm/neon/variance_neon.asm b/vp8/common/arm/neon/variance_neon.asm index e3b48327d..8ecad72b9 100644 --- a/vp8/common/arm/neon/variance_neon.asm +++ b/vp8/common/arm/neon/variance_neon.asm @@ -26,6 +26,7 @@ ; r3 int recon_stride ; stack unsigned int *sse |vp8_variance16x16_neon| PROC + vpush {q5} vmov.i8 q8, #0 ;q8 - sum vmov.i8 q9, #0 ;q9, q10 - sse vmov.i8 q10, #0 @@ -67,7 +68,7 @@ variance16x16_neon_loop vadd.u32 q10, q9, q10 ;accumulate sse vpaddl.s32 q0, q8 ;accumulate sum - ldr r12, [sp] ;load *sse from stack + ldr r12, [sp, #16] ;load *sse from stack vpaddl.u32 q1, q10 vadd.s64 d0, d0, d1 @@ -87,6 +88,8 @@ variance16x16_neon_loop vsub.u32 d0, d1, d10 vmov.32 r0, d0[0] ;return + + vpop {q5} bx lr ENDP @@ -99,6 +102,8 @@ variance16x16_neon_loop ; int recon_stride, ; unsigned int *sse) |vp8_variance16x8_neon| PROC + vpush {q5} + vmov.i8 q8, #0 ;q8 - sum vmov.i8 q9, #0 ;q9, q10 - sse vmov.i8 q10, #0 @@ -137,7 +142,7 @@ variance16x8_neon_loop vadd.u32 q10, q9, q10 ;accumulate sse vpaddl.s32 q0, q8 ;accumulate sum - ldr r12, [sp] ;load *sse from stack + ldr r12, [sp, #16] ;load *sse from stack vpaddl.u32 q1, q10 vadd.s64 d0, d0, d1 @@ -149,6 +154,8 @@ variance16x8_neon_loop vsub.u32 d0, d1, d10 vmov.32 r0, d0[0] ;return + + vpop {q5} bx lr ENDP @@ -162,6 +169,8 @@ variance16x8_neon_loop ; unsigned int *sse) |vp8_variance8x16_neon| PROC + vpush {q5} + vmov.i8 q8, #0 ;q8 - sum vmov.i8 q9, #0 ;q9, q10 - sse vmov.i8 q10, #0 @@ -192,7 +201,7 @@ variance8x16_neon_loop vadd.u32 q10, q9, q10 ;accumulate sse vpaddl.s32 q0, q8 ;accumulate sum - ldr r12, [sp] ;load *sse from stack + ldr r12, [sp, #16] ;load *sse from stack vpaddl.u32 q1, q10 vadd.s64 d0, d0, d1 @@ -204,6 +213,8 @@ variance8x16_neon_loop vsub.u32 d0, d1, d10 vmov.32 r0, d0[0] ;return + + vpop {q5} bx lr ENDP @@ -215,6 +226,8 @@ variance8x16_neon_loop ; r3 int recon_stride ; stack unsigned int *sse |vp8_variance8x8_neon| PROC + vpush {q5} + vmov.i8 q8, #0 ;q8 - sum vmov.i8 q9, #0 ;q9, q10 - sse vmov.i8 q10, #0 @@ -257,7 +270,7 @@ variance8x8_neon_loop vadd.u32 q10, q9, q10 ;accumulate sse vpaddl.s32 q0, q8 ;accumulate sum - ldr r12, [sp] ;load *sse from stack + ldr r12, [sp, #16] ;load *sse from stack vpaddl.u32 q1, q10 vadd.s64 d0, d0, d1 @@ -269,6 +282,8 @@ variance8x8_neon_loop vsub.u32 d0, d1, d10 vmov.32 r0, d0[0] ;return + + vpop {q5} bx lr ENDP diff --git a/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm b/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm index 9d22c5252..adc5b7e3a 100644 --- a/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm +++ b/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm @@ -31,11 +31,12 @@ bilinear_taps_coeff |vp8_sub_pixel_variance16x16_neon_func| PROC push {r4-r6, lr} + vpush {d8-d15} adr r12, bilinear_taps_coeff - ldr r4, [sp, #16] ;load *dst_ptr from stack - ldr r5, [sp, #20] ;load dst_pixels_per_line from stack - ldr r6, [sp, #24] ;load *sse from stack + ldr r4, [sp, #80] ;load *dst_ptr from stack + ldr r5, [sp, #84] ;load dst_pixels_per_line from stack + ldr r6, [sp, #88] ;load *sse from stack cmp r2, #0 ;skip first_pass filter if xoffset=0 beq secondpass_bfilter16x16_only @@ -416,6 +417,7 @@ sub_pixel_variance16x16_neon_loop add sp, sp, #528 vmov.32 r0, d0[0] ;return + vpop {d8-d15} pop {r4-r6,pc} ENDP diff --git a/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm index 155be4fc5..b0829af75 100644 --- a/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm +++ b/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm @@ -31,9 +31,10 @@ ;================================================ |vp8_variance_halfpixvar16x16_h_neon| PROC push {lr} + vpush {d8-d15} mov r12, #4 ;loop counter - ldr lr, [sp, #4] ;load *sse from stack + ldr lr, [sp, #68] ;load *sse from stack vmov.i8 q8, #0 ;q8 - sum vmov.i8 q9, #0 ;q9, q10 - sse vmov.i8 q10, #0 @@ -116,6 +117,8 @@ vp8_filt_fpo16x16s_4_0_loop_neon vsub.u32 d0, d1, d10 vmov.32 r0, d0[0] ;return + + vpop {d8-d15} pop {pc} ENDP @@ -131,11 +134,12 @@ vp8_filt_fpo16x16s_4_0_loop_neon ;================================================ |vp8_variance_halfpixvar16x16_v_neon| PROC push {lr} + vpush {d8-d15} mov r12, #4 ;loop counter vld1.u8 {q0}, [r0], r1 ;load src data - ldr lr, [sp, #4] ;load *sse from stack + ldr lr, [sp, #68] ;load *sse from stack vmov.i8 q8, #0 ;q8 - sum vmov.i8 q9, #0 ;q9, q10 - sse @@ -212,6 +216,8 @@ vp8_filt_spo16x16s_0_4_loop_neon vsub.u32 d0, d1, d10 vmov.32 r0, d0[0] ;return + + vpop {d8-d15} pop {pc} ENDP @@ -227,10 +233,11 @@ vp8_filt_spo16x16s_0_4_loop_neon ;================================================ |vp8_variance_halfpixvar16x16_hv_neon| PROC push {lr} + vpush {d8-d15} vld1.u8 {d0, d1, d2, d3}, [r0], r1 ;load src data - ldr lr, [sp, #4] ;load *sse from stack + ldr lr, [sp, #68] ;load *sse from stack vmov.i8 q13, #0 ;q8 - sum vext.8 q1, q0, q1, #1 ;construct src_ptr[1] @@ -331,6 +338,8 @@ vp8_filt16x16s_4_4_loop_neon vsub.u32 d0, d1, d10 vmov.32 r0, d0[0] ;return + + vpop {d8-d15} pop {pc} ENDP @@ -349,10 +358,11 @@ vp8_filt16x16s_4_4_loop_neon |vp8_sub_pixel_variance16x16s_neon| PROC push {r4, lr} + vpush {d8-d15} - ldr r4, [sp, #8] ;load *dst_ptr from stack - ldr r12, [sp, #12] ;load dst_pixels_per_line from stack - ldr lr, [sp, #16] ;load *sse from stack + ldr r4, [sp, #72] ;load *dst_ptr from stack + ldr r12, [sp, #76] ;load dst_pixels_per_line from stack + ldr lr, [sp, #80] ;load *sse from stack cmp r2, #0 ;skip first_pass filter if xoffset=0 beq secondpass_bfilter16x16s_only @@ -566,6 +576,7 @@ sub_pixel_variance16x16s_neon_loop add sp, sp, #256 vmov.32 r0, d0[0] ;return + vpop {d8-d15} pop {r4, pc} ENDP diff --git a/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm b/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm index f6b684753..9d9f9e077 100644 --- a/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm +++ b/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm @@ -26,11 +26,12 @@ |vp8_sub_pixel_variance8x8_neon| PROC push {r4-r5, lr} + vpush {d8-d15} adr r12, bilinear_taps_coeff - ldr r4, [sp, #12] ;load *dst_ptr from stack - ldr r5, [sp, #16] ;load dst_pixels_per_line from stack - ldr lr, [sp, #20] ;load *sse from stack + ldr r4, [sp, #76] ;load *dst_ptr from stack + ldr r5, [sp, #80] ;load dst_pixels_per_line from stack + ldr lr, [sp, #84] ;load *sse from stack cmp r2, #0 ;skip first_pass filter if xoffset=0 beq skip_firstpass_filter @@ -210,6 +211,8 @@ sub_pixel_variance8x8_neon_loop vsub.u32 d0, d1, d10 vmov.32 r0, d0[0] ;return + + vpop {d8-d15} pop {r4-r5, pc} ENDP diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c index 2d9e343bc..29fea616b 100644 --- a/vp8/decoder/onyxd_if.c +++ b/vp8/decoder/onyxd_if.c @@ -178,12 +178,6 @@ vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_ return pbi->common.error.error_code; } -/*For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.*/ -#if HAVE_NEON -extern void vp8_push_neon(int64_t *store); -extern void vp8_pop_neon(int64_t *store); -#endif - static int get_free_fb (VP8_COMMON *cm) { int i; @@ -307,9 +301,6 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, const uint8_t *source, int64_t time_stamp) { -#if HAVE_NEON - int64_t dx_store_reg[8]; -#endif VP8_COMMON *cm = &pbi->common; int retcode = -1; @@ -319,15 +310,6 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, if(retcode <= 0) return retcode; -#if HAVE_NEON -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->cpu_caps & HAS_NEON) -#endif - { - vp8_push_neon(dx_store_reg); - } -#endif - cm->new_fb_idx = get_free_fb (cm); /* setup reference frames for vp8_decode_frame */ @@ -403,15 +385,6 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, pbi->last_time_stamp = time_stamp; decode_exit: -#if HAVE_NEON -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->cpu_caps & HAS_NEON) -#endif - { - vp8_pop_neon(dx_store_reg); - } -#endif - pbi->common.error.setjmp = 0; return retcode; } diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm index 5bda78678..840cb33d9 100644 --- a/vp8/encoder/arm/neon/subtract_neon.asm +++ b/vp8/encoder/arm/neon/subtract_neon.asm @@ -65,8 +65,10 @@ ; unsigned char *pred, int pred_stride) |vp8_subtract_mby_neon| PROC push {r4-r7} + vpush {d8-d15} + mov r12, #4 - ldr r4, [sp, #16] ; pred_stride + ldr r4, [sp, #80] ; pred_stride mov r6, #32 ; "diff" stride x2 add r5, r0, #16 ; second diff pointer @@ -101,6 +103,7 @@ subtract_mby_loop subs r12, r12, #1 bne subtract_mby_loop + vpop {d8-d15} pop {r4-r7} bx lr ENDP @@ -112,9 +115,11 @@ subtract_mby_loop |vp8_subtract_mbuv_neon| PROC push {r4-r7} - ldr r4, [sp, #16] ; upred - ldr r5, [sp, #20] ; vpred - ldr r6, [sp, #24] ; pred_stride + vpush {d8-d15} + + ldr r4, [sp, #80] ; upred + ldr r5, [sp, #84] ; vpred + ldr r6, [sp, #88] ; pred_stride add r0, r0, #512 ; short *udiff = diff + 256; mov r12, #32 ; "diff" stride x2 add r7, r0, #16 ; second diff pointer @@ -191,6 +196,7 @@ subtract_mby_loop vst1.16 {q14}, [r0], r12 vst1.16 {q15}, [r7], r12 + vpop {d8-d15} pop {r4-r7} bx lr diff --git a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm index 5b9f11e59..d219e2d14 100644 --- a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm +++ b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm @@ -21,6 +21,7 @@ ;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr, ; int sz); |vp8_memcpy_partial_neon| PROC + vpush {d8-d15} ;pld [r1] ;preload pred data ;pld [r1, #128] ;pld [r1, #256] @@ -64,6 +65,7 @@ extra_copy_neon_loop bne extra_copy_neon_loop done_copy_neon_loop + vpop {d8-d15} bx lr ENDP diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm index 55edbf512..f82af3ee3 100644 --- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm +++ b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm @@ -27,6 +27,8 @@ ;from vp8_variance(). |vp8_mse16x16_neon| PROC + vpush {q7} + vmov.i8 q7, #0 ;q7, q8, q9, q10 - sse vmov.i8 q8, #0 vmov.i8 q9, #0 @@ -62,7 +64,7 @@ mse16x16_neon_loop vadd.u32 q7, q7, q8 vadd.u32 q9, q9, q10 - ldr r12, [sp] ;load *sse from stack + ldr r12, [sp, #16] ;load *sse from stack vadd.u32 q10, q7, q9 vpaddl.u32 q1, q10 @@ -71,6 +73,7 @@ mse16x16_neon_loop vst1.32 {d0[0]}, [r12] vmov.32 r0, d0[0] + vpop {q7} bx lr ENDP @@ -82,6 +85,8 @@ mse16x16_neon_loop ; r2 unsigned char *ref_ptr, ; r3 int recon_stride |vp8_get4x4sse_cs_neon| PROC + vpush {q7} + vld1.8 {d0}, [r0], r1 ;Load up source and reference vld1.8 {d4}, [r2], r3 vld1.8 {d1}, [r0], r1 @@ -109,6 +114,8 @@ mse16x16_neon_loop vadd.u64 d0, d2, d3 vmov.32 r0, d0[0] + + vpop {q7} bx lr ENDP diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 32c599791..560134ee5 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -4820,33 +4820,11 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, } #endif -/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */ -#if HAVE_NEON -extern void vp8_push_neon(int64_t *store); -extern void vp8_pop_neon(int64_t *store); -#endif - - int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time) { -#if HAVE_NEON - int64_t store_reg[8]; -#if CONFIG_RUNTIME_CPU_DETECT - VP8_COMMON *cm = &cpi->common; -#endif -#endif struct vpx_usec_timer timer; int res = 0; -#if HAVE_NEON -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->cpu_caps & HAS_NEON) -#endif - { - vp8_push_neon(store_reg); - } -#endif - vpx_usec_timer_start(&timer); /* Reinit the lookahead buffer if the frame size changes */ @@ -4863,15 +4841,6 @@ int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_C vpx_usec_timer_mark(&timer); cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); -#if HAVE_NEON -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->cpu_caps & HAS_NEON) -#endif - { - vp8_pop_neon(store_reg); - } -#endif - return res; } @@ -4892,9 +4861,6 @@ static int frame_is_reference(const VP8_COMP *cpi) int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush) { -#if HAVE_NEON - int64_t store_reg[8]; -#endif VP8_COMMON *cm; struct vpx_usec_timer tsctimer; struct vpx_usec_timer ticktimer; @@ -4914,15 +4880,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l cpi->common.error.setjmp = 1; -#if HAVE_NEON -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->cpu_caps & HAS_NEON) -#endif - { - vp8_push_neon(store_reg); - } -#endif - vpx_usec_timer_start(&cmptimer); cpi->source = NULL; @@ -5005,14 +4962,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l #endif -#if HAVE_NEON -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->cpu_caps & HAS_NEON) -#endif - { - vp8_pop_neon(store_reg); - } -#endif return -1; } @@ -5416,15 +5365,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l #endif #endif -#if HAVE_NEON -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->cpu_caps & HAS_NEON) -#endif - { - vp8_pop_neon(store_reg); - } -#endif - cpi->common.error.setjmp = 0; return 0; diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index dfb54a52c..3568b34e1 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -172,7 +172,6 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict8x4_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict8x8_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict16x16_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/buildintrapredictorsmby_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/save_reg_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_full_2x_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_0_2x_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_blk_neon.c diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 8a8155410..99fd6ca10 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -12,7 +12,7 @@ struct macroblockd; /* Encoder forward decls */ struct macroblock; struct vp9_variance_vtable; - +struct search_site_config; struct mv; union int_mv; struct yv12_buffer_config; @@ -563,33 +563,6 @@ specialize qw/vp9_sad4x8_avg/, "$sse_x86inc"; add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; specialize qw/vp9_sad4x4_avg/, "$sse_x86inc"; -add_proto qw/unsigned int vp9_variance_halfpixvar16x16_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_variance_halfpixvar16x16_h/, "$sse2_x86inc"; - -add_proto qw/unsigned int vp9_variance_halfpixvar16x16_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_variance_halfpixvar16x16_v/, "$sse2_x86inc"; - -add_proto qw/unsigned int vp9_variance_halfpixvar16x16_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_variance_halfpixvar16x16_hv/, "$sse2_x86inc"; - -add_proto qw/unsigned int vp9_variance_halfpixvar64x64_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_variance_halfpixvar64x64_h/; - -add_proto qw/unsigned int vp9_variance_halfpixvar64x64_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_variance_halfpixvar64x64_v/; - -add_proto qw/unsigned int vp9_variance_halfpixvar64x64_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_variance_halfpixvar64x64_hv/; - -add_proto qw/unsigned int vp9_variance_halfpixvar32x32_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_variance_halfpixvar32x32_h/; - -add_proto qw/unsigned int vp9_variance_halfpixvar32x32_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_variance_halfpixvar32x32_v/; - -add_proto qw/unsigned int vp9_variance_halfpixvar32x32_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_variance_halfpixvar32x32_hv/; - add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; specialize qw/vp9_sad64x64x3/; @@ -678,9 +651,6 @@ specialize qw/vp9_sad4x8x4d sse/; add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; specialize qw/vp9_sad4x4x4d sse/; -#add_proto qw/unsigned int vp9_sub_pixel_mse16x16/, "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse"; -#specialize qw/vp9_sub_pixel_mse16x16 sse2 mmx/; - add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; specialize qw/vp9_mse16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc"; @@ -693,12 +663,6 @@ specialize qw/vp9_mse16x8/; add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; specialize qw/vp9_mse8x8/; -add_proto qw/unsigned int vp9_sub_pixel_mse64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_sub_pixel_mse64x64/; - -add_proto qw/unsigned int vp9_sub_pixel_mse32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_sub_pixel_mse32x32/; - add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *"; specialize qw/vp9_get_mb_ss mmx sse2/; # ENCODEMB INVOKE @@ -766,11 +730,11 @@ add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct m specialize qw/vp9_refining_search_sad sse3/; $vp9_refining_search_sad_sse3=vp9_refining_search_sadx4; -add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv"; +add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv"; specialize qw/vp9_diamond_search_sad sse3/; $vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4; -add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv"; +add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv"; specialize qw/vp9_full_range_search/; add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; diff --git a/vp9/common/vp9_tapify.py b/vp9/common/vp9_tapify.py deleted file mode 100644 index 99529cff0..000000000 --- a/vp9/common/vp9_tapify.py +++ /dev/null @@ -1,106 +0,0 @@ -""" - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. -""" -#!/usr/bin/env python -import sys,string,os,re,math,numpy -scale = 2**16 -def dist(p1,p2): - x1,y1 = p1 - x2,y2 = p2 - if x1==x2 and y1==y2 : - return 1.0 - return 1/ math.sqrt((x1-x2)*(x1-x2)+(y1-y2)*(y1-y2)) - -def gettaps(p): - def l(b): - return int(math.floor(b)) - def h(b): - return int(math.ceil(b)) - def t(b,p,s): - return int((scale*dist(b,p)+s/2)/s) - r,c = p - ul=[l(r),l(c)] - ur=[l(r),h(c)] - ll=[h(r),l(c)] - lr=[h(r),h(c)] - sum = dist(ul,p)+dist(ur,p)+dist(ll,p)+dist(lr,p) - t4 = scale - t(ul,p,sum) - t(ur,p,sum) - t(ll,p,sum); - return [[ul,t(ul,p,sum)],[ur,t(ur,p,sum)], - [ll,t(ll,p,sum)],[lr,t4]] - -def print_mb_taps(angle,blocksize): - theta = angle / 57.2957795; - affine = [[math.cos(theta),-math.sin(theta)], - [math.sin(theta),math.cos(theta)]] - radius = (float(blocksize)-1)/2 - print " // angle of",angle,"degrees" - for y in range(blocksize) : - for x in range(blocksize) : - r,c = numpy.dot(affine,[y-radius, x-radius]) - tps = gettaps([r+radius,c+radius]) - for t in tps : - p,t = t - tr,tc = p - print " %2d, %2d, %5d, " % (tr,tc,t,), - print " // %2d,%2d " % (y,x) - -i=float(sys.argv[1]) -while i <= float(sys.argv[2]) : - print_mb_taps(i,float(sys.argv[4])) - i=i+float(sys.argv[3]) -""" - -taps = [] -pt=dict() -ptr=dict() -for y in range(16) : - for x in range(16) : - r,c = numpy.dot(affine,[y-7.5, x-7.5]) - tps = gettaps([r+7.5,c+7.5]) - j=0 - for tp in tps : - p,i = tp - r,c = p - pt[y,x,j]= [p,i] - try: - ptr[r,j,c].append([y,x]) - except: - ptr[r,j,c]=[[y,x]] - j = j+1 - -for key in sorted(pt.keys()) : - print key,pt[key] - -lr = -99 -lj = -99 -lc = 0 - -shuf="" -mask="" -for r,j,c in sorted(ptr.keys()) : - for y,x in ptr[r,j,c] : - if lr != r or lj != j : - print "shuf_"+str(lr)+"_"+str(lj)+"_"+shuf.ljust(16,"0"), lc - shuf="" - lc = 0 - for i in range(lc,c-1) : - shuf = shuf +"0" - shuf = shuf + hex(x)[2] - lc =c - break - lr = r - lj = j -# print r,j,c,ptr[r,j,c] -# print - -for r,j,c in sorted(ptr.keys()) : - for y,x in ptr[r,j,c] : - print r,j,c,y,x - break -""" diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index faf710c24..385b2ebe5 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -32,74 +32,6 @@ #include "vp9/decoder/vp9_detokenize.h" #include "vp9/decoder/vp9_dthread.h" -#define WRITE_RECON_BUFFER 0 -#if WRITE_RECON_BUFFER == 1 -static void recon_write_yuv_frame(const char *name, - const YV12_BUFFER_CONFIG *s, - int w, int _h) { - FILE *yuv_file = fopen(name, "ab"); - const uint8_t *src = s->y_buffer; - int h = _h; - - do { - fwrite(src, w, 1, yuv_file); - src += s->y_stride; - } while (--h); - - src = s->u_buffer; - h = (_h + 1) >> 1; - w = (w + 1) >> 1; - - do { - fwrite(src, w, 1, yuv_file); - src += s->uv_stride; - } while (--h); - - src = s->v_buffer; - h = (_h + 1) >> 1; - - do { - fwrite(src, w, 1, yuv_file); - src += s->uv_stride; - } while (--h); - - fclose(yuv_file); -} -#endif -#if WRITE_RECON_BUFFER == 2 -void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) { - // write the frame - FILE *yframe; - int i; - char filename[255]; - - snprintf(filename, sizeof(filename)-1, "dx\\y%04d.raw", this_frame); - yframe = fopen(filename, "wb"); - - for (i = 0; i < frame->y_height; i++) - fwrite(frame->y_buffer + i * frame->y_stride, - frame->y_width, 1, yframe); - - fclose(yframe); - snprintf(filename, sizeof(filename)-1, "dx\\u%04d.raw", this_frame); - yframe = fopen(filename, "wb"); - - for (i = 0; i < frame->uv_height; i++) - fwrite(frame->u_buffer + i * frame->uv_stride, - frame->uv_width, 1, yframe); - - fclose(yframe); - snprintf(filename, sizeof(filename)-1, "dx\\v%04d.raw", this_frame); - yframe = fopen(filename, "wb"); - - for (i = 0; i < frame->uv_height; i++) - fwrite(frame->v_buffer + i * frame->uv_stride, - frame->uv_width, 1, yframe); - - fclose(yframe); -} -#endif - void vp9_initialize_dec() { static int init_done = 0; @@ -348,15 +280,6 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, swap_frame_buffers(pbi); -#if WRITE_RECON_BUFFER == 2 - if (cm->show_frame) - write_dx_frame_to_file(cm->frame_to_show, - cm->current_video_frame); - else - write_dx_frame_to_file(cm->frame_to_show, - cm->current_video_frame + 1000); -#endif - if (!pbi->do_loopfilter_inline) { // If multiple threads are used to decode tiles, then we use those threads // to do parallel loopfiltering. @@ -367,21 +290,6 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, } } -#if WRITE_RECON_BUFFER == 2 - if (cm->show_frame) - write_dx_frame_to_file(cm->frame_to_show, - cm->current_video_frame + 2000); - else - write_dx_frame_to_file(cm->frame_to_show, - cm->current_video_frame + 3000); -#endif - -#if WRITE_RECON_BUFFER == 1 - if (cm->show_frame) - recon_write_yuv_frame("recon.yuv", cm->frame_to_show, - cm->width, cm->height); -#endif - vp9_clear_system_state(); cm->last_width = cm->width; diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index fcf2a0420..f35a85fba 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -20,12 +20,6 @@ extern "C" { #endif -// motion search site -typedef struct { - MV mv; - int offset; -} search_site; - // Structure to hold snapshot of coding context during the mode picking process typedef struct { MODE_INFO mic; @@ -108,10 +102,6 @@ struct macroblock { int skip_optimize; int q_index; - search_site *ss; - int ss_count; - int searches_per_step; - int errorperbit; int sadperbit16; int sadperbit4; diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 6816f555e..395d26aef 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -501,9 +501,9 @@ static void update_frame_size(VP9_COMP *cpi) { int y_stride = cpi->scaled_source.y_stride; if (cpi->sf.search_method == NSTEP) { - vp9_init3smotion_compensation(&cpi->mb, y_stride); + vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride); } else if (cpi->sf.search_method == DIAMOND) { - vp9_init_dsmotion_compensation(&cpi->mb, y_stride); + vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride); } } @@ -782,9 +782,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { cm->error.setjmp = 1; - CHECK_MEM_ERROR(cm, cpi->mb.ss, vpx_calloc(sizeof(search_site), - (MAX_MVSEARCH_STEPS * 8) + 1)); - vp9_rtcd(); cpi->use_svc = 0; @@ -973,95 +970,73 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { cpi->rd.thresh_freq_fact[i][j] = 32; } -#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SVFHH, SVFHV, SVFHHV, \ - SDX3F, SDX8F, SDX4DF)\ +#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF)\ cpi->fn_ptr[BT].sdf = SDF; \ cpi->fn_ptr[BT].sdaf = SDAF; \ cpi->fn_ptr[BT].vf = VF; \ cpi->fn_ptr[BT].svf = SVF; \ cpi->fn_ptr[BT].svaf = SVAF; \ - cpi->fn_ptr[BT].svf_halfpix_h = SVFHH; \ - cpi->fn_ptr[BT].svf_halfpix_v = SVFHV; \ - cpi->fn_ptr[BT].svf_halfpix_hv = SVFHHV; \ cpi->fn_ptr[BT].sdx3f = SDX3F; \ cpi->fn_ptr[BT].sdx8f = SDX8F; \ cpi->fn_ptr[BT].sdx4df = SDX4DF; BFP(BLOCK_32X16, vp9_sad32x16, vp9_sad32x16_avg, vp9_variance32x16, vp9_sub_pixel_variance32x16, - vp9_sub_pixel_avg_variance32x16, NULL, NULL, - NULL, NULL, NULL, - vp9_sad32x16x4d) + vp9_sub_pixel_avg_variance32x16, NULL, NULL, vp9_sad32x16x4d) BFP(BLOCK_16X32, vp9_sad16x32, vp9_sad16x32_avg, vp9_variance16x32, vp9_sub_pixel_variance16x32, - vp9_sub_pixel_avg_variance16x32, NULL, NULL, - NULL, NULL, NULL, - vp9_sad16x32x4d) + vp9_sub_pixel_avg_variance16x32, NULL, NULL, vp9_sad16x32x4d) BFP(BLOCK_64X32, vp9_sad64x32, vp9_sad64x32_avg, vp9_variance64x32, vp9_sub_pixel_variance64x32, - vp9_sub_pixel_avg_variance64x32, NULL, NULL, - NULL, NULL, NULL, - vp9_sad64x32x4d) + vp9_sub_pixel_avg_variance64x32, NULL, NULL, vp9_sad64x32x4d) BFP(BLOCK_32X64, vp9_sad32x64, vp9_sad32x64_avg, vp9_variance32x64, vp9_sub_pixel_variance32x64, - vp9_sub_pixel_avg_variance32x64, NULL, NULL, - NULL, NULL, NULL, - vp9_sad32x64x4d) + vp9_sub_pixel_avg_variance32x64, NULL, NULL, vp9_sad32x64x4d) BFP(BLOCK_32X32, vp9_sad32x32, vp9_sad32x32_avg, vp9_variance32x32, vp9_sub_pixel_variance32x32, - vp9_sub_pixel_avg_variance32x32, vp9_variance_halfpixvar32x32_h, - vp9_variance_halfpixvar32x32_v, - vp9_variance_halfpixvar32x32_hv, vp9_sad32x32x3, vp9_sad32x32x8, + vp9_sub_pixel_avg_variance32x32, vp9_sad32x32x3, vp9_sad32x32x8, vp9_sad32x32x4d) BFP(BLOCK_64X64, vp9_sad64x64, vp9_sad64x64_avg, vp9_variance64x64, vp9_sub_pixel_variance64x64, - vp9_sub_pixel_avg_variance64x64, vp9_variance_halfpixvar64x64_h, - vp9_variance_halfpixvar64x64_v, - vp9_variance_halfpixvar64x64_hv, vp9_sad64x64x3, vp9_sad64x64x8, + vp9_sub_pixel_avg_variance64x64, vp9_sad64x64x3, vp9_sad64x64x8, vp9_sad64x64x4d) BFP(BLOCK_16X16, vp9_sad16x16, vp9_sad16x16_avg, vp9_variance16x16, vp9_sub_pixel_variance16x16, - vp9_sub_pixel_avg_variance16x16, vp9_variance_halfpixvar16x16_h, - vp9_variance_halfpixvar16x16_v, - vp9_variance_halfpixvar16x16_hv, vp9_sad16x16x3, vp9_sad16x16x8, + vp9_sub_pixel_avg_variance16x16, vp9_sad16x16x3, vp9_sad16x16x8, vp9_sad16x16x4d) BFP(BLOCK_16X8, vp9_sad16x8, vp9_sad16x8_avg, vp9_variance16x8, vp9_sub_pixel_variance16x8, - vp9_sub_pixel_avg_variance16x8, NULL, NULL, NULL, + vp9_sub_pixel_avg_variance16x8, vp9_sad16x8x3, vp9_sad16x8x8, vp9_sad16x8x4d) BFP(BLOCK_8X16, vp9_sad8x16, vp9_sad8x16_avg, vp9_variance8x16, vp9_sub_pixel_variance8x16, - vp9_sub_pixel_avg_variance8x16, NULL, NULL, NULL, + vp9_sub_pixel_avg_variance8x16, vp9_sad8x16x3, vp9_sad8x16x8, vp9_sad8x16x4d) BFP(BLOCK_8X8, vp9_sad8x8, vp9_sad8x8_avg, vp9_variance8x8, vp9_sub_pixel_variance8x8, - vp9_sub_pixel_avg_variance8x8, NULL, NULL, NULL, + vp9_sub_pixel_avg_variance8x8, vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d) BFP(BLOCK_8X4, vp9_sad8x4, vp9_sad8x4_avg, vp9_variance8x4, vp9_sub_pixel_variance8x4, - vp9_sub_pixel_avg_variance8x4, NULL, NULL, - NULL, NULL, vp9_sad8x4x8, - vp9_sad8x4x4d) + vp9_sub_pixel_avg_variance8x4, NULL, vp9_sad8x4x8, vp9_sad8x4x4d) BFP(BLOCK_4X8, vp9_sad4x8, vp9_sad4x8_avg, vp9_variance4x8, vp9_sub_pixel_variance4x8, - vp9_sub_pixel_avg_variance4x8, NULL, NULL, - NULL, NULL, vp9_sad4x8x8, - vp9_sad4x8x4d) + vp9_sub_pixel_avg_variance4x8, NULL, vp9_sad4x8x8, vp9_sad4x8x4d) BFP(BLOCK_4X4, vp9_sad4x4, vp9_sad4x4_avg, vp9_variance4x4, vp9_sub_pixel_variance4x4, - vp9_sub_pixel_avg_variance4x4, NULL, NULL, NULL, + vp9_sub_pixel_avg_variance4x4, vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d) cpi->full_search_sad = vp9_full_search_sad; @@ -1182,7 +1157,6 @@ void vp9_remove_compressor(VP9_COMP *cpi) { } dealloc_compressor_data(cpi); - vpx_free(cpi->mb.ss); vpx_free(cpi->tok); for (i = 0; i < sizeof(cpi->mbgraph_stats) / @@ -1444,77 +1418,67 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) { } #endif -static void scale_and_extend_frame_nonnormative(YV12_BUFFER_CONFIG *src_fb, - YV12_BUFFER_CONFIG *dst_fb) { - const int in_w = src_fb->y_crop_width; - const int in_h = src_fb->y_crop_height; - const int out_w = dst_fb->y_crop_width; - const int out_h = dst_fb->y_crop_height; - const int in_w_uv = src_fb->uv_crop_width; - const int in_h_uv = src_fb->uv_crop_height; - const int out_w_uv = dst_fb->uv_crop_width; - const int out_h_uv = dst_fb->uv_crop_height; +static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst) { + // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t int i; + const uint8_t *const srcs[4] = {src->y_buffer, src->u_buffer, src->v_buffer, + src->alpha_buffer}; + const int src_strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, + src->alpha_stride}; + const int src_widths[4] = {src->y_crop_width, src->uv_crop_width, + src->uv_crop_width, src->y_crop_width}; + const int src_heights[4] = {src->y_crop_height, src->uv_crop_height, + src->uv_crop_height, src->y_crop_height}; + uint8_t *const dsts[4] = {dst->y_buffer, dst->u_buffer, dst->v_buffer, + dst->alpha_buffer}; + const int dst_strides[4] = {dst->y_stride, dst->uv_stride, dst->uv_stride, + dst->alpha_stride}; + const int dst_widths[4] = {dst->y_crop_width, dst->uv_crop_width, + dst->uv_crop_width, dst->y_crop_width}; + const int dst_heights[4] = {dst->y_crop_height, dst->uv_crop_height, + dst->uv_crop_height, dst->y_crop_height}; + + for (i = 0; i < MAX_MB_PLANE; ++i) + vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i], + dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]); - uint8_t *srcs[4] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer, - src_fb->alpha_buffer}; - int src_strides[4] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride, - src_fb->alpha_stride}; - - uint8_t *dsts[4] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer, - dst_fb->alpha_buffer}; - int dst_strides[4] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride, - dst_fb->alpha_stride}; - - for (i = 0; i < MAX_MB_PLANE; ++i) { - if (i == 0 || i == 3) { - // Y and alpha planes - vp9_resize_plane(srcs[i], in_h, in_w, src_strides[i], - dsts[i], out_h, out_w, dst_strides[i]); - } else { - // Chroma planes - vp9_resize_plane(srcs[i], in_h_uv, in_w_uv, src_strides[i], - dsts[i], out_h_uv, out_w_uv, dst_strides[i]); - } - } // TODO(hkuang): Call C version explicitly // as neon version only expand border size 32. - vp8_yv12_extend_frame_borders_c(dst_fb); -} - -static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb, - YV12_BUFFER_CONFIG *dst_fb) { - const int in_w = src_fb->y_crop_width; - const int in_h = src_fb->y_crop_height; - const int out_w = dst_fb->y_crop_width; - const int out_h = dst_fb->y_crop_height; + vp8_yv12_extend_frame_borders_c(dst); +} + +static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst) { + const int src_w = src->y_crop_width; + const int src_h = src->y_crop_height; + const int dst_w = dst->y_crop_width; + const int dst_h = dst->y_crop_height; + const uint8_t *const srcs[4] = {src->y_buffer, src->u_buffer, src->v_buffer, + src->alpha_buffer}; + const int src_strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, + src->alpha_stride}; + uint8_t *const dsts[4] = {dst->y_buffer, dst->u_buffer, dst->v_buffer, + dst->alpha_buffer}; + const int dst_strides[4] = {dst->y_stride, dst->uv_stride, dst->uv_stride, + dst->alpha_stride}; int x, y, i; - uint8_t *srcs[4] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer, - src_fb->alpha_buffer}; - int src_strides[4] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride, - src_fb->alpha_stride}; - - uint8_t *dsts[4] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer, - dst_fb->alpha_buffer}; - int dst_strides[4] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride, - dst_fb->alpha_stride}; - - for (y = 0; y < out_h; y += 16) { - for (x = 0; x < out_w; x += 16) { + for (y = 0; y < dst_h; y += 16) { + for (x = 0; x < dst_w; x += 16) { for (i = 0; i < MAX_MB_PLANE; ++i) { const int factor = (i == 0 || i == 3 ? 1 : 2); - const int x_q4 = x * (16 / factor) * in_w / out_w; - const int y_q4 = y * (16 / factor) * in_h / out_h; + const int x_q4 = x * (16 / factor) * src_w / dst_w; + const int y_q4 = y * (16 / factor) * src_h / dst_h; const int src_stride = src_strides[i]; const int dst_stride = dst_strides[i]; - uint8_t *src = srcs[i] + y / factor * in_h / out_h * src_stride + - x / factor * in_w / out_w; - uint8_t *dst = dsts[i] + y / factor * dst_stride + x / factor; + const uint8_t *src_ptr = srcs[i] + (y / factor) * src_h / dst_h * + src_stride + (x / factor) * src_w / dst_w; + uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor); - vp9_convolve8(src, src_stride, dst, dst_stride, - vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w, - vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h, + vp9_convolve8(src_ptr, src_stride, dst_ptr, dst_stride, + vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * src_w / dst_w, + vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * src_h / dst_h, 16 / factor, 16 / factor); } } @@ -1522,7 +1486,7 @@ static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb, // TODO(hkuang): Call C version explicitly // as neon version only expand border size 32. - vp8_yv12_extend_frame_borders_c(dst_fb); + vp8_yv12_extend_frame_borders_c(dst); } static int find_fp_qindex() { @@ -1701,7 +1665,7 @@ void vp9_scale_references(VP9_COMP *cpi) { for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; - YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf; + const YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf; if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) { diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 8f3249407..132b479e2 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -497,6 +497,8 @@ typedef struct VP9_COMP { int frame_flags; + search_site_config ss_cfg; + #if CONFIG_MULTIPLE_ARF // ARF tracking variables. int multi_arf_enabled; diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 34506f2bd..b408ced0e 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -418,7 +418,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, v_fn_ptr.vf = get_block_variance_fn(bsize); // Center the initial step/diamond search on best mv. - tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv, + tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv, step_param, x->sadperbit16, &num00, &v_fn_ptr, ref_mv); if (tmp_err < INT_MAX) @@ -441,7 +441,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, if (num00) { --num00; } else { - tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv, + tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv, step_param + n, x->sadperbit16, &num00, &v_fn_ptr, ref_mv); if (tmp_err < INT_MAX) diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 89937f5a6..bbec4da76 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -101,32 +101,32 @@ static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref, return 0; } -void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) { +void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) { int len, ss_count = 1; - x->ss[0].mv.col = x->ss[0].mv.row = 0; - x->ss[0].offset = 0; + cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0; + cfg->ss[0].offset = 0; for (len = MAX_FIRST_STEP; len > 0; len /= 2) { // Generate offsets for 4 search sites per step. const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}}; int i; for (i = 0; i < 4; ++i) { - search_site *const ss = &x->ss[ss_count++]; + search_site *const ss = &cfg->ss[ss_count++]; ss->mv = ss_mvs[i]; ss->offset = ss->mv.row * stride + ss->mv.col; } } - x->ss_count = ss_count; - x->searches_per_step = 4; + cfg->ss_count = ss_count; + cfg->searches_per_step = 4; } -void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { +void vp9_init3smotion_compensation(search_site_config *cfg, int stride) { int len, ss_count = 1; - x->ss[0].mv.col = x->ss[0].mv.row = 0; - x->ss[0].offset = 0; + cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0; + cfg->ss[0].offset = 0; for (len = MAX_FIRST_STEP; len > 0; len /= 2) { // Generate offsets for 8 search sites per step. @@ -136,14 +136,14 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { }; int i; for (i = 0; i < 8; ++i) { - search_site *const ss = &x->ss[ss_count++]; + search_site *const ss = &cfg->ss[ss_count++]; ss->mv = ss_mvs[i]; ss->offset = ss->mv.row * stride + ss->mv.col; } } - x->ss_count = ss_count; - x->searches_per_step = 8; + cfg->ss_count = ss_count; + cfg->searches_per_step = 8; } /* @@ -871,7 +871,9 @@ int vp9_fast_dia_search(const MACROBLOCK *x, #undef CHECK_BETTER -int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, +int vp9_full_range_search_c(const MACROBLOCK *x, + const search_site_config *cfg, + MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv) { @@ -962,6 +964,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, } int vp9_diamond_search_sad_c(const MACROBLOCK *x, + const search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, @@ -973,8 +976,8 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, // of iterations // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = // (MAX_FIRST_STEP/4) pel... etc. - const search_site *const ss = &x->ss[search_param * x->searches_per_step]; - const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; + const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step]; + const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; const uint8_t *best_address, *in_what_ref; int best_sad = INT_MAX; @@ -996,7 +999,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, i = 1; for (step = 0; step < tot_steps; step++) { - for (j = 0; j < x->searches_per_step; j++) { + for (j = 0; j < cfg->searches_per_step; j++) { const MV mv = {best_mv->row + ss[i].mv.row, best_mv->col + ss[i].mv.col}; if (is_mv_in(x, &mv)) { @@ -1050,6 +1053,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, } int vp9_diamond_search_sadx4(const MACROBLOCK *x, + const search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, @@ -1075,8 +1079,8 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, // 0 = initial step (MAX_FIRST_STEP) pel // 1 = (MAX_FIRST_STEP/2) pel, // 2 = (MAX_FIRST_STEP/4) pel... - const search_site *ss = &x->ss[search_param * x->searches_per_step]; - const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; + const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; + const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); @@ -1112,7 +1116,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, if (all_in) { unsigned int sad_array[4]; - for (j = 0; j < x->searches_per_step; j += 4) { + for (j = 0; j < cfg->searches_per_step; j += 4) { unsigned char const *block_offset[4]; for (t = 0; t < 4; t++) @@ -1135,7 +1139,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, } } } else { - for (j = 0; j < x->searches_per_step; j++) { + for (j = 0; j < cfg->searches_per_step; j++) { // Trap illegal vectors const MV this_mv = {best_mv->row + ss[i].mv.row, best_mv->col + ss[i].mv.col}; @@ -1202,7 +1206,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, const MV *ref_mv, MV *dst_mv) { MV temp_mv; int thissme, n, num00 = 0; - int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, + int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, step_param, sadpb, &n, fn_ptr, ref_mv); if (bestsme < INT_MAX) @@ -1220,7 +1224,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, if (num00) { num00--; } else { - thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, + thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, step_param + n, sadpb, &num00, fn_ptr, ref_mv); if (thissme < INT_MAX) @@ -1290,192 +1294,154 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, int distance, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv, MV *best_mv) { + int r; const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *const what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *const in_what = xd->plane[0].pre[0].buf; - const int in_what_stride = xd->plane[0].pre[0].stride; - MV this_mv; - unsigned int bestsad = INT_MAX; - int r, c; - unsigned int thissad; - int ref_row = ref_mv->row; - int ref_col = ref_mv->col; - - // Apply further limits to prevent us looking using vectors that stretch - // beyond the UMV border - const int row_min = MAX(ref_row - distance, x->mv_row_min); - const int row_max = MIN(ref_row + distance, x->mv_row_max); - const int col_min = MAX(ref_col - distance, x->mv_col_min); - const int col_max = MIN(ref_col + distance, x->mv_col_max); - unsigned int sad_array[3]; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; + const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); + const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); + const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); + const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; + unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); + *best_mv = *ref_mv; - // Work out the mid point for the search - const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col]; - - best_mv->row = ref_row; - best_mv->col = ref_col; - - // Baseline value at the centre - bestsad = fn_ptr->sdf(what, what_stride, - bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); - - for (r = row_min; r < row_max; r++) { - const uint8_t *check_here = &in_what[r * in_what_stride + col_min]; - this_mv.row = r; - c = col_min; - - while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) { - int i; + for (r = row_min; r < row_max; ++r) { + int c = col_min; + const uint8_t *check_here = &in_what->buf[r * in_what->stride + c]; - fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); + if (fn_ptr->sdx3f != NULL) { + while ((c + 2) < col_max) { + int i; + unsigned int sads[3]; - for (i = 0; i < 3; i++) { - thissad = sad_array[i]; + fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride, + sads); - if (thissad < bestsad) { - this_mv.col = c; - thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; - best_mv->row = r; - best_mv->col = c; + for (i = 0; i < 3; ++i) { + unsigned int sad = sads[i]; + if (sad < best_sad) { + const MV mv = {r, c}; + sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } } + ++check_here; + ++c; } - check_here++; - c++; } } while (c < col_max) { - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - bestsad); - - if (thissad < bestsad) { - this_mv.col = c; - thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; - best_mv->row = r; - best_mv->col = c; + unsigned int sad = fn_ptr->sdf(what->buf, what->stride, + check_here, in_what->stride, best_sad); + if (sad < best_sad) { + const MV mv = {r, c}; + sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; } } - - check_here++; - c++; + ++check_here; + ++c; } } - return bestsad; + + return best_sad; } int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, int distance, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv, MV *best_mv) { + int r; const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *const what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *const in_what = xd->plane[0].pre[0].buf; - const int in_what_stride = xd->plane[0].pre[0].stride; - MV this_mv; - unsigned int bestsad = INT_MAX; - int r, c; - int ref_row = ref_mv->row; - int ref_col = ref_mv->col; - - // Apply further limits to prevent us looking using vectors that stretch - // beyond the UMV border - const int row_min = MAX(ref_row - distance, x->mv_row_min); - const int row_max = MIN(ref_row + distance, x->mv_row_max); - const int col_min = MAX(ref_col - distance, x->mv_col_min); - const int col_max = MIN(ref_col + distance, x->mv_col_max); - DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8); - unsigned int sad_array[3]; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; + const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); + const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); + const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); + const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; + unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); + *best_mv = *ref_mv; - // Work out the mid point for the search - const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col]; - - best_mv->row = ref_row; - best_mv->col = ref_col; - - // Baseline value at the center - bestsad = fn_ptr->sdf(what, what_stride, - bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); - - for (r = row_min; r < row_max; r++) { - const uint8_t *check_here = &in_what[r * in_what_stride + col_min]; - this_mv.row = r; - c = col_min; - - while ((c + 7) < col_max) { - int i; + for (r = row_min; r < row_max; ++r) { + int c = col_min; + const uint8_t *check_here = &in_what->buf[r * in_what->stride + c]; - fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); + if (fn_ptr->sdx8f != NULL) { + while ((c + 7) < col_max) { + int i; + unsigned int sads[8]; - for (i = 0; i < 8; i++) { - unsigned int thissad = (unsigned int)sad_array8[i]; + fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride, + sads); - if (thissad < bestsad) { - this_mv.col = c; - thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; - best_mv->row = r; - best_mv->col = c; + for (i = 0; i < 8; ++i) { + unsigned int sad = sads[i]; + if (sad < best_sad) { + const MV mv = {r, c}; + sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } } + ++check_here; + ++c; } - - check_here++; - c++; } } - while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) { - int i; + if (fn_ptr->sdx3f != NULL) { + while ((c + 2) < col_max) { + int i; + unsigned int sads[3]; - fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); + fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride, + sads); - for (i = 0; i < 3; i++) { - unsigned int thissad = sad_array[i]; - - if (thissad < bestsad) { - this_mv.col = c; - thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; - best_mv->row = r; - best_mv->col = c; + for (i = 0; i < 3; ++i) { + unsigned int sad = sads[i]; + if (sad < best_sad) { + const MV mv = {r, c}; + sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } } + ++check_here; + ++c; } - - check_here++; - c++; } } while (c < col_max) { - unsigned int thissad = fn_ptr->sdf(what, what_stride, - check_here, in_what_stride, bestsad); - - if (thissad < bestsad) { - this_mv.col = c; - thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; - best_mv->row = r; - best_mv->col = c; + unsigned int sad = fn_ptr->sdf(what->buf, what->stride, + check_here, in_what->stride, best_sad); + if (sad < best_sad) { + const MV mv = {r, c}; + sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; } } - - check_here++; - c++; + ++check_here; + ++c; } } - return bestsad; + + return best_sad; } int vp9_refining_search_sad_c(const MACROBLOCK *x, diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 70d7985e4..1f524f1f6 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -31,6 +31,20 @@ extern "C" { // for Block_16x16 #define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND) +// motion search site +typedef struct search_site { + MV mv; + int offset; +} search_site; + +typedef struct search_site_config { + search_site ss[8 * MAX_MVSEARCH_STEPS + 1]; + int ss_count; + int searches_per_step; +} search_site_config; + +void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride); +void vp9_init3smotion_compensation(search_site_config *cfg, int stride); void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv); int vp9_mv_bit_cost(const MV *mv, const MV *ref, @@ -46,8 +60,6 @@ int vp9_get_mvpred_av_var(const MACROBLOCK *x, const uint8_t *second_pred, const vp9_variance_fn_ptr_t *vfp, int use_mvcost); -void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride); -void vp9_init3smotion_compensation(MACROBLOCK *x, int stride); struct VP9_COMP; int vp9_init_search_range(struct VP9_COMP *cpi, int size); @@ -119,6 +131,7 @@ typedef int (*vp9_refining_search_fn_t)(const MACROBLOCK *x, const MV *center_mv); typedef int (*vp9_diamond_search_fn_t)(const MACROBLOCK *x, + const search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index c1493e719..56eb9440c 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -418,7 +418,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Perform intra prediction search, if the best SAD is above a certain // threshold. - if (best_rd > inter_mode_thresh) { + if (best_rd > inter_mode_thresh && bsize < cpi->sf.max_intra_bsize) { for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) { vp9_predict_intra_block(xd, 0, b_width_log2(bsize), mbmi->tx_size, this_mode, diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index 1399bfb7e..ae3c86aee 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -276,126 +276,6 @@ VAR(64, 64) SUBPIX_VAR(64, 64) SUBPIX_AVG_VAR(64, 64) -unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const uint8_t *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, - xoffset, yoffset, dst_ptr, - dst_pixels_per_line, sse); - return *sse; -} - -unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const uint8_t *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line, - xoffset, yoffset, dst_ptr, - dst_pixels_per_line, sse); - return *sse; -} - -unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const uint8_t *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line, - xoffset, yoffset, dst_ptr, - dst_pixels_per_line, sse); - return *sse; -} - void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride) { int i, j; diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index 4c8be71cd..152c3d962 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -82,9 +82,6 @@ typedef struct vp9_variance_vtable { vp9_variance_fn_t vf; vp9_subpixvariance_fn_t svf; vp9_subp_avg_variance_fn_t svaf; - vp9_variance_fn_t svf_halfpix_h; - vp9_variance_fn_t svf_halfpix_v; - vp9_variance_fn_t svf_halfpix_hv; vp9_sad_multi_fn_t sdx3f; vp9_sad_multi_fn_t sdx8f; vp9_sad_multi_d_fn_t sdx4df; diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c index 9e65694a8..25d594632 100644 --- a/vp9/encoder/x86/vp9_variance_sse2.c +++ b/vp9/encoder/x86/vp9_variance_sse2.c @@ -494,58 +494,3 @@ FNS(ssse3, ssse3); #undef FNS #undef FN - -unsigned int vp9_variance_halfpixvar16x16_h_sse2( - const unsigned char *src_ptr, - int src_pixels_per_line, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - int xsum0; - unsigned int xxsum0; - - vp9_half_horiz_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum0, &xxsum0); - - *sse = xxsum0; - return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); -} - - -unsigned int vp9_variance_halfpixvar16x16_v_sse2( - const unsigned char *src_ptr, - int src_pixels_per_line, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - int xsum0; - unsigned int xxsum0; - vp9_half_vert_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum0, &xxsum0); - - *sse = xxsum0; - return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); -} - - -unsigned int vp9_variance_halfpixvar16x16_hv_sse2( - const unsigned char *src_ptr, - int src_pixels_per_line, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - int xsum0; - unsigned int xxsum0; - - vp9_half_horiz_vert_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum0, &xxsum0); - - *sse = xxsum0; - return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); -} diff --git a/webmdec.c b/webmdec.c deleted file mode 100644 index 93a8d9fb5..000000000 --- a/webmdec.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./webmdec.h" - -#include <stdarg.h> - -#include "third_party/nestegg/include/nestegg/nestegg.h" - -static int nestegg_read_cb(void *buffer, size_t length, void *userdata) { - FILE *f = userdata; - - if (fread(buffer, 1, length, f) < length) { - if (ferror(f)) - return -1; - if (feof(f)) - return 0; - } - return 1; -} - -static int nestegg_seek_cb(int64_t offset, int whence, void *userdata) { - switch (whence) { - case NESTEGG_SEEK_SET: - whence = SEEK_SET; - break; - case NESTEGG_SEEK_CUR: - whence = SEEK_CUR; - break; - case NESTEGG_SEEK_END: - whence = SEEK_END; - break; - }; - return fseek(userdata, (int32_t)offset, whence) ? -1 : 0; -} - -static int64_t nestegg_tell_cb(void *userdata) { - return ftell(userdata); -} - -static void nestegg_log_cb(nestegg *context, - unsigned int severity, - char const *format, ...) { - va_list ap; - va_start(ap, format); - vfprintf(stderr, format, ap); - fprintf(stderr, "\n"); - va_end(ap); -} - -int file_is_webm(struct WebmInputContext *webm_ctx, - struct VpxInputContext *vpx_ctx) { - uint32_t i, n; - int track_type = -1; - int codec_id; - - nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb, 0}; - nestegg_video_params params; - - io.userdata = vpx_ctx->file; - if (nestegg_init(&webm_ctx->nestegg_ctx, io, NULL, -1)) - goto fail; - - if (nestegg_track_count(webm_ctx->nestegg_ctx, &n)) - goto fail; - - for (i = 0; i < n; i++) { - track_type = nestegg_track_type(webm_ctx->nestegg_ctx, i); - - if (track_type == NESTEGG_TRACK_VIDEO) - break; - else if (track_type < 0) - goto fail; - } - - codec_id = nestegg_track_codec_id(webm_ctx->nestegg_ctx, i); - if (codec_id == NESTEGG_CODEC_VP8) { - vpx_ctx->fourcc = VP8_FOURCC; - } else if (codec_id == NESTEGG_CODEC_VP9) { - vpx_ctx->fourcc = VP9_FOURCC; - } else { - fprintf(stderr, "Not VPx video, quitting.\n"); - goto fail; - } - - webm_ctx->video_track = i; - - if (nestegg_track_video_params(webm_ctx->nestegg_ctx, i, ¶ms)) - goto fail; - - vpx_ctx->framerate.denominator = 0; - vpx_ctx->framerate.numerator = 0; - vpx_ctx->width = params.width; - vpx_ctx->height = params.height; - - return 1; - - fail: - webm_ctx->nestegg_ctx = NULL; - rewind(vpx_ctx->file); - - return 0; -} - -int webm_read_frame(struct WebmInputContext *webm_ctx, - uint8_t **buffer, - size_t *bytes_in_buffer, - size_t *buffer_size) { - if (webm_ctx->chunk >= webm_ctx->chunks) { - uint32_t track; - int status; - - do { - /* End of this packet, get another. */ - if (webm_ctx->pkt) { - nestegg_free_packet(webm_ctx->pkt); - webm_ctx->pkt = NULL; - } - - status = nestegg_read_packet(webm_ctx->nestegg_ctx, &webm_ctx->pkt); - if (status <= 0) - return status ? status : 1; - - if (nestegg_packet_track(webm_ctx->pkt, &track)) - return -1; - } while (track != webm_ctx->video_track); - - if (nestegg_packet_count(webm_ctx->pkt, &webm_ctx->chunks)) - return -1; - - webm_ctx->chunk = 0; - } - - if (nestegg_packet_data(webm_ctx->pkt, webm_ctx->chunk, - buffer, bytes_in_buffer)) { - return -1; - } - - webm_ctx->chunk++; - return 0; -} - -int webm_guess_framerate(struct WebmInputContext *webm_ctx, - struct VpxInputContext *vpx_ctx) { - uint32_t i; - uint64_t tstamp = 0; - - /* Check to see if we can seek before we parse any data. */ - if (nestegg_track_seek(webm_ctx->nestegg_ctx, webm_ctx->video_track, 0)) { - fprintf(stderr, "Failed to guess framerate (no Cues), set to 30fps.\n"); - vpx_ctx->framerate.numerator = 30; - vpx_ctx->framerate.denominator = 1; - return 0; - } - - /* Guess the framerate. Read up to 1 second, or 50 video packets, - * whichever comes first. - */ - for (i = 0; tstamp < 1000000000 && i < 50;) { - nestegg_packet *pkt; - uint32_t track; - - if (nestegg_read_packet(webm_ctx->nestegg_ctx, &pkt) <= 0) - break; - - nestegg_packet_track(pkt, &track); - if (track == webm_ctx->video_track) { - nestegg_packet_tstamp(pkt, &tstamp); - ++i; - } - - nestegg_free_packet(pkt); - } - - if (nestegg_track_seek(webm_ctx->nestegg_ctx, webm_ctx->video_track, 0)) - goto fail; - - vpx_ctx->framerate.numerator = (i - 1) * 1000000; - vpx_ctx->framerate.denominator = (int)(tstamp / 1000); - return 0; - - fail: - nestegg_destroy(webm_ctx->nestegg_ctx); - webm_ctx->nestegg_ctx = NULL; - rewind(vpx_ctx->file); - return 1; -} - -void webm_free(struct WebmInputContext *webm_ctx) { - if (webm_ctx && webm_ctx->nestegg_ctx) { - if (webm_ctx->pkt) - nestegg_free_packet(webm_ctx->pkt); - nestegg_destroy(webm_ctx->nestegg_ctx); - } -} diff --git a/webmdec.cc b/webmdec.cc new file mode 100644 index 000000000..eb89befd8 --- /dev/null +++ b/webmdec.cc @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./webmdec.h" + +#include <cstring> +#include <cstdio> + +#include "third_party/libwebm/mkvparser.hpp" +#include "third_party/libwebm/mkvreader.hpp" + +namespace { + +void reset(struct WebmInputContext *const webm_ctx) { + if (webm_ctx->reader != NULL) { + mkvparser::MkvReader *const reader = + reinterpret_cast<mkvparser::MkvReader*>(webm_ctx->reader); + delete reader; + } + if (webm_ctx->segment != NULL) { + mkvparser::Segment *const segment = + reinterpret_cast<mkvparser::Segment*>(webm_ctx->segment); + delete segment; + } + if (webm_ctx->buffer != NULL) { + delete[] webm_ctx->buffer; + } + webm_ctx->reader = NULL; + webm_ctx->segment = NULL; + webm_ctx->buffer = NULL; + webm_ctx->cluster = NULL; + webm_ctx->block_entry = NULL; + webm_ctx->block = NULL; + webm_ctx->block_frame_index = 0; + webm_ctx->video_track_index = 0; + webm_ctx->timestamp_ns = 0; +} + +void get_first_cluster(struct WebmInputContext *const webm_ctx) { + mkvparser::Segment *const segment = + reinterpret_cast<mkvparser::Segment*>(webm_ctx->segment); + const mkvparser::Cluster *const cluster = segment->GetFirst(); + webm_ctx->cluster = cluster; +} + +void rewind_and_reset(struct WebmInputContext *const webm_ctx, + struct VpxInputContext *const vpx_ctx) { + rewind(vpx_ctx->file); + reset(webm_ctx); +} + +} // namespace + +int file_is_webm(struct WebmInputContext *webm_ctx, + struct VpxInputContext *vpx_ctx) { + mkvparser::MkvReader *const reader = new mkvparser::MkvReader(vpx_ctx->file); + webm_ctx->reader = reader; + + mkvparser::EBMLHeader header; + long long pos = 0; + if (header.Parse(reader, pos) < 0) { + rewind_and_reset(webm_ctx, vpx_ctx); + return 0; + } + + mkvparser::Segment* segment; + if (mkvparser::Segment::CreateInstance(reader, pos, segment)) { + rewind_and_reset(webm_ctx, vpx_ctx); + return 0; + } + webm_ctx->segment = segment; + if (segment->Load() < 0) { + rewind_and_reset(webm_ctx, vpx_ctx); + return 0; + } + + const mkvparser::Tracks *const tracks = segment->GetTracks(); + const mkvparser::VideoTrack* video_track = NULL; + for (unsigned long i = 0; i < tracks->GetTracksCount(); ++i) { + const mkvparser::Track* const track = tracks->GetTrackByIndex(i); + if (track->GetType() == mkvparser::Track::kVideo) { + video_track = static_cast<const mkvparser::VideoTrack*>(track); + webm_ctx->video_track_index = track->GetNumber(); + break; + } + } + + if (video_track == NULL) { + rewind_and_reset(webm_ctx, vpx_ctx); + return 0; + } + + if (!strncmp(video_track->GetCodecId(), "V_VP8", 5)) { + vpx_ctx->fourcc = VP8_FOURCC; + } else if (!strncmp(video_track->GetCodecId(), "V_VP9", 5)) { + vpx_ctx->fourcc = VP9_FOURCC; + } else { + rewind_and_reset(webm_ctx, vpx_ctx); + return 0; + } + + vpx_ctx->framerate.denominator = 0; + vpx_ctx->framerate.numerator = 0; + vpx_ctx->width = video_track->GetWidth(); + vpx_ctx->height = video_track->GetHeight(); + + get_first_cluster(webm_ctx); + + return 1; +} + +int webm_read_frame(struct WebmInputContext *webm_ctx, + uint8_t **buffer, + size_t *bytes_in_buffer, + size_t *buffer_size) { + mkvparser::Segment *const segment = + reinterpret_cast<mkvparser::Segment*>(webm_ctx->segment); + const mkvparser::Cluster* cluster = + reinterpret_cast<const mkvparser::Cluster*>(webm_ctx->cluster); + const mkvparser::Block *block = + reinterpret_cast<const mkvparser::Block*>(webm_ctx->block); + const mkvparser::BlockEntry *block_entry = + reinterpret_cast<const mkvparser::BlockEntry*>(webm_ctx->block_entry); + bool block_entry_eos = false; + do { + long status = 0; + bool get_new_block = false; + if (block_entry == NULL && !block_entry_eos) { + status = cluster->GetFirst(block_entry); + get_new_block = true; + } else if (block_entry_eos || block_entry->EOS()) { + cluster = segment->GetNext(cluster); + if (cluster == NULL || cluster->EOS()) { + *bytes_in_buffer = 0; + return 1; + } + status = cluster->GetFirst(block_entry); + block_entry_eos = false; + get_new_block = true; + } else if (block == NULL || + webm_ctx->block_frame_index == block->GetFrameCount() || + block->GetTrackNumber() != webm_ctx->video_track_index) { + status = cluster->GetNext(block_entry, block_entry); + if (block_entry == NULL || block_entry->EOS()) { + block_entry_eos = true; + continue; + } + get_new_block = true; + } + if (status) { + return -1; + } + if (get_new_block) { + block = block_entry->GetBlock(); + webm_ctx->block_frame_index = 0; + } + } while (block->GetTrackNumber() != webm_ctx->video_track_index || + block_entry_eos); + + webm_ctx->cluster = cluster; + webm_ctx->block_entry = block_entry; + webm_ctx->block = block; + + const mkvparser::Block::Frame& frame = + block->GetFrame(webm_ctx->block_frame_index); + ++webm_ctx->block_frame_index; + if (frame.len > static_cast<long>(*buffer_size)) { + delete[] *buffer; + *buffer = new uint8_t[frame.len]; + if (*buffer == NULL) { + return -1; + } + *buffer_size = frame.len; + webm_ctx->buffer = *buffer; + } + *bytes_in_buffer = frame.len; + webm_ctx->timestamp_ns = block->GetTime(cluster); + + mkvparser::MkvReader *const reader = + reinterpret_cast<mkvparser::MkvReader*>(webm_ctx->reader); + return frame.Read(reader, *buffer) ? -1 : 0; +} + +int webm_guess_framerate(struct WebmInputContext *webm_ctx, + struct VpxInputContext *vpx_ctx) { + uint32_t i = 0; + uint8_t *buffer = NULL; + size_t bytes_in_buffer = 0; + size_t buffer_size = 0; + while (webm_ctx->timestamp_ns < 1000000000 && i < 50) { + if (webm_read_frame(webm_ctx, &buffer, &bytes_in_buffer, &buffer_size)) { + break; + } + ++i; + } + vpx_ctx->framerate.numerator = (i - 1) * 1000000; + vpx_ctx->framerate.denominator = + static_cast<int>(webm_ctx->timestamp_ns / 1000); + delete[] buffer; + + get_first_cluster(webm_ctx); + webm_ctx->block = NULL; + webm_ctx->block_entry = NULL; + webm_ctx->block_frame_index = 0; + webm_ctx->timestamp_ns = 0; + + return 0; +} + +void webm_free(struct WebmInputContext *webm_ctx) { + reset(webm_ctx); +} @@ -16,34 +16,53 @@ extern "C" { #endif -struct nestegg; -struct nestegg_packet; struct VpxInputContext; struct WebmInputContext { - uint32_t chunk; - uint32_t chunks; - uint32_t video_track; - struct nestegg *nestegg_ctx; - struct nestegg_packet *pkt; + void *reader; + void *segment; + uint8_t *buffer; + const void *cluster; + const void *block_entry; + const void *block; + int block_frame_index; + int video_track_index; + uint64_t timestamp_ns; }; +// Checks if the input is a WebM file. If so, initializes WebMInputContext so +// that webm_read_frame can be called to retrieve a video frame. +// Returns 1 on success and 0 on failure or input is not WebM file. +// TODO(vigneshv): Refactor this function into two smaller functions specific +// to their task. int file_is_webm(struct WebmInputContext *webm_ctx, struct VpxInputContext *vpx_ctx); -/* Reads a WebM video frame. Return values: - * 0 - Success - * 1 - End of File - * -1 - Error - */ +// Reads a WebM Video Frame. Memory for the buffer is created, owned and managed +// by this function. For the first call, |buffer| should be NULL and +// |*bytes_in_buffer| should be 0. Once all the frames are read and used, +// webm_free() should be called, otherwise there will be a leak. +// Parameters: +// webm_ctx - WebmInputContext object +// buffer - pointer where the frame data will be filled. +// bytes_in_buffer - pointer to buffer size. +// buffer_size - unused TODO(vigneshv): remove this +// Return values: +// 0 - Success +// 1 - End of Stream +// -1 - Error +// TODO(vigneshv): Make the return values consistent across all functions in +// this file. int webm_read_frame(struct WebmInputContext *webm_ctx, uint8_t **buffer, size_t *bytes_in_buffer, size_t *buffer_size); +// Guesses the frame rate of the input file based on the container timestamps. int webm_guess_framerate(struct WebmInputContext *webm_ctx, struct VpxInputContext *vpx_ctx); +// Resets the WebMInputContext. void webm_free(struct WebmInputContext *webm_ctx); #ifdef __cplusplus |