summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--build/make/Makefile1
-rwxr-xr-xbuild/make/gen_msvs_proj.sh90
-rwxr-xr-xbuild/make/gen_msvs_vcxproj.sh90
-rw-r--r--build/make/msvs_common.sh98
-rw-r--r--examples.mk15
-rwxr-xr-xtest/decode_to_md5.sh68
-rwxr-xr-xtest/decode_with_drops.sh75
-rwxr-xr-xtest/examples.sh28
-rwxr-xr-xtest/simple_decoder.sh57
-rwxr-xr-xtest/simple_encoder.sh58
-rw-r--r--test/test.mk14
-rwxr-xr-xtest/tools_common.sh7
-rw-r--r--vp8/common/arm/neon/buildintrapredictorsmby_neon.asm19
-rw-r--r--vp8/common/arm/neon/idct_dequant_0_2x_neon.asm2
-rw-r--r--vp8/common/arm/neon/idct_dequant_full_2x_neon.asm3
-rw-r--r--vp8/common/arm/neon/loopfilter_neon.asm24
-rw-r--r--vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm4
-rw-r--r--vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm4
-rw-r--r--vp8/common/arm/neon/mbloopfilter_neon.asm24
-rw-r--r--vp8/common/arm/neon/sad16_neon.asm5
-rw-r--r--vp8/common/arm/neon/sad8_neon.asm6
-rw-r--r--vp8/common/arm/neon/save_reg_neon.asm36
-rw-r--r--vp8/common/arm/neon/shortidct4x4llm_neon.asm5
-rw-r--r--vp8/common/arm/neon/sixtappredict16x16_neon.asm9
-rw-r--r--vp8/common/arm/neon/sixtappredict4x4_neon.asm8
-rw-r--r--vp8/common/arm/neon/sixtappredict8x4_neon.asm9
-rw-r--r--vp8/common/arm/neon/sixtappredict8x8_neon.asm10
-rw-r--r--vp8/common/arm/neon/variance_neon.asm23
-rw-r--r--vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm8
-rw-r--r--vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm23
-rw-r--r--vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm9
-rw-r--r--vp8/decoder/onyxd_if.c27
-rw-r--r--vp8/encoder/arm/neon/subtract_neon.asm14
-rw-r--r--vp8/encoder/arm/neon/vp8_memcpy_neon.asm2
-rw-r--r--vp8/encoder/arm/neon/vp8_mse16x16_neon.asm9
-rw-r--r--vp8/encoder/onyx_if.c60
-rw-r--r--vp8/vp8_common.mk1
-rw-r--r--vp9/common/vp9_rtcd_defs.pl42
-rw-r--r--vp9/common/vp9_tapify.py106
-rw-r--r--vp9/decoder/vp9_decoder.c92
-rw-r--r--vp9/encoder/vp9_block.h10
-rw-r--r--vp9/encoder/vp9_encoder.c172
-rw-r--r--vp9/encoder/vp9_encoder.h2
-rw-r--r--vp9/encoder/vp9_firstpass.c4
-rw-r--r--vp9/encoder/vp9_mcomp.c292
-rw-r--r--vp9/encoder/vp9_mcomp.h17
-rw-r--r--vp9/encoder/vp9_pickmode.c2
-rw-r--r--vp9/encoder/vp9_variance.c120
-rw-r--r--vp9/encoder/vp9_variance.h3
-rw-r--r--vp9/encoder/x86/vp9_variance_sse2.c55
-rw-r--r--webmdec.c202
-rw-r--r--webmdec.cc219
-rw-r--r--webmdec.h43
53 files changed, 1049 insertions, 1277 deletions
diff --git a/build/make/Makefile b/build/make/Makefile
index 03dacce5e..c4d53f160 100644
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -411,6 +411,7 @@ ifneq ($(call enabled,DIST-SRCS),)
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_proj.sh
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_sln.sh
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_vcxproj.sh
+ DIST-SRCS-$(CONFIG_MSVS) += build/make/msvs_common.sh
DIST-SRCS-$(CONFIG_MSVS) += build/x86-msvs/obj_int_extract.bat
DIST-SRCS-$(CONFIG_MSVS) += build/arm-msvs/obj_int_extract.bat
DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
index d0cbf3e54..4e803b81e 100755
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -9,17 +9,11 @@
## be found in the AUTHORS file in the root of the source tree.
##
-
self=$0
self_basename=${self##*/}
self_dirname=$(dirname "$0")
-EOL=$'\n'
-if [ "$(uname -o 2>/dev/null)" = "Cygwin" ] \
- && cygpath --help >/dev/null 2>&1; then
- FIXPATH='cygpath -m'
-else
- FIXPATH='echo'
-fi
+
+. "$self_dirname/msvs_common.sh"|| exit 127
show_help() {
cat <<EOF
@@ -49,86 +43,6 @@ EOF
exit 1
}
-die() {
- echo "${self_basename}: $@" >&2
- exit 1
-}
-
-die_unknown(){
- echo "Unknown option \"$1\"." >&2
- echo "See ${self_basename} --help for available options." >&2
- exit 1
-}
-
-fix_path() {
- $FIXPATH "$1"
-}
-
-generate_uuid() {
- local hex="0123456789ABCDEF"
- local i
- local uuid=""
- local j
- #93995380-89BD-4b04-88EB-625FBE52EBFB
- for ((i=0; i<32; i++)); do
- (( j = $RANDOM % 16 ))
- uuid="${uuid}${hex:$j:1}"
- done
- echo "${uuid:0:8}-${uuid:8:4}-${uuid:12:4}-${uuid:16:4}-${uuid:20:12}"
-}
-
-indent1=" "
-indent=""
-indent_push() {
- indent="${indent}${indent1}"
-}
-indent_pop() {
- indent="${indent%${indent1}}"
-}
-
-tag_attributes() {
- for opt in "$@"; do
- optval="${opt#*=}"
- [ -n "${optval}" ] ||
- die "Missing attribute value in '$opt' while generating $tag tag"
- echo "${indent}${opt%%=*}=\"${optval}\""
- done
-}
-
-open_tag() {
- local tag=$1
- shift
- if [ $# -ne 0 ]; then
- echo "${indent}<${tag}"
- indent_push
- tag_attributes "$@"
- echo "${indent}>"
- else
- echo "${indent}<${tag}>"
- indent_push
- fi
-}
-
-close_tag() {
- local tag=$1
- indent_pop
- echo "${indent}</${tag}>"
-}
-
-tag() {
- local tag=$1
- shift
- if [ $# -ne 0 ]; then
- echo "${indent}<${tag}"
- indent_push
- tag_attributes "$@"
- indent_pop
- echo "${indent}/>"
- else
- echo "${indent}<${tag}/>"
- fi
-}
-
generate_filter() {
local var=$1
local name=$2
diff --git a/build/make/gen_msvs_vcxproj.sh b/build/make/gen_msvs_vcxproj.sh
index a64e129b2..9dc790629 100755
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -9,17 +9,11 @@
## be found in the AUTHORS file in the root of the source tree.
##
-
self=$0
self_basename=${self##*/}
self_dirname=$(dirname "$0")
-EOL=$'\n'
-if [ "$(uname -o 2>/dev/null)" = "Cygwin" ] \
- && cygpath --help >/dev/null 2>&1; then
- FIXPATH='cygpath -m'
-else
- FIXPATH='echo'
-fi
+
+. "$self_dirname/msvs_common.sh"|| exit 127
show_help() {
cat <<EOF
@@ -50,86 +44,6 @@ EOF
exit 1
}
-die() {
- echo "${self_basename}: $@" >&2
- exit 1
-}
-
-die_unknown(){
- echo "Unknown option \"$1\"." >&2
- echo "See ${self_basename} --help for available options." >&2
- exit 1
-}
-
-fix_path() {
- $FIXPATH "$1"
-}
-
-generate_uuid() {
- local hex="0123456789ABCDEF"
- local i
- local uuid=""
- local j
- #93995380-89BD-4b04-88EB-625FBE52EBFB
- for ((i=0; i<32; i++)); do
- (( j = $RANDOM % 16 ))
- uuid="${uuid}${hex:$j:1}"
- done
- echo "${uuid:0:8}-${uuid:8:4}-${uuid:12:4}-${uuid:16:4}-${uuid:20:12}"
-}
-
-indent1=" "
-indent=""
-indent_push() {
- indent="${indent}${indent1}"
-}
-indent_pop() {
- indent="${indent%${indent1}}"
-}
-
-tag_attributes() {
- for opt in "$@"; do
- optval="${opt#*=}"
- [ -n "${optval}" ] ||
- die "Missing attribute value in '$opt' while generating $tag tag"
- echo "${indent}${opt%%=*}=\"${optval}\""
- done
-}
-
-open_tag() {
- local tag=$1
- shift
- if [ $# -ne 0 ]; then
- echo "${indent}<${tag}"
- indent_push
- tag_attributes "$@"
- echo "${indent}>"
- else
- echo "${indent}<${tag}>"
- indent_push
- fi
-}
-
-close_tag() {
- local tag=$1
- indent_pop
- echo "${indent}</${tag}>"
-}
-
-tag() {
- local tag=$1
- shift
- if [ $# -ne 0 ]; then
- echo "${indent}<${tag}"
- indent_push
- tag_attributes "$@"
- indent_pop
- echo "${indent}/>"
- else
- echo "${indent}<${tag}/>"
- fi
-}
-
tag_content() {
local tag=$1
local content=$2
diff --git a/build/make/msvs_common.sh b/build/make/msvs_common.sh
new file mode 100644
index 000000000..eb2eb7bcf
--- /dev/null
+++ b/build/make/msvs_common.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+##
+## Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+
+if [ "$(uname -o 2>/dev/null)" = "Cygwin" ] \
+ && cygpath --help >/dev/null 2>&1; then
+ FIXPATH='cygpath -m'
+else
+ FIXPATH='echo'
+fi
+
+die() {
+ echo "${self_basename}: $@" >&2
+ exit 1
+}
+
+die_unknown(){
+ echo "Unknown option \"$1\"." >&2
+ echo "See ${self_basename} --help for available options." >&2
+ exit 1
+}
+
+fix_path() {
+ $FIXPATH "$1"
+}
+
+generate_uuid() {
+ local hex="0123456789ABCDEF"
+ local i
+ local uuid=""
+ local j
+ #93995380-89BD-4b04-88EB-625FBE52EBFB
+ for ((i=0; i<32; i++)); do
+ (( j = $RANDOM % 16 ))
+ uuid="${uuid}${hex:$j:1}"
+ done
+ echo "${uuid:0:8}-${uuid:8:4}-${uuid:12:4}-${uuid:16:4}-${uuid:20:12}"
+}
+
+indent1=" "
+indent=""
+indent_push() {
+ indent="${indent}${indent1}"
+}
+indent_pop() {
+ indent="${indent%${indent1}}"
+}
+
+tag_attributes() {
+ for opt in "$@"; do
+ optval="${opt#*=}"
+ [ -n "${optval}" ] ||
+ die "Missing attribute value in '$opt' while generating $tag tag"
+ echo "${indent}${opt%%=*}=\"${optval}\""
+ done
+}
+
+open_tag() {
+ local tag=$1
+ shift
+ if [ $# -ne 0 ]; then
+ echo "${indent}<${tag}"
+ indent_push
+ tag_attributes "$@"
+ echo "${indent}>"
+ else
+ echo "${indent}<${tag}>"
+ indent_push
+ fi
+}
+
+close_tag() {
+ local tag=$1
+ indent_pop
+ echo "${indent}</${tag}>"
+}
+
+tag() {
+ local tag=$1
+ shift
+ if [ $# -ne 0 ]; then
+ echo "${indent}<${tag}"
+ indent_push
+ tag_attributes "$@"
+ indent_pop
+ echo "${indent}/>"
+ else
+ echo "${indent}<${tag}/>"
+ fi
+}
+
diff --git a/examples.mk b/examples.mk
index 91b980168..f6e7c0062 100644
--- a/examples.mk
+++ b/examples.mk
@@ -25,6 +25,11 @@ LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer.cpp \
third_party/libwebm/mkvwriter.hpp \
third_party/libwebm/webmids.hpp
+LIBWEBM_PARSER_SRCS = third_party/libwebm/mkvparser.cpp \
+ third_party/libwebm/mkvreader.cpp \
+ third_party/libwebm/mkvparser.hpp \
+ third_party/libwebm/mkvreader.hpp
+
# List of examples to build. UTILS are tools meant for distribution
# while EXAMPLES demonstrate specific portions of the API.
UTILS-$(CONFIG_DECODERS) += vpxdec.c
@@ -39,14 +44,8 @@ vpxdec.SRCS += tools_common.c tools_common.h
vpxdec.SRCS += y4menc.c y4menc.h
vpxdec.SRCS += $(LIBYUV_SRCS)
ifeq ($(CONFIG_WEBM_IO),yes)
- vpxdec.SRCS += third_party/nestegg/halloc/halloc.h
- vpxdec.SRCS += third_party/nestegg/halloc/src/align.h
- vpxdec.SRCS += third_party/nestegg/halloc/src/halloc.c
- vpxdec.SRCS += third_party/nestegg/halloc/src/hlist.h
- vpxdec.SRCS += third_party/nestegg/halloc/src/macros.h
- vpxdec.SRCS += third_party/nestegg/include/nestegg/nestegg.h
- vpxdec.SRCS += third_party/nestegg/src/nestegg.c
- vpxdec.SRCS += webmdec.c webmdec.h
+ vpxdec.SRCS += $(LIBWEBM_PARSER_SRCS)
+ vpxdec.SRCS += webmdec.cc webmdec.h
endif
vpxdec.GUID = BA5FE66F-38DD-E034-F542-B1578C5FB950
vpxdec.DESCRIPTION = Full featured decoder
diff --git a/test/decode_to_md5.sh b/test/decode_to_md5.sh
new file mode 100755
index 000000000..da1a87062
--- /dev/null
+++ b/test/decode_to_md5.sh
@@ -0,0 +1,68 @@
+#!/bin/sh
+##
+## Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+## This file tests the libvpx decode_to_md5 example. To add new tests to this
+## file, do the following:
+## 1. Write a shell function (this is your test).
+## 2. Add the function to decode_to_md5_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available:
+# $VP8_IVF_FILE and $VP9_IVF_FILE are required.
+decode_to_md5_verify_environment() {
+ if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_IVF_FILE}" ]; then
+ echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+ return 1
+ fi
+}
+
+# Runs decode_to_md5 on $1 and echoes the MD5 sum for the final frame. $2 is
+# interpreted as codec name and used solely to name the output file.
+decode_to_md5() {
+ local decoder="${LIBVPX_BIN_PATH}/decode_to_md5${VPX_TEST_EXE_SUFFIX}"
+ local input_file="$1"
+ local codec="$2"
+ local output_file="${VPX_TEST_OUTPUT_DIR}/decode_to_md5_${codec}"
+
+ [ -x "${decoder}" ] || return 1
+
+ "${decoder}" "${input_file}" "${output_file}" > /dev/null 2>&1
+
+ [ -e "${output_file}" ] || return 1
+
+ local md5_last_frame=$(tail -n1 "${output_file}")
+ echo "${md5_last_frame% *}" | tr -d [:space:]
+}
+
+decode_to_md5_vp8() {
+ # expected MD5 sum for the last frame.
+ local expected_md5="56794d911b02190212bca92f88ad60c6"
+
+ if [ "$(vp8_decode_available)" = "yes" ]; then
+ local actual_md5="$(decode_to_md5 "${VP8_IVF_FILE}" vp8)" || return 1
+ [ "${actual_md5}" = "${expected_md5}" ] || return 1
+ fi
+}
+
+decode_to_md5_vp9() {
+ # expected MD5 sum for the last frame.
+ local expected_md5="2952c0eae93f3dadd1aa84c50d3fd6d2"
+
+ if [ "$(vp9_decode_available)" = "yes" ]; then
+ local actual_md5="$(decode_to_md5 "${VP9_IVF_FILE}" vp9)" || return 1
+ [ "${actual_md5}" = "${expected_md5}" ] || return 1
+ fi
+}
+
+decode_to_md5_tests="decode_to_md5_vp8
+ decode_to_md5_vp9"
+
+run_tests decode_to_md5_verify_environment "${decode_to_md5_tests}"
diff --git a/test/decode_with_drops.sh b/test/decode_with_drops.sh
new file mode 100755
index 000000000..d0321bfb2
--- /dev/null
+++ b/test/decode_with_drops.sh
@@ -0,0 +1,75 @@
+#!/bin/sh
+##
+## Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+## This file tests the libvpx decode_with_drops example. To add new tests to
+## this file, do the following:
+## 1. Write a shell function (this is your test).
+## 2. Add the function to decode_with_drops_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available:
+# $VP8_IVF_FILE and $VP9_IVF_FILE are required.
+decode_with_drops_verify_environment() {
+ if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_IVF_FILE}" ]; then
+ echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+ return 1
+ fi
+}
+
+# Runs decode_with_drops on $1, $2 is interpreted as codec name and used solely
+# to name the output file. $3 is the drop mode, and is passed directly to
+# decode_with_drops.
+decode_with_drops() {
+ local decoder="${LIBVPX_BIN_PATH}/decode_with_drops${VPX_TEST_EXE_SUFFIX}"
+ local input_file="$1"
+ local codec="$2"
+ local output_file="${VPX_TEST_OUTPUT_DIR}/decode_with_drops_${codec}"
+ local drop_mode="$3"
+
+ [ -x "${decoder}" ] || return 1
+
+ "${decoder}" "${input_file}" "${output_file}" "${drop_mode}" > /dev/null 2>&1
+
+ [ -e "${output_file}" ] || return 1
+}
+
+# Decodes $VP8_IVF_FILE while dropping frames, twice: once in sequence mode,
+# and once in pattern mode.
+# Note: This test assumes that $VP8_IVF_FILE has exactly 29 frames, and could
+# break if the file is modified.
+decode_with_drops_vp8() {
+ if [ "$(vp8_decode_available)" = "yes" ]; then
+ # Test sequence mode: Drop frames 2-28.
+ decode_with_drops "${VP8_IVF_FILE}" "vp8" "2-28"
+
+ # Test pattern mode: Drop 3 of every 4 frames.
+ decode_with_drops "${VP8_IVF_FILE}" "vp8" "3/4"
+ fi
+}
+
+# Decodes $VP9_IVF_FILE while dropping frames, twice: once in sequence mode,
+# and once in pattern mode.
+# Note: This test assumes that $VP9_IVF_FILE has exactly 20 frames, and could
+# break if the file is modified.
+decode_with_drops_vp9() {
+ if [ "$(vp9_decode_available)" = "yes" ]; then
+ # Test sequence mode: Drop frames 2-28.
+ decode_with_drops "${VP9_IVF_FILE}" "vp9" "2-19"
+
+ # Test pattern mode: Drop 3 of every 4 frames.
+ decode_with_drops "${VP9_IVF_FILE}" "vp9" "3/4"
+ fi
+}
+
+decode_with_drops_tests="decode_with_drops_vp8
+ decode_with_drops_vp9"
+
+run_tests decode_with_drops_verify_environment "${decode_with_drops_tests}"
diff --git a/test/examples.sh b/test/examples.sh
new file mode 100755
index 000000000..ac2a18c03
--- /dev/null
+++ b/test/examples.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+##
+## Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+## This file runs all of the tests for the libvpx examples.
+##
+. $(dirname $0)/tools_common.sh
+
+example_tests=$(ls $(dirname $0)/*.sh)
+
+# List of script names to exclude.
+exclude_list="examples vpxdec vpxenc tools_common"
+
+# Filter out the scripts in $exclude_list.
+for word in ${exclude_list}; do
+ example_tests=$(filter_strings "${example_tests}" "${word}" exclude)
+done
+
+for test in ${example_tests}; do
+ # Source each test script so that exporting variables can be avoided.
+ . "${test}"
+done
diff --git a/test/simple_decoder.sh b/test/simple_decoder.sh
new file mode 100755
index 000000000..a0db58ff8
--- /dev/null
+++ b/test/simple_decoder.sh
@@ -0,0 +1,57 @@
+#!/bin/sh
+##
+## Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+## This file tests the libvpx simple_decoder example code. To add new tests to
+## this file, do the following:
+## 1. Write a shell function (this is your test).
+## 2. Add the function to simple_decoder_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available:
+# $VP8_IVF_FILE and $VP9_IVF_FILE are required.
+simple_decoder_verify_environment() {
+ if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_IVF_FILE}" ]; then
+ echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+ return 1
+ fi
+}
+
+# Runs simple_decoder using $1 as input file. $2 is the codec name, and is used
+# solely to name the output file.
+simple_decoder() {
+ local decoder="${LIBVPX_BIN_PATH}/simple_decoder${VPX_TEST_EXE_SUFFIX}"
+ local input_file="$1"
+ local codec="$2"
+ local output_file="${VPX_TEST_OUTPUT_DIR}/simple_decoder_${codec}.raw"
+
+ [ -x "${decoder}" ] || return 1
+
+ "${decoder}" "${input_file}" "${output_file}" > /dev/null 2>&1
+
+ [ -e "${output_file}" ] || return 1
+}
+
+simple_decoder_vp8() {
+ if [ "$(vp8_decode_available)" = "yes" ]; then
+ simple_decoder "${VP8_IVF_FILE}" vp8 || return 1
+ fi
+}
+
+simple_decoder_vp9() {
+ if [ "$(vp9_decode_available)" = "yes" ]; then
+ simple_decoder "${VP9_IVF_FILE}" vp9 || return 1
+ fi
+}
+
+simple_decoder_tests="simple_decoder_vp8
+ simple_decoder_vp9"
+
+run_tests simple_decoder_verify_environment "${simple_decoder_tests}"
diff --git a/test/simple_encoder.sh b/test/simple_encoder.sh
new file mode 100755
index 000000000..13f5e298b
--- /dev/null
+++ b/test/simple_encoder.sh
@@ -0,0 +1,58 @@
+#!/bin/sh
+##
+## Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+## This file tests the libvpx simple_encoder example. To add new tests to this
+## file, do the following:
+## 1. Write a shell function (this is your test).
+## 2. Add the function to simple_encoder_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+simple_encoder_verify_environment() {
+ if [ ! -e "${YUV_RAW_INPUT}" ]; then
+ echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+ return 1
+ fi
+}
+
+# Runs simple_encoder using the codec specified by $1.
+simple_encoder() {
+ local encoder="${LIBVPX_BIN_PATH}/simple_encoder${VPX_TEST_EXE_SUFFIX}"
+ local codec="$1"
+ local output_file="${VPX_TEST_OUTPUT_DIR}/simple_encoder_${codec}.ivf"
+
+ [ -x "${encoder}" ] || return 1
+
+ "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" "${YUV_RAW_INPUT_HEIGHT}" \
+ "${YUV_RAW_INPUT}" "${output_file}" 9999 > /dev/null 2>&1
+
+ [ -e "${output_file}" ] || return 1
+}
+
+simple_encoder_vp8() {
+ if [ "$(vp8_encode_available)" = "yes" ]; then
+ simple_encoder vp8 || return 1
+ fi
+}
+
+# TODO(tomfinegan): Add a frame limit param to simple_encoder and enable this
+# test. VP9 is just too slow right now: This test takes 4m30s+ on a fast
+# machine.
+DISABLED_simple_encoder_vp9() {
+ if [ "$(vp9_encode_available)" = "yes" ]; then
+ simple_encoder vp9 || return 1
+ fi
+}
+
+simple_encoder_tests="simple_encoder_vp8
+ DISABLED_simple_encoder_vp9"
+
+run_tests simple_encoder_verify_environment "${simple_encoder_tests}"
diff --git a/test/test.mk b/test/test.mk
index da56b00ec..0dcb6c86e 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -43,15 +43,13 @@ LIBVPX_TEST_SRCS-yes += encode_test_driver.h
## WebM Parsing
ifeq ($(CONFIG_WEBM_IO), yes)
-NESTEGG_SRCS += ../third_party/nestegg/halloc/halloc.h
-NESTEGG_SRCS += ../third_party/nestegg/halloc/src/align.h
-NESTEGG_SRCS += ../third_party/nestegg/halloc/src/halloc.c
-NESTEGG_SRCS += ../third_party/nestegg/halloc/src/hlist.h
-NESTEGG_SRCS += ../third_party/nestegg/include/nestegg/nestegg.h
-NESTEGG_SRCS += ../third_party/nestegg/src/nestegg.c
-LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += $(NESTEGG_SRCS)
+LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser.cpp
+LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvreader.cpp
+LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser.hpp
+LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvreader.hpp
+LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += $(LIBWEBM_PARSER_SRCS)
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../tools_common.h
-LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.c
+LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.cc
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.h
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += webm_video_source.h
endif
diff --git a/test/tools_common.sh b/test/tools_common.sh
index 45b777178..0f5cbb9e0 100755
--- a/test/tools_common.sh
+++ b/test/tools_common.sh
@@ -9,6 +9,11 @@
## be found in the AUTHORS file in the root of the source tree.
##
## This file contains shell code shared by test scripts for libvpx tools.
+
+# Use $VPX_TEST_TOOLS_COMMON_SH as a pseudo include guard.
+if [ -z "${VPX_TEST_TOOLS_COMMON_SH}" ]; then
+VPX_TEST_TOOLS_COMMON_SH=included
+
set -e
# Sets $VPX_TOOL_TEST to the name specified by positional parameter one.
@@ -441,3 +446,5 @@ $(basename "${0%.*}") test configuration:
VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS}
EOF
fi
+
+fi # End $VPX_TEST_TOOLS_COMMON_SH pseudo include guard.
diff --git a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
index e3ea91fe6..a8730aa04 100644
--- a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
+++ b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
@@ -26,6 +26,7 @@
|vp8_build_intra_predictors_mby_neon_func| PROC
push {r4-r8, lr}
+ vpush {d8-d15}
cmp r3, #0
beq case_dc_pred
@@ -37,8 +38,8 @@
beq case_tm_pred
case_dc_pred
- ldr r4, [sp, #24] ; Up
- ldr r5, [sp, #28] ; Left
+ ldr r4, [sp, #88] ; Up
+ ldr r5, [sp, #92] ; Left
; Default the DC average to 128
mov r12, #128
@@ -143,6 +144,7 @@ skip_dc_pred_up_left
vst1.u8 {q0}, [r1]!
vst1.u8 {q0}, [r1]!
+ vpop {d8-d15}
pop {r4-r8,pc}
case_v_pred
; Copy down above row
@@ -165,6 +167,7 @@ case_v_pred
vst1.u8 {q0}, [r1]!
vst1.u8 {q0}, [r1]!
vst1.u8 {q0}, [r1]!
+ vpop {d8-d15}
pop {r4-r8,pc}
case_h_pred
@@ -224,6 +227,7 @@ case_h_pred
vst1.u8 {q2}, [r1]!
vst1.u8 {q3}, [r1]!
+ vpop {d8-d15}
pop {r4-r8,pc}
case_tm_pred
@@ -293,6 +297,7 @@ case_tm_pred_loop
subs r12, r12, #1
bne case_tm_pred_loop
+ vpop {d8-d15}
pop {r4-r8,pc}
ENDP
@@ -307,6 +312,7 @@ case_tm_pred_loop
|vp8_build_intra_predictors_mby_s_neon_func| PROC
push {r4-r8, lr}
+ vpush {d8-d15}
mov r1, r0 ; unsigned char *ypred_ptr = x->dst.y_buffer; //x->Predictor;
@@ -320,8 +326,8 @@ case_tm_pred_loop
beq case_tm_pred_s
case_dc_pred_s
- ldr r4, [sp, #24] ; Up
- ldr r5, [sp, #28] ; Left
+ ldr r4, [sp, #88] ; Up
+ ldr r5, [sp, #92] ; Left
; Default the DC average to 128
mov r12, #128
@@ -426,6 +432,7 @@ skip_dc_pred_up_left_s
vst1.u8 {q0}, [r1], r2
vst1.u8 {q0}, [r1], r2
+ vpop {d8-d15}
pop {r4-r8,pc}
case_v_pred_s
; Copy down above row
@@ -448,6 +455,8 @@ case_v_pred_s
vst1.u8 {q0}, [r1], r2
vst1.u8 {q0}, [r1], r2
vst1.u8 {q0}, [r1], r2
+
+ vpop {d8-d15}
pop {r4-r8,pc}
case_h_pred_s
@@ -507,6 +516,7 @@ case_h_pred_s
vst1.u8 {q2}, [r1], r2
vst1.u8 {q3}, [r1], r2
+ vpop {d8-d15}
pop {r4-r8,pc}
case_tm_pred_s
@@ -576,6 +586,7 @@ case_tm_pred_loop_s
subs r12, r12, #1
bne case_tm_pred_loop_s
+ vpop {d8-d15}
pop {r4-r8,pc}
ENDP
diff --git a/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm b/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
index 6c29c5586..3a3921081 100644
--- a/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
+++ b/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
@@ -22,6 +22,7 @@
; r3 stride
|idct_dequant_0_2x_neon| PROC
push {r4, r5}
+ vpush {d8-d15}
add r12, r2, #4
vld1.32 {d2[0]}, [r2], r3
@@ -72,6 +73,7 @@
vst1.32 {d4[1]}, [r2]
vst1.32 {d10[1]}, [r0]
+ vpop {d8-d15}
pop {r4, r5}
bx lr
diff --git a/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm b/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm
index d5dce63f6..8da0fa0b7 100644
--- a/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm
+++ b/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm
@@ -22,6 +22,8 @@
; r2 *dst
; r3 stride
|idct_dequant_full_2x_neon| PROC
+ vpush {d8-d15}
+
vld1.16 {q0, q1}, [r1] ; dq (same l/r)
vld1.16 {q2, q3}, [r0] ; l q
add r0, r0, #32
@@ -184,6 +186,7 @@
vst1.32 {d3[0]}, [r2]
vst1.32 {d3[1]}, [r1]
+ vpop {d8-d15}
bx lr
ENDP ; |idct_dequant_full_2x_neon|
diff --git a/vp8/common/arm/neon/loopfilter_neon.asm b/vp8/common/arm/neon/loopfilter_neon.asm
index e44be0a1e..c4f09c775 100644
--- a/vp8/common/arm/neon/loopfilter_neon.asm
+++ b/vp8/common/arm/neon/loopfilter_neon.asm
@@ -24,10 +24,12 @@
; sp unsigned char thresh,
|vp8_loop_filter_horizontal_edge_y_neon| PROC
push {lr}
+ vpush {d8-d15}
+
vdup.u8 q0, r2 ; duplicate blimit
vdup.u8 q1, r3 ; duplicate limit
sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines
- ldr r3, [sp, #4] ; load thresh
+ ldr r3, [sp, #68] ; load thresh
add r12, r2, r1
add r1, r1, r1
@@ -52,6 +54,7 @@
vst1.u8 {q7}, [r2@128], r1 ; store oq0
vst1.u8 {q8}, [r12@128], r1 ; store oq1
+ vpop {d8-d15}
pop {pc}
ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
@@ -64,10 +67,12 @@
; sp+4 unsigned char *v
|vp8_loop_filter_horizontal_edge_uv_neon| PROC
push {lr}
+ vpush {d8-d15}
+
vdup.u8 q0, r2 ; duplicate blimit
vdup.u8 q1, r3 ; duplicate limit
- ldr r12, [sp, #4] ; load thresh
- ldr r2, [sp, #8] ; load v ptr
+ ldr r12, [sp, #68] ; load thresh
+ ldr r2, [sp, #72] ; load v ptr
vdup.u8 q2, r12 ; duplicate thresh
sub r3, r0, r1, lsl #2 ; move u pointer down by 4 lines
@@ -104,6 +109,7 @@
vst1.u8 {d16}, [r0@64] ; store u oq1
vst1.u8 {d17}, [r2@64] ; store v oq1
+ vpop {d8-d15}
pop {pc}
ENDP ; |vp8_loop_filter_horizontal_edge_uv_neon|
@@ -120,11 +126,13 @@
|vp8_loop_filter_vertical_edge_y_neon| PROC
push {lr}
+ vpush {d8-d15}
+
vdup.u8 q0, r2 ; duplicate blimit
vdup.u8 q1, r3 ; duplicate limit
sub r2, r0, #4 ; src ptr down by 4 columns
add r1, r1, r1
- ldr r3, [sp, #4] ; load thresh
+ ldr r3, [sp, #68] ; load thresh
add r12, r2, r1, asr #1
vld1.u8 {d6}, [r2], r1
@@ -194,6 +202,7 @@
vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0]
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r12]
+ vpop {d8-d15}
pop {pc}
ENDP ; |vp8_loop_filter_vertical_edge_y_neon|
@@ -210,9 +219,11 @@
; sp+4 unsigned char *v
|vp8_loop_filter_vertical_edge_uv_neon| PROC
push {lr}
+ vpush {d8-d15}
+
vdup.u8 q0, r2 ; duplicate blimit
sub r12, r0, #4 ; move u pointer down by 4 columns
- ldr r2, [sp, #8] ; load v ptr
+ ldr r2, [sp, #72] ; load v ptr
vdup.u8 q1, r3 ; duplicate limit
sub r3, r2, #4 ; move v pointer down by 4 columns
@@ -233,7 +244,7 @@
vld1.u8 {d20}, [r12]
vld1.u8 {d21}, [r3]
- ldr r12, [sp, #4] ; load thresh
+ ldr r12, [sp, #68] ; load thresh
;transpose to 8x16 matrix
vtrn.32 q3, q7
@@ -281,6 +292,7 @@
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0]
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2]
+ vpop {d8-d15}
pop {pc}
ENDP ; |vp8_loop_filter_vertical_edge_uv_neon|
diff --git a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
index adf848b9c..6eb06516d 100644
--- a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
@@ -9,7 +9,6 @@
;
- ;EXPORT |vp8_loop_filter_simple_horizontal_edge_neon|
EXPORT |vp8_loop_filter_bhs_neon|
EXPORT |vp8_loop_filter_mbhs_neon|
ARM
@@ -22,7 +21,7 @@
; q1 limit, PRESERVE
|vp8_loop_filter_simple_horizontal_edge_neon| PROC
-
+ vpush {d8-d15}
sub r3, r0, r1, lsl #1 ; move src pointer down by 2 lines
vld1.u8 {q7}, [r0@128], r1 ; q0
@@ -82,6 +81,7 @@
vst1.u8 {q6}, [r3@128] ; store op0
vst1.u8 {q7}, [r0@128] ; store oq0
+ vpop {d8-d15}
bx lr
ENDP ; |vp8_loop_filter_simple_horizontal_edge_neon|
diff --git a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
index e690df2f7..78d13c895 100644
--- a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
@@ -9,7 +9,6 @@
;
- ;EXPORT |vp8_loop_filter_simple_vertical_edge_neon|
EXPORT |vp8_loop_filter_bvs_neon|
EXPORT |vp8_loop_filter_mbvs_neon|
ARM
@@ -22,6 +21,8 @@
; q1 limit, PRESERVE
|vp8_loop_filter_simple_vertical_edge_neon| PROC
+ vpush {d8-d15}
+
sub r0, r0, #2 ; move src pointer down by 2 columns
add r12, r1, r1
add r3, r0, r1
@@ -120,6 +121,7 @@
vst2.8 {d14[6], d15[6]}, [r0], r12
vst2.8 {d14[7], d15[7]}, [r3]
+ vpop {d8-d15}
bx lr
ENDP ; |vp8_loop_filter_simple_vertical_edge_neon|
diff --git a/vp8/common/arm/neon/mbloopfilter_neon.asm b/vp8/common/arm/neon/mbloopfilter_neon.asm
index f41c156df..d200c3090 100644
--- a/vp8/common/arm/neon/mbloopfilter_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilter_neon.asm
@@ -28,8 +28,10 @@
; sp unsigned char thresh,
|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
push {lr}
+ vpush {d8-d15}
+
add r1, r1, r1 ; double stride
- ldr r12, [sp, #4] ; load thresh
+ ldr r12, [sp, #68] ; load thresh
sub r0, r0, r1, lsl #1 ; move src pointer down by 4 lines
vdup.u8 q2, r12 ; thresh
add r12, r0, r1, lsr #1 ; move src pointer up by 1 line
@@ -55,6 +57,7 @@
vst1.u8 {q8}, [r12@128] ; store oq1
vst1.u8 {q9}, [r0@128] ; store oq2
+ vpop {d8-d15}
pop {pc}
ENDP ; |vp8_mbloop_filter_horizontal_edge_y_neon|
@@ -72,10 +75,12 @@
|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
push {lr}
- ldr r12, [sp, #4] ; load thresh
+ vpush {d8-d15}
+
+ ldr r12, [sp, #68] ; load thresh
sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines
vdup.u8 q2, r12 ; thresh
- ldr r12, [sp, #8] ; load v ptr
+ ldr r12, [sp, #72] ; load v ptr
sub r12, r12, r1, lsl #2 ; move v pointer down by 4 lines
vld1.u8 {d6}, [r0@64], r1 ; p3
@@ -116,6 +121,7 @@
vst1.u8 {d18}, [r0@64], r1 ; store u oq2
vst1.u8 {d19}, [r12@64], r1 ; store v oq2
+ vpop {d8-d15}
pop {pc}
ENDP ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
@@ -130,7 +136,9 @@
; sp unsigned char thresh,
|vp8_mbloop_filter_vertical_edge_y_neon| PROC
push {lr}
- ldr r12, [sp, #4] ; load thresh
+ vpush {d8-d15}
+
+ ldr r12, [sp, #68] ; load thresh
sub r0, r0, #4 ; move src pointer down by 4 columns
vdup.s8 q2, r12 ; thresh
add r12, r0, r1, lsl #3 ; move src pointer down by 8 lines
@@ -208,6 +216,7 @@
vst1.8 {d20}, [r0]
vst1.8 {d21}, [r12]
+ vpop {d8-d15}
pop {pc}
ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon|
@@ -224,10 +233,12 @@
; sp+4 unsigned char *v
|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
push {lr}
- ldr r12, [sp, #4] ; load thresh
+ vpush {d8-d15}
+
+ ldr r12, [sp, #68] ; load thresh
sub r0, r0, #4 ; move u pointer down by 4 columns
vdup.u8 q2, r12 ; thresh
- ldr r12, [sp, #8] ; load v ptr
+ ldr r12, [sp, #72] ; load v ptr
sub r12, r12, #4 ; move v pointer down by 4 columns
vld1.u8 {d6}, [r0], r1 ;load u data
@@ -303,6 +314,7 @@
vst1.8 {d20}, [r0]
vst1.8 {d21}, [r12]
+ vpop {d8-d15}
pop {pc}
ENDP ; |vp8_mbloop_filter_vertical_edge_uv_neon|
diff --git a/vp8/common/arm/neon/sad16_neon.asm b/vp8/common/arm/neon/sad16_neon.asm
index d7c590e15..7197e5655 100644
--- a/vp8/common/arm/neon/sad16_neon.asm
+++ b/vp8/common/arm/neon/sad16_neon.asm
@@ -24,6 +24,7 @@
; r3 int ref_stride
|vp8_sad16x16_neon| PROC
;;
+ vpush {d8-d15}
vld1.8 {q0}, [r0], r1
vld1.8 {q4}, [r2], r3
@@ -132,6 +133,7 @@
vmov.32 r0, d0[0]
+ vpop {d8-d15}
bx lr
ENDP
@@ -143,6 +145,8 @@
; unsigned char *ref_ptr,
; int ref_stride)
|vp8_sad16x8_neon| PROC
+ vpush {d8-d15}
+
vld1.8 {q0}, [r0], r1
vld1.8 {q4}, [r2], r3
@@ -200,6 +204,7 @@
vmov.32 r0, d0[0]
+ vpop {d8-d15}
bx lr
ENDP
diff --git a/vp8/common/arm/neon/sad8_neon.asm b/vp8/common/arm/neon/sad8_neon.asm
index 23ba6df93..6b849d933 100644
--- a/vp8/common/arm/neon/sad8_neon.asm
+++ b/vp8/common/arm/neon/sad8_neon.asm
@@ -25,6 +25,7 @@
; int ref_stride)
|vp8_sad8x8_neon| PROC
+ vpush {d8-d15}
vld1.8 {d0}, [r0], r1
vld1.8 {d8}, [r2], r3
@@ -70,6 +71,7 @@
vmov.32 r0, d0[0]
+ vpop {d8-d15}
bx lr
ENDP
@@ -82,6 +84,7 @@
; int ref_stride)
|vp8_sad8x16_neon| PROC
+ vpush {d8-d15}
vld1.8 {d0}, [r0], r1
vld1.8 {d8}, [r2], r3
@@ -167,6 +170,7 @@
vmov.32 r0, d0[0]
+ vpop {d8-d15}
bx lr
ENDP
@@ -179,6 +183,7 @@
; int ref_stride)
|vp8_sad4x4_neon| PROC
+ vpush {d8-d15}
vld1.8 {d0}, [r0], r1
vld1.8 {d8}, [r2], r3
@@ -202,6 +207,7 @@
vpaddl.u32 d0, d1
vmov.32 r0, d0[0]
+ vpop {d8-d15}
bx lr
ENDP
diff --git a/vp8/common/arm/neon/save_reg_neon.asm b/vp8/common/arm/neon/save_reg_neon.asm
deleted file mode 100644
index fd7002e7a..000000000
--- a/vp8/common/arm/neon/save_reg_neon.asm
+++ /dev/null
@@ -1,36 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_push_neon|
- EXPORT |vp8_pop_neon|
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-|vp8_push_neon| PROC
- vst1.i64 {d8, d9, d10, d11}, [r0]!
- vst1.i64 {d12, d13, d14, d15}, [r0]!
- bx lr
-
- ENDP
-
-|vp8_pop_neon| PROC
- vld1.i64 {d8, d9, d10, d11}, [r0]!
- vld1.i64 {d12, d13, d14, d15}, [r0]!
- bx lr
-
- ENDP
-
- END
-
diff --git a/vp8/common/arm/neon/shortidct4x4llm_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_neon.asm
index 67d2ab015..87ca887be 100644
--- a/vp8/common/arm/neon/shortidct4x4llm_neon.asm
+++ b/vp8/common/arm/neon/shortidct4x4llm_neon.asm
@@ -37,12 +37,14 @@
; result of the multiplication that is needed in IDCT.
|vp8_short_idct4x4llm_neon| PROC
+ vpush {d8-d15}
+
adr r12, idct_coeff
vld1.16 {q1, q2}, [r0]
vld1.16 {d0}, [r12]
vswp d3, d4 ;q2(vp[4] vp[12])
- ldr r0, [sp] ; stride
+ ldr r0, [sp, #64] ; stride
vqdmulh.s16 q3, q2, d0[2]
vqdmulh.s16 q4, q2, d0[0]
@@ -125,6 +127,7 @@
vst1.32 d2[0], [r3], r0
vst1.32 d2[1], [r3], r0
+ vpop {d8-d15}
bx lr
ENDP
diff --git a/vp8/common/arm/neon/sixtappredict16x16_neon.asm b/vp8/common/arm/neon/sixtappredict16x16_neon.asm
index 9fdafd360..dd27719bf 100644
--- a/vp8/common/arm/neon/sixtappredict16x16_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict16x16_neon.asm
@@ -43,10 +43,11 @@ filter16_coeff
|vp8_sixtap_predict16x16_neon| PROC
push {r4-r5, lr}
+ vpush {d8-d15}
adr r12, filter16_coeff
- ldr r4, [sp, #12] ;load parameters from stack
- ldr r5, [sp, #16] ;load parameters from stack
+ ldr r4, [sp, #76] ;load parameters from stack
+ ldr r5, [sp, #80] ;load parameters from stack
cmp r2, #0 ;skip first_pass filter if xoffset=0
beq secondpass_filter16x16_only
@@ -291,6 +292,8 @@ secondpass_inner_loop_neon
bne filt_blk2d_sp16x16_outloop_neon
add sp, sp, #336
+
+ vpop {d8-d15}
pop {r4-r5,pc}
;--------------------
@@ -384,6 +387,7 @@ filt_blk2d_fpo16x16_loop_neon
bne filt_blk2d_fpo16x16_loop_neon
+ vpop {d8-d15}
pop {r4-r5,pc}
;--------------------
@@ -482,6 +486,7 @@ secondpass_only_inner_loop_neon
bne filt_blk2d_spo16x16_outloop_neon
+ vpop {d8-d15}
pop {r4-r5,pc}
ENDP
diff --git a/vp8/common/arm/neon/sixtappredict4x4_neon.asm b/vp8/common/arm/neon/sixtappredict4x4_neon.asm
index a4222bc62..e32e71305 100644
--- a/vp8/common/arm/neon/sixtappredict4x4_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict4x4_neon.asm
@@ -35,10 +35,11 @@ filter4_coeff
|vp8_sixtap_predict4x4_neon| PROC
push {r4, lr}
+ vpush {d8-d15}
adr r12, filter4_coeff
- ldr r4, [sp, #8] ;load parameters from stack
- ldr lr, [sp, #12] ;load parameters from stack
+ ldr r4, [sp, #72] ;load parameters from stack
+ ldr lr, [sp, #76] ;load parameters from stack
cmp r2, #0 ;skip first_pass filter if xoffset=0
beq secondpass_filter4x4_only
@@ -261,6 +262,7 @@ filter4_coeff
vst1.32 {d4[0]}, [r1]
vst1.32 {d4[1]}, [r2]
+ vpop {d8-d15}
pop {r4, pc}
@@ -348,6 +350,7 @@ firstpass_filter4x4_only
vst1.32 {d28[0]}, [r1]
vst1.32 {d28[1]}, [r2]
+ vpop {d8-d15}
pop {r4, pc}
@@ -413,6 +416,7 @@ secondpass_filter4x4_only
vst1.32 {d4[0]}, [r1]
vst1.32 {d4[1]}, [r2]
+ vpop {d8-d15}
pop {r4, pc}
ENDP
diff --git a/vp8/common/arm/neon/sixtappredict8x4_neon.asm b/vp8/common/arm/neon/sixtappredict8x4_neon.asm
index a57ec015f..d19bf8920 100644
--- a/vp8/common/arm/neon/sixtappredict8x4_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict8x4_neon.asm
@@ -35,10 +35,11 @@ filter8_coeff
|vp8_sixtap_predict8x4_neon| PROC
push {r4-r5, lr}
+ vpush {d8-d15}
adr r12, filter8_coeff
- ldr r4, [sp, #12] ;load parameters from stack
- ldr r5, [sp, #16] ;load parameters from stack
+ ldr r4, [sp, #76] ;load parameters from stack
+ ldr r5, [sp, #80] ;load parameters from stack
cmp r2, #0 ;skip first_pass filter if xoffset=0
beq secondpass_filter8x4_only
@@ -297,6 +298,8 @@ filter8_coeff
vst1.u8 {d9}, [r4], r5
add sp, sp, #32
+
+ vpop {d8-d15}
pop {r4-r5,pc}
;--------------------
@@ -392,6 +395,7 @@ firstpass_filter8x4_only
vst1.u8 {d24}, [r4], r5
vst1.u8 {d25}, [r4], r5
+ vpop {d8-d15}
pop {r4-r5,pc}
;---------------------
@@ -464,6 +468,7 @@ secondpass_filter8x4_only
vst1.u8 {d8}, [r4], r5
vst1.u8 {d9}, [r4], r5
+ vpop {d8-d15}
pop {r4-r5,pc}
ENDP
diff --git a/vp8/common/arm/neon/sixtappredict8x8_neon.asm b/vp8/common/arm/neon/sixtappredict8x8_neon.asm
index 00ed5aeef..4b049252c 100644
--- a/vp8/common/arm/neon/sixtappredict8x8_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict8x8_neon.asm
@@ -35,11 +35,11 @@ filter8_coeff
|vp8_sixtap_predict8x8_neon| PROC
push {r4-r5, lr}
-
+ vpush {d8-d15}
adr r12, filter8_coeff
- ldr r4, [sp, #12] ;load parameters from stack
- ldr r5, [sp, #16] ;load parameters from stack
+ ldr r4, [sp, #76] ;load parameters from stack
+ ldr r5, [sp, #80] ;load parameters from stack
cmp r2, #0 ;skip first_pass filter if xoffset=0
beq secondpass_filter8x8_only
@@ -324,6 +324,8 @@ filt_blk2d_sp8x8_loop_neon
bne filt_blk2d_sp8x8_loop_neon
add sp, sp, #64
+
+ vpop {d8-d15}
pop {r4-r5,pc}
;---------------------
@@ -428,6 +430,7 @@ filt_blk2d_fpo8x8_loop_neon
bne filt_blk2d_fpo8x8_loop_neon
+ vpop {d8-d15}
pop {r4-r5,pc}
;---------------------
@@ -515,6 +518,7 @@ filt_blk2d_spo8x8_loop_neon
bne filt_blk2d_spo8x8_loop_neon
+ vpop {d8-d15}
pop {r4-r5,pc}
ENDP
diff --git a/vp8/common/arm/neon/variance_neon.asm b/vp8/common/arm/neon/variance_neon.asm
index e3b48327d..8ecad72b9 100644
--- a/vp8/common/arm/neon/variance_neon.asm
+++ b/vp8/common/arm/neon/variance_neon.asm
@@ -26,6 +26,7 @@
; r3 int recon_stride
; stack unsigned int *sse
|vp8_variance16x16_neon| PROC
+ vpush {q5}
vmov.i8 q8, #0 ;q8 - sum
vmov.i8 q9, #0 ;q9, q10 - sse
vmov.i8 q10, #0
@@ -67,7 +68,7 @@ variance16x16_neon_loop
vadd.u32 q10, q9, q10 ;accumulate sse
vpaddl.s32 q0, q8 ;accumulate sum
- ldr r12, [sp] ;load *sse from stack
+ ldr r12, [sp, #16] ;load *sse from stack
vpaddl.u32 q1, q10
vadd.s64 d0, d0, d1
@@ -87,6 +88,8 @@ variance16x16_neon_loop
vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
+
+ vpop {q5}
bx lr
ENDP
@@ -99,6 +102,8 @@ variance16x16_neon_loop
; int recon_stride,
; unsigned int *sse)
|vp8_variance16x8_neon| PROC
+ vpush {q5}
+
vmov.i8 q8, #0 ;q8 - sum
vmov.i8 q9, #0 ;q9, q10 - sse
vmov.i8 q10, #0
@@ -137,7 +142,7 @@ variance16x8_neon_loop
vadd.u32 q10, q9, q10 ;accumulate sse
vpaddl.s32 q0, q8 ;accumulate sum
- ldr r12, [sp] ;load *sse from stack
+ ldr r12, [sp, #16] ;load *sse from stack
vpaddl.u32 q1, q10
vadd.s64 d0, d0, d1
@@ -149,6 +154,8 @@ variance16x8_neon_loop
vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
+
+ vpop {q5}
bx lr
ENDP
@@ -162,6 +169,8 @@ variance16x8_neon_loop
; unsigned int *sse)
|vp8_variance8x16_neon| PROC
+ vpush {q5}
+
vmov.i8 q8, #0 ;q8 - sum
vmov.i8 q9, #0 ;q9, q10 - sse
vmov.i8 q10, #0
@@ -192,7 +201,7 @@ variance8x16_neon_loop
vadd.u32 q10, q9, q10 ;accumulate sse
vpaddl.s32 q0, q8 ;accumulate sum
- ldr r12, [sp] ;load *sse from stack
+ ldr r12, [sp, #16] ;load *sse from stack
vpaddl.u32 q1, q10
vadd.s64 d0, d0, d1
@@ -204,6 +213,8 @@ variance8x16_neon_loop
vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
+
+ vpop {q5}
bx lr
ENDP
@@ -215,6 +226,8 @@ variance8x16_neon_loop
; r3 int recon_stride
; stack unsigned int *sse
|vp8_variance8x8_neon| PROC
+ vpush {q5}
+
vmov.i8 q8, #0 ;q8 - sum
vmov.i8 q9, #0 ;q9, q10 - sse
vmov.i8 q10, #0
@@ -257,7 +270,7 @@ variance8x8_neon_loop
vadd.u32 q10, q9, q10 ;accumulate sse
vpaddl.s32 q0, q8 ;accumulate sum
- ldr r12, [sp] ;load *sse from stack
+ ldr r12, [sp, #16] ;load *sse from stack
vpaddl.u32 q1, q10
vadd.s64 d0, d0, d1
@@ -269,6 +282,8 @@ variance8x8_neon_loop
vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
+
+ vpop {q5}
bx lr
ENDP
diff --git a/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm b/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
index 9d22c5252..adc5b7e3a 100644
--- a/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
+++ b/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
@@ -31,11 +31,12 @@ bilinear_taps_coeff
|vp8_sub_pixel_variance16x16_neon_func| PROC
push {r4-r6, lr}
+ vpush {d8-d15}
adr r12, bilinear_taps_coeff
- ldr r4, [sp, #16] ;load *dst_ptr from stack
- ldr r5, [sp, #20] ;load dst_pixels_per_line from stack
- ldr r6, [sp, #24] ;load *sse from stack
+ ldr r4, [sp, #80] ;load *dst_ptr from stack
+ ldr r5, [sp, #84] ;load dst_pixels_per_line from stack
+ ldr r6, [sp, #88] ;load *sse from stack
cmp r2, #0 ;skip first_pass filter if xoffset=0
beq secondpass_bfilter16x16_only
@@ -416,6 +417,7 @@ sub_pixel_variance16x16_neon_loop
add sp, sp, #528
vmov.32 r0, d0[0] ;return
+ vpop {d8-d15}
pop {r4-r6,pc}
ENDP
diff --git a/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm
index 155be4fc5..b0829af75 100644
--- a/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+++ b/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm
@@ -31,9 +31,10 @@
;================================================
|vp8_variance_halfpixvar16x16_h_neon| PROC
push {lr}
+ vpush {d8-d15}
mov r12, #4 ;loop counter
- ldr lr, [sp, #4] ;load *sse from stack
+ ldr lr, [sp, #68] ;load *sse from stack
vmov.i8 q8, #0 ;q8 - sum
vmov.i8 q9, #0 ;q9, q10 - sse
vmov.i8 q10, #0
@@ -116,6 +117,8 @@ vp8_filt_fpo16x16s_4_0_loop_neon
vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
+
+ vpop {d8-d15}
pop {pc}
ENDP
@@ -131,11 +134,12 @@ vp8_filt_fpo16x16s_4_0_loop_neon
;================================================
|vp8_variance_halfpixvar16x16_v_neon| PROC
push {lr}
+ vpush {d8-d15}
mov r12, #4 ;loop counter
vld1.u8 {q0}, [r0], r1 ;load src data
- ldr lr, [sp, #4] ;load *sse from stack
+ ldr lr, [sp, #68] ;load *sse from stack
vmov.i8 q8, #0 ;q8 - sum
vmov.i8 q9, #0 ;q9, q10 - sse
@@ -212,6 +216,8 @@ vp8_filt_spo16x16s_0_4_loop_neon
vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
+
+ vpop {d8-d15}
pop {pc}
ENDP
@@ -227,10 +233,11 @@ vp8_filt_spo16x16s_0_4_loop_neon
;================================================
|vp8_variance_halfpixvar16x16_hv_neon| PROC
push {lr}
+ vpush {d8-d15}
vld1.u8 {d0, d1, d2, d3}, [r0], r1 ;load src data
- ldr lr, [sp, #4] ;load *sse from stack
+ ldr lr, [sp, #68] ;load *sse from stack
vmov.i8 q13, #0 ;q8 - sum
vext.8 q1, q0, q1, #1 ;construct src_ptr[1]
@@ -331,6 +338,8 @@ vp8_filt16x16s_4_4_loop_neon
vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
+
+ vpop {d8-d15}
pop {pc}
ENDP
@@ -349,10 +358,11 @@ vp8_filt16x16s_4_4_loop_neon
|vp8_sub_pixel_variance16x16s_neon| PROC
push {r4, lr}
+ vpush {d8-d15}
- ldr r4, [sp, #8] ;load *dst_ptr from stack
- ldr r12, [sp, #12] ;load dst_pixels_per_line from stack
- ldr lr, [sp, #16] ;load *sse from stack
+ ldr r4, [sp, #72] ;load *dst_ptr from stack
+ ldr r12, [sp, #76] ;load dst_pixels_per_line from stack
+ ldr lr, [sp, #80] ;load *sse from stack
cmp r2, #0 ;skip first_pass filter if xoffset=0
beq secondpass_bfilter16x16s_only
@@ -566,6 +576,7 @@ sub_pixel_variance16x16s_neon_loop
add sp, sp, #256
vmov.32 r0, d0[0] ;return
+ vpop {d8-d15}
pop {r4, pc}
ENDP
diff --git a/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm b/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
index f6b684753..9d9f9e077 100644
--- a/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
+++ b/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
@@ -26,11 +26,12 @@
|vp8_sub_pixel_variance8x8_neon| PROC
push {r4-r5, lr}
+ vpush {d8-d15}
adr r12, bilinear_taps_coeff
- ldr r4, [sp, #12] ;load *dst_ptr from stack
- ldr r5, [sp, #16] ;load dst_pixels_per_line from stack
- ldr lr, [sp, #20] ;load *sse from stack
+ ldr r4, [sp, #76] ;load *dst_ptr from stack
+ ldr r5, [sp, #80] ;load dst_pixels_per_line from stack
+ ldr lr, [sp, #84] ;load *sse from stack
cmp r2, #0 ;skip first_pass filter if xoffset=0
beq skip_firstpass_filter
@@ -210,6 +211,8 @@ sub_pixel_variance8x8_neon_loop
vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
+
+ vpop {d8-d15}
pop {r4-r5, pc}
ENDP
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 2d9e343bc..29fea616b 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -178,12 +178,6 @@ vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_
return pbi->common.error.error_code;
}
-/*For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.*/
-#if HAVE_NEON
-extern void vp8_push_neon(int64_t *store);
-extern void vp8_pop_neon(int64_t *store);
-#endif
-
static int get_free_fb (VP8_COMMON *cm)
{
int i;
@@ -307,9 +301,6 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size,
const uint8_t *source,
int64_t time_stamp)
{
-#if HAVE_NEON
- int64_t dx_store_reg[8];
-#endif
VP8_COMMON *cm = &pbi->common;
int retcode = -1;
@@ -319,15 +310,6 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size,
if(retcode <= 0)
return retcode;
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_push_neon(dx_store_reg);
- }
-#endif
-
cm->new_fb_idx = get_free_fb (cm);
/* setup reference frames for vp8_decode_frame */
@@ -403,15 +385,6 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size,
pbi->last_time_stamp = time_stamp;
decode_exit:
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_pop_neon(dx_store_reg);
- }
-#endif
-
pbi->common.error.setjmp = 0;
return retcode;
}
diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm
index 5bda78678..840cb33d9 100644
--- a/vp8/encoder/arm/neon/subtract_neon.asm
+++ b/vp8/encoder/arm/neon/subtract_neon.asm
@@ -65,8 +65,10 @@
; unsigned char *pred, int pred_stride)
|vp8_subtract_mby_neon| PROC
push {r4-r7}
+ vpush {d8-d15}
+
mov r12, #4
- ldr r4, [sp, #16] ; pred_stride
+ ldr r4, [sp, #80] ; pred_stride
mov r6, #32 ; "diff" stride x2
add r5, r0, #16 ; second diff pointer
@@ -101,6 +103,7 @@ subtract_mby_loop
subs r12, r12, #1
bne subtract_mby_loop
+ vpop {d8-d15}
pop {r4-r7}
bx lr
ENDP
@@ -112,9 +115,11 @@ subtract_mby_loop
|vp8_subtract_mbuv_neon| PROC
push {r4-r7}
- ldr r4, [sp, #16] ; upred
- ldr r5, [sp, #20] ; vpred
- ldr r6, [sp, #24] ; pred_stride
+ vpush {d8-d15}
+
+ ldr r4, [sp, #80] ; upred
+ ldr r5, [sp, #84] ; vpred
+ ldr r6, [sp, #88] ; pred_stride
add r0, r0, #512 ; short *udiff = diff + 256;
mov r12, #32 ; "diff" stride x2
add r7, r0, #16 ; second diff pointer
@@ -191,6 +196,7 @@ subtract_mby_loop
vst1.16 {q14}, [r0], r12
vst1.16 {q15}, [r7], r12
+ vpop {d8-d15}
pop {r4-r7}
bx lr
diff --git a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
index 5b9f11e59..d219e2d14 100644
--- a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
@@ -21,6 +21,7 @@
;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr,
; int sz);
|vp8_memcpy_partial_neon| PROC
+ vpush {d8-d15}
;pld [r1] ;preload pred data
;pld [r1, #128]
;pld [r1, #256]
@@ -64,6 +65,7 @@ extra_copy_neon_loop
bne extra_copy_neon_loop
done_copy_neon_loop
+ vpop {d8-d15}
bx lr
ENDP
diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
index 55edbf512..f82af3ee3 100644
--- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
@@ -27,6 +27,8 @@
;from vp8_variance().
|vp8_mse16x16_neon| PROC
+ vpush {q7}
+
vmov.i8 q7, #0 ;q7, q8, q9, q10 - sse
vmov.i8 q8, #0
vmov.i8 q9, #0
@@ -62,7 +64,7 @@ mse16x16_neon_loop
vadd.u32 q7, q7, q8
vadd.u32 q9, q9, q10
- ldr r12, [sp] ;load *sse from stack
+ ldr r12, [sp, #16] ;load *sse from stack
vadd.u32 q10, q7, q9
vpaddl.u32 q1, q10
@@ -71,6 +73,7 @@ mse16x16_neon_loop
vst1.32 {d0[0]}, [r12]
vmov.32 r0, d0[0]
+ vpop {q7}
bx lr
ENDP
@@ -82,6 +85,8 @@ mse16x16_neon_loop
; r2 unsigned char *ref_ptr,
; r3 int recon_stride
|vp8_get4x4sse_cs_neon| PROC
+ vpush {q7}
+
vld1.8 {d0}, [r0], r1 ;Load up source and reference
vld1.8 {d4}, [r2], r3
vld1.8 {d1}, [r0], r1
@@ -109,6 +114,8 @@ mse16x16_neon_loop
vadd.u64 d0, d2, d3
vmov.32 r0, d0[0]
+
+ vpop {q7}
bx lr
ENDP
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 32c599791..560134ee5 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -4820,33 +4820,11 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
}
#endif
-/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
-#if HAVE_NEON
-extern void vp8_push_neon(int64_t *store);
-extern void vp8_pop_neon(int64_t *store);
-#endif
-
-
int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time)
{
-#if HAVE_NEON
- int64_t store_reg[8];
-#if CONFIG_RUNTIME_CPU_DETECT
- VP8_COMMON *cm = &cpi->common;
-#endif
-#endif
struct vpx_usec_timer timer;
int res = 0;
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_push_neon(store_reg);
- }
-#endif
-
vpx_usec_timer_start(&timer);
/* Reinit the lookahead buffer if the frame size changes */
@@ -4863,15 +4841,6 @@ int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_C
vpx_usec_timer_mark(&timer);
cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_pop_neon(store_reg);
- }
-#endif
-
return res;
}
@@ -4892,9 +4861,6 @@ static int frame_is_reference(const VP8_COMP *cpi)
int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush)
{
-#if HAVE_NEON
- int64_t store_reg[8];
-#endif
VP8_COMMON *cm;
struct vpx_usec_timer tsctimer;
struct vpx_usec_timer ticktimer;
@@ -4914,15 +4880,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
cpi->common.error.setjmp = 1;
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_push_neon(store_reg);
- }
-#endif
-
vpx_usec_timer_start(&cmptimer);
cpi->source = NULL;
@@ -5005,14 +4962,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
#endif
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_pop_neon(store_reg);
- }
-#endif
return -1;
}
@@ -5416,15 +5365,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
#endif
#endif
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->cpu_caps & HAS_NEON)
-#endif
- {
- vp8_pop_neon(store_reg);
- }
-#endif
-
cpi->common.error.setjmp = 0;
return 0;
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index dfb54a52c..3568b34e1 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -172,7 +172,6 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict8x4_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict8x8_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict16x16_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/save_reg_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_full_2x_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_0_2x_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_blk_neon.c
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 8a8155410..99fd6ca10 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -12,7 +12,7 @@ struct macroblockd;
/* Encoder forward decls */
struct macroblock;
struct vp9_variance_vtable;
-
+struct search_site_config;
struct mv;
union int_mv;
struct yv12_buffer_config;
@@ -563,33 +563,6 @@ specialize qw/vp9_sad4x8_avg/, "$sse_x86inc";
add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
specialize qw/vp9_sad4x4_avg/, "$sse_x86inc";
-add_proto qw/unsigned int vp9_variance_halfpixvar16x16_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar16x16_h/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance_halfpixvar16x16_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar16x16_v/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance_halfpixvar16x16_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar16x16_hv/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance_halfpixvar64x64_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar64x64_h/;
-
-add_proto qw/unsigned int vp9_variance_halfpixvar64x64_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar64x64_v/;
-
-add_proto qw/unsigned int vp9_variance_halfpixvar64x64_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar64x64_hv/;
-
-add_proto qw/unsigned int vp9_variance_halfpixvar32x32_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar32x32_h/;
-
-add_proto qw/unsigned int vp9_variance_halfpixvar32x32_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar32x32_v/;
-
-add_proto qw/unsigned int vp9_variance_halfpixvar32x32_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar32x32_hv/;
-
add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad64x64x3/;
@@ -678,9 +651,6 @@ specialize qw/vp9_sad4x8x4d sse/;
add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad4x4x4d sse/;
-#add_proto qw/unsigned int vp9_sub_pixel_mse16x16/, "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse";
-#specialize qw/vp9_sub_pixel_mse16x16 sse2 mmx/;
-
add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_mse16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
@@ -693,12 +663,6 @@ specialize qw/vp9_mse16x8/;
add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_mse8x8/;
-add_proto qw/unsigned int vp9_sub_pixel_mse64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_mse64x64/;
-
-add_proto qw/unsigned int vp9_sub_pixel_mse32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_mse32x32/;
-
add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
specialize qw/vp9_get_mb_ss mmx sse2/;
# ENCODEMB INVOKE
@@ -766,11 +730,11 @@ add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct m
specialize qw/vp9_refining_search_sad sse3/;
$vp9_refining_search_sad_sse3=vp9_refining_search_sadx4;
-add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
+add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
specialize qw/vp9_diamond_search_sad sse3/;
$vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4;
-add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
+add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
specialize qw/vp9_full_range_search/;
add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
diff --git a/vp9/common/vp9_tapify.py b/vp9/common/vp9_tapify.py
deleted file mode 100644
index 99529cff0..000000000
--- a/vp9/common/vp9_tapify.py
+++ /dev/null
@@ -1,106 +0,0 @@
-"""
- * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
-"""
-#!/usr/bin/env python
-import sys,string,os,re,math,numpy
-scale = 2**16
-def dist(p1,p2):
- x1,y1 = p1
- x2,y2 = p2
- if x1==x2 and y1==y2 :
- return 1.0
- return 1/ math.sqrt((x1-x2)*(x1-x2)+(y1-y2)*(y1-y2))
-
-def gettaps(p):
- def l(b):
- return int(math.floor(b))
- def h(b):
- return int(math.ceil(b))
- def t(b,p,s):
- return int((scale*dist(b,p)+s/2)/s)
- r,c = p
- ul=[l(r),l(c)]
- ur=[l(r),h(c)]
- ll=[h(r),l(c)]
- lr=[h(r),h(c)]
- sum = dist(ul,p)+dist(ur,p)+dist(ll,p)+dist(lr,p)
- t4 = scale - t(ul,p,sum) - t(ur,p,sum) - t(ll,p,sum);
- return [[ul,t(ul,p,sum)],[ur,t(ur,p,sum)],
- [ll,t(ll,p,sum)],[lr,t4]]
-
-def print_mb_taps(angle,blocksize):
- theta = angle / 57.2957795;
- affine = [[math.cos(theta),-math.sin(theta)],
- [math.sin(theta),math.cos(theta)]]
- radius = (float(blocksize)-1)/2
- print " // angle of",angle,"degrees"
- for y in range(blocksize) :
- for x in range(blocksize) :
- r,c = numpy.dot(affine,[y-radius, x-radius])
- tps = gettaps([r+radius,c+radius])
- for t in tps :
- p,t = t
- tr,tc = p
- print " %2d, %2d, %5d, " % (tr,tc,t,),
- print " // %2d,%2d " % (y,x)
-
-i=float(sys.argv[1])
-while i <= float(sys.argv[2]) :
- print_mb_taps(i,float(sys.argv[4]))
- i=i+float(sys.argv[3])
-"""
-
-taps = []
-pt=dict()
-ptr=dict()
-for y in range(16) :
- for x in range(16) :
- r,c = numpy.dot(affine,[y-7.5, x-7.5])
- tps = gettaps([r+7.5,c+7.5])
- j=0
- for tp in tps :
- p,i = tp
- r,c = p
- pt[y,x,j]= [p,i]
- try:
- ptr[r,j,c].append([y,x])
- except:
- ptr[r,j,c]=[[y,x]]
- j = j+1
-
-for key in sorted(pt.keys()) :
- print key,pt[key]
-
-lr = -99
-lj = -99
-lc = 0
-
-shuf=""
-mask=""
-for r,j,c in sorted(ptr.keys()) :
- for y,x in ptr[r,j,c] :
- if lr != r or lj != j :
- print "shuf_"+str(lr)+"_"+str(lj)+"_"+shuf.ljust(16,"0"), lc
- shuf=""
- lc = 0
- for i in range(lc,c-1) :
- shuf = shuf +"0"
- shuf = shuf + hex(x)[2]
- lc =c
- break
- lr = r
- lj = j
-# print r,j,c,ptr[r,j,c]
-# print
-
-for r,j,c in sorted(ptr.keys()) :
- for y,x in ptr[r,j,c] :
- print r,j,c,y,x
- break
-"""
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index faf710c24..385b2ebe5 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -32,74 +32,6 @@
#include "vp9/decoder/vp9_detokenize.h"
#include "vp9/decoder/vp9_dthread.h"
-#define WRITE_RECON_BUFFER 0
-#if WRITE_RECON_BUFFER == 1
-static void recon_write_yuv_frame(const char *name,
- const YV12_BUFFER_CONFIG *s,
- int w, int _h) {
- FILE *yuv_file = fopen(name, "ab");
- const uint8_t *src = s->y_buffer;
- int h = _h;
-
- do {
- fwrite(src, w, 1, yuv_file);
- src += s->y_stride;
- } while (--h);
-
- src = s->u_buffer;
- h = (_h + 1) >> 1;
- w = (w + 1) >> 1;
-
- do {
- fwrite(src, w, 1, yuv_file);
- src += s->uv_stride;
- } while (--h);
-
- src = s->v_buffer;
- h = (_h + 1) >> 1;
-
- do {
- fwrite(src, w, 1, yuv_file);
- src += s->uv_stride;
- } while (--h);
-
- fclose(yuv_file);
-}
-#endif
-#if WRITE_RECON_BUFFER == 2
-void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) {
- // write the frame
- FILE *yframe;
- int i;
- char filename[255];
-
- snprintf(filename, sizeof(filename)-1, "dx\\y%04d.raw", this_frame);
- yframe = fopen(filename, "wb");
-
- for (i = 0; i < frame->y_height; i++)
- fwrite(frame->y_buffer + i * frame->y_stride,
- frame->y_width, 1, yframe);
-
- fclose(yframe);
- snprintf(filename, sizeof(filename)-1, "dx\\u%04d.raw", this_frame);
- yframe = fopen(filename, "wb");
-
- for (i = 0; i < frame->uv_height; i++)
- fwrite(frame->u_buffer + i * frame->uv_stride,
- frame->uv_width, 1, yframe);
-
- fclose(yframe);
- snprintf(filename, sizeof(filename)-1, "dx\\v%04d.raw", this_frame);
- yframe = fopen(filename, "wb");
-
- for (i = 0; i < frame->uv_height; i++)
- fwrite(frame->v_buffer + i * frame->uv_stride,
- frame->uv_width, 1, yframe);
-
- fclose(yframe);
-}
-#endif
-
void vp9_initialize_dec() {
static int init_done = 0;
@@ -348,15 +280,6 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
swap_frame_buffers(pbi);
-#if WRITE_RECON_BUFFER == 2
- if (cm->show_frame)
- write_dx_frame_to_file(cm->frame_to_show,
- cm->current_video_frame);
- else
- write_dx_frame_to_file(cm->frame_to_show,
- cm->current_video_frame + 1000);
-#endif
-
if (!pbi->do_loopfilter_inline) {
// If multiple threads are used to decode tiles, then we use those threads
// to do parallel loopfiltering.
@@ -367,21 +290,6 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
}
}
-#if WRITE_RECON_BUFFER == 2
- if (cm->show_frame)
- write_dx_frame_to_file(cm->frame_to_show,
- cm->current_video_frame + 2000);
- else
- write_dx_frame_to_file(cm->frame_to_show,
- cm->current_video_frame + 3000);
-#endif
-
-#if WRITE_RECON_BUFFER == 1
- if (cm->show_frame)
- recon_write_yuv_frame("recon.yuv", cm->frame_to_show,
- cm->width, cm->height);
-#endif
-
vp9_clear_system_state();
cm->last_width = cm->width;
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index fcf2a0420..f35a85fba 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -20,12 +20,6 @@
extern "C" {
#endif
-// motion search site
-typedef struct {
- MV mv;
- int offset;
-} search_site;
-
// Structure to hold snapshot of coding context during the mode picking process
typedef struct {
MODE_INFO mic;
@@ -108,10 +102,6 @@ struct macroblock {
int skip_optimize;
int q_index;
- search_site *ss;
- int ss_count;
- int searches_per_step;
-
int errorperbit;
int sadperbit16;
int sadperbit4;
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 6816f555e..395d26aef 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -501,9 +501,9 @@ static void update_frame_size(VP9_COMP *cpi) {
int y_stride = cpi->scaled_source.y_stride;
if (cpi->sf.search_method == NSTEP) {
- vp9_init3smotion_compensation(&cpi->mb, y_stride);
+ vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
} else if (cpi->sf.search_method == DIAMOND) {
- vp9_init_dsmotion_compensation(&cpi->mb, y_stride);
+ vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
}
}
@@ -782,9 +782,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
cm->error.setjmp = 1;
- CHECK_MEM_ERROR(cm, cpi->mb.ss, vpx_calloc(sizeof(search_site),
- (MAX_MVSEARCH_STEPS * 8) + 1));
-
vp9_rtcd();
cpi->use_svc = 0;
@@ -973,95 +970,73 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
cpi->rd.thresh_freq_fact[i][j] = 32;
}
-#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SVFHH, SVFHV, SVFHHV, \
- SDX3F, SDX8F, SDX4DF)\
+#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF)\
cpi->fn_ptr[BT].sdf = SDF; \
cpi->fn_ptr[BT].sdaf = SDAF; \
cpi->fn_ptr[BT].vf = VF; \
cpi->fn_ptr[BT].svf = SVF; \
cpi->fn_ptr[BT].svaf = SVAF; \
- cpi->fn_ptr[BT].svf_halfpix_h = SVFHH; \
- cpi->fn_ptr[BT].svf_halfpix_v = SVFHV; \
- cpi->fn_ptr[BT].svf_halfpix_hv = SVFHHV; \
cpi->fn_ptr[BT].sdx3f = SDX3F; \
cpi->fn_ptr[BT].sdx8f = SDX8F; \
cpi->fn_ptr[BT].sdx4df = SDX4DF;
BFP(BLOCK_32X16, vp9_sad32x16, vp9_sad32x16_avg,
vp9_variance32x16, vp9_sub_pixel_variance32x16,
- vp9_sub_pixel_avg_variance32x16, NULL, NULL,
- NULL, NULL, NULL,
- vp9_sad32x16x4d)
+ vp9_sub_pixel_avg_variance32x16, NULL, NULL, vp9_sad32x16x4d)
BFP(BLOCK_16X32, vp9_sad16x32, vp9_sad16x32_avg,
vp9_variance16x32, vp9_sub_pixel_variance16x32,
- vp9_sub_pixel_avg_variance16x32, NULL, NULL,
- NULL, NULL, NULL,
- vp9_sad16x32x4d)
+ vp9_sub_pixel_avg_variance16x32, NULL, NULL, vp9_sad16x32x4d)
BFP(BLOCK_64X32, vp9_sad64x32, vp9_sad64x32_avg,
vp9_variance64x32, vp9_sub_pixel_variance64x32,
- vp9_sub_pixel_avg_variance64x32, NULL, NULL,
- NULL, NULL, NULL,
- vp9_sad64x32x4d)
+ vp9_sub_pixel_avg_variance64x32, NULL, NULL, vp9_sad64x32x4d)
BFP(BLOCK_32X64, vp9_sad32x64, vp9_sad32x64_avg,
vp9_variance32x64, vp9_sub_pixel_variance32x64,
- vp9_sub_pixel_avg_variance32x64, NULL, NULL,
- NULL, NULL, NULL,
- vp9_sad32x64x4d)
+ vp9_sub_pixel_avg_variance32x64, NULL, NULL, vp9_sad32x64x4d)
BFP(BLOCK_32X32, vp9_sad32x32, vp9_sad32x32_avg,
vp9_variance32x32, vp9_sub_pixel_variance32x32,
- vp9_sub_pixel_avg_variance32x32, vp9_variance_halfpixvar32x32_h,
- vp9_variance_halfpixvar32x32_v,
- vp9_variance_halfpixvar32x32_hv, vp9_sad32x32x3, vp9_sad32x32x8,
+ vp9_sub_pixel_avg_variance32x32, vp9_sad32x32x3, vp9_sad32x32x8,
vp9_sad32x32x4d)
BFP(BLOCK_64X64, vp9_sad64x64, vp9_sad64x64_avg,
vp9_variance64x64, vp9_sub_pixel_variance64x64,
- vp9_sub_pixel_avg_variance64x64, vp9_variance_halfpixvar64x64_h,
- vp9_variance_halfpixvar64x64_v,
- vp9_variance_halfpixvar64x64_hv, vp9_sad64x64x3, vp9_sad64x64x8,
+ vp9_sub_pixel_avg_variance64x64, vp9_sad64x64x3, vp9_sad64x64x8,
vp9_sad64x64x4d)
BFP(BLOCK_16X16, vp9_sad16x16, vp9_sad16x16_avg,
vp9_variance16x16, vp9_sub_pixel_variance16x16,
- vp9_sub_pixel_avg_variance16x16, vp9_variance_halfpixvar16x16_h,
- vp9_variance_halfpixvar16x16_v,
- vp9_variance_halfpixvar16x16_hv, vp9_sad16x16x3, vp9_sad16x16x8,
+ vp9_sub_pixel_avg_variance16x16, vp9_sad16x16x3, vp9_sad16x16x8,
vp9_sad16x16x4d)
BFP(BLOCK_16X8, vp9_sad16x8, vp9_sad16x8_avg,
vp9_variance16x8, vp9_sub_pixel_variance16x8,
- vp9_sub_pixel_avg_variance16x8, NULL, NULL, NULL,
+ vp9_sub_pixel_avg_variance16x8,
vp9_sad16x8x3, vp9_sad16x8x8, vp9_sad16x8x4d)
BFP(BLOCK_8X16, vp9_sad8x16, vp9_sad8x16_avg,
vp9_variance8x16, vp9_sub_pixel_variance8x16,
- vp9_sub_pixel_avg_variance8x16, NULL, NULL, NULL,
+ vp9_sub_pixel_avg_variance8x16,
vp9_sad8x16x3, vp9_sad8x16x8, vp9_sad8x16x4d)
BFP(BLOCK_8X8, vp9_sad8x8, vp9_sad8x8_avg,
vp9_variance8x8, vp9_sub_pixel_variance8x8,
- vp9_sub_pixel_avg_variance8x8, NULL, NULL, NULL,
+ vp9_sub_pixel_avg_variance8x8,
vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d)
BFP(BLOCK_8X4, vp9_sad8x4, vp9_sad8x4_avg,
vp9_variance8x4, vp9_sub_pixel_variance8x4,
- vp9_sub_pixel_avg_variance8x4, NULL, NULL,
- NULL, NULL, vp9_sad8x4x8,
- vp9_sad8x4x4d)
+ vp9_sub_pixel_avg_variance8x4, NULL, vp9_sad8x4x8, vp9_sad8x4x4d)
BFP(BLOCK_4X8, vp9_sad4x8, vp9_sad4x8_avg,
vp9_variance4x8, vp9_sub_pixel_variance4x8,
- vp9_sub_pixel_avg_variance4x8, NULL, NULL,
- NULL, NULL, vp9_sad4x8x8,
- vp9_sad4x8x4d)
+ vp9_sub_pixel_avg_variance4x8, NULL, vp9_sad4x8x8, vp9_sad4x8x4d)
BFP(BLOCK_4X4, vp9_sad4x4, vp9_sad4x4_avg,
vp9_variance4x4, vp9_sub_pixel_variance4x4,
- vp9_sub_pixel_avg_variance4x4, NULL, NULL, NULL,
+ vp9_sub_pixel_avg_variance4x4,
vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d)
cpi->full_search_sad = vp9_full_search_sad;
@@ -1182,7 +1157,6 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
}
dealloc_compressor_data(cpi);
- vpx_free(cpi->mb.ss);
vpx_free(cpi->tok);
for (i = 0; i < sizeof(cpi->mbgraph_stats) /
@@ -1444,77 +1418,67 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
}
#endif
-static void scale_and_extend_frame_nonnormative(YV12_BUFFER_CONFIG *src_fb,
- YV12_BUFFER_CONFIG *dst_fb) {
- const int in_w = src_fb->y_crop_width;
- const int in_h = src_fb->y_crop_height;
- const int out_w = dst_fb->y_crop_width;
- const int out_h = dst_fb->y_crop_height;
- const int in_w_uv = src_fb->uv_crop_width;
- const int in_h_uv = src_fb->uv_crop_height;
- const int out_w_uv = dst_fb->uv_crop_width;
- const int out_h_uv = dst_fb->uv_crop_height;
+static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst) {
+ // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t
int i;
+ const uint8_t *const srcs[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
+ src->alpha_buffer};
+ const int src_strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
+ src->alpha_stride};
+ const int src_widths[4] = {src->y_crop_width, src->uv_crop_width,
+ src->uv_crop_width, src->y_crop_width};
+ const int src_heights[4] = {src->y_crop_height, src->uv_crop_height,
+ src->uv_crop_height, src->y_crop_height};
+ uint8_t *const dsts[4] = {dst->y_buffer, dst->u_buffer, dst->v_buffer,
+ dst->alpha_buffer};
+ const int dst_strides[4] = {dst->y_stride, dst->uv_stride, dst->uv_stride,
+ dst->alpha_stride};
+ const int dst_widths[4] = {dst->y_crop_width, dst->uv_crop_width,
+ dst->uv_crop_width, dst->y_crop_width};
+ const int dst_heights[4] = {dst->y_crop_height, dst->uv_crop_height,
+ dst->uv_crop_height, dst->y_crop_height};
+
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
+ dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
- uint8_t *srcs[4] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer,
- src_fb->alpha_buffer};
- int src_strides[4] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride,
- src_fb->alpha_stride};
-
- uint8_t *dsts[4] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer,
- dst_fb->alpha_buffer};
- int dst_strides[4] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride,
- dst_fb->alpha_stride};
-
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- if (i == 0 || i == 3) {
- // Y and alpha planes
- vp9_resize_plane(srcs[i], in_h, in_w, src_strides[i],
- dsts[i], out_h, out_w, dst_strides[i]);
- } else {
- // Chroma planes
- vp9_resize_plane(srcs[i], in_h_uv, in_w_uv, src_strides[i],
- dsts[i], out_h_uv, out_w_uv, dst_strides[i]);
- }
- }
// TODO(hkuang): Call C version explicitly
// as neon version only expand border size 32.
- vp8_yv12_extend_frame_borders_c(dst_fb);
-}
-
-static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
- YV12_BUFFER_CONFIG *dst_fb) {
- const int in_w = src_fb->y_crop_width;
- const int in_h = src_fb->y_crop_height;
- const int out_w = dst_fb->y_crop_width;
- const int out_h = dst_fb->y_crop_height;
+ vp8_yv12_extend_frame_borders_c(dst);
+}
+
+static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst) {
+ const int src_w = src->y_crop_width;
+ const int src_h = src->y_crop_height;
+ const int dst_w = dst->y_crop_width;
+ const int dst_h = dst->y_crop_height;
+ const uint8_t *const srcs[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
+ src->alpha_buffer};
+ const int src_strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
+ src->alpha_stride};
+ uint8_t *const dsts[4] = {dst->y_buffer, dst->u_buffer, dst->v_buffer,
+ dst->alpha_buffer};
+ const int dst_strides[4] = {dst->y_stride, dst->uv_stride, dst->uv_stride,
+ dst->alpha_stride};
int x, y, i;
- uint8_t *srcs[4] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer,
- src_fb->alpha_buffer};
- int src_strides[4] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride,
- src_fb->alpha_stride};
-
- uint8_t *dsts[4] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer,
- dst_fb->alpha_buffer};
- int dst_strides[4] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride,
- dst_fb->alpha_stride};
-
- for (y = 0; y < out_h; y += 16) {
- for (x = 0; x < out_w; x += 16) {
+ for (y = 0; y < dst_h; y += 16) {
+ for (x = 0; x < dst_w; x += 16) {
for (i = 0; i < MAX_MB_PLANE; ++i) {
const int factor = (i == 0 || i == 3 ? 1 : 2);
- const int x_q4 = x * (16 / factor) * in_w / out_w;
- const int y_q4 = y * (16 / factor) * in_h / out_h;
+ const int x_q4 = x * (16 / factor) * src_w / dst_w;
+ const int y_q4 = y * (16 / factor) * src_h / dst_h;
const int src_stride = src_strides[i];
const int dst_stride = dst_strides[i];
- uint8_t *src = srcs[i] + y / factor * in_h / out_h * src_stride +
- x / factor * in_w / out_w;
- uint8_t *dst = dsts[i] + y / factor * dst_stride + x / factor;
+ const uint8_t *src_ptr = srcs[i] + (y / factor) * src_h / dst_h *
+ src_stride + (x / factor) * src_w / dst_w;
+ uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
- vp9_convolve8(src, src_stride, dst, dst_stride,
- vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
- vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
+ vp9_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
+ vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * src_w / dst_w,
+ vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * src_h / dst_h,
16 / factor, 16 / factor);
}
}
@@ -1522,7 +1486,7 @@ static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
// TODO(hkuang): Call C version explicitly
// as neon version only expand border size 32.
- vp8_yv12_extend_frame_borders_c(dst_fb);
+ vp8_yv12_extend_frame_borders_c(dst);
}
static int find_fp_qindex() {
@@ -1701,7 +1665,7 @@ void vp9_scale_references(VP9_COMP *cpi) {
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
- YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf;
+ const YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf;
if (ref->y_crop_width != cm->width ||
ref->y_crop_height != cm->height) {
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 8f3249407..132b479e2 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -497,6 +497,8 @@ typedef struct VP9_COMP {
int frame_flags;
+ search_site_config ss_cfg;
+
#if CONFIG_MULTIPLE_ARF
// ARF tracking variables.
int multi_arf_enabled;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 34506f2bd..b408ced0e 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -418,7 +418,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
v_fn_ptr.vf = get_block_variance_fn(bsize);
// Center the initial step/diamond search on best mv.
- tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
+ tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
step_param,
x->sadperbit16, &num00, &v_fn_ptr, ref_mv);
if (tmp_err < INT_MAX)
@@ -441,7 +441,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
if (num00) {
--num00;
} else {
- tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
+ tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
step_param + n, x->sadperbit16,
&num00, &v_fn_ptr, ref_mv);
if (tmp_err < INT_MAX)
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 89937f5a6..bbec4da76 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -101,32 +101,32 @@ static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
return 0;
}
-void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
+void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
int len, ss_count = 1;
- x->ss[0].mv.col = x->ss[0].mv.row = 0;
- x->ss[0].offset = 0;
+ cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
+ cfg->ss[0].offset = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Generate offsets for 4 search sites per step.
const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
int i;
for (i = 0; i < 4; ++i) {
- search_site *const ss = &x->ss[ss_count++];
+ search_site *const ss = &cfg->ss[ss_count++];
ss->mv = ss_mvs[i];
ss->offset = ss->mv.row * stride + ss->mv.col;
}
}
- x->ss_count = ss_count;
- x->searches_per_step = 4;
+ cfg->ss_count = ss_count;
+ cfg->searches_per_step = 4;
}
-void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
+void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
int len, ss_count = 1;
- x->ss[0].mv.col = x->ss[0].mv.row = 0;
- x->ss[0].offset = 0;
+ cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
+ cfg->ss[0].offset = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Generate offsets for 8 search sites per step.
@@ -136,14 +136,14 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
};
int i;
for (i = 0; i < 8; ++i) {
- search_site *const ss = &x->ss[ss_count++];
+ search_site *const ss = &cfg->ss[ss_count++];
ss->mv = ss_mvs[i];
ss->offset = ss->mv.row * stride + ss->mv.col;
}
}
- x->ss_count = ss_count;
- x->searches_per_step = 8;
+ cfg->ss_count = ss_count;
+ cfg->searches_per_step = 8;
}
/*
@@ -871,7 +871,9 @@ int vp9_fast_dia_search(const MACROBLOCK *x,
#undef CHECK_BETTER
-int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
+int vp9_full_range_search_c(const MACROBLOCK *x,
+ const search_site_config *cfg,
+ MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *center_mv) {
@@ -962,6 +964,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
}
int vp9_diamond_search_sad_c(const MACROBLOCK *x,
+ const search_site_config *cfg,
MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
@@ -973,8 +976,8 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
// of iterations
// 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
// (MAX_FIRST_STEP/4) pel... etc.
- const search_site *const ss = &x->ss[search_param * x->searches_per_step];
- const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
+ const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
+ const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
const uint8_t *best_address, *in_what_ref;
int best_sad = INT_MAX;
@@ -996,7 +999,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
i = 1;
for (step = 0; step < tot_steps; step++) {
- for (j = 0; j < x->searches_per_step; j++) {
+ for (j = 0; j < cfg->searches_per_step; j++) {
const MV mv = {best_mv->row + ss[i].mv.row,
best_mv->col + ss[i].mv.col};
if (is_mv_in(x, &mv)) {
@@ -1050,6 +1053,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
}
int vp9_diamond_search_sadx4(const MACROBLOCK *x,
+ const search_site_config *cfg,
MV *ref_mv, MV *best_mv, int search_param,
int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
@@ -1075,8 +1079,8 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
// 0 = initial step (MAX_FIRST_STEP) pel
// 1 = (MAX_FIRST_STEP/2) pel,
// 2 = (MAX_FIRST_STEP/4) pel...
- const search_site *ss = &x->ss[search_param * x->searches_per_step];
- const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
+ const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
+ const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
@@ -1112,7 +1116,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
if (all_in) {
unsigned int sad_array[4];
- for (j = 0; j < x->searches_per_step; j += 4) {
+ for (j = 0; j < cfg->searches_per_step; j += 4) {
unsigned char const *block_offset[4];
for (t = 0; t < 4; t++)
@@ -1135,7 +1139,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
}
}
} else {
- for (j = 0; j < x->searches_per_step; j++) {
+ for (j = 0; j < cfg->searches_per_step; j++) {
// Trap illegal vectors
const MV this_mv = {best_mv->row + ss[i].mv.row,
best_mv->col + ss[i].mv.col};
@@ -1202,7 +1206,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
const MV *ref_mv, MV *dst_mv) {
MV temp_mv;
int thissme, n, num00 = 0;
- int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
+ int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
step_param, sadpb, &n,
fn_ptr, ref_mv);
if (bestsme < INT_MAX)
@@ -1220,7 +1224,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
if (num00) {
num00--;
} else {
- thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
+ thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
step_param + n, sadpb, &num00,
fn_ptr, ref_mv);
if (thissme < INT_MAX)
@@ -1290,192 +1294,154 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
int sad_per_bit, int distance,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *center_mv, MV *best_mv) {
+ int r;
const MACROBLOCKD *const xd = &x->e_mbd;
- const uint8_t *const what = x->plane[0].src.buf;
- const int what_stride = x->plane[0].src.stride;
- const uint8_t *const in_what = xd->plane[0].pre[0].buf;
- const int in_what_stride = xd->plane[0].pre[0].stride;
- MV this_mv;
- unsigned int bestsad = INT_MAX;
- int r, c;
- unsigned int thissad;
- int ref_row = ref_mv->row;
- int ref_col = ref_mv->col;
-
- // Apply further limits to prevent us looking using vectors that stretch
- // beyond the UMV border
- const int row_min = MAX(ref_row - distance, x->mv_row_min);
- const int row_max = MIN(ref_row + distance, x->mv_row_max);
- const int col_min = MAX(ref_col - distance, x->mv_col_min);
- const int col_max = MIN(ref_col + distance, x->mv_col_max);
- unsigned int sad_array[3];
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
+ const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
+ const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
+ const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+ unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
+ *best_mv = *ref_mv;
- // Work out the mid point for the search
- const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
-
- best_mv->row = ref_row;
- best_mv->col = ref_col;
-
- // Baseline value at the centre
- bestsad = fn_ptr->sdf(what, what_stride,
- bestaddress, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
-
- for (r = row_min; r < row_max; r++) {
- const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
- this_mv.row = r;
- c = col_min;
-
- while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
- int i;
+ for (r = row_min; r < row_max; ++r) {
+ int c = col_min;
+ const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
- fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
+ if (fn_ptr->sdx3f != NULL) {
+ while ((c + 2) < col_max) {
+ int i;
+ unsigned int sads[3];
- for (i = 0; i < 3; i++) {
- thissad = sad_array[i];
+ fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
+ sads);
- if (thissad < bestsad) {
- this_mv.col = c;
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
+ for (i = 0; i < 3; ++i) {
+ unsigned int sad = sads[i];
+ if (sad < best_sad) {
+ const MV mv = {r, c};
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
}
+ ++check_here;
+ ++c;
}
- check_here++;
- c++;
}
}
while (c < col_max) {
- thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
- bestsad);
-
- if (thissad < bestsad) {
- this_mv.col = c;
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
+ unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
+ check_here, in_what->stride, best_sad);
+ if (sad < best_sad) {
+ const MV mv = {r, c};
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
}
}
-
- check_here++;
- c++;
+ ++check_here;
+ ++c;
}
}
- return bestsad;
+
+ return best_sad;
}
int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
int sad_per_bit, int distance,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *center_mv, MV *best_mv) {
+ int r;
const MACROBLOCKD *const xd = &x->e_mbd;
- const uint8_t *const what = x->plane[0].src.buf;
- const int what_stride = x->plane[0].src.stride;
- const uint8_t *const in_what = xd->plane[0].pre[0].buf;
- const int in_what_stride = xd->plane[0].pre[0].stride;
- MV this_mv;
- unsigned int bestsad = INT_MAX;
- int r, c;
- int ref_row = ref_mv->row;
- int ref_col = ref_mv->col;
-
- // Apply further limits to prevent us looking using vectors that stretch
- // beyond the UMV border
- const int row_min = MAX(ref_row - distance, x->mv_row_min);
- const int row_max = MIN(ref_row + distance, x->mv_row_max);
- const int col_min = MAX(ref_col - distance, x->mv_col_min);
- const int col_max = MIN(ref_col + distance, x->mv_col_max);
- DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8);
- unsigned int sad_array[3];
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
+ const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
+ const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
+ const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+ unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
+ *best_mv = *ref_mv;
- // Work out the mid point for the search
- const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
-
- best_mv->row = ref_row;
- best_mv->col = ref_col;
-
- // Baseline value at the center
- bestsad = fn_ptr->sdf(what, what_stride,
- bestaddress, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
-
- for (r = row_min; r < row_max; r++) {
- const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
- this_mv.row = r;
- c = col_min;
-
- while ((c + 7) < col_max) {
- int i;
+ for (r = row_min; r < row_max; ++r) {
+ int c = col_min;
+ const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
- fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
+ if (fn_ptr->sdx8f != NULL) {
+ while ((c + 7) < col_max) {
+ int i;
+ unsigned int sads[8];
- for (i = 0; i < 8; i++) {
- unsigned int thissad = (unsigned int)sad_array8[i];
+ fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride,
+ sads);
- if (thissad < bestsad) {
- this_mv.col = c;
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
+ for (i = 0; i < 8; ++i) {
+ unsigned int sad = sads[i];
+ if (sad < best_sad) {
+ const MV mv = {r, c};
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
}
+ ++check_here;
+ ++c;
}
-
- check_here++;
- c++;
}
}
- while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
- int i;
+ if (fn_ptr->sdx3f != NULL) {
+ while ((c + 2) < col_max) {
+ int i;
+ unsigned int sads[3];
- fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
+ fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
+ sads);
- for (i = 0; i < 3; i++) {
- unsigned int thissad = sad_array[i];
-
- if (thissad < bestsad) {
- this_mv.col = c;
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
+ for (i = 0; i < 3; ++i) {
+ unsigned int sad = sads[i];
+ if (sad < best_sad) {
+ const MV mv = {r, c};
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
}
+ ++check_here;
+ ++c;
}
-
- check_here++;
- c++;
}
}
while (c < col_max) {
- unsigned int thissad = fn_ptr->sdf(what, what_stride,
- check_here, in_what_stride, bestsad);
-
- if (thissad < bestsad) {
- this_mv.col = c;
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
+ unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
+ check_here, in_what->stride, best_sad);
+ if (sad < best_sad) {
+ const MV mv = {r, c};
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
}
}
-
- check_here++;
- c++;
+ ++check_here;
+ ++c;
}
}
- return bestsad;
+
+ return best_sad;
}
int vp9_refining_search_sad_c(const MACROBLOCK *x,
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 70d7985e4..1f524f1f6 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -31,6 +31,20 @@ extern "C" {
// for Block_16x16
#define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND)
+// motion search site
+typedef struct search_site {
+ MV mv;
+ int offset;
+} search_site;
+
+typedef struct search_site_config {
+ search_site ss[8 * MAX_MVSEARCH_STEPS + 1];
+ int ss_count;
+ int searches_per_step;
+} search_site_config;
+
+void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride);
+void vp9_init3smotion_compensation(search_site_config *cfg, int stride);
void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv);
int vp9_mv_bit_cost(const MV *mv, const MV *ref,
@@ -46,8 +60,6 @@ int vp9_get_mvpred_av_var(const MACROBLOCK *x,
const uint8_t *second_pred,
const vp9_variance_fn_ptr_t *vfp,
int use_mvcost);
-void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride);
-void vp9_init3smotion_compensation(MACROBLOCK *x, int stride);
struct VP9_COMP;
int vp9_init_search_range(struct VP9_COMP *cpi, int size);
@@ -119,6 +131,7 @@ typedef int (*vp9_refining_search_fn_t)(const MACROBLOCK *x,
const MV *center_mv);
typedef int (*vp9_diamond_search_fn_t)(const MACROBLOCK *x,
+ const search_site_config *cfg,
MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit,
int *num00,
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index c1493e719..56eb9440c 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -418,7 +418,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// Perform intra prediction search, if the best SAD is above a certain
// threshold.
- if (best_rd > inter_mode_thresh) {
+ if (best_rd > inter_mode_thresh && bsize < cpi->sf.max_intra_bsize) {
for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
vp9_predict_intra_block(xd, 0, b_width_log2(bsize),
mbmi->tx_size, this_mode,
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c
index 1399bfb7e..ae3c86aee 100644
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -276,126 +276,6 @@ VAR(64, 64)
SUBPIX_VAR(64, 64)
SUBPIX_AVG_VAR(64, 64)
-unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- const uint8_t *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
- xoffset, yoffset, dst_ptr,
- dst_pixels_per_line, sse);
- return *sse;
-}
-
-unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- const uint8_t *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
- xoffset, yoffset, dst_ptr,
- dst_pixels_per_line, sse);
- return *sse;
-}
-
-unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- const uint8_t *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
- xoffset, yoffset, dst_ptr,
- dst_pixels_per_line, sse);
- return *sse;
-}
-
void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
int height, const uint8_t *ref, int ref_stride) {
int i, j;
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index 4c8be71cd..152c3d962 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -82,9 +82,6 @@ typedef struct vp9_variance_vtable {
vp9_variance_fn_t vf;
vp9_subpixvariance_fn_t svf;
vp9_subp_avg_variance_fn_t svaf;
- vp9_variance_fn_t svf_halfpix_h;
- vp9_variance_fn_t svf_halfpix_v;
- vp9_variance_fn_t svf_halfpix_hv;
vp9_sad_multi_fn_t sdx3f;
vp9_sad_multi_fn_t sdx8f;
vp9_sad_multi_d_fn_t sdx4df;
diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c
index 9e65694a8..25d594632 100644
--- a/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/vp9/encoder/x86/vp9_variance_sse2.c
@@ -494,58 +494,3 @@ FNS(ssse3, ssse3);
#undef FNS
#undef FN
-
-unsigned int vp9_variance_halfpixvar16x16_h_sse2(
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- const unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- int xsum0;
- unsigned int xxsum0;
-
- vp9_half_horiz_variance16x_h_sse2(
- src_ptr, src_pixels_per_line,
- dst_ptr, dst_pixels_per_line, 16,
- &xsum0, &xxsum0);
-
- *sse = xxsum0;
- return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
-}
-
-
-unsigned int vp9_variance_halfpixvar16x16_v_sse2(
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- const unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- int xsum0;
- unsigned int xxsum0;
- vp9_half_vert_variance16x_h_sse2(
- src_ptr, src_pixels_per_line,
- dst_ptr, dst_pixels_per_line, 16,
- &xsum0, &xxsum0);
-
- *sse = xxsum0;
- return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
-}
-
-
-unsigned int vp9_variance_halfpixvar16x16_hv_sse2(
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- const unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- int xsum0;
- unsigned int xxsum0;
-
- vp9_half_horiz_vert_variance16x_h_sse2(
- src_ptr, src_pixels_per_line,
- dst_ptr, dst_pixels_per_line, 16,
- &xsum0, &xxsum0);
-
- *sse = xxsum0;
- return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
-}
diff --git a/webmdec.c b/webmdec.c
deleted file mode 100644
index 93a8d9fb5..000000000
--- a/webmdec.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "./webmdec.h"
-
-#include <stdarg.h>
-
-#include "third_party/nestegg/include/nestegg/nestegg.h"
-
-static int nestegg_read_cb(void *buffer, size_t length, void *userdata) {
- FILE *f = userdata;
-
- if (fread(buffer, 1, length, f) < length) {
- if (ferror(f))
- return -1;
- if (feof(f))
- return 0;
- }
- return 1;
-}
-
-static int nestegg_seek_cb(int64_t offset, int whence, void *userdata) {
- switch (whence) {
- case NESTEGG_SEEK_SET:
- whence = SEEK_SET;
- break;
- case NESTEGG_SEEK_CUR:
- whence = SEEK_CUR;
- break;
- case NESTEGG_SEEK_END:
- whence = SEEK_END;
- break;
- };
- return fseek(userdata, (int32_t)offset, whence) ? -1 : 0;
-}
-
-static int64_t nestegg_tell_cb(void *userdata) {
- return ftell(userdata);
-}
-
-static void nestegg_log_cb(nestegg *context,
- unsigned int severity,
- char const *format, ...) {
- va_list ap;
- va_start(ap, format);
- vfprintf(stderr, format, ap);
- fprintf(stderr, "\n");
- va_end(ap);
-}
-
-int file_is_webm(struct WebmInputContext *webm_ctx,
- struct VpxInputContext *vpx_ctx) {
- uint32_t i, n;
- int track_type = -1;
- int codec_id;
-
- nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb, 0};
- nestegg_video_params params;
-
- io.userdata = vpx_ctx->file;
- if (nestegg_init(&webm_ctx->nestegg_ctx, io, NULL, -1))
- goto fail;
-
- if (nestegg_track_count(webm_ctx->nestegg_ctx, &n))
- goto fail;
-
- for (i = 0; i < n; i++) {
- track_type = nestegg_track_type(webm_ctx->nestegg_ctx, i);
-
- if (track_type == NESTEGG_TRACK_VIDEO)
- break;
- else if (track_type < 0)
- goto fail;
- }
-
- codec_id = nestegg_track_codec_id(webm_ctx->nestegg_ctx, i);
- if (codec_id == NESTEGG_CODEC_VP8) {
- vpx_ctx->fourcc = VP8_FOURCC;
- } else if (codec_id == NESTEGG_CODEC_VP9) {
- vpx_ctx->fourcc = VP9_FOURCC;
- } else {
- fprintf(stderr, "Not VPx video, quitting.\n");
- goto fail;
- }
-
- webm_ctx->video_track = i;
-
- if (nestegg_track_video_params(webm_ctx->nestegg_ctx, i, &params))
- goto fail;
-
- vpx_ctx->framerate.denominator = 0;
- vpx_ctx->framerate.numerator = 0;
- vpx_ctx->width = params.width;
- vpx_ctx->height = params.height;
-
- return 1;
-
- fail:
- webm_ctx->nestegg_ctx = NULL;
- rewind(vpx_ctx->file);
-
- return 0;
-}
-
-int webm_read_frame(struct WebmInputContext *webm_ctx,
- uint8_t **buffer,
- size_t *bytes_in_buffer,
- size_t *buffer_size) {
- if (webm_ctx->chunk >= webm_ctx->chunks) {
- uint32_t track;
- int status;
-
- do {
- /* End of this packet, get another. */
- if (webm_ctx->pkt) {
- nestegg_free_packet(webm_ctx->pkt);
- webm_ctx->pkt = NULL;
- }
-
- status = nestegg_read_packet(webm_ctx->nestegg_ctx, &webm_ctx->pkt);
- if (status <= 0)
- return status ? status : 1;
-
- if (nestegg_packet_track(webm_ctx->pkt, &track))
- return -1;
- } while (track != webm_ctx->video_track);
-
- if (nestegg_packet_count(webm_ctx->pkt, &webm_ctx->chunks))
- return -1;
-
- webm_ctx->chunk = 0;
- }
-
- if (nestegg_packet_data(webm_ctx->pkt, webm_ctx->chunk,
- buffer, bytes_in_buffer)) {
- return -1;
- }
-
- webm_ctx->chunk++;
- return 0;
-}
-
-int webm_guess_framerate(struct WebmInputContext *webm_ctx,
- struct VpxInputContext *vpx_ctx) {
- uint32_t i;
- uint64_t tstamp = 0;
-
- /* Check to see if we can seek before we parse any data. */
- if (nestegg_track_seek(webm_ctx->nestegg_ctx, webm_ctx->video_track, 0)) {
- fprintf(stderr, "Failed to guess framerate (no Cues), set to 30fps.\n");
- vpx_ctx->framerate.numerator = 30;
- vpx_ctx->framerate.denominator = 1;
- return 0;
- }
-
- /* Guess the framerate. Read up to 1 second, or 50 video packets,
- * whichever comes first.
- */
- for (i = 0; tstamp < 1000000000 && i < 50;) {
- nestegg_packet *pkt;
- uint32_t track;
-
- if (nestegg_read_packet(webm_ctx->nestegg_ctx, &pkt) <= 0)
- break;
-
- nestegg_packet_track(pkt, &track);
- if (track == webm_ctx->video_track) {
- nestegg_packet_tstamp(pkt, &tstamp);
- ++i;
- }
-
- nestegg_free_packet(pkt);
- }
-
- if (nestegg_track_seek(webm_ctx->nestegg_ctx, webm_ctx->video_track, 0))
- goto fail;
-
- vpx_ctx->framerate.numerator = (i - 1) * 1000000;
- vpx_ctx->framerate.denominator = (int)(tstamp / 1000);
- return 0;
-
- fail:
- nestegg_destroy(webm_ctx->nestegg_ctx);
- webm_ctx->nestegg_ctx = NULL;
- rewind(vpx_ctx->file);
- return 1;
-}
-
-void webm_free(struct WebmInputContext *webm_ctx) {
- if (webm_ctx && webm_ctx->nestegg_ctx) {
- if (webm_ctx->pkt)
- nestegg_free_packet(webm_ctx->pkt);
- nestegg_destroy(webm_ctx->nestegg_ctx);
- }
-}
diff --git a/webmdec.cc b/webmdec.cc
new file mode 100644
index 000000000..eb89befd8
--- /dev/null
+++ b/webmdec.cc
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./webmdec.h"
+
+#include <cstring>
+#include <cstdio>
+
+#include "third_party/libwebm/mkvparser.hpp"
+#include "third_party/libwebm/mkvreader.hpp"
+
+namespace {
+
+void reset(struct WebmInputContext *const webm_ctx) {
+ if (webm_ctx->reader != NULL) {
+ mkvparser::MkvReader *const reader =
+ reinterpret_cast<mkvparser::MkvReader*>(webm_ctx->reader);
+ delete reader;
+ }
+ if (webm_ctx->segment != NULL) {
+ mkvparser::Segment *const segment =
+ reinterpret_cast<mkvparser::Segment*>(webm_ctx->segment);
+ delete segment;
+ }
+ if (webm_ctx->buffer != NULL) {
+ delete[] webm_ctx->buffer;
+ }
+ webm_ctx->reader = NULL;
+ webm_ctx->segment = NULL;
+ webm_ctx->buffer = NULL;
+ webm_ctx->cluster = NULL;
+ webm_ctx->block_entry = NULL;
+ webm_ctx->block = NULL;
+ webm_ctx->block_frame_index = 0;
+ webm_ctx->video_track_index = 0;
+ webm_ctx->timestamp_ns = 0;
+}
+
+void get_first_cluster(struct WebmInputContext *const webm_ctx) {
+ mkvparser::Segment *const segment =
+ reinterpret_cast<mkvparser::Segment*>(webm_ctx->segment);
+ const mkvparser::Cluster *const cluster = segment->GetFirst();
+ webm_ctx->cluster = cluster;
+}
+
+void rewind_and_reset(struct WebmInputContext *const webm_ctx,
+ struct VpxInputContext *const vpx_ctx) {
+ rewind(vpx_ctx->file);
+ reset(webm_ctx);
+}
+
+} // namespace
+
+int file_is_webm(struct WebmInputContext *webm_ctx,
+ struct VpxInputContext *vpx_ctx) {
+ mkvparser::MkvReader *const reader = new mkvparser::MkvReader(vpx_ctx->file);
+ webm_ctx->reader = reader;
+
+ mkvparser::EBMLHeader header;
+ long long pos = 0;
+ if (header.Parse(reader, pos) < 0) {
+ rewind_and_reset(webm_ctx, vpx_ctx);
+ return 0;
+ }
+
+ mkvparser::Segment* segment;
+ if (mkvparser::Segment::CreateInstance(reader, pos, segment)) {
+ rewind_and_reset(webm_ctx, vpx_ctx);
+ return 0;
+ }
+ webm_ctx->segment = segment;
+ if (segment->Load() < 0) {
+ rewind_and_reset(webm_ctx, vpx_ctx);
+ return 0;
+ }
+
+ const mkvparser::Tracks *const tracks = segment->GetTracks();
+ const mkvparser::VideoTrack* video_track = NULL;
+ for (unsigned long i = 0; i < tracks->GetTracksCount(); ++i) {
+ const mkvparser::Track* const track = tracks->GetTrackByIndex(i);
+ if (track->GetType() == mkvparser::Track::kVideo) {
+ video_track = static_cast<const mkvparser::VideoTrack*>(track);
+ webm_ctx->video_track_index = track->GetNumber();
+ break;
+ }
+ }
+
+ if (video_track == NULL) {
+ rewind_and_reset(webm_ctx, vpx_ctx);
+ return 0;
+ }
+
+ if (!strncmp(video_track->GetCodecId(), "V_VP8", 5)) {
+ vpx_ctx->fourcc = VP8_FOURCC;
+ } else if (!strncmp(video_track->GetCodecId(), "V_VP9", 5)) {
+ vpx_ctx->fourcc = VP9_FOURCC;
+ } else {
+ rewind_and_reset(webm_ctx, vpx_ctx);
+ return 0;
+ }
+
+ vpx_ctx->framerate.denominator = 0;
+ vpx_ctx->framerate.numerator = 0;
+ vpx_ctx->width = video_track->GetWidth();
+ vpx_ctx->height = video_track->GetHeight();
+
+ get_first_cluster(webm_ctx);
+
+ return 1;
+}
+
+int webm_read_frame(struct WebmInputContext *webm_ctx,
+ uint8_t **buffer,
+ size_t *bytes_in_buffer,
+ size_t *buffer_size) {
+ mkvparser::Segment *const segment =
+ reinterpret_cast<mkvparser::Segment*>(webm_ctx->segment);
+ const mkvparser::Cluster* cluster =
+ reinterpret_cast<const mkvparser::Cluster*>(webm_ctx->cluster);
+ const mkvparser::Block *block =
+ reinterpret_cast<const mkvparser::Block*>(webm_ctx->block);
+ const mkvparser::BlockEntry *block_entry =
+ reinterpret_cast<const mkvparser::BlockEntry*>(webm_ctx->block_entry);
+ bool block_entry_eos = false;
+ do {
+ long status = 0;
+ bool get_new_block = false;
+ if (block_entry == NULL && !block_entry_eos) {
+ status = cluster->GetFirst(block_entry);
+ get_new_block = true;
+ } else if (block_entry_eos || block_entry->EOS()) {
+ cluster = segment->GetNext(cluster);
+ if (cluster == NULL || cluster->EOS()) {
+ *bytes_in_buffer = 0;
+ return 1;
+ }
+ status = cluster->GetFirst(block_entry);
+ block_entry_eos = false;
+ get_new_block = true;
+ } else if (block == NULL ||
+ webm_ctx->block_frame_index == block->GetFrameCount() ||
+ block->GetTrackNumber() != webm_ctx->video_track_index) {
+ status = cluster->GetNext(block_entry, block_entry);
+ if (block_entry == NULL || block_entry->EOS()) {
+ block_entry_eos = true;
+ continue;
+ }
+ get_new_block = true;
+ }
+ if (status) {
+ return -1;
+ }
+ if (get_new_block) {
+ block = block_entry->GetBlock();
+ webm_ctx->block_frame_index = 0;
+ }
+ } while (block->GetTrackNumber() != webm_ctx->video_track_index ||
+ block_entry_eos);
+
+ webm_ctx->cluster = cluster;
+ webm_ctx->block_entry = block_entry;
+ webm_ctx->block = block;
+
+ const mkvparser::Block::Frame& frame =
+ block->GetFrame(webm_ctx->block_frame_index);
+ ++webm_ctx->block_frame_index;
+ if (frame.len > static_cast<long>(*buffer_size)) {
+ delete[] *buffer;
+ *buffer = new uint8_t[frame.len];
+ if (*buffer == NULL) {
+ return -1;
+ }
+ *buffer_size = frame.len;
+ webm_ctx->buffer = *buffer;
+ }
+ *bytes_in_buffer = frame.len;
+ webm_ctx->timestamp_ns = block->GetTime(cluster);
+
+ mkvparser::MkvReader *const reader =
+ reinterpret_cast<mkvparser::MkvReader*>(webm_ctx->reader);
+ return frame.Read(reader, *buffer) ? -1 : 0;
+}
+
+int webm_guess_framerate(struct WebmInputContext *webm_ctx,
+ struct VpxInputContext *vpx_ctx) {
+ uint32_t i = 0;
+ uint8_t *buffer = NULL;
+ size_t bytes_in_buffer = 0;
+ size_t buffer_size = 0;
+ while (webm_ctx->timestamp_ns < 1000000000 && i < 50) {
+ if (webm_read_frame(webm_ctx, &buffer, &bytes_in_buffer, &buffer_size)) {
+ break;
+ }
+ ++i;
+ }
+ vpx_ctx->framerate.numerator = (i - 1) * 1000000;
+ vpx_ctx->framerate.denominator =
+ static_cast<int>(webm_ctx->timestamp_ns / 1000);
+ delete[] buffer;
+
+ get_first_cluster(webm_ctx);
+ webm_ctx->block = NULL;
+ webm_ctx->block_entry = NULL;
+ webm_ctx->block_frame_index = 0;
+ webm_ctx->timestamp_ns = 0;
+
+ return 0;
+}
+
+void webm_free(struct WebmInputContext *webm_ctx) {
+ reset(webm_ctx);
+}
diff --git a/webmdec.h b/webmdec.h
index 108c6ade9..29b815da1 100644
--- a/webmdec.h
+++ b/webmdec.h
@@ -16,34 +16,53 @@
extern "C" {
#endif
-struct nestegg;
-struct nestegg_packet;
struct VpxInputContext;
struct WebmInputContext {
- uint32_t chunk;
- uint32_t chunks;
- uint32_t video_track;
- struct nestegg *nestegg_ctx;
- struct nestegg_packet *pkt;
+ void *reader;
+ void *segment;
+ uint8_t *buffer;
+ const void *cluster;
+ const void *block_entry;
+ const void *block;
+ int block_frame_index;
+ int video_track_index;
+ uint64_t timestamp_ns;
};
+// Checks if the input is a WebM file. If so, initializes WebMInputContext so
+// that webm_read_frame can be called to retrieve a video frame.
+// Returns 1 on success and 0 on failure or input is not WebM file.
+// TODO(vigneshv): Refactor this function into two smaller functions specific
+// to their task.
int file_is_webm(struct WebmInputContext *webm_ctx,
struct VpxInputContext *vpx_ctx);
-/* Reads a WebM video frame. Return values:
- * 0 - Success
- * 1 - End of File
- * -1 - Error
- */
+// Reads a WebM Video Frame. Memory for the buffer is created, owned and managed
+// by this function. For the first call, |buffer| should be NULL and
+// |*bytes_in_buffer| should be 0. Once all the frames are read and used,
+// webm_free() should be called, otherwise there will be a leak.
+// Parameters:
+// webm_ctx - WebmInputContext object
+// buffer - pointer where the frame data will be filled.
+// bytes_in_buffer - pointer to buffer size.
+// buffer_size - unused TODO(vigneshv): remove this
+// Return values:
+// 0 - Success
+// 1 - End of Stream
+// -1 - Error
+// TODO(vigneshv): Make the return values consistent across all functions in
+// this file.
int webm_read_frame(struct WebmInputContext *webm_ctx,
uint8_t **buffer,
size_t *bytes_in_buffer,
size_t *buffer_size);
+// Guesses the frame rate of the input file based on the container timestamps.
int webm_guess_framerate(struct WebmInputContext *webm_ctx,
struct VpxInputContext *vpx_ctx);
+// Resets the WebMInputContext.
void webm_free(struct WebmInputContext *webm_ctx);
#ifdef __cplusplus