summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libs.mk1
-rw-r--r--vp8/common/arm/arm_systemdependent.c134
-rw-r--r--vp8/common/arm/idct_arm.h4
-rw-r--r--vp8/common/arm/loopfilter_arm.h4
-rw-r--r--vp8/common/arm/recon_arm.h4
-rw-r--r--vp8/common/arm/subpixel_arm.h4
-rw-r--r--vp8/common/arm/systemdependent.c149
-rw-r--r--vp8/common/generic/systemdependent.c5
-rw-r--r--vp8/common/onyxc_int.h1
-rw-r--r--vp8/decoder/arm/arm_dsystemdependent.c66
-rw-r--r--vp8/decoder/arm/dequantize_arm.h4
-rw-r--r--vp8/decoder/arm/dsystemdependent.c39
-rw-r--r--vp8/decoder/generic/dsystemdependent.c5
-rw-r--r--vp8/decoder/onyxd_if.c58
-rw-r--r--vp8/encoder/arm/arm_csystemdependent.c136
-rw-r--r--vp8/encoder/arm/armv5te/boolhuff_armv5te.asm (renamed from vp8/encoder/arm/neon/boolhuff_armv7.asm)11
-rw-r--r--vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm (renamed from vp8/encoder/arm/neon/vp8_packtokens_armv7.asm)20
-rw-r--r--vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm (renamed from vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm)20
-rw-r--r--vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm (renamed from vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm)20
-rw-r--r--vp8/encoder/arm/csystemdependent.c164
-rw-r--r--vp8/encoder/arm/dct_arm.h4
-rw-r--r--vp8/encoder/arm/encodemb_arm.h2
-rw-r--r--vp8/encoder/arm/variance_arm.h2
-rw-r--r--vp8/encoder/bitstream.h14
-rw-r--r--vp8/encoder/generic/csystemdependent.c5
-rw-r--r--vp8/encoder/onyx_if.c92
-rw-r--r--vp8/encoder/picklpf.c71
-rw-r--r--vp8/vp8_common.mk9
-rw-r--r--vp8/vp8cx_arm.mk19
-rw-r--r--vp8/vp8dx_arm.mk6
-rw-r--r--vpx_ports/arm.h27
-rw-r--r--vpx_ports/arm_cpudetect.c190
-rw-r--r--vpx_scale/arm/scalesystemdependant.c22
33 files changed, 841 insertions, 471 deletions
diff --git a/libs.mk b/libs.mk
index 4beaa50cb..9ded3945a 100644
--- a/libs.mk
+++ b/libs.mk
@@ -93,6 +93,7 @@ CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86.h
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_cpuid.c
endif
+CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm_cpudetect.c
CODEC_SRCS-$(ARCH_ARM) += $(BUILD_PFX)vpx_config.asm
CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c
new file mode 100644
index 000000000..fe62fae13
--- /dev/null
+++ b/vp8/common/arm/arm_systemdependent.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "g_common.h"
+#include "pragmas.h"
+#include "subpixel.h"
+#include "loopfilter.h"
+#include "recon.h"
+#include "idct.h"
+#include "onyxc_int.h"
+
+extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
+
+extern void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
+
+void vp8_arch_arm_common_init(VP8_COMMON *ctx)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+ VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
+ int flags = arm_cpu_caps();
+ int has_edsp = flags & HAS_EDSP;
+ int has_media = flags & HAS_MEDIA;
+ int has_neon = flags & HAS_NEON;
+ rtcd->flags = flags;
+
+ /* Override default functions with fastest ones for this CPU. */
+#if HAVE_ARMV6
+ if (has_media)
+ {
+ rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_armv6;
+ rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_armv6;
+ rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_armv6;
+ rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_armv6;
+ rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
+ rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_armv6;
+ rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_armv6;
+ rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_armv6;
+
+ rtcd->idct.idct1 = vp8_short_idct4x4llm_1_v6;
+ rtcd->idct.idct16 = vp8_short_idct4x4llm_v6_dual;
+ rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_v6;
+ rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_v6;
+
+ rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
+ rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6;
+ rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
+ rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_armv6;
+ rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
+ rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_armv6;
+ rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
+ rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_armv6;
+
+ rtcd->recon.copy16x16 = vp8_copy_mem16x16_v6;
+ rtcd->recon.copy8x8 = vp8_copy_mem8x8_v6;
+ rtcd->recon.copy8x4 = vp8_copy_mem8x4_v6;
+ rtcd->recon.recon = vp8_recon_b_armv6;
+ rtcd->recon.recon2 = vp8_recon2b_armv6;
+ rtcd->recon.recon4 = vp8_recon4b_armv6;
+ }
+#endif
+
+#if HAVE_ARMV7
+ if (has_neon)
+ {
+ rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_neon;
+ rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_neon;
+ rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_neon;
+ rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_neon;
+ rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
+ rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_neon;
+ rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_neon;
+ rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_neon;
+
+ rtcd->idct.idct1 = vp8_short_idct4x4llm_1_neon;
+ rtcd->idct.idct16 = vp8_short_idct4x4llm_neon;
+ rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_neon;
+ rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_neon;
+
+ rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
+ rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_neon;
+ rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
+ rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_neon;
+ rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
+ rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_neon;
+ rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
+ rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_neon;
+
+ rtcd->recon.copy16x16 = vp8_copy_mem16x16_neon;
+ rtcd->recon.copy8x8 = vp8_copy_mem8x8_neon;
+ rtcd->recon.copy8x4 = vp8_copy_mem8x4_neon;
+ rtcd->recon.recon = vp8_recon_b_neon;
+ rtcd->recon.recon2 = vp8_recon2b_neon;
+ rtcd->recon.recon4 = vp8_recon4b_neon;
+ }
+#endif
+
+#endif
+
+#if HAVE_ARMV6
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (has_media)
+#endif
+ {
+ vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
+ vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
+ }
+#endif
+
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (has_neon)
+#endif
+ {
+ vp8_build_intra_predictors_mby_ptr =
+ vp8_build_intra_predictors_mby_neon;
+ vp8_build_intra_predictors_mby_s_ptr =
+ vp8_build_intra_predictors_mby_s_neon;
+ }
+#endif
+}
diff --git a/vp8/common/arm/idct_arm.h b/vp8/common/arm/idct_arm.h
index f28d7f649..8b8d17917 100644
--- a/vp8/common/arm/idct_arm.h
+++ b/vp8/common/arm/idct_arm.h
@@ -19,6 +19,7 @@ extern prototype_idct_scalar_add(vp8_dc_only_idct_add_v6);
extern prototype_second_order(vp8_short_inv_walsh4x4_1_v6);
extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_idct_idct1
#define vp8_idct_idct1 vp8_short_idct4x4llm_1_v6
@@ -34,6 +35,7 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
#undef vp8_idct_iwalsh16
#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_v6
#endif
+#endif
#if HAVE_ARMV7
extern prototype_idct(vp8_short_idct4x4llm_1_neon);
@@ -42,6 +44,7 @@ extern prototype_idct_scalar_add(vp8_dc_only_idct_add_neon);
extern prototype_second_order(vp8_short_inv_walsh4x4_1_neon);
extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_idct_idct1
#define vp8_idct_idct1 vp8_short_idct4x4llm_1_neon
@@ -57,5 +60,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
#undef vp8_idct_iwalsh16
#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_neon
#endif
+#endif
#endif
diff --git a/vp8/common/arm/loopfilter_arm.h b/vp8/common/arm/loopfilter_arm.h
index 6c3628ae9..cd62207d7 100644
--- a/vp8/common/arm/loopfilter_arm.h
+++ b/vp8/common/arm/loopfilter_arm.h
@@ -22,6 +22,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6);
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6);
extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_lf_normal_mb_v
#define vp8_lf_normal_mb_v vp8_loop_filter_mbv_armv6
@@ -46,6 +47,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
#undef vp8_lf_simple_b_h
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6
#endif
+#endif
#if HAVE_ARMV7
extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon);
@@ -57,6 +59,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_neon);
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_neon);
extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_lf_normal_mb_v
#define vp8_lf_normal_mb_v vp8_loop_filter_mbv_neon
@@ -81,5 +84,6 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
#undef vp8_lf_simple_b_h
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon
#endif
+#endif
#endif
diff --git a/vp8/common/arm/recon_arm.h b/vp8/common/arm/recon_arm.h
index 18855a3c0..c30f6dc2d 100644
--- a/vp8/common/arm/recon_arm.h
+++ b/vp8/common/arm/recon_arm.h
@@ -21,6 +21,7 @@ extern prototype_copy_block(vp8_copy_mem8x8_v6);
extern prototype_copy_block(vp8_copy_mem8x4_v6);
extern prototype_copy_block(vp8_copy_mem16x16_v6);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_recon_recon
#define vp8_recon_recon vp8_recon_b_armv6
@@ -39,6 +40,7 @@ extern prototype_copy_block(vp8_copy_mem16x16_v6);
#undef vp8_recon_copy16x16
#define vp8_recon_copy16x16 vp8_copy_mem16x16_v6
#endif
+#endif
#if HAVE_ARMV7
extern prototype_recon_block(vp8_recon_b_neon);
@@ -49,6 +51,7 @@ extern prototype_copy_block(vp8_copy_mem8x8_neon);
extern prototype_copy_block(vp8_copy_mem8x4_neon);
extern prototype_copy_block(vp8_copy_mem16x16_neon);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_recon_recon
#define vp8_recon_recon vp8_recon_b_neon
@@ -67,5 +70,6 @@ extern prototype_copy_block(vp8_copy_mem16x16_neon);
#undef vp8_recon_copy16x16
#define vp8_recon_copy16x16 vp8_copy_mem16x16_neon
#endif
+#endif
#endif
diff --git a/vp8/common/arm/subpixel_arm.h b/vp8/common/arm/subpixel_arm.h
index 53600e547..6288538d0 100644
--- a/vp8/common/arm/subpixel_arm.h
+++ b/vp8/common/arm/subpixel_arm.h
@@ -22,6 +22,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_armv6);
extern prototype_subpixel_predict(vp8_bilinear_predict8x4_armv6);
extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_subpix_sixtap16x16
#define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_armv6
@@ -46,6 +47,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6);
#undef vp8_subpix_bilinear4x4
#define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_armv6
#endif
+#endif
#if HAVE_ARMV7
extern prototype_subpixel_predict(vp8_sixtap_predict16x16_neon);
@@ -57,6 +59,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_neon);
extern prototype_subpixel_predict(vp8_bilinear_predict8x4_neon);
extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_subpix_sixtap16x16
#define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_neon
@@ -81,5 +84,6 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon);
#undef vp8_subpix_bilinear4x4
#define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_neon
#endif
+#endif
#endif
diff --git a/vp8/common/arm/systemdependent.c b/vp8/common/arm/systemdependent.c
deleted file mode 100644
index 1eed97e02..000000000
--- a/vp8/common/arm/systemdependent.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "g_common.h"
-#include "pragmas.h"
-#include "subpixel.h"
-#include "loopfilter.h"
-#include "recon.h"
-#include "idct.h"
-#include "onyxc_int.h"
-
-void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
-
-void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
-
-void vp8_machine_specific_config(VP8_COMMON *ctx)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
- VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
-
-#if HAVE_ARMV7
- rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_neon;
- rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_neon;
- rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_neon;
- rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_neon;
- rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
- rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_neon;
- rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_neon;
- rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_neon;
-
- rtcd->idct.idct1 = vp8_short_idct4x4llm_1_neon;
- rtcd->idct.idct16 = vp8_short_idct4x4llm_neon;
- rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_neon;
- rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_neon;
-
- rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
- rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_neon;
- rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
- rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_neon;
- rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
- rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_neon;
- rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
- rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_neon;
-
- rtcd->recon.copy16x16 = vp8_copy_mem16x16_neon;
- rtcd->recon.copy8x8 = vp8_copy_mem8x8_neon;
- rtcd->recon.copy8x4 = vp8_copy_mem8x4_neon;
- rtcd->recon.recon = vp8_recon_b_neon;
- rtcd->recon.recon2 = vp8_recon2b_neon;
- rtcd->recon.recon4 = vp8_recon4b_neon;
-#elif HAVE_ARMV6
-
- rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_armv6;
- rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_armv6;
- rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_armv6;
- rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_armv6;
- rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
- rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_armv6;
- rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_armv6;
- rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_armv6;
-
- rtcd->idct.idct1 = vp8_short_idct4x4llm_1_v6;
- rtcd->idct.idct16 = vp8_short_idct4x4llm_v6_dual;
- rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_armv6;
- rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_armv6;
-
- rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
- rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6;
- rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
- rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_armv6;
- rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
- rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_armv6;
- rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
- rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_armv6;
-
- rtcd->recon.copy16x16 = vp8_copy_mem16x16_v6;
- rtcd->recon.copy8x8 = vp8_copy_mem8x8_v6;
- rtcd->recon.copy8x4 = vp8_copy_mem8x4_v6;
- rtcd->recon.recon = vp8_recon_b_armv6;
- rtcd->recon.recon2 = vp8_recon2b_armv6;
- rtcd->recon.recon4 = vp8_recon4b_armv6;
-#else
-//pure c
- rtcd->idct.idct1 = vp8_short_idct4x4llm_1_c;
- rtcd->idct.idct16 = vp8_short_idct4x4llm_c;
- rtcd->idct.idct1_scalar = vp8_dc_only_idct_c;
- rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c;
- rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c;
-
- rtcd->recon.copy16x16 = vp8_copy_mem16x16_c;
- rtcd->recon.copy8x8 = vp8_copy_mem8x8_c;
- rtcd->recon.copy8x4 = vp8_copy_mem8x4_c;
- rtcd->recon.recon = vp8_recon_b_c;
- rtcd->recon.recon2 = vp8_recon2b_c;
- rtcd->recon.recon4 = vp8_recon4b_c;
-
- rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_c;
- rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_c;
- rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_c;
- rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_c;
- rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_c;
- rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_c;
- rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_c;
- rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_c;
-
- rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_c;
- rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_c;
- rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_c;
- rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_c;
- rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_c;
- rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_c;
- rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
- rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c;
-#endif
-
-#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
- rtcd->postproc.down = vp8_mbpost_proc_down_c;
- rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
- rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
- rtcd->postproc.addnoise = vp8_plane_add_noise_c;
-#endif
-#endif
-
-#if HAVE_ARMV7
- vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby_neon;
- vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s_neon;
-#elif HAVE_ARMV6
- vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
- vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
-#else
- vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
- vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
-
-#endif
-
-}
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index c04e31ffe..0ef375e33 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -18,6 +18,7 @@
#include "onyxc_int.h"
extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
+extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);
void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
@@ -77,4 +78,8 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
vp8_arch_x86_common_init(ctx);
#endif
+#if ARCH_ARM
+ vp8_arch_arm_common_init(ctx);
+#endif
+
}
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 4966002f5..d12143d4d 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -74,6 +74,7 @@ typedef struct VP8_COMMON_RTCD
vp8_subpix_rtcd_vtable_t subpix;
vp8_loopfilter_rtcd_vtable_t loopfilter;
vp8_postproc_rtcd_vtable_t postproc;
+ int flags;
#else
int unused;
#endif
diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c
new file mode 100644
index 000000000..77cff47db
--- /dev/null
+++ b/vp8/decoder/arm/arm_dsystemdependent.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "blockd.h"
+#include "pragmas.h"
+#include "postproc.h"
+#include "dboolhuff.h"
+#include "dequantize.h"
+#include "onyxd_int.h"
+
+void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+ int flags = pbi->common.rtcd.flags;
+ int has_edsp = flags & HAS_EDSP;
+ int has_media = flags & HAS_MEDIA;
+ int has_neon = flags & HAS_NEON;
+
+#if HAVE_ARMV6
+ if (has_media)
+ {
+ pbi->dequant.block = vp8_dequantize_b_v6;
+ pbi->dequant.idct_add = vp8_dequant_idct_add_v6;
+ pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_v6;
+ pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6;
+ pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6;
+ pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6;
+#if 0 //For use with RTCD, when implemented
+ pbi->dboolhuff.start = vp8dx_start_decode_c;
+ pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
+ pbi->dboolhuff.debool = vp8dx_decode_bool_c;
+ pbi->dboolhuff.devalue = vp8dx_decode_value_c;
+#endif
+ }
+#endif
+
+#if HAVE_ARMV7
+ if (has_neon)
+ {
+ pbi->dequant.block = vp8_dequantize_b_neon;
+ pbi->dequant.idct_add = vp8_dequant_idct_add_neon;
+ /*This is not used: NEON always dequants two blocks at once.
+ pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_neon;*/
+ pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon;
+ pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon;
+ pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;
+#if 0 //For use with RTCD, when implemented
+ pbi->dboolhuff.start = vp8dx_start_decode_c;
+ pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
+ pbi->dboolhuff.debool = vp8dx_decode_bool_c;
+ pbi->dboolhuff.devalue = vp8dx_decode_value_c;
+#endif
+ }
+#endif
+#endif
+}
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h
index 40151e01a..b7d800d26 100644
--- a/vp8/decoder/arm/dequantize_arm.h
+++ b/vp8/decoder/arm/dequantize_arm.h
@@ -20,6 +20,7 @@ extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_v6)
extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_v6);
extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_dequant_block
#define vp8_dequant_block vp8_dequantize_b_v6
@@ -38,6 +39,7 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
#undef vp8_dequant_idct_add_uv_block
#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6
#endif
+#endif
#if HAVE_ARMV7
extern prototype_dequant_block(vp8_dequantize_b_neon);
@@ -47,6 +49,7 @@ extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_neo
extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_neon);
extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_dequant_block
#define vp8_dequant_block vp8_dequantize_b_neon
@@ -65,5 +68,6 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
#undef vp8_dequant_idct_add_uv_block
#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon
#endif
+#endif
#endif
diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c
deleted file mode 100644
index 9dcf7b657..000000000
--- a/vp8/decoder/arm/dsystemdependent.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "blockd.h"
-#include "pragmas.h"
-#include "postproc.h"
-#include "dboolhuff.h"
-#include "dequantize.h"
-#include "onyxd_int.h"
-
-void vp8_dmachine_specific_config(VP8D_COMP *pbi)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
- pbi->mb.rtcd = &pbi->common.rtcd;
-#if HAVE_ARMV7
- pbi->dequant.block = vp8_dequantize_b_neon;
- pbi->dboolhuff.start = vp8dx_start_decode_c;
- pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
- pbi->dboolhuff.debool = vp8dx_decode_bool_c;
- pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-
-#elif HAVE_ARMV6
- pbi->dequant.block = vp8_dequantize_b_v6;
- pbi->dboolhuff.start = vp8dx_start_decode_c;
- pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
- pbi->dboolhuff.debool = vp8dx_decode_bool_c;
- pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-#endif
-#endif
-}
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index 60f2af5b8..84de7af43 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -14,6 +14,7 @@
#include "onyxd_int.h"
extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi);
+extern void vp8_arch_arm_decode_init(VP8D_COMP *pbi);
void vp8_dmachine_specific_config(VP8D_COMP *pbi)
{
@@ -37,4 +38,8 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
#if ARCH_X86 || ARCH_X86_64
vp8_arch_x86_decode_init(pbi);
#endif
+
+#if ARCH_ARM
+ vp8_arch_arm_decode_init(pbi);
+#endif
}
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 884c38da0..b5a6e3e85 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -30,6 +30,9 @@
#include "systemdependent.h"
#include "vpx_ports/vpx_timer.h"
#include "detokenize.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
extern void vp8_init_loop_filter(VP8_COMMON *cm);
extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
@@ -224,7 +227,6 @@ int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_C
#if HAVE_ARMV7
extern void vp8_push_neon(INT64 *store);
extern void vp8_pop_neon(INT64 *store);
-static INT64 dx_store_reg[8];
#endif
static int get_free_fb (VP8_COMMON *cm)
@@ -312,6 +314,9 @@ static int swap_frame_buffers (VP8_COMMON *cm)
int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, INT64 time_stamp)
{
+#if HAVE_ARMV7
+ INT64 dx_store_reg[8];
+#endif
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
VP8_COMMON *cm = &pbi->common;
int retcode = 0;
@@ -327,10 +332,27 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
pbi->common.error.error_code = VPX_CODEC_OK;
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_push_neon(dx_store_reg);
+ }
+#endif
+
cm->new_fb_idx = get_free_fb (cm);
if (setjmp(pbi->common.error.jmp))
{
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(dx_store_reg);
+ }
+#endif
pbi->common.error.setjmp = 0;
if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
@@ -339,10 +361,6 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
pbi->common.error.setjmp = 1;
-#if HAVE_ARMV7
- vp8_push_neon(dx_store_reg);
-#endif
-
vpx_usec_timer_start(&timer);
//cm->current_video_frame++;
@@ -354,7 +372,12 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
if (retcode < 0)
{
#if HAVE_ARMV7
- vp8_pop_neon(dx_store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(dx_store_reg);
+ }
#endif
pbi->common.error.error_code = VPX_CODEC_ERROR;
pbi->common.error.setjmp = 0;
@@ -367,6 +390,14 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
{
if (swap_frame_buffers (cm))
{
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(dx_store_reg);
+ }
+#endif
pbi->common.error.error_code = VPX_CODEC_ERROR;
pbi->common.error.setjmp = 0;
return -1;
@@ -375,6 +406,14 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
{
if (swap_frame_buffers (cm))
{
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(dx_store_reg);
+ }
+#endif
pbi->common.error.error_code = VPX_CODEC_ERROR;
pbi->common.error.setjmp = 0;
return -1;
@@ -455,7 +494,12 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
#endif
#if HAVE_ARMV7
- vp8_pop_neon(dx_store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(dx_store_reg);
+ }
#endif
pbi->common.error.setjmp = 0;
return retcode;
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c
new file mode 100644
index 000000000..8736fcf1d
--- /dev/null
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "variance.h"
+#include "onyx_int.h"
+
+extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+
+void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+ int flags = cpi->common.rtcd.flags;
+ int has_edsp = flags & HAS_EDSP;
+ int has_media = flags & HAS_MEDIA;
+ int has_neon = flags & HAS_NEON;
+
+#if HAVE_ARMV6
+ if (has_media)
+ {
+ /*cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c;
+ cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c;
+ cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c;
+ cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c;
+ cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;*/
+
+ /*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;
+ cpi->rtcd.variance.var8x8 = vp8_variance8x8_c;
+ cpi->rtcd.variance.var8x16 = vp8_variance8x16_c;
+ cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;
+ cpi->rtcd.variance.var16x16 = vp8_variance16x16_c;*/
+
+ /*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;
+ cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c;
+ cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
+ cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;
+ cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;*/
+
+ /*cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
+ cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
+
+ /*cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c;
+ cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
+ cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;;
+ cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;*/
+
+ /*cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
+ cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;
+ cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c;
+ cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c;*/
+ cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_armv6;
+
+ /*cpi->rtcd.encodemb.berr = vp8_block_error_c;
+ cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c;
+ cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;
+ cpi->rtcd.encodemb.subb = vp8_subtract_b_c;
+ cpi->rtcd.encodemb.submby = vp8_subtract_mby_c;
+ cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c;*/
+
+ /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
+ cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;*/
+ }
+#endif
+
+#if HAVE_ARMV7
+ if (has_neon)
+ {
+ cpi->rtcd.variance.sad16x16 = vp8_sad16x16_neon;
+ cpi->rtcd.variance.sad16x8 = vp8_sad16x8_neon;
+ cpi->rtcd.variance.sad8x16 = vp8_sad8x16_neon;
+ cpi->rtcd.variance.sad8x8 = vp8_sad8x8_neon;
+ cpi->rtcd.variance.sad4x4 = vp8_sad4x4_neon;
+
+ /*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;*/
+ cpi->rtcd.variance.var8x8 = vp8_variance8x8_neon;
+ cpi->rtcd.variance.var8x16 = vp8_variance8x16_neon;
+ cpi->rtcd.variance.var16x8 = vp8_variance16x8_neon;
+ cpi->rtcd.variance.var16x16 = vp8_variance16x16_neon;
+
+ /*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;*/
+ cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_neon;
+ /*cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
+ cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/
+ cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_neon;
+
+ cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon;
+ /*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
+
+ cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_neon;
+ /*cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
+ cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;*/
+ cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_neon;
+
+ cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_neon;
+ cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_neon;
+ cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_neon;
+ cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_neon;
+ cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_neon;
+
+ /*cpi->rtcd.encodemb.berr = vp8_block_error_c;
+ cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c;
+ cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;*/
+ cpi->rtcd.encodemb.subb = vp8_subtract_b_neon;
+ cpi->rtcd.encodemb.submby = vp8_subtract_mby_neon;
+ cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_neon;
+
+ /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
+ cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;*/
+ /* The neon quantizer has not been updated to match the new exact
+ * quantizer introduced in commit e04e2935
+ */
+ /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon;*/
+ }
+#endif
+
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (has_neon)
+#endif
+ {
+ vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon;
+ }
+#endif
+#endif
+}
diff --git a/vp8/encoder/arm/neon/boolhuff_armv7.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
index 9c4823c51..e78dc3322 100644
--- a/vp8/encoder/arm/neon/boolhuff_armv7.asm
+++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
@@ -205,17 +205,10 @@ token_count_lt_zero_se
ldr r5, [r0, #vp8_writer_range]
ldr r3, [r0, #vp8_writer_count]
- ; reverse the stream of bits to be packed. Normally
- ; the most significant bit is peeled off and compared
- ; in the form of (v >> --n) & 1. ARM architecture has
- ; the ability to set a flag based on the value of the
- ; bit shifted off the bottom of the register. To make
- ; that happen the bitstream is reversed.
- rbit r11, r1
rsb r4, r10, #32 ; 32-n
; v is kept in r1 during the token pack loop
- lsr r1, r11, r4 ; v >>= 32 - n
+ lsl r1, r1, r4 ; r1 = v << 32 - n
encode_value_loop
sub r7, r5, #1 ; range-1
@@ -223,7 +216,7 @@ encode_value_loop
; Decisions are made based on the bit value shifted
; off of v, so set a flag here based on this.
; This value is refered to as "bb"
- lsrs r1, r1, #1 ; bit = v >> n
+ lsls r1, r1, #1 ; bit = v >> n
mov r4, r7, lsl #7 ; ((range-1) * 128)
mov r7, #1
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
index c19ac8250..3233d2a96 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
@@ -9,7 +9,7 @@
;
- EXPORT |vp8cx_pack_tokens_armv7|
+ EXPORT |vp8cx_pack_tokens_armv5|
INCLUDE vpx_vp8_enc_asm_offsets.asm
@@ -25,7 +25,7 @@
; r3 vp8_coef_encodings
; s0 vp8_extra_bits
; s1 vp8_coef_tree
-|vp8cx_pack_tokens_armv7| PROC
+|vp8cx_pack_tokens_armv5| PROC
push {r4-r11, lr}
; Add size of xcount * sizeof (TOKENEXTRA) to get stop
@@ -57,18 +57,11 @@ while_p_lt_stop
movne lr, #2 ; i = 2
subne r8, r8, #1 ; --n
- ; reverse the stream of bits to be packed. Normally
- ; the most significant bit is peeled off and compared
- ; in the form of (v >> --n) & 1. ARM architecture has
- ; the ability to set a flag based on the value of the
- ; bit shifted off the bottom of the register. To make
- ; that happen the bitstream is reversed.
- rbit r12, r6
rsb r4, r8, #32 ; 32-n
ldr r10, [sp, #52] ; vp8_coef_tree
; v is kept in r12 during the token pack loop
- lsr r12, r12, r4 ; v >>= 32 - n
+ lsl r12, r6, r4 ; r12 = v << 32 - n
; loop start
token_loop
@@ -78,7 +71,7 @@ token_loop
; Decisions are made based on the bit value shifted
; off of v, so set a flag here based on this.
; This value is refered to as "bb"
- lsrs r12, r12, #1 ; bb = v >> n
+ lsls r12, r12, #1 ; bb = v >> n
mul r4, r4, r7 ; ((range-1) * pp[i>>1]))
; bb can only be 0 or 1. So only execute this statement
@@ -172,16 +165,15 @@ token_count_lt_zero
ldr r10, [r12, #vp8_extra_bit_struct_tree]
str r10, [sp, #4] ; b->tree
- rbit r12, r7 ; reverse v
rsb r4, r8, #32
- lsr r12, r12, r4
+ lsl r12, r7, r4
mov lr, #0 ; i = 0
extra_bits_loop
ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
sub r7, r5, #1 ; range-1
- lsrs r12, r12, #1 ; v >> n
+ lsls r12, r12, #1 ; v >> n
mul r4, r4, r7 ; (range-1) * pp[i>>1]
addcs lr, lr, #1 ; i + bb
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
index 075645586..a9b552ae1 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
@@ -9,7 +9,7 @@
;
- EXPORT |vp8cx_pack_mb_row_tokens_armv7|
+ EXPORT |vp8cx_pack_mb_row_tokens_armv5|
INCLUDE vpx_vp8_enc_asm_offsets.asm
@@ -25,7 +25,7 @@
; r3 vp8_extra_bits
; s0 vp8_coef_tree
-|vp8cx_pack_mb_row_tokens_armv7| PROC
+|vp8cx_pack_mb_row_tokens_armv5| PROC
push {r4-r11, lr}
sub sp, sp, #24
@@ -78,18 +78,11 @@ while_p_lt_stop
movne lr, #2 ; i = 2
subne r8, r8, #1 ; --n
- ; reverse the stream of bits to be packed. Normally
- ; the most significant bit is peeled off and compared
- ; in the form of (v >> --n) & 1. ARM architecture has
- ; the ability to set a flag based on the value of the
- ; bit shifted off the bottom of the register. To make
- ; that happen the bitstream is reversed.
- rbit r12, r6
rsb r4, r8, #32 ; 32-n
ldr r10, [sp, #60] ; vp8_coef_tree
; v is kept in r12 during the token pack loop
- lsr r12, r12, r4 ; v >>= 32 - n
+ lsl r12, r6, r4 ; r12 = v << 32 - n
; loop start
token_loop
@@ -99,7 +92,7 @@ token_loop
; Decisions are made based on the bit value shifted
; off of v, so set a flag here based on this.
; This value is refered to as "bb"
- lsrs r12, r12, #1 ; bb = v >> n
+ lsls r12, r12, #1 ; bb = v >> n
mul r4, r4, r7 ; ((range-1) * pp[i>>1]))
; bb can only be 0 or 1. So only execute this statement
@@ -193,16 +186,15 @@ token_count_lt_zero
ldr r10, [r12, #vp8_extra_bit_struct_tree]
str r10, [sp, #4] ; b->tree
- rbit r12, r7 ; reverse v
rsb r4, r8, #32
- lsr r12, r12, r4
+ lsl r12, r7, r4
mov lr, #0 ; i = 0
extra_bits_loop
ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
sub r7, r5, #1 ; range-1
- lsrs r12, r12, #1 ; v >> n
+ lsls r12, r12, #1 ; v >> n
mul r4, r4, r7 ; (range-1) * pp[i>>1]
addcs lr, lr, #1 ; i + bb
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
index 10a3d9851..0835164e5 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
@@ -9,7 +9,7 @@
;
- EXPORT |vp8cx_pack_tokens_into_partitions_armv7|
+ EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
INCLUDE vpx_vp8_enc_asm_offsets.asm
@@ -27,7 +27,7 @@
; s1 vp8_extra_bits,
; s2 const vp8_tree_index *,
-|vp8cx_pack_tokens_into_partitions_armv7| PROC
+|vp8cx_pack_tokens_into_partitions_armv5| PROC
push {r4-r11, lr}
sub sp, sp, #44
@@ -106,18 +106,11 @@ while_p_lt_stop
movne lr, #2 ; i = 2
subne r8, r8, #1 ; --n
- ; reverse the stream of bits to be packed. Normally
- ; the most significant bit is peeled off and compared
- ; in the form of (v >> --n) & 1. ARM architecture has
- ; the ability to set a flag based on the value of the
- ; bit shifted off the bottom of the register. To make
- ; that happen the bitstream is reversed.
- rbit r12, r6
rsb r4, r8, #32 ; 32-n
ldr r10, [sp, #88] ; vp8_coef_tree
; v is kept in r12 during the token pack loop
- lsr r12, r12, r4 ; v >>= 32 - n
+ lsl r12, r6, r4 ; r12 = v << 32 - n
; loop start
token_loop
@@ -127,7 +120,7 @@ token_loop
; Decisions are made based on the bit value shifted
; off of v, so set a flag here based on this.
; This value is refered to as "bb"
- lsrs r12, r12, #1 ; bb = v >> n
+ lsls r12, r12, #1 ; bb = v >> n
mul r4, r4, r7 ; ((range-1) * pp[i>>1]))
; bb can only be 0 or 1. So only execute this statement
@@ -221,16 +214,15 @@ token_count_lt_zero
ldr r10, [r12, #vp8_extra_bit_struct_tree]
str r10, [sp, #4] ; b->tree
- rbit r12, r7 ; reverse v
rsb r4, r8, #32
- lsr r12, r12, r4
+ lsl r12, r7, r4
mov lr, #0 ; i = 0
extra_bits_loop
ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
sub r7, r5, #1 ; range-1
- lsrs r12, r12, #1 ; v >> n
+ lsls r12, r12, #1 ; v >> n
mul r4, r4, r7 ; (range-1) * pp[i>>1]
addcs lr, lr, #1 ; i + bb
diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c
deleted file mode 100644
index 8d70d635a..000000000
--- a/vp8/encoder/arm/csystemdependent.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "variance.h"
-#include "onyx_int.h"
-
-void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-
-void vp8_cmachine_specific_config(VP8_COMP *cpi)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
- cpi->rtcd.common = &cpi->common.rtcd;
-
-#if HAVE_ARMV7
- cpi->rtcd.variance.sad16x16 = vp8_sad16x16_neon;
- cpi->rtcd.variance.sad16x8 = vp8_sad16x8_neon;
- cpi->rtcd.variance.sad8x16 = vp8_sad8x16_neon;
- cpi->rtcd.variance.sad8x8 = vp8_sad8x8_neon;
- cpi->rtcd.variance.sad4x4 = vp8_sad4x4_neon;
-
- cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;
- cpi->rtcd.variance.var8x8 = vp8_variance8x8_neon;
- cpi->rtcd.variance.var8x16 = vp8_variance8x16_neon;
- cpi->rtcd.variance.var16x8 = vp8_variance16x8_neon;
- cpi->rtcd.variance.var16x16 = vp8_variance16x16_neon;
-
- cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;
- cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_neon;
- cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
- cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;
- cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_neon;
-
- cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon;
- cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;
-
- cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_neon;
- cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
- cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;;
- cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_neon;
-
- cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_neon;
- cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_neon;
- cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_neon;
- cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_neon;
- cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_neon;
-
- cpi->rtcd.encodemb.berr = vp8_block_error_c;
- cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c;
- cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;
- cpi->rtcd.encodemb.subb = vp8_subtract_b_neon;
- cpi->rtcd.encodemb.submby = vp8_subtract_mby_neon;
- cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_neon;
-
- cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
- cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;
- /* The neon quantizer has not been updated to match the new exact
- * quantizer introduced in commit e04e2935
- */
- /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon;*/
-#elif HAVE_ARMV6
- cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c;
- cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c;
- cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c;
- cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c;
- cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;
-
- cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;
- cpi->rtcd.variance.var8x8 = vp8_variance8x8_c;
- cpi->rtcd.variance.var8x16 = vp8_variance8x16_c;
- cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;
- cpi->rtcd.variance.var16x16 = vp8_variance16x16_c;
-
- cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;
- cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c;
- cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
- cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;
- cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;
-
- cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
- cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;
-
- cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c;
- cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
- cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;;
- cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;
-
- cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
- cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;
- cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c;
- cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c;
- cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_armv6;
-
- cpi->rtcd.encodemb.berr = vp8_block_error_c;
- cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c;
- cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;
- cpi->rtcd.encodemb.subb = vp8_subtract_b_c;
- cpi->rtcd.encodemb.submby = vp8_subtract_mby_c;
- cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c;
-
- cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
- cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;
-#else
- //pure c
- cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c;
- cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c;
- cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c;
- cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c;
- cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;
-
- cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;
- cpi->rtcd.variance.var8x8 = vp8_variance8x8_c;
- cpi->rtcd.variance.var8x16 = vp8_variance8x16_c;
- cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;
- cpi->rtcd.variance.var16x16 = vp8_variance16x16_c;
-
- cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;
- cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c;
- cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
- cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;
- cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;
-
- cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
- cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;
-
- cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c;
- cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
- cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;;
- cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;
-
- cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
- cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;
- cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c;
- cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c;
- cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c;
-
- cpi->rtcd.encodemb.berr = vp8_block_error_c;
- cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c;
- cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;
- cpi->rtcd.encodemb.subb = vp8_subtract_b_c;
- cpi->rtcd.encodemb.submby = vp8_subtract_mby_c;
- cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c;
-
- cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
- cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;
-#endif
-#endif
-
-#if HAVE_ARMV7
- vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon;
-#else
- vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
-#endif
-}
diff --git a/vp8/encoder/arm/dct_arm.h b/vp8/encoder/arm/dct_arm.h
index 774599bf0..41fa5d192 100644
--- a/vp8/encoder/arm/dct_arm.h
+++ b/vp8/encoder/arm/dct_arm.h
@@ -15,9 +15,11 @@
#if HAVE_ARMV6
extern prototype_fdct(vp8_short_walsh4x4_armv6);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_fdct_walsh_short4x4
#define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_armv6
#endif
+#endif
#if HAVE_ARMV7
extern prototype_fdct(vp8_short_fdct4x4_neon);
@@ -26,6 +28,7 @@ extern prototype_fdct(vp8_fast_fdct4x4_neon);
extern prototype_fdct(vp8_fast_fdct8x4_neon);
extern prototype_fdct(vp8_short_walsh4x4_neon);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_fdct_short4x4
#define vp8_fdct_short4x4 vp8_short_fdct4x4_neon
@@ -40,6 +43,7 @@ extern prototype_fdct(vp8_short_walsh4x4_neon);
#undef vp8_fdct_walsh_short4x4
#define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_neon
+#endif
#endif
diff --git a/vp8/encoder/arm/encodemb_arm.h b/vp8/encoder/arm/encodemb_arm.h
index eb699433f..8fe453735 100644
--- a/vp8/encoder/arm/encodemb_arm.h
+++ b/vp8/encoder/arm/encodemb_arm.h
@@ -30,6 +30,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon);
//#undef vp8_encodemb_mbuverr
//#define vp8_encodemb_mbuverr vp8_mbuverror_c
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_encodemb_subb
#define vp8_encodemb_subb vp8_subtract_b_neon
@@ -38,6 +39,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon);
#undef vp8_encodemb_submbuv
#define vp8_encodemb_submbuv vp8_subtract_mbuv_neon
+#endif
#endif
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index 859e43f51..fb9dd5a5b 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -38,6 +38,7 @@ extern prototype_sad(vp8_get16x16pred_error_neon);
//extern prototype_variance2(vp8_get16x16var_c);
extern prototype_sad(vp8_get4x4sse_cs_neon);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_variance_sad4x4
#define vp8_variance_sad4x4 vp8_sad4x4_neon
@@ -100,6 +101,7 @@ extern prototype_sad(vp8_get4x4sse_cs_neon);
#undef vp8_variance_get4x4sse_cs
#define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_neon
+#endif
#endif
diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h
index 559631338..f5d148ea4 100644
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -12,25 +12,25 @@
#ifndef __INC_BITSTREAM_H
#define __INC_BITSTREAM_H
-#if HAVE_ARMV7
-void vp8cx_pack_tokens_armv7(vp8_writer *w, const TOKENEXTRA *p, int xcount,
+#if HAVE_ARMV5TE
+void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount,
vp8_token *,
vp8_extra_bit_struct *,
const vp8_tree_index *);
-void vp8cx_pack_tokens_into_partitions_armv7(VP8_COMP *, unsigned char *, int , int *,
+void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *, unsigned char *, int , int *,
vp8_token *,
vp8_extra_bit_struct *,
const vp8_tree_index *);
-void vp8cx_pack_mb_row_tokens_armv7(VP8_COMP *cpi, vp8_writer *w,
+void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
vp8_token *,
vp8_extra_bit_struct *,
const vp8_tree_index *);
# define pack_tokens(a,b,c) \
- vp8cx_pack_tokens_armv7(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+ vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
# define pack_tokens_into_partitions(a,b,c,d) \
- vp8cx_pack_tokens_into_partitions_armv7(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+ vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
# define pack_mb_row_tokens(a,b) \
- vp8cx_pack_mb_row_tokens_armv7(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+ vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
#else
# define pack_tokens(a,b,c) pack_tokens_c(a,b,c)
# define pack_tokens_into_partitions(a,b,c,d) pack_tokens_into_partitions_c(a,b,c,d)
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 1acb73d9c..520b08f51 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -15,6 +15,7 @@
void vp8_arch_x86_encoder_init(VP8_COMP *cpi);
+void vp8_arch_arm_encoder_init(VP8_COMP *cpi);
void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
@@ -94,4 +95,8 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
vp8_arch_x86_encoder_init(cpi);
#endif
+#if ARCH_ARM
+ vp8_arch_arm_encoder_init(cpi);
+#endif
+
}
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 53d68be52..7e1583dd9 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -31,6 +31,9 @@
#include "vpx_ports/vpx_timer.h"
#include "vpxerrors.h"
#include "temporal_filter.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
#include <math.h>
#include <stdio.h>
@@ -2106,8 +2109,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
CHECK_MEM_ERROR(cpi->rdtok, vpx_calloc(256 * 3 / 2, sizeof(TOKENEXTRA)));
CHECK_MEM_ERROR(cpi->mb.ss, vpx_calloc(sizeof(search_site), (MAX_MVSEARCH_STEPS * 8) + 1));
- vp8_cmachine_specific_config(cpi);
vp8_create_common(&cpi->common);
+ vp8_cmachine_specific_config(cpi);
vp8_init_config((VP8_PTR)cpi, oxcf);
@@ -2852,9 +2855,20 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
{
//vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
#if HAVE_ARMV7
- vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source);
-#else
- vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source);
+ }
+#if CONFIG_RUNTIME_CPU_DETECT
+ else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+ {
+ vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
+ }
#endif
cpi->Source = &cpi->scaled_source;
@@ -4624,10 +4638,10 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
#if HAVE_ARMV7
extern void vp8_push_neon(INT64 *store);
extern void vp8_pop_neon(INT64 *store);
-static INT64 store_reg[8];
#endif
int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time)
{
+ INT64 store_reg[8];
VP8_COMP *cpi = (VP8_COMP *) ptr;
VP8_COMMON *cm = &cpi->common;
struct vpx_usec_timer timer;
@@ -4636,7 +4650,12 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
return -1;
#if HAVE_ARMV7
- vp8_push_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_push_neon(store_reg);
+ }
#endif
vpx_usec_timer_start(&timer);
@@ -4645,7 +4664,12 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
if (cpi->source_buffer_count != 0 && cpi->source_buffer_count >= cpi->oxcf.lag_in_frames)
{
#if HAVE_ARMV7
- vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(store_reg);
+ }
#endif
return -1;
}
@@ -4686,9 +4710,20 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
s->source_time_stamp = time_stamp;
s->source_frame_flags = frame_flags;
#if HAVE_ARMV7
- vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer);
-#else
- vp8_yv12_copy_frame_ptr(sd, &s->source_buffer);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer);
+ }
+#if CONFIG_RUNTIME_CPU_DETECT
+ else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+ {
+ vp8_yv12_copy_frame_ptr(sd, &s->source_buffer);
+ }
#endif
cpi->source_buffer_count = 1;
}
@@ -4697,14 +4732,19 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
#if HAVE_ARMV7
- vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(store_reg);
+ }
#endif
return 0;
}
int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush)
{
-
+ INT64 store_reg[8];
VP8_COMP *cpi = (VP8_COMP *) ptr;
VP8_COMMON *cm = &cpi->common;
struct vpx_usec_timer tsctimer;
@@ -4715,7 +4755,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
return -1;
#if HAVE_ARMV7
- vp8_push_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_push_neon(store_reg);
+ }
#endif
vpx_usec_timer_start(&cmptimer);
@@ -4867,7 +4912,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
#endif
#if HAVE_ARMV7
- vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(store_reg);
+ }
#endif
return -1;
}
@@ -4910,7 +4960,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
if (!cpi)
{
#if HAVE_ARMV7
- vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(store_reg);
+ }
#endif
return 0;
}
@@ -5099,7 +5154,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
#endif
#if HAVE_ARMV7
- vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(store_reg);
+ }
#endif
return 0;
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index 79e07dbc0..09e8b5412 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -16,6 +16,9 @@
#include "vpx_scale/yv12extend.h"
#include "vpx_scale/vpxscale.h"
#include "alloccommon.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val);
extern void vp8_loop_filter_frame_yonly(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val, int sharpness_lvl);
@@ -306,9 +309,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
// Make a copy of the unfiltered / processed recon buffer
#if HAVE_ARMV7
- vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf);
-#else
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf);
+ }
+#if CONFIG_RUNTIME_CPU_DETECT
+ else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+ {
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf);
+ }
#endif
if (cm->frame_type == KEY_FRAME)
@@ -343,9 +357,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
// Re-instate the unfiltered frame
#if HAVE_ARMV7
- vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
- vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+ }
+#if CONFIG_RUNTIME_CPU_DETECT
+ else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+ {
+ vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+ }
#endif
while (filter_step > 0)
@@ -372,9 +397,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
// Re-instate the unfiltered frame
#if HAVE_ARMV7
- vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
- vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+ }
+#if CONFIG_RUNTIME_CPU_DETECT
+ else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+ {
+ vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+ }
#endif
// If value is close to the best so far then bias towards a lower loop filter value.
@@ -401,9 +437,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
// Re-instate the unfiltered frame
#if HAVE_ARMV7
- vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
- vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+ }
+#if CONFIG_RUNTIME_CPU_DETECT
+ else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+ {
+ vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+ }
#endif
// Was it better than the previous best?
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index ecca18a0a..3b5aaa548 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -112,6 +112,8 @@ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
endif
+VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c
+
# common (c)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/bilinearfilter_arm.c
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/filter_arm.c
@@ -119,15 +121,8 @@ VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/loopfilter_arm.c
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/recon_arm.c
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/reconintra4x4_arm.c
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/reconintra_arm.c
-VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/systemdependent.c
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/vpx_asm_offsets.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/filter_c.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/idctllm.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/recon.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/reconintra4x4.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/generic/systemdependent.c
-
# common (armv6)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/bilinearfilter_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem8x4_v6$(ASM)
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index 1424bd15a..d126faf32 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -13,17 +13,22 @@
#File list for arm
# encoder
-VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/csystemdependent.c
+VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/arm_csystemdependent.c
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/encodemb_arm.c
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/quantize_arm.c
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/picklpf_arm.c
-VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/boolhuff_arm.c
+VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/mcomp_arm.c
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV6) += encoder/generic/csystemdependent.c
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV7) += encoder/boolhuff.c
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV7) += encoder/mcomp.c
+VP8_CX_SRCS_REMOVE-$(HAVE_ARMV5TE) += encoder/boolhuff.c
+
+#File list for armv5te
+# encoder
+VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/boolhuff_armv5te$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_armv5$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_mbrow_armv5$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_partitions_armv5$(ASM)
#File list for armv6
# encoder
@@ -44,10 +49,6 @@ VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance8x8_neon$(ASM
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_memcpy_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_packtokens_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_packtokens_mbrow_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_packtokens_partitions_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/boolhuff_armv7$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/vpx_vp8_enc_asm_offsets.c
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index ae0610cda..0803a9cb0 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -11,11 +11,9 @@
#VP8_DX_SRCS list is modified according to different platforms.
+VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/arm_dsystemdependent.c
+
VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/dequantize_arm.c
-VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/dsystemdependent.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6) += decoder/generic/dsystemdependent.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6) += decoder/dequantize.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6) += decoder/idct_blk.c
VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK) += decoder/arm/detokenize$(ASM)
#File list for armv6
diff --git a/vpx_ports/arm.h b/vpx_ports/arm.h
new file mode 100644
index 000000000..81af1f11f
--- /dev/null
+++ b/vpx_ports/arm.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VPX_PORTS_ARM_H
+#define VPX_PORTS_ARM_H
+#include <stdlib.h>
+#include "config.h"
+
+/*ARMv5TE "Enhanced DSP" instructions.*/
+#define HAS_EDSP 0x01
+/*ARMv6 "Parallel" or "Media" instructions.*/
+#define HAS_MEDIA 0x02
+/*ARMv7 optional NEON instructions.*/
+#define HAS_NEON 0x04
+
+int arm_cpu_caps(void);
+
+#endif
+
diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c
new file mode 100644
index 000000000..4109924cf
--- /dev/null
+++ b/vpx_ports/arm_cpudetect.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "arm.h"
+
+static int arm_cpu_env_flags(int *flags)
+{
+ char *env;
+ env = getenv("VPX_SIMD_CAPS");
+ if (env && *env)
+ {
+ *flags = (int)strtol(env, NULL, 0);
+ return 0;
+ }
+ *flags = 0;
+ return -1;
+}
+
+static int arm_cpu_env_mask(void)
+{
+ char *env;
+ env = getenv("VPX_SIMD_CAPS_MASK");
+ return env && *env ? (int)strtol(env, NULL, 0) : ~0;
+}
+
+
+#if defined(_MSC_VER)
+/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
+#define WIN32_LEAN_AND_MEAN
+#define WIN32_EXTRA_LEAN
+#include <windows.h>
+
+int arm_cpu_caps(void)
+{
+ int flags;
+ int mask;
+ if (!arm_cpu_env_flags(&flags))
+ {
+ return flags;
+ }
+ mask = arm_cpu_env_mask();
+ /* MSVC has no inline __asm support for ARM, but it does let you __emit
+ * instructions via their assembled hex code.
+ * All of these instructions should be essentially nops.
+ */
+#if defined(HAVE_ARMV5TE)
+ if (mask & HAS_EDSP)
+ {
+ __try
+ {
+ /*PLD [r13]*/
+ __emit(0xF5DDF000);
+ flags |= HAS_EDSP;
+ }
+ __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
+ {
+ /*Ignore exception.*/
+ }
+ }
+#if defined(HAVE_ARMV6)
+ if (mask & HAS_MEDIA)
+ __try
+ {
+ /*SHADD8 r3,r3,r3*/
+ __emit(0xE6333F93);
+ flags |= HAS_MEDIA;
+ }
+ __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
+ {
+ /*Ignore exception.*/
+ }
+ }
+#if defined(HAVE_ARMV7)
+ if (mask & HAS_NEON)
+ {
+ __try
+ {
+ /*VORR q0,q0,q0*/
+ __emit(0xF2200150);
+ flags |= HAS_NEON;
+ }
+ __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
+ {
+ /*Ignore exception.*/
+ }
+ }
+#endif
+#endif
+#endif
+ return flags & mask;
+}
+
+#elif defined(__linux__)
+#include <stdio.h>
+
+int arm_cpu_caps(void)
+{
+ FILE *fin;
+ int flags;
+ int mask;
+ if (!arm_cpu_env_flags(&flags))
+ {
+ return flags;
+ }
+ mask = arm_cpu_env_mask();
+ /* Reading /proc/self/auxv would be easier, but that doesn't work reliably
+ * on Android.
+ * This also means that detection will fail in Scratchbox.
+ */
+ fin = fopen("/proc/cpuinfo","r");
+ if(fin != NULL)
+ {
+ /* 512 should be enough for anybody (it's even enough for all the flags
+ * that x86 has accumulated... so far).
+ */
+ char buf[512];
+ while (fgets(buf, 511, fin) != NULL)
+ {
+#if defined(HAVE_ARMV5TE) || defined(HAVE_ARMV7)
+ if (memcmp(buf, "Features", 8) == 0)
+ {
+ char *p;
+#if defined(HAVE_ARMV5TE)
+ p=strstr(buf, " edsp");
+ if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
+ {
+ flags |= HAS_EDSP;
+ }
+#if defined(HAVE_ARMV7)
+ p = strstr(buf, " neon");
+ if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
+ {
+ flags |= HAS_NEON;
+ }
+#endif
+#endif
+ }
+#endif
+#if defined(HAVE_ARMV6)
+ if (memcmp(buf, "CPU architecture:",17) == 0){
+ int version;
+ version = atoi(buf+17);
+ if (version >= 6)
+ {
+ flags |= HAS_MEDIA;
+ }
+ }
+#endif
+ }
+ fclose(fin);
+ }
+ return flags & mask;
+}
+
+#elif !CONFIG_RUNTIME_CPU_DETECT
+
+int arm_cpu_caps(void)
+{
+ int flags;
+ int mask;
+ if (!arm_cpu_env_flags(&flags))
+ {
+ return flags;
+ }
+ mask = arm_cpu_env_mask();
+#if defined(HAVE_ARMV5TE)
+ flags |= HAS_EDSP;
+#endif
+#if defined(HAVE_ARMV6)
+ flags |= HAS_MEDIA;
+#endif
+#if defined(HAVE_ARMV7)
+ flags |= HAS_NEON;
+#endif
+ return flags & mask;
+}
+
+#else
+#error "--enable-runtime-cpu-detect selected, but no CPU detection method " \
+ "available for your platform. Reconfigure without --enable-runtime-cpu-detect."
+#endif
diff --git a/vpx_scale/arm/scalesystemdependant.c b/vpx_scale/arm/scalesystemdependant.c
index 1e8bcb89d..fee76fff7 100644
--- a/vpx_scale/arm/scalesystemdependant.c
+++ b/vpx_scale/arm/scalesystemdependant.c
@@ -10,6 +10,7 @@
#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
#include "vpx_scale/vpxscale.h"
@@ -47,6 +48,9 @@ extern void vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CO
****************************************************************************/
void vp8_scale_machine_specific_config()
{
+#if HAVE_ARMV7 && CONFIG_RUNTIME_CPU_DETECT
+ int flags;
+#endif
/*
vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_armv4;
vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_armv4;
@@ -73,14 +77,20 @@ void vp8_scale_machine_specific_config()
vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c;
*/
-#if HAVE_ARMV7
- vp8_yv12_extend_frame_borders_ptr = vp8_yv12_extend_frame_borders_neon;
- vp8_yv12_copy_frame_yonly_ptr = vp8_yv12_copy_frame_yonly_neon;
- vp8_yv12_copy_frame_ptr = vp8_yv12_copy_frame_neon;
-#else
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
vp8_yv12_extend_frame_borders_ptr = vp8_yv12_extend_frame_borders;
vp8_yv12_copy_frame_yonly_ptr = vp8_yv12_copy_frame_yonly;
vp8_yv12_copy_frame_ptr = vp8_yv12_copy_frame;
#endif
-
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ flags = arm_cpu_caps();
+ if (flags & HAS_NEON)
+#endif
+ {
+ vp8_yv12_extend_frame_borders_ptr = vp8_yv12_extend_frame_borders_neon;
+ vp8_yv12_copy_frame_yonly_ptr = vp8_yv12_copy_frame_yonly_neon;
+ vp8_yv12_copy_frame_ptr = vp8_yv12_copy_frame_neon;
+ }
+#endif
}