summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbuild/make/configure.sh4
-rw-r--r--docs.mk3
-rw-r--r--vp8/encoder/arm/arm_csystemdependent.c3
-rw-r--r--vp8/encoder/arm/variance_arm.c36
-rw-r--r--vp8/encoder/arm/variance_arm.h13
-rw-r--r--vp8/encoder/generic/csystemdependent.c3
-rw-r--r--vp8/encoder/onyx_if.c75
-rw-r--r--vp8/encoder/onyx_int.h6
-rw-r--r--vp8/encoder/x86/variance_impl_sse2.asm32
-rw-r--r--vp8/encoder/x86/x86_csystemdependent.c2
-rw-r--r--vp8/vp8_cx_iface.c88
-rw-r--r--vp8/vp8cx.mk6
12 files changed, 178 insertions, 93 deletions
diff --git a/build/make/configure.sh b/build/make/configure.sh
index 7234b7974..ac99cf561 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -624,6 +624,10 @@ process_common_toolchain() {
# Handle Solaris variants. Solaris 10 needs -lposix4
case ${toolchain} in
+ sparc-solaris-*)
+ add_extralibs -lposix4
+ add_cflags "-DMUST_BE_ALIGNED"
+ ;;
*-solaris-*)
add_extralibs -lposix4
;;
diff --git a/docs.mk b/docs.mk
index 28df9d262..98332a244 100644
--- a/docs.mk
+++ b/docs.mk
@@ -34,7 +34,8 @@ TXT_DOX = $(call enabled,TXT_DOX)
EXAMPLE_PATH += $(SRC_PATH_BARE) #for CHANGELOG, README, etc
-doxyfile: libs.doxy_template libs.doxy examples.doxy
+doxyfile: $(if $(findstring examples, $(ALL_TARGETS)),examples.doxy)
+doxyfile: libs.doxy_template libs.doxy
@echo " [CREATE] $@"
@cat $^ > $@
@echo "STRIP_FROM_PATH += $(SRC_PATH_BARE) $(BUILD_ROOT)" >> $@
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c
index 5852afddb..73007d414 100644
--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -46,6 +46,9 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_armv6;
+ cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_armv6;
+ cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_armv6;
+ cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_armv6;
/*cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
diff --git a/vp8/encoder/arm/variance_arm.c b/vp8/encoder/arm/variance_arm.c
index 1cf9fd8f0..64d76bcf8 100644
--- a/vp8/encoder/arm/variance_arm.c
+++ b/vp8/encoder/arm/variance_arm.c
@@ -43,7 +43,41 @@ unsigned int vp8_sub_pixel_variance16x16_armv6
dst_pixels_per_line, sse);
}
-#endif
+unsigned int vp8_variance_halfpixvar16x16_h_armv6(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse)
+{
+ return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 4, 0,
+ ref_ptr, recon_stride, sse);
+}
+
+unsigned int vp8_variance_halfpixvar16x16_v_armv6(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse)
+{
+ return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 0, 4,
+ ref_ptr, recon_stride, sse);
+}
+
+unsigned int vp8_variance_halfpixvar16x16_hv_armv6(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse)
+{
+ return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 4, 4,
+ ref_ptr, recon_stride, sse);
+}
+
+#endif /* HAVE_ARMV6 */
+
#if HAVE_ARMV7
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index c807e29c0..7ac0ac08e 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -17,6 +17,9 @@
extern prototype_sad(vp8_sad16x16_armv6);
extern prototype_variance(vp8_variance16x16_armv6);
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6);
+extern prototype_variance(vp8_variance_halfpixvar16x16_h_armv6);
+extern prototype_variance(vp8_variance_halfpixvar16x16_v_armv6);
+extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6);
#if !CONFIG_RUNTIME_CPU_DETECT
@@ -29,10 +32,20 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6);
#undef vp8_variance_var16x16
#define vp8_variance_var16x16 vp8_variance16x16_armv6
+#undef vp8_variance_halfpixvar16x16_h
+#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_armv6
+
+#undef vp8_variance_halfpixvar16x16_v
+#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_armv6
+
+#undef vp8_variance_halfpixvar16x16_hv
+#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_armv6
+
#endif /* !CONFIG_RUNTIME_CPU_DETECT */
#endif /* HAVE_ARMV6 */
+
#if HAVE_ARMV7
extern prototype_sad(vp8_sad4x4_neon);
extern prototype_sad(vp8_sad8x8_neon);
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index f067b3937..07151fc4c 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -95,9 +95,10 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
cpi->rtcd.search.full_search = vp8_full_search_sad;
#endif
cpi->rtcd.search.diamond_search = vp8_diamond_search_sad;
-
+#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c;
#endif
+#endif
// Pure C:
vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 65296886f..0a4a9a099 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -330,35 +330,31 @@ static void setup_features(VP8_COMP *cpi)
void vp8_dealloc_compressor_data(VP8_COMP *cpi)
{
- vpx_free(cpi->tplist);
+ vpx_free(cpi->tplist);
cpi->tplist = NULL;
// Delete last frame MV storage buffers
- vpx_free(cpi->lfmv);
-
+ vpx_free(cpi->lfmv);
cpi->lfmv = 0;
- vpx_free(cpi->lf_ref_frame_sign_bias);
-
+ vpx_free(cpi->lf_ref_frame_sign_bias);
cpi->lf_ref_frame_sign_bias = 0;
- vpx_free(cpi->lf_ref_frame);
-
+ vpx_free(cpi->lf_ref_frame);
cpi->lf_ref_frame = 0;
// Delete sementation map
- vpx_free(cpi->segmentation_map);
-
+ vpx_free(cpi->segmentation_map);
cpi->segmentation_map = 0;
- vpx_free(cpi->active_map);
-
+ vpx_free(cpi->active_map);
cpi->active_map = 0;
+#if !(CONFIG_REALTIME_ONLY)
// Delete first pass motion map
- vpx_free(cpi->fp_motion_map);
-
+ vpx_free(cpi->fp_motion_map);
cpi->fp_motion_map = 0;
+#endif
vp8_de_alloc_frame_buffers(&cpi->common);
@@ -380,21 +376,17 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
cpi->tok = 0;
// Structure used to monitor GF usage
- vpx_free(cpi->gf_active_flags);
-
+ vpx_free(cpi->gf_active_flags);
cpi->gf_active_flags = 0;
- vpx_free(cpi->mb.pip);
-
+ vpx_free(cpi->mb.pip);
cpi->mb.pip = 0;
#if !(CONFIG_REALTIME_ONLY)
- vpx_free(cpi->total_stats);
-
+ vpx_free(cpi->total_stats);
cpi->total_stats = 0;
- vpx_free(cpi->this_frame_stats);
-
+ vpx_free(cpi->this_frame_stats);
cpi->this_frame_stats = 0;
#endif
}
@@ -2230,9 +2222,11 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
vpx_memset(cpi->active_map , 1, (cpi->common.mb_rows * cpi->common.mb_cols));
cpi->active_map_enabled = 0;
+#if !(CONFIG_REALTIME_ONLY)
// Create the first pass motion map structure and set to 0
// Allocate space for maximum of 15 buffers
CHECK_MEM_ERROR(cpi->fp_motion_map, vpx_calloc(15*cpi->common.MBs, 1));
+#endif
#if 0
// Experimental code for lagged and one pass
@@ -3199,8 +3193,11 @@ static void update_golden_frame_and_stats(VP8_COMP *cpi)
// Update the Golden frame reconstruction buffer if signalled and the GF usage counts.
if (cm->refresh_golden_frame)
{
- // Update the golden frame buffer
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
+ if (cm->frame_type != KEY_FRAME)
+ {
+ // Update the golden frame buffer
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
+ }
// Select an interval before next GF
if (!cpi->auto_gold)
@@ -4834,16 +4831,19 @@ static void encode_frame_to_data_rate
if (cpi->oxcf.error_resilient_mode)
{
- // Is this an alternate reference update
- if (cpi->common.refresh_alt_ref_frame)
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]);
+ if (cm->frame_type != KEY_FRAME)
+ {
+ // Is this an alternate reference update
+ if (cm->refresh_alt_ref_frame)
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]);
- if (cpi->common.refresh_golden_frame)
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
+ if (cm->refresh_golden_frame)
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
+ }
}
else
{
- if (cpi->oxcf.play_alternate && cpi->common.refresh_alt_ref_frame && (cpi->common.frame_type != KEY_FRAME))
+ if (cpi->oxcf.play_alternate && cm->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME))
// Update the alternate reference frame and stats as appropriate.
update_alt_ref_frame_and_stats(cpi);
else
@@ -5318,23 +5318,6 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
if (cpi->compressor_speed == 2)
{
vp8_check_gf_quality(cpi);
- }
-
- if (!cpi)
- {
-#if HAVE_ARMV7
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->rtcd.flags & HAS_NEON)
-#endif
- {
- vp8_pop_neon(store_reg);
- }
-#endif
- return 0;
- }
-
- if (cpi->compressor_speed == 2)
- {
vpx_usec_timer_start(&tsctimer);
vpx_usec_timer_start(&ticktimer);
}
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index a9ccc89d7..b66131d15 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -51,7 +51,9 @@
#define MV_ZBIN_BOOST 4
#define ZBIN_OQ_MAX 192
+#if !(CONFIG_REALTIME_ONLY)
#define VP8_TEMPORAL_ALT_REF 1
+#endif
typedef struct
{
@@ -492,9 +494,11 @@ typedef struct
FIRSTPASS_STATS *stats_in, *stats_in_end;
struct vpx_codec_pkt_list *output_pkt_list;
int first_pass_done;
- unsigned char *fp_motion_map;
+#if !(CONFIG_REALTIME_ONLY)
+ unsigned char *fp_motion_map;
unsigned char *fp_motion_map_stats, *fp_motion_map_stats_save;
+#endif
#if 0
// Experimental code for lagged and one pass
diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm
index 7178e7e31..6cdc47bc9 100644
--- a/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/vp8/encoder/x86/variance_impl_sse2.asm
@@ -85,10 +85,9 @@ sym(vp8_get16x16var_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- GET_GOT rbx
+ push rbx
push rsi
push rdi
- sub rsp, 16
; end prolog
mov rsi, arg(0) ;[src_ptr]
@@ -97,6 +96,29 @@ sym(vp8_get16x16var_sse2):
movsxd rax, DWORD PTR arg(1) ;[source_stride]
movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
+ ; Prefetch data
+ lea rcx, [rax+rax*2]
+ prefetcht0 [rsi]
+ prefetcht0 [rsi+rax]
+ prefetcht0 [rsi+rax*2]
+ prefetcht0 [rsi+rcx]
+ lea rbx, [rsi+rax*4]
+ prefetcht0 [rbx]
+ prefetcht0 [rbx+rax]
+ prefetcht0 [rbx+rax*2]
+ prefetcht0 [rbx+rcx]
+
+ lea rcx, [rdx+rdx*2]
+ prefetcht0 [rdi]
+ prefetcht0 [rdi+rdx]
+ prefetcht0 [rdi+rdx*2]
+ prefetcht0 [rdi+rcx]
+ lea rbx, [rdi+rdx*4]
+ prefetcht0 [rbx]
+ prefetcht0 [rbx+rdx]
+ prefetcht0 [rbx+rdx*2]
+ prefetcht0 [rbx+rcx]
+
pxor xmm0, xmm0 ; clear xmm0 for unpack
pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
@@ -107,6 +129,9 @@ var16loop:
movdqu xmm1, XMMWORD PTR [rsi]
movdqu xmm2, XMMWORD PTR [rdi]
+ prefetcht0 [rsi+rax*8]
+ prefetcht0 [rdi+rdx*8]
+
movdqa xmm3, xmm1
movdqa xmm4, xmm2
@@ -178,10 +203,9 @@ var16loop:
; begin epilog
- add rsp, 16
pop rdi
pop rsi
- RESTORE_GOT
+ pop rbx
UNSHADOW_ARGS
pop rbp
ret
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 4e3ce070d..61c603229 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -301,7 +301,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
#endif
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2;
+#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2;
+#endif
}
#endif
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index eb568e18b..83667337d 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -498,58 +498,68 @@ static vpx_codec_err_t vp8e_common_init(vpx_codec_ctx_t *ctx,
{
priv = calloc(1, sizeof(struct vpx_codec_alg_priv));
- if (priv)
+ if (!priv)
{
- ctx->priv = &priv->base;
- ctx->priv->sz = sizeof(*ctx->priv);
- ctx->priv->iface = ctx->iface;
- ctx->priv->alg_priv = priv;
- ctx->priv->init_flags = ctx->init_flags;
-
- if (ctx->config.enc)
- {
- /* Update the reference to the config structure to an
- * internal copy.
- */
- ctx->priv->alg_priv->cfg = *ctx->config.enc;
- ctx->config.enc = &ctx->priv->alg_priv->cfg;
- }
+ return VPX_CODEC_MEM_ERROR;
+ }
- cfg = &ctx->priv->alg_priv->cfg;
+ ctx->priv = &priv->base;
+ ctx->priv->sz = sizeof(*ctx->priv);
+ ctx->priv->iface = ctx->iface;
+ ctx->priv->alg_priv = priv;
+ ctx->priv->init_flags = ctx->init_flags;
- /* Select the extra vp6 configuration table based on the current
- * usage value. If the current usage value isn't found, use the
- * values for usage case 0.
+ if (ctx->config.enc)
+ {
+ /* Update the reference to the config structure to an
+ * internal copy.
*/
- for (i = 0;
- extracfg_map[i].usage && extracfg_map[i].usage != cfg->g_usage;
- i++);
+ ctx->priv->alg_priv->cfg = *ctx->config.enc;
+ ctx->config.enc = &ctx->priv->alg_priv->cfg;
+ }
+
+ cfg = &ctx->priv->alg_priv->cfg;
+
+ /* Select the extra vp6 configuration table based on the current
+ * usage value. If the current usage value isn't found, use the
+ * values for usage case 0.
+ */
+ for (i = 0;
+ extracfg_map[i].usage && extracfg_map[i].usage != cfg->g_usage;
+ i++);
- priv->vp8_cfg = extracfg_map[i].cfg;
- priv->vp8_cfg.pkt_list = &priv->pkt_list.head;
+ priv->vp8_cfg = extracfg_map[i].cfg;
+ priv->vp8_cfg.pkt_list = &priv->pkt_list.head;
priv->vp8_cfg.experimental = experimental;
- priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 2;
+ priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 2;
- if (priv->cx_data_sz < 4096) priv->cx_data_sz = 4096;
+ if (priv->cx_data_sz < 4096) priv->cx_data_sz = 4096;
- priv->cx_data = malloc(priv->cx_data_sz);
- priv->deprecated_mode = NO_MODE_SET;
+ priv->cx_data = malloc(priv->cx_data_sz);
- vp8_initialize();
+ if (!priv->cx_data)
+ {
+ return VPX_CODEC_MEM_ERROR;
+ }
- res = validate_config(priv, &priv->cfg, &priv->vp8_cfg);
+ priv->deprecated_mode = NO_MODE_SET;
- if (!res)
- {
- set_vp8e_config(&ctx->priv->alg_priv->oxcf, ctx->priv->alg_priv->cfg, ctx->priv->alg_priv->vp8_cfg);
- optr = vp8_create_compressor(&ctx->priv->alg_priv->oxcf);
+ vp8_initialize();
- if (!optr)
- res = VPX_CODEC_MEM_ERROR;
- else
- ctx->priv->alg_priv->cpi = optr;
- }
+ res = validate_config(priv, &priv->cfg, &priv->vp8_cfg);
+
+ if (!res)
+ {
+ set_vp8e_config(&ctx->priv->alg_priv->oxcf,
+ ctx->priv->alg_priv->cfg,
+ ctx->priv->alg_priv->vp8_cfg);
+ optr = vp8_create_compressor(&ctx->priv->alg_priv->oxcf);
+
+ if (!optr)
+ res = VPX_CODEC_MEM_ERROR;
+ else
+ ctx->priv->alg_priv->cpi = optr;
}
}
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index a84188f0c..ed2feddae 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -85,6 +85,7 @@ VP8_CX_SRCS-yes += encoder/temporal_filter.h
ifeq ($(CONFIG_REALTIME_ONLY),yes)
VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c
+VP8_CX_SRCS_REMOVE-yes += encoder/temporal_filter.c
endif
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodemb_x86.h
@@ -114,4 +115,9 @@ VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
+ifeq ($(CONFIG_REALTIME_ONLY),yes)
+VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
+endif
+
+
VP8_CX_SRCS-yes := $(filter-out $(VP8_CX_SRCS_REMOVE-yes),$(VP8_CX_SRCS-yes))