diff options
-rwxr-xr-x | build/make/configure.sh | 4 | ||||
-rw-r--r-- | docs.mk | 3 | ||||
-rw-r--r-- | vp8/encoder/arm/arm_csystemdependent.c | 3 | ||||
-rw-r--r-- | vp8/encoder/arm/variance_arm.c | 36 | ||||
-rw-r--r-- | vp8/encoder/arm/variance_arm.h | 13 | ||||
-rw-r--r-- | vp8/encoder/generic/csystemdependent.c | 3 | ||||
-rw-r--r-- | vp8/encoder/onyx_if.c | 75 | ||||
-rw-r--r-- | vp8/encoder/onyx_int.h | 6 | ||||
-rw-r--r-- | vp8/encoder/x86/variance_impl_sse2.asm | 32 | ||||
-rw-r--r-- | vp8/encoder/x86/x86_csystemdependent.c | 2 | ||||
-rw-r--r-- | vp8/vp8_cx_iface.c | 88 | ||||
-rw-r--r-- | vp8/vp8cx.mk | 6 |
12 files changed, 178 insertions, 93 deletions
diff --git a/build/make/configure.sh b/build/make/configure.sh index 7234b7974..ac99cf561 100755 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -624,6 +624,10 @@ process_common_toolchain() { # Handle Solaris variants. Solaris 10 needs -lposix4 case ${toolchain} in + sparc-solaris-*) + add_extralibs -lposix4 + add_cflags "-DMUST_BE_ALIGNED" + ;; *-solaris-*) add_extralibs -lposix4 ;; @@ -34,7 +34,8 @@ TXT_DOX = $(call enabled,TXT_DOX) EXAMPLE_PATH += $(SRC_PATH_BARE) #for CHANGELOG, README, etc -doxyfile: libs.doxy_template libs.doxy examples.doxy +doxyfile: $(if $(findstring examples, $(ALL_TARGETS)),examples.doxy) +doxyfile: libs.doxy_template libs.doxy @echo " [CREATE] $@" @cat $^ > $@ @echo "STRIP_FROM_PATH += $(SRC_PATH_BARE) $(BUILD_ROOT)" >> $@ diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c index 5852afddb..73007d414 100644 --- a/vp8/encoder/arm/arm_csystemdependent.c +++ b/vp8/encoder/arm/arm_csystemdependent.c @@ -46,6 +46,9 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/ cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_armv6; + cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_armv6; + cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_armv6; + cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_armv6; /*cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/ diff --git a/vp8/encoder/arm/variance_arm.c b/vp8/encoder/arm/variance_arm.c index 1cf9fd8f0..64d76bcf8 100644 --- a/vp8/encoder/arm/variance_arm.c +++ b/vp8/encoder/arm/variance_arm.c @@ -43,7 +43,41 @@ unsigned int vp8_sub_pixel_variance16x16_armv6 dst_pixels_per_line, sse); } -#endif +unsigned int vp8_variance_halfpixvar16x16_h_armv6( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 4, 0, + ref_ptr, recon_stride, sse); +} + +unsigned int vp8_variance_halfpixvar16x16_v_armv6( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 0, 4, + ref_ptr, recon_stride, sse); +} + +unsigned int vp8_variance_halfpixvar16x16_hv_armv6( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 4, 4, + ref_ptr, recon_stride, sse); +} + +#endif /* HAVE_ARMV6 */ + #if HAVE_ARMV7 diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h index c807e29c0..7ac0ac08e 100644 --- a/vp8/encoder/arm/variance_arm.h +++ b/vp8/encoder/arm/variance_arm.h @@ -17,6 +17,9 @@ extern prototype_sad(vp8_sad16x16_armv6); extern prototype_variance(vp8_variance16x16_armv6); extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6); +extern prototype_variance(vp8_variance_halfpixvar16x16_h_armv6); +extern prototype_variance(vp8_variance_halfpixvar16x16_v_armv6); +extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6); #if !CONFIG_RUNTIME_CPU_DETECT @@ -29,10 +32,20 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6); #undef vp8_variance_var16x16 #define vp8_variance_var16x16 vp8_variance16x16_armv6 +#undef vp8_variance_halfpixvar16x16_h +#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_armv6 + +#undef vp8_variance_halfpixvar16x16_v +#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_armv6 + +#undef vp8_variance_halfpixvar16x16_hv +#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_armv6 + #endif /* !CONFIG_RUNTIME_CPU_DETECT */ #endif /* HAVE_ARMV6 */ + #if HAVE_ARMV7 extern prototype_sad(vp8_sad4x4_neon); extern prototype_sad(vp8_sad8x8_neon); diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index f067b3937..07151fc4c 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -95,9 +95,10 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.search.full_search = vp8_full_search_sad; #endif cpi->rtcd.search.diamond_search = vp8_diamond_search_sad; - +#if !(CONFIG_REALTIME_ONLY) cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c; #endif +#endif // Pure C: vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame; diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 65296886f..0a4a9a099 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -330,35 +330,31 @@ static void setup_features(VP8_COMP *cpi) void vp8_dealloc_compressor_data(VP8_COMP *cpi) { - vpx_free(cpi->tplist); + vpx_free(cpi->tplist); cpi->tplist = NULL; // Delete last frame MV storage buffers - vpx_free(cpi->lfmv); - + vpx_free(cpi->lfmv); cpi->lfmv = 0; - vpx_free(cpi->lf_ref_frame_sign_bias); - + vpx_free(cpi->lf_ref_frame_sign_bias); cpi->lf_ref_frame_sign_bias = 0; - vpx_free(cpi->lf_ref_frame); - + vpx_free(cpi->lf_ref_frame); cpi->lf_ref_frame = 0; // Delete sementation map - vpx_free(cpi->segmentation_map); - + vpx_free(cpi->segmentation_map); cpi->segmentation_map = 0; - vpx_free(cpi->active_map); - + vpx_free(cpi->active_map); cpi->active_map = 0; +#if !(CONFIG_REALTIME_ONLY) // Delete first pass motion map - vpx_free(cpi->fp_motion_map); - + vpx_free(cpi->fp_motion_map); cpi->fp_motion_map = 0; +#endif vp8_de_alloc_frame_buffers(&cpi->common); @@ -380,21 +376,17 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi) cpi->tok = 0; // Structure used to monitor GF usage - vpx_free(cpi->gf_active_flags); - + vpx_free(cpi->gf_active_flags); cpi->gf_active_flags = 0; - vpx_free(cpi->mb.pip); - + vpx_free(cpi->mb.pip); cpi->mb.pip = 0; #if !(CONFIG_REALTIME_ONLY) - vpx_free(cpi->total_stats); - + vpx_free(cpi->total_stats); cpi->total_stats = 0; - vpx_free(cpi->this_frame_stats); - + vpx_free(cpi->this_frame_stats); cpi->this_frame_stats = 0; #endif } @@ -2230,9 +2222,11 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) vpx_memset(cpi->active_map , 1, (cpi->common.mb_rows * cpi->common.mb_cols)); cpi->active_map_enabled = 0; +#if !(CONFIG_REALTIME_ONLY) // Create the first pass motion map structure and set to 0 // Allocate space for maximum of 15 buffers CHECK_MEM_ERROR(cpi->fp_motion_map, vpx_calloc(15*cpi->common.MBs, 1)); +#endif #if 0 // Experimental code for lagged and one pass @@ -3199,8 +3193,11 @@ static void update_golden_frame_and_stats(VP8_COMP *cpi) // Update the Golden frame reconstruction buffer if signalled and the GF usage counts. if (cm->refresh_golden_frame) { - // Update the golden frame buffer - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]); + if (cm->frame_type != KEY_FRAME) + { + // Update the golden frame buffer + vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]); + } // Select an interval before next GF if (!cpi->auto_gold) @@ -4834,16 +4831,19 @@ static void encode_frame_to_data_rate if (cpi->oxcf.error_resilient_mode) { - // Is this an alternate reference update - if (cpi->common.refresh_alt_ref_frame) - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]); + if (cm->frame_type != KEY_FRAME) + { + // Is this an alternate reference update + if (cm->refresh_alt_ref_frame) + vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]); - if (cpi->common.refresh_golden_frame) - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]); + if (cm->refresh_golden_frame) + vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]); + } } else { - if (cpi->oxcf.play_alternate && cpi->common.refresh_alt_ref_frame && (cpi->common.frame_type != KEY_FRAME)) + if (cpi->oxcf.play_alternate && cm->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME)) // Update the alternate reference frame and stats as appropriate. update_alt_ref_frame_and_stats(cpi); else @@ -5318,23 +5318,6 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon if (cpi->compressor_speed == 2) { vp8_check_gf_quality(cpi); - } - - if (!cpi) - { -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp8_pop_neon(store_reg); - } -#endif - return 0; - } - - if (cpi->compressor_speed == 2) - { vpx_usec_timer_start(&tsctimer); vpx_usec_timer_start(&ticktimer); } diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index a9ccc89d7..b66131d15 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -51,7 +51,9 @@ #define MV_ZBIN_BOOST 4 #define ZBIN_OQ_MAX 192 +#if !(CONFIG_REALTIME_ONLY) #define VP8_TEMPORAL_ALT_REF 1 +#endif typedef struct { @@ -492,9 +494,11 @@ typedef struct FIRSTPASS_STATS *stats_in, *stats_in_end; struct vpx_codec_pkt_list *output_pkt_list; int first_pass_done; - unsigned char *fp_motion_map; +#if !(CONFIG_REALTIME_ONLY) + unsigned char *fp_motion_map; unsigned char *fp_motion_map_stats, *fp_motion_map_stats_save; +#endif #if 0 // Experimental code for lagged and one pass diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm index 7178e7e31..6cdc47bc9 100644 --- a/vp8/encoder/x86/variance_impl_sse2.asm +++ b/vp8/encoder/x86/variance_impl_sse2.asm @@ -85,10 +85,9 @@ sym(vp8_get16x16var_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx + push rbx push rsi push rdi - sub rsp, 16 ; end prolog mov rsi, arg(0) ;[src_ptr] @@ -97,6 +96,29 @@ sym(vp8_get16x16var_sse2): movsxd rax, DWORD PTR arg(1) ;[source_stride] movsxd rdx, DWORD PTR arg(3) ;[recon_stride] + ; Prefetch data + lea rcx, [rax+rax*2] + prefetcht0 [rsi] + prefetcht0 [rsi+rax] + prefetcht0 [rsi+rax*2] + prefetcht0 [rsi+rcx] + lea rbx, [rsi+rax*4] + prefetcht0 [rbx] + prefetcht0 [rbx+rax] + prefetcht0 [rbx+rax*2] + prefetcht0 [rbx+rcx] + + lea rcx, [rdx+rdx*2] + prefetcht0 [rdi] + prefetcht0 [rdi+rdx] + prefetcht0 [rdi+rdx*2] + prefetcht0 [rdi+rcx] + lea rbx, [rdi+rdx*4] + prefetcht0 [rbx] + prefetcht0 [rbx+rdx] + prefetcht0 [rbx+rdx*2] + prefetcht0 [rbx+rcx] + pxor xmm0, xmm0 ; clear xmm0 for unpack pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs @@ -107,6 +129,9 @@ var16loop: movdqu xmm1, XMMWORD PTR [rsi] movdqu xmm2, XMMWORD PTR [rdi] + prefetcht0 [rsi+rax*8] + prefetcht0 [rdi+rdx*8] + movdqa xmm3, xmm1 movdqa xmm4, xmm2 @@ -178,10 +203,9 @@ var16loop: ; begin epilog - add rsp, 16 pop rdi pop rsi - RESTORE_GOT + pop rbx UNSHADOW_ARGS pop rbp ret diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index 4e3ce070d..61c603229 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -301,7 +301,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) #endif cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2; +#if !(CONFIG_REALTIME_ONLY) cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2; +#endif } #endif diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index eb568e18b..83667337d 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -498,58 +498,68 @@ static vpx_codec_err_t vp8e_common_init(vpx_codec_ctx_t *ctx, { priv = calloc(1, sizeof(struct vpx_codec_alg_priv)); - if (priv) + if (!priv) { - ctx->priv = &priv->base; - ctx->priv->sz = sizeof(*ctx->priv); - ctx->priv->iface = ctx->iface; - ctx->priv->alg_priv = priv; - ctx->priv->init_flags = ctx->init_flags; - - if (ctx->config.enc) - { - /* Update the reference to the config structure to an - * internal copy. - */ - ctx->priv->alg_priv->cfg = *ctx->config.enc; - ctx->config.enc = &ctx->priv->alg_priv->cfg; - } + return VPX_CODEC_MEM_ERROR; + } - cfg = &ctx->priv->alg_priv->cfg; + ctx->priv = &priv->base; + ctx->priv->sz = sizeof(*ctx->priv); + ctx->priv->iface = ctx->iface; + ctx->priv->alg_priv = priv; + ctx->priv->init_flags = ctx->init_flags; - /* Select the extra vp6 configuration table based on the current - * usage value. If the current usage value isn't found, use the - * values for usage case 0. + if (ctx->config.enc) + { + /* Update the reference to the config structure to an + * internal copy. */ - for (i = 0; - extracfg_map[i].usage && extracfg_map[i].usage != cfg->g_usage; - i++); + ctx->priv->alg_priv->cfg = *ctx->config.enc; + ctx->config.enc = &ctx->priv->alg_priv->cfg; + } + + cfg = &ctx->priv->alg_priv->cfg; + + /* Select the extra vp6 configuration table based on the current + * usage value. If the current usage value isn't found, use the + * values for usage case 0. + */ + for (i = 0; + extracfg_map[i].usage && extracfg_map[i].usage != cfg->g_usage; + i++); - priv->vp8_cfg = extracfg_map[i].cfg; - priv->vp8_cfg.pkt_list = &priv->pkt_list.head; + priv->vp8_cfg = extracfg_map[i].cfg; + priv->vp8_cfg.pkt_list = &priv->pkt_list.head; priv->vp8_cfg.experimental = experimental; - priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 2; + priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 2; - if (priv->cx_data_sz < 4096) priv->cx_data_sz = 4096; + if (priv->cx_data_sz < 4096) priv->cx_data_sz = 4096; - priv->cx_data = malloc(priv->cx_data_sz); - priv->deprecated_mode = NO_MODE_SET; + priv->cx_data = malloc(priv->cx_data_sz); - vp8_initialize(); + if (!priv->cx_data) + { + return VPX_CODEC_MEM_ERROR; + } - res = validate_config(priv, &priv->cfg, &priv->vp8_cfg); + priv->deprecated_mode = NO_MODE_SET; - if (!res) - { - set_vp8e_config(&ctx->priv->alg_priv->oxcf, ctx->priv->alg_priv->cfg, ctx->priv->alg_priv->vp8_cfg); - optr = vp8_create_compressor(&ctx->priv->alg_priv->oxcf); + vp8_initialize(); - if (!optr) - res = VPX_CODEC_MEM_ERROR; - else - ctx->priv->alg_priv->cpi = optr; - } + res = validate_config(priv, &priv->cfg, &priv->vp8_cfg); + + if (!res) + { + set_vp8e_config(&ctx->priv->alg_priv->oxcf, + ctx->priv->alg_priv->cfg, + ctx->priv->alg_priv->vp8_cfg); + optr = vp8_create_compressor(&ctx->priv->alg_priv->oxcf); + + if (!optr) + res = VPX_CODEC_MEM_ERROR; + else + ctx->priv->alg_priv->cpi = optr; } } diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index a84188f0c..ed2feddae 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -85,6 +85,7 @@ VP8_CX_SRCS-yes += encoder/temporal_filter.h ifeq ($(CONFIG_REALTIME_ONLY),yes) VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c +VP8_CX_SRCS_REMOVE-yes += encoder/temporal_filter.c endif VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodemb_x86.h @@ -114,4 +115,9 @@ VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm +ifeq ($(CONFIG_REALTIME_ONLY),yes) +VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm +endif + + VP8_CX_SRCS-yes := $(filter-out $(VP8_CX_SRCS_REMOVE-yes),$(VP8_CX_SRCS-yes)) |