summaryrefslogtreecommitdiff
path: root/vp8/encoder
diff options
context:
space:
mode:
authorJohn Koleszar <jkoleszar@google.com>2011-03-03 08:59:34 -0500
committerJohn Koleszar <jkoleszar@google.com>2011-03-03 08:59:34 -0500
commitca29f6a7c47471621da52e96cb2d5c5acb81b41b (patch)
tree57b2392bc6fcb2f7b49f90cb05043da8edcbf6e2 /vp8/encoder
parent738a79191725f5af4a08451312f5a48fb386bc4b (diff)
parent6f5189c044cf52171822f6df1ad6ae4a3bcaf2c4 (diff)
downloadlibvpx-ca29f6a7c47471621da52e96cb2d5c5acb81b41b.tar
libvpx-ca29f6a7c47471621da52e96cb2d5c5acb81b41b.tar.gz
libvpx-ca29f6a7c47471621da52e96cb2d5c5acb81b41b.tar.bz2
libvpx-ca29f6a7c47471621da52e96cb2d5c5acb81b41b.zip
Merge remote branch 'internal/upstream' into HEAD
Conflicts: vp8/vp8_cx_iface.c Change-Id: Iecfd4532ab1c722d10ecce8a5ec473e96093cf3b
Diffstat (limited to 'vp8/encoder')
-rw-r--r--vp8/encoder/arm/arm_csystemdependent.c3
-rw-r--r--vp8/encoder/arm/variance_arm.c36
-rw-r--r--vp8/encoder/arm/variance_arm.h13
-rw-r--r--vp8/encoder/generic/csystemdependent.c3
-rw-r--r--vp8/encoder/onyx_if.c75
-rw-r--r--vp8/encoder/onyx_int.h6
-rw-r--r--vp8/encoder/x86/variance_impl_sse2.asm32
-rw-r--r--vp8/encoder/x86/x86_csystemdependent.c2
8 files changed, 117 insertions, 53 deletions
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c
index 5852afddb..73007d414 100644
--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -46,6 +46,9 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_armv6;
+ cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_armv6;
+ cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_armv6;
+ cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_armv6;
/*cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
diff --git a/vp8/encoder/arm/variance_arm.c b/vp8/encoder/arm/variance_arm.c
index 1cf9fd8f0..64d76bcf8 100644
--- a/vp8/encoder/arm/variance_arm.c
+++ b/vp8/encoder/arm/variance_arm.c
@@ -43,7 +43,41 @@ unsigned int vp8_sub_pixel_variance16x16_armv6
dst_pixels_per_line, sse);
}
-#endif
+unsigned int vp8_variance_halfpixvar16x16_h_armv6(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse)
+{
+ return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 4, 0,
+ ref_ptr, recon_stride, sse);
+}
+
+unsigned int vp8_variance_halfpixvar16x16_v_armv6(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse)
+{
+ return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 0, 4,
+ ref_ptr, recon_stride, sse);
+}
+
+unsigned int vp8_variance_halfpixvar16x16_hv_armv6(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse)
+{
+ return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 4, 4,
+ ref_ptr, recon_stride, sse);
+}
+
+#endif /* HAVE_ARMV6 */
+
#if HAVE_ARMV7
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index c807e29c0..7ac0ac08e 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -17,6 +17,9 @@
extern prototype_sad(vp8_sad16x16_armv6);
extern prototype_variance(vp8_variance16x16_armv6);
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6);
+extern prototype_variance(vp8_variance_halfpixvar16x16_h_armv6);
+extern prototype_variance(vp8_variance_halfpixvar16x16_v_armv6);
+extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6);
#if !CONFIG_RUNTIME_CPU_DETECT
@@ -29,10 +32,20 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6);
#undef vp8_variance_var16x16
#define vp8_variance_var16x16 vp8_variance16x16_armv6
+#undef vp8_variance_halfpixvar16x16_h
+#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_armv6
+
+#undef vp8_variance_halfpixvar16x16_v
+#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_armv6
+
+#undef vp8_variance_halfpixvar16x16_hv
+#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_armv6
+
#endif /* !CONFIG_RUNTIME_CPU_DETECT */
#endif /* HAVE_ARMV6 */
+
#if HAVE_ARMV7
extern prototype_sad(vp8_sad4x4_neon);
extern prototype_sad(vp8_sad8x8_neon);
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index f067b3937..07151fc4c 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -95,9 +95,10 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
cpi->rtcd.search.full_search = vp8_full_search_sad;
#endif
cpi->rtcd.search.diamond_search = vp8_diamond_search_sad;
-
+#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c;
#endif
+#endif
// Pure C:
vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 3f0ee8e91..0d471cd28 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -330,35 +330,31 @@ static void setup_features(VP8_COMP *cpi)
void vp8_dealloc_compressor_data(VP8_COMP *cpi)
{
- vpx_free(cpi->tplist);
+ vpx_free(cpi->tplist);
cpi->tplist = NULL;
// Delete last frame MV storage buffers
- vpx_free(cpi->lfmv);
-
+ vpx_free(cpi->lfmv);
cpi->lfmv = 0;
- vpx_free(cpi->lf_ref_frame_sign_bias);
-
+ vpx_free(cpi->lf_ref_frame_sign_bias);
cpi->lf_ref_frame_sign_bias = 0;
- vpx_free(cpi->lf_ref_frame);
-
+ vpx_free(cpi->lf_ref_frame);
cpi->lf_ref_frame = 0;
// Delete sementation map
- vpx_free(cpi->segmentation_map);
-
+ vpx_free(cpi->segmentation_map);
cpi->segmentation_map = 0;
- vpx_free(cpi->active_map);
-
+ vpx_free(cpi->active_map);
cpi->active_map = 0;
+#if !(CONFIG_REALTIME_ONLY)
// Delete first pass motion map
- vpx_free(cpi->fp_motion_map);
-
+ vpx_free(cpi->fp_motion_map);
cpi->fp_motion_map = 0;
+#endif
vp8_de_alloc_frame_buffers(&cpi->common);
@@ -380,21 +376,17 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
cpi->tok = 0;
// Structure used to monitor GF usage
- vpx_free(cpi->gf_active_flags);
-
+ vpx_free(cpi->gf_active_flags);
cpi->gf_active_flags = 0;
- vpx_free(cpi->mb.pip);
-
+ vpx_free(cpi->mb.pip);
cpi->mb.pip = 0;
#if !(CONFIG_REALTIME_ONLY)
- vpx_free(cpi->total_stats);
-
+ vpx_free(cpi->total_stats);
cpi->total_stats = 0;
- vpx_free(cpi->this_frame_stats);
-
+ vpx_free(cpi->this_frame_stats);
cpi->this_frame_stats = 0;
#endif
}
@@ -2227,9 +2219,11 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
vpx_memset(cpi->active_map , 1, (cpi->common.mb_rows * cpi->common.mb_cols));
cpi->active_map_enabled = 0;
+#if !(CONFIG_REALTIME_ONLY)
// Create the first pass motion map structure and set to 0
// Allocate space for maximum of 15 buffers
CHECK_MEM_ERROR(cpi->fp_motion_map, vpx_calloc(15*cpi->common.MBs, 1));
+#endif
#if 0
// Experimental code for lagged and one pass
@@ -3195,8 +3189,11 @@ static void update_golden_frame_and_stats(VP8_COMP *cpi)
// Update the Golden frame reconstruction buffer if signalled and the GF usage counts.
if (cm->refresh_golden_frame)
{
- // Update the golden frame buffer
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
+ if (cm->frame_type != KEY_FRAME)
+ {
+ // Update the golden frame buffer
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
+ }
// Select an interval before next GF
if (!cpi->auto_gold)
@@ -4833,16 +4830,19 @@ static void encode_frame_to_data_rate
if (cpi->oxcf.error_resilient_mode)
{
- // Is this an alternate reference update
- if (cpi->common.refresh_alt_ref_frame)
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]);
+ if (cm->frame_type != KEY_FRAME)
+ {
+ // Is this an alternate reference update
+ if (cm->refresh_alt_ref_frame)
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]);
- if (cpi->common.refresh_golden_frame)
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
+ if (cm->refresh_golden_frame)
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
+ }
}
else
{
- if (cpi->oxcf.play_alternate && cpi->common.refresh_alt_ref_frame && (cpi->common.frame_type != KEY_FRAME))
+ if (cpi->oxcf.play_alternate && cm->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME))
// Update the alternate reference frame and stats as appropriate.
update_alt_ref_frame_and_stats(cpi);
else
@@ -5317,23 +5317,6 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
if (cpi->compressor_speed == 2)
{
vp8_check_gf_quality(cpi);
- }
-
- if (!cpi)
- {
-#if HAVE_ARMV7
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->rtcd.flags & HAS_NEON)
-#endif
- {
- vp8_pop_neon(store_reg);
- }
-#endif
- return 0;
- }
-
- if (cpi->compressor_speed == 2)
- {
vpx_usec_timer_start(&tsctimer);
vpx_usec_timer_start(&ticktimer);
}
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 0d33b42a6..5a4b3c185 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -51,7 +51,9 @@
#define MV_ZBIN_BOOST 4
#define ZBIN_OQ_MAX 192
+#if !(CONFIG_REALTIME_ONLY)
#define VP8_TEMPORAL_ALT_REF 1
+#endif
typedef struct
{
@@ -496,9 +498,11 @@ typedef struct
FIRSTPASS_STATS *stats_in, *stats_in_end;
struct vpx_codec_pkt_list *output_pkt_list;
int first_pass_done;
- unsigned char *fp_motion_map;
+#if !(CONFIG_REALTIME_ONLY)
+ unsigned char *fp_motion_map;
unsigned char *fp_motion_map_stats, *fp_motion_map_stats_save;
+#endif
#if 0
// Experimental code for lagged and one pass
diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm
index 7178e7e31..6cdc47bc9 100644
--- a/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/vp8/encoder/x86/variance_impl_sse2.asm
@@ -85,10 +85,9 @@ sym(vp8_get16x16var_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- GET_GOT rbx
+ push rbx
push rsi
push rdi
- sub rsp, 16
; end prolog
mov rsi, arg(0) ;[src_ptr]
@@ -97,6 +96,29 @@ sym(vp8_get16x16var_sse2):
movsxd rax, DWORD PTR arg(1) ;[source_stride]
movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
+ ; Prefetch data
+ lea rcx, [rax+rax*2]
+ prefetcht0 [rsi]
+ prefetcht0 [rsi+rax]
+ prefetcht0 [rsi+rax*2]
+ prefetcht0 [rsi+rcx]
+ lea rbx, [rsi+rax*4]
+ prefetcht0 [rbx]
+ prefetcht0 [rbx+rax]
+ prefetcht0 [rbx+rax*2]
+ prefetcht0 [rbx+rcx]
+
+ lea rcx, [rdx+rdx*2]
+ prefetcht0 [rdi]
+ prefetcht0 [rdi+rdx]
+ prefetcht0 [rdi+rdx*2]
+ prefetcht0 [rdi+rcx]
+ lea rbx, [rdi+rdx*4]
+ prefetcht0 [rbx]
+ prefetcht0 [rbx+rdx]
+ prefetcht0 [rbx+rdx*2]
+ prefetcht0 [rbx+rcx]
+
pxor xmm0, xmm0 ; clear xmm0 for unpack
pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
@@ -107,6 +129,9 @@ var16loop:
movdqu xmm1, XMMWORD PTR [rsi]
movdqu xmm2, XMMWORD PTR [rdi]
+ prefetcht0 [rsi+rax*8]
+ prefetcht0 [rdi+rdx*8]
+
movdqa xmm3, xmm1
movdqa xmm4, xmm2
@@ -178,10 +203,9 @@ var16loop:
; begin epilog
- add rsp, 16
pop rdi
pop rsi
- RESTORE_GOT
+ pop rbx
UNSHADOW_ARGS
pop rbp
ret
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 4e3ce070d..61c603229 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -301,7 +301,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
#endif
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2;
+#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2;
+#endif
}
#endif