diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 1 | ||||
-rw-r--r-- | vp9/encoder/vp9_lookahead.c | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_lookahead.h | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 26 | ||||
-rw-r--r-- | vp9/encoder/vp9_svc_layercontext.c | 16 | ||||
-rw-r--r-- | vp9/encoder/vp9_temporal_filter.c | 16 | ||||
-rw-r--r-- | vp9/encoder/vp9_temporal_filter.h | 1 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm | 337 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_variance_impl_sse2.asm | 333 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_variance_sse2.c | 60 | ||||
-rw-r--r-- | vp9/vp9cx.mk | 1 |
14 files changed, 38 insertions, 769 deletions
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 395d26aef..f3ab4ede3 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -162,6 +162,7 @@ void vp9_initialize_enc() { vp9_rc_init_minq_luts(); vp9_entropy_mv_init(); vp9_entropy_mode_init(); + vp9_temporal_filter_init(); init_done = 1; } } @@ -536,7 +537,6 @@ static void set_tile_limits(VP9_COMP *cpi) { static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { VP9_COMMON *const cm = &cpi->common; - int i; cpi->oxcf = *oxcf; @@ -571,10 +571,6 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { cpi->alt_fb_idx = 2; set_tile_limits(cpi); - - cpi->fixed_divide[0] = 0; - for (i = 1; i < 512; i++) - cpi->fixed_divide[i] = 0x80000 / i; } static int get_pass(MODE mode) { diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index edd45948d..6b9737076 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -443,7 +443,6 @@ typedef struct VP9_COMP { YV12_BUFFER_CONFIG alt_ref_buffer; YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS]; - int fixed_divide[512]; #if CONFIG_INTERNAL_STATS unsigned int mode_chosen_counts[MAX_MODES]; diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c index a298f1c20..abe71e681 100644 --- a/vp9/encoder/vp9_lookahead.c +++ b/vp9/encoder/vp9_lookahead.c @@ -18,6 +18,9 @@ #include "vp9/encoder/vp9_extend.h" #include "vp9/encoder/vp9_lookahead.h" +// The max of past frames we want to keep in the queue. +#define MAX_PRE_FRAMES 1 + struct lookahead_ctx { unsigned int max_sz; /* Absolute size of the queue */ unsigned int sz; /* Number of buffers currently in the queue */ diff --git a/vp9/encoder/vp9_lookahead.h b/vp9/encoder/vp9_lookahead.h index 046c533cc..ff63c0d0d 100644 --- a/vp9/encoder/vp9_lookahead.h +++ b/vp9/encoder/vp9_lookahead.h @@ -20,9 +20,6 @@ extern "C" { #define MAX_LAG_BUFFERS 25 -// The max of past frames we want to keep in the queue. -#define MAX_PRE_FRAMES 1 - struct lookahead_entry { YV12_BUFFER_CONFIG img; int64_t ts_start; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index b123bfdcb..24e75aedb 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -40,6 +40,8 @@ #define MIN_BPB_FACTOR 0.005 #define MAX_BPB_FACTOR 50 +#define FRAME_OVERHEAD_BITS 200 + // Tables relating active max Q to active min Q static int kf_low_motion_minq[QINDEX_RANGE]; static int kf_high_motion_minq[QINDEX_RANGE]; diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index 820366119..b1cc67609 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -20,8 +20,6 @@ extern "C" { #endif -#define FRAME_OVERHEAD_BITS 200 - // Bits Per MB at different Q (Multiplied by 512) #define BPER_MB_NORMBITS 9 diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index f309aac96..a2fc1bb77 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1806,7 +1806,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, // motion search for newmv (single predictor case only) if (!has_second_rf && this_mode == NEWMV && seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) { - int_mv *const new_mv = &mode_mv[NEWMV][0]; + MV *const new_mv = &mode_mv[NEWMV][0].as_mv; int step_param = 0; int further_steps; int thissme, bestsme = INT_MAX; @@ -1862,9 +1862,9 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, step_param, sadpb, 1, v_fn_ptr, 1, &bsi->ref_mv[0]->as_mv, - &new_mv->as_mv); + new_mv); if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv, + bestsme = vp9_get_mvpred_var(x, new_mv, &bsi->ref_mv[0]->as_mv, v_fn_ptr, 1); } else if (cpi->sf.search_method == SQUARE) { @@ -1872,9 +1872,9 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, step_param, sadpb, 1, v_fn_ptr, 1, &bsi->ref_mv[0]->as_mv, - &new_mv->as_mv); + new_mv); if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv, + bestsme = vp9_get_mvpred_var(x, new_mv, &bsi->ref_mv[0]->as_mv, v_fn_ptr, 1); } else if (cpi->sf.search_method == BIGDIA) { @@ -1882,16 +1882,16 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, step_param, sadpb, 1, v_fn_ptr, 1, &bsi->ref_mv[0]->as_mv, - &new_mv->as_mv); + new_mv); if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv, + bestsme = vp9_get_mvpred_var(x, new_mv, &bsi->ref_mv[0]->as_mv, v_fn_ptr, 1); } else { bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, sadpb, further_steps, 0, v_fn_ptr, &bsi->ref_mv[0]->as_mv, - &new_mv->as_mv); + new_mv); } // Should we do a full search (best quality only) @@ -1906,18 +1906,18 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, &best_mv->as_mv); if (thissme < bestsme) { bestsme = thissme; - new_mv->as_int = best_mv->as_int; + *new_mv = best_mv->as_mv; } else { // The full search result is actually worse so re-instate the // previous best vector - best_mv->as_int = new_mv->as_int; + best_mv->as_mv = *new_mv; } } if (bestsme < INT_MAX) { int distortion; cpi->find_fractional_mv_step(x, - &new_mv->as_mv, + new_mv, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv, x->errorperbit, v_fn_ptr, @@ -1928,11 +1928,11 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, &x->pred_sse[mbmi->ref_frame[0]]); // save motion search result for use in compound prediction - seg_mvs[i][mbmi->ref_frame[0]].as_int = new_mv->as_int; + seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv; } if (cpi->sf.adaptive_motion_search) - x->pred_mv[mbmi->ref_frame[0]].as_int = new_mv->as_int; + x->pred_mv[mbmi->ref_frame[0]].as_mv = *new_mv; // restore src pointers mi_buf_restore(x, orig_src, orig_pre); diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 5342447d6..792e8d21b 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -221,15 +221,9 @@ void vp9_inc_frame_in_layer(SVC *svc) { ++lc->current_video_frame_in_layer; } -int vp9_is_upper_layer_key_frame(const struct VP9_COMP *const cpi) { - int is_upper_layer_key_frame = 0; - - if (cpi->use_svc && cpi->svc.number_temporal_layers == 1 && - cpi->svc.spatial_layer_id > 0) { - if (cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame) { - is_upper_layer_key_frame = 1; - } - } - - return is_upper_layer_key_frame; +int vp9_is_upper_layer_key_frame(const VP9_COMP *cpi) { + return cpi->use_svc && + cpi->svc.number_temporal_layers == 1 && + cpi->svc.spatial_layer_id > 0 && + cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame; } diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index ca9339155..a176bbf91 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -27,6 +27,8 @@ #include "vpx_ports/vpx_timer.h" #include "vpx_scale/vpx_scale.h" +static int fixed_divide[512]; + static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, @@ -78,6 +80,14 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, kernel, mv_precision_uv, x, y); } +void vp9_temporal_filter_init() { + int i; + + fixed_divide[0] = 0; + for (i = 1; i < 512; ++i) + fixed_divide[i] = 0x80000 / i; +} + void vp9_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, @@ -294,7 +304,7 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, for (i = 0, k = 0; i < 16; i++) { for (j = 0; j < 16; j++, k++) { unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= cpi->fixed_divide[count[k]]; + pval *= fixed_divide[count[k]]; pval >>= 19; dst1[byte] = (uint8_t)pval; @@ -315,13 +325,13 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, // U unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= cpi->fixed_divide[count[k]]; + pval *= fixed_divide[count[k]]; pval >>= 19; dst1[byte] = (uint8_t)pval; // V pval = accumulator[m] + (count[m] >> 1); - pval *= cpi->fixed_divide[count[m]]; + pval *= fixed_divide[count[m]]; pval >>= 19; dst2[byte] = (uint8_t)pval; diff --git a/vp9/encoder/vp9_temporal_filter.h b/vp9/encoder/vp9_temporal_filter.h index 3028d7884..9453dc16a 100644 --- a/vp9/encoder/vp9_temporal_filter.h +++ b/vp9/encoder/vp9_temporal_filter.h @@ -15,6 +15,7 @@ extern "C" { #endif +void vp9_temporal_filter_init(); void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance); void vp9_configure_arnr_filter(VP9_COMP *cpi, const unsigned int frames_to_arnr, diff --git a/vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm b/vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm deleted file mode 100644 index 2ecc23e55..000000000 --- a/vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm +++ /dev/null @@ -1,337 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -;void vp9_half_horiz_vert_variance16x_h_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_half_horiz_vert_variance16x_h_sse2) PRIVATE -sym(vp9_half_horiz_vert_variance16x_h_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - pxor xmm6, xmm6 ; error accumulator - pxor xmm7, xmm7 ; sse eaccumulator - mov rsi, arg(0) ;ref_ptr ; - - mov rdi, arg(2) ;src_ptr ; - movsxd rcx, dword ptr arg(4) ;Height ; - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - movsxd rdx, dword ptr arg(3) ;src_pixels_per_line - - pxor xmm0, xmm0 ; - - movdqu xmm5, XMMWORD PTR [rsi] - movdqu xmm3, XMMWORD PTR [rsi+1] - pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1 - - lea rsi, [rsi + rax] - -.half_horiz_vert_variance16x_h_1: - movdqu xmm1, XMMWORD PTR [rsi] ; - movdqu xmm2, XMMWORD PTR [rsi+1] ; - pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1 - - pavgb xmm5, xmm1 ; xmm = vertical average of the above - - movdqa xmm4, xmm5 - punpcklbw xmm5, xmm0 ; xmm5 = words of above - punpckhbw xmm4, xmm0 - - movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7 - punpcklbw xmm3, xmm0 ; xmm3 = words of above - psubw xmm5, xmm3 ; xmm5 -= xmm3 - - movq xmm3, QWORD PTR [rdi+8] - punpcklbw xmm3, xmm0 - psubw xmm4, xmm3 - - paddw xmm6, xmm5 ; xmm6 += accumulated column differences - paddw xmm6, xmm4 - pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 - pmaddwd xmm4, xmm4 - paddd xmm7, xmm5 ; xmm7 += accumulated square column differences - paddd xmm7, xmm4 - - movdqa xmm5, xmm1 ; save xmm1 for use on the next row - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - - sub rcx, 1 ; - jnz .half_horiz_vert_variance16x_h_1 ; - - pxor xmm1, xmm1 - pxor xmm5, xmm5 - - punpcklwd xmm0, xmm6 - punpckhwd xmm1, xmm6 - psrad xmm0, 16 - psrad xmm1, 16 - paddd xmm0, xmm1 - movdqa xmm1, xmm0 - - movdqa xmm6, xmm7 - punpckldq xmm6, xmm5 - punpckhdq xmm7, xmm5 - paddd xmm6, xmm7 - - punpckldq xmm0, xmm5 - punpckhdq xmm1, xmm5 - paddd xmm0, xmm1 - - movdqa xmm7, xmm6 - movdqa xmm1, xmm0 - - psrldq xmm7, 8 - psrldq xmm1, 8 - - paddd xmm6, xmm7 - paddd xmm0, xmm1 - - mov rsi, arg(5) ;[Sum] - mov rdi, arg(6) ;[SSE] - - movd [rsi], xmm0 - movd [rdi], xmm6 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_half_vert_variance16x_h_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_half_vert_variance16x_h_sse2) PRIVATE -sym(vp9_half_vert_variance16x_h_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - pxor xmm6, xmm6 ; error accumulator - pxor xmm7, xmm7 ; sse eaccumulator - mov rsi, arg(0) ;ref_ptr - - mov rdi, arg(2) ;src_ptr - movsxd rcx, dword ptr arg(4) ;Height - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - movsxd rdx, dword ptr arg(3) ;src_pixels_per_line - - movdqu xmm5, XMMWORD PTR [rsi] - lea rsi, [rsi + rax ] - pxor xmm0, xmm0 - -.half_vert_variance16x_h_1: - movdqu xmm3, XMMWORD PTR [rsi] - - pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) - movdqa xmm4, xmm5 - punpcklbw xmm5, xmm0 - punpckhbw xmm4, xmm0 - - movq xmm2, QWORD PTR [rdi] - punpcklbw xmm2, xmm0 - psubw xmm5, xmm2 - movq xmm2, QWORD PTR [rdi+8] - punpcklbw xmm2, xmm0 - psubw xmm4, xmm2 - - paddw xmm6, xmm5 ; xmm6 += accumulated column differences - paddw xmm6, xmm4 - pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 - pmaddwd xmm4, xmm4 - paddd xmm7, xmm5 ; xmm7 += accumulated square column differences - paddd xmm7, xmm4 - - movdqa xmm5, xmm3 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - - sub rcx, 1 - jnz .half_vert_variance16x_h_1 - - pxor xmm1, xmm1 - pxor xmm5, xmm5 - - punpcklwd xmm0, xmm6 - punpckhwd xmm1, xmm6 - psrad xmm0, 16 - psrad xmm1, 16 - paddd xmm0, xmm1 - movdqa xmm1, xmm0 - - movdqa xmm6, xmm7 - punpckldq xmm6, xmm5 - punpckhdq xmm7, xmm5 - paddd xmm6, xmm7 - - punpckldq xmm0, xmm5 - punpckhdq xmm1, xmm5 - paddd xmm0, xmm1 - - movdqa xmm7, xmm6 - movdqa xmm1, xmm0 - - psrldq xmm7, 8 - psrldq xmm1, 8 - - paddd xmm6, xmm7 - paddd xmm0, xmm1 - - mov rsi, arg(5) ;[Sum] - mov rdi, arg(6) ;[SSE] - - movd [rsi], xmm0 - movd [rdi], xmm6 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_half_horiz_variance16x_h_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_half_horiz_variance16x_h_sse2) PRIVATE -sym(vp9_half_horiz_variance16x_h_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - pxor xmm6, xmm6 ; error accumulator - pxor xmm7, xmm7 ; sse eaccumulator - mov rsi, arg(0) ;ref_ptr ; - - mov rdi, arg(2) ;src_ptr ; - movsxd rcx, dword ptr arg(4) ;Height ; - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - movsxd rdx, dword ptr arg(3) ;src_pixels_per_line - - pxor xmm0, xmm0 ; - -.half_horiz_variance16x_h_1: - movdqu xmm5, XMMWORD PTR [rsi] ; xmm5 = s0,s1,s2..s15 - movdqu xmm3, XMMWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s16 - - pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) - movdqa xmm1, xmm5 - punpcklbw xmm5, xmm0 ; xmm5 = words of above - punpckhbw xmm1, xmm0 - - movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7 - punpcklbw xmm3, xmm0 ; xmm3 = words of above - movq xmm2, QWORD PTR [rdi+8] - punpcklbw xmm2, xmm0 - - psubw xmm5, xmm3 ; xmm5 -= xmm3 - psubw xmm1, xmm2 - paddw xmm6, xmm5 ; xmm6 += accumulated column differences - paddw xmm6, xmm1 - pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 - pmaddwd xmm1, xmm1 - paddd xmm7, xmm5 ; xmm7 += accumulated square column differences - paddd xmm7, xmm1 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - - sub rcx, 1 ; - jnz .half_horiz_variance16x_h_1 ; - - pxor xmm1, xmm1 - pxor xmm5, xmm5 - - punpcklwd xmm0, xmm6 - punpckhwd xmm1, xmm6 - psrad xmm0, 16 - psrad xmm1, 16 - paddd xmm0, xmm1 - movdqa xmm1, xmm0 - - movdqa xmm6, xmm7 - punpckldq xmm6, xmm5 - punpckhdq xmm7, xmm5 - paddd xmm6, xmm7 - - punpckldq xmm0, xmm5 - punpckhdq xmm1, xmm5 - paddd xmm0, xmm1 - - movdqa xmm7, xmm6 - movdqa xmm1, xmm0 - - psrldq xmm7, 8 - psrldq xmm1, 8 - - paddd xmm6, xmm7 - paddd xmm0, xmm1 - - mov rsi, arg(5) ;[Sum] - mov rdi, arg(6) ;[SSE] - - movd [rsi], xmm0 - movd [rdi], xmm6 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp9/encoder/x86/vp9_variance_impl_sse2.asm b/vp9/encoder/x86/vp9_variance_impl_sse2.asm index 2c5088134..483041278 100644 --- a/vp9/encoder/x86/vp9_variance_impl_sse2.asm +++ b/vp9/encoder/x86/vp9_variance_impl_sse2.asm @@ -398,337 +398,4 @@ sym(vp9_get8x8var_sse2): pop rbp ret -;void vp9_half_horiz_vert_variance8x_h_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_half_horiz_vert_variance8x_h_sse2) PRIVATE -sym(vp9_half_horiz_vert_variance8x_h_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line - movsxd r9, dword ptr arg(3) ;src_pixels_per_line -%endif - - pxor xmm6, xmm6 ; error accumulator - pxor xmm7, xmm7 ; sse eaccumulator - mov rsi, arg(0) ;ref_ptr ; - - mov rdi, arg(2) ;src_ptr ; - movsxd rcx, dword ptr arg(4) ;Height ; - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - - pxor xmm0, xmm0 ; - - movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8 - movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9 - pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1 - -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line ; next source -%else - add rsi, r8 -%endif - -.half_horiz_vert_variance8x_h_1: - - movq xmm1, QWORD PTR [rsi] ; - movq xmm2, QWORD PTR [rsi+1] ; - pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1 - - pavgb xmm5, xmm1 ; xmm = vertical average of the above - punpcklbw xmm5, xmm0 ; xmm5 = words of above - - movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8 - punpcklbw xmm3, xmm0 ; xmm3 = words of above - - psubw xmm5, xmm3 ; xmm5 -= xmm3 - paddw xmm6, xmm5 ; xmm6 += accumulated column differences - pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 - paddd xmm7, xmm5 ; xmm7 += accumulated square column differences - - movdqa xmm5, xmm1 ; save xmm1 for use on the next row - -%if ABI_IS_32BIT - add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source - add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination -%else - add rsi, r8 - add rdi, r9 -%endif - - sub rcx, 1 ; - jnz .half_horiz_vert_variance8x_h_1 ; - - movdq2q mm6, xmm6 ; - movdq2q mm7, xmm7 ; - - psrldq xmm6, 8 - psrldq xmm7, 8 - - movdq2q mm2, xmm6 - movdq2q mm3, xmm7 - - paddw mm6, mm2 - paddd mm7, mm3 - - pxor mm3, mm3 ; - pxor mm2, mm2 ; - - punpcklwd mm2, mm6 ; - punpckhwd mm3, mm6 ; - - paddd mm2, mm3 ; - movq mm6, mm2 ; - - psrlq mm6, 32 ; - paddd mm2, mm6 ; - - psrad mm2, 16 ; - movq mm4, mm7 ; - - psrlq mm4, 32 ; - paddd mm4, mm7 ; - - mov rsi, arg(5) ; sum - mov rdi, arg(6) ; sumsquared - - movd [rsi], mm2 ; - movd [rdi], mm4 ; - - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_half_vert_variance8x_h_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_half_vert_variance8x_h_sse2) PRIVATE -sym(vp9_half_vert_variance8x_h_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line - movsxd r9, dword ptr arg(3) ;src_pixels_per_line -%endif - - pxor xmm6, xmm6 ; error accumulator - pxor xmm7, xmm7 ; sse eaccumulator - mov rsi, arg(0) ;ref_ptr ; - - mov rdi, arg(2) ;src_ptr ; - movsxd rcx, dword ptr arg(4) ;Height ; - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - - pxor xmm0, xmm0 ; -.half_vert_variance8x_h_1: - movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8 - movq xmm3, QWORD PTR [rsi+rax] ; xmm3 = s1,s2,s3..s9 - - pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) - punpcklbw xmm5, xmm0 ; xmm5 = words of above - - movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8 - punpcklbw xmm3, xmm0 ; xmm3 = words of above - - psubw xmm5, xmm3 ; xmm5 -= xmm3 - paddw xmm6, xmm5 ; xmm6 += accumulated column differences - pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 - paddd xmm7, xmm5 ; xmm7 += accumulated square column differences - -%if ABI_IS_32BIT - add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source - add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination -%else - add rsi, r8 - add rdi, r9 -%endif - sub rcx, 1 ; - jnz .half_vert_variance8x_h_1 ; - - movdq2q mm6, xmm6 ; - movdq2q mm7, xmm7 ; - - psrldq xmm6, 8 - psrldq xmm7, 8 - - movdq2q mm2, xmm6 - movdq2q mm3, xmm7 - - paddw mm6, mm2 - paddd mm7, mm3 - - pxor mm3, mm3 ; - pxor mm2, mm2 ; - - punpcklwd mm2, mm6 ; - punpckhwd mm3, mm6 ; - - paddd mm2, mm3 ; - movq mm6, mm2 ; - - psrlq mm6, 32 ; - paddd mm2, mm6 ; - - psrad mm2, 16 ; - movq mm4, mm7 ; - - psrlq mm4, 32 ; - paddd mm4, mm7 ; - - mov rsi, arg(5) ; sum - mov rdi, arg(6) ; sumsquared - - movd [rsi], mm2 ; - movd [rdi], mm4 ; - - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp9_half_horiz_variance8x_h_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_half_horiz_variance8x_h_sse2) PRIVATE -sym(vp9_half_horiz_variance8x_h_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line - movsxd r9, dword ptr arg(3) ;src_pixels_per_line -%endif - - pxor xmm6, xmm6 ; error accumulator - pxor xmm7, xmm7 ; sse eaccumulator - mov rsi, arg(0) ;ref_ptr ; - - mov rdi, arg(2) ;src_ptr ; - movsxd rcx, dword ptr arg(4) ;Height ; - - pxor xmm0, xmm0 ; -.half_horiz_variance8x_h_1: - movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8 - movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9 - - pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) - punpcklbw xmm5, xmm0 ; xmm5 = words of above - - movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8 - punpcklbw xmm3, xmm0 ; xmm3 = words of above - - psubw xmm5, xmm3 ; xmm5 -= xmm3 - paddw xmm6, xmm5 ; xmm6 += accumulated column differences - pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 - paddd xmm7, xmm5 ; xmm7 += accumulated square column differences - -%if ABI_IS_32BIT - add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source - add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination -%else - add rsi, r8 - add rdi, r9 -%endif - sub rcx, 1 ; - jnz .half_horiz_variance8x_h_1 ; - - movdq2q mm6, xmm6 ; - movdq2q mm7, xmm7 ; - - psrldq xmm6, 8 - psrldq xmm7, 8 - - movdq2q mm2, xmm6 - movdq2q mm3, xmm7 - - paddw mm6, mm2 - paddd mm7, mm3 - - pxor mm3, mm3 ; - pxor mm2, mm2 ; - - punpcklwd mm2, mm6 ; - punpckhwd mm3, mm6 ; - - paddd mm2, mm3 ; - movq mm6, mm2 ; - - psrlq mm6, 32 ; - paddd mm2, mm6 ; - - psrad mm2, 16 ; - movq mm4, mm7 ; - - psrlq mm4, 32 ; - paddd mm4, mm7 ; - - mov rsi, arg(5) ; sum - mov rdi, arg(6) ; sumsquared - - movd [rsi], mm2 ; - movd [rdi], mm4 ; - - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c index 25d594632..41f225922 100644 --- a/vp9/encoder/x86/vp9_variance_sse2.c +++ b/vp9/encoder/x86/vp9_variance_sse2.c @@ -42,66 +42,6 @@ unsigned int vp9_get8x8var_sse2 unsigned int *SSE, int *Sum ); -void vp9_half_horiz_vert_variance8x_h_sse2 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int *sum, - unsigned int *sumsquared -); -void vp9_half_horiz_vert_variance16x_h_sse2 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int *sum, - unsigned int *sumsquared -); -void vp9_half_horiz_variance8x_h_sse2 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int *sum, - unsigned int *sumsquared -); -void vp9_half_horiz_variance16x_h_sse2 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int *sum, - unsigned int *sumsquared -); -void vp9_half_vert_variance8x_h_sse2 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int *sum, - unsigned int *sumsquared -); -void vp9_half_vert_variance16x_h_sse2 -( - const unsigned char *ref_ptr, - int ref_pixels_per_line, - const unsigned char *src_ptr, - int src_pixels_per_line, - unsigned int Height, - int *sum, - unsigned int *sumsquared -); typedef unsigned int (*get_var_sse2) ( const unsigned char *src_ptr, diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index fab7f18ee..5e88793c8 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -96,7 +96,6 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c -VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm |