summaryrefslogtreecommitdiff
path: root/vp9/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/encoder')
-rw-r--r--vp9/encoder/vp9_rdopt.c26
-rw-r--r--vp9/encoder/vp9_svc_layercontext.c16
-rw-r--r--vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm337
-rw-r--r--vp9/encoder/x86/vp9_variance_impl_sse2.asm333
-rw-r--r--vp9/encoder/x86/vp9_variance_sse2.c60
5 files changed, 18 insertions, 754 deletions
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index f309aac96..a2fc1bb77 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1806,7 +1806,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
// motion search for newmv (single predictor case only)
if (!has_second_rf && this_mode == NEWMV &&
seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
- int_mv *const new_mv = &mode_mv[NEWMV][0];
+ MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
int step_param = 0;
int further_steps;
int thissme, bestsme = INT_MAX;
@@ -1862,9 +1862,9 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
step_param,
sadpb, 1, v_fn_ptr, 1,
&bsi->ref_mv[0]->as_mv,
- &new_mv->as_mv);
+ new_mv);
if (bestsme < INT_MAX)
- bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
+ bestsme = vp9_get_mvpred_var(x, new_mv,
&bsi->ref_mv[0]->as_mv,
v_fn_ptr, 1);
} else if (cpi->sf.search_method == SQUARE) {
@@ -1872,9 +1872,9 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
step_param,
sadpb, 1, v_fn_ptr, 1,
&bsi->ref_mv[0]->as_mv,
- &new_mv->as_mv);
+ new_mv);
if (bestsme < INT_MAX)
- bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
+ bestsme = vp9_get_mvpred_var(x, new_mv,
&bsi->ref_mv[0]->as_mv,
v_fn_ptr, 1);
} else if (cpi->sf.search_method == BIGDIA) {
@@ -1882,16 +1882,16 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
step_param,
sadpb, 1, v_fn_ptr, 1,
&bsi->ref_mv[0]->as_mv,
- &new_mv->as_mv);
+ new_mv);
if (bestsme < INT_MAX)
- bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
+ bestsme = vp9_get_mvpred_var(x, new_mv,
&bsi->ref_mv[0]->as_mv,
v_fn_ptr, 1);
} else {
bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
sadpb, further_steps, 0, v_fn_ptr,
&bsi->ref_mv[0]->as_mv,
- &new_mv->as_mv);
+ new_mv);
}
// Should we do a full search (best quality only)
@@ -1906,18 +1906,18 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
&best_mv->as_mv);
if (thissme < bestsme) {
bestsme = thissme;
- new_mv->as_int = best_mv->as_int;
+ *new_mv = best_mv->as_mv;
} else {
// The full search result is actually worse so re-instate the
// previous best vector
- best_mv->as_int = new_mv->as_int;
+ best_mv->as_mv = *new_mv;
}
}
if (bestsme < INT_MAX) {
int distortion;
cpi->find_fractional_mv_step(x,
- &new_mv->as_mv,
+ new_mv,
&bsi->ref_mv[0]->as_mv,
cm->allow_high_precision_mv,
x->errorperbit, v_fn_ptr,
@@ -1928,11 +1928,11 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
&x->pred_sse[mbmi->ref_frame[0]]);
// save motion search result for use in compound prediction
- seg_mvs[i][mbmi->ref_frame[0]].as_int = new_mv->as_int;
+ seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
}
if (cpi->sf.adaptive_motion_search)
- x->pred_mv[mbmi->ref_frame[0]].as_int = new_mv->as_int;
+ x->pred_mv[mbmi->ref_frame[0]].as_mv = *new_mv;
// restore src pointers
mi_buf_restore(x, orig_src, orig_pre);
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 5342447d6..792e8d21b 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -221,15 +221,9 @@ void vp9_inc_frame_in_layer(SVC *svc) {
++lc->current_video_frame_in_layer;
}
-int vp9_is_upper_layer_key_frame(const struct VP9_COMP *const cpi) {
- int is_upper_layer_key_frame = 0;
-
- if (cpi->use_svc && cpi->svc.number_temporal_layers == 1 &&
- cpi->svc.spatial_layer_id > 0) {
- if (cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame) {
- is_upper_layer_key_frame = 1;
- }
- }
-
- return is_upper_layer_key_frame;
+int vp9_is_upper_layer_key_frame(const VP9_COMP *cpi) {
+ return cpi->use_svc &&
+ cpi->svc.number_temporal_layers == 1 &&
+ cpi->svc.spatial_layer_id > 0 &&
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame;
}
diff --git a/vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm b/vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm
deleted file mode 100644
index 2ecc23e55..000000000
--- a/vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm
+++ /dev/null
@@ -1,337 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-%include "vpx_ports/x86_abi_support.asm"
-
-;void vp9_half_horiz_vert_variance16x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_horiz_vert_variance16x_h_sse2) PRIVATE
-sym(vp9_half_horiz_vert_variance16x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
- movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
- movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
-
- pxor xmm0, xmm0 ;
-
- movdqu xmm5, XMMWORD PTR [rsi]
- movdqu xmm3, XMMWORD PTR [rsi+1]
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1
-
- lea rsi, [rsi + rax]
-
-.half_horiz_vert_variance16x_h_1:
- movdqu xmm1, XMMWORD PTR [rsi] ;
- movdqu xmm2, XMMWORD PTR [rsi+1] ;
- pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1
-
- pavgb xmm5, xmm1 ; xmm = vertical average of the above
-
- movdqa xmm4, xmm5
- punpcklbw xmm5, xmm0 ; xmm5 = words of above
- punpckhbw xmm4, xmm0
-
- movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7
- punpcklbw xmm3, xmm0 ; xmm3 = words of above
- psubw xmm5, xmm3 ; xmm5 -= xmm3
-
- movq xmm3, QWORD PTR [rdi+8]
- punpcklbw xmm3, xmm0
- psubw xmm4, xmm3
-
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- paddw xmm6, xmm4
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- pmaddwd xmm4, xmm4
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
- paddd xmm7, xmm4
-
- movdqa xmm5, xmm1 ; save xmm1 for use on the next row
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
-
- sub rcx, 1 ;
- jnz .half_horiz_vert_variance16x_h_1 ;
-
- pxor xmm1, xmm1
- pxor xmm5, xmm5
-
- punpcklwd xmm0, xmm6
- punpckhwd xmm1, xmm6
- psrad xmm0, 16
- psrad xmm1, 16
- paddd xmm0, xmm1
- movdqa xmm1, xmm0
-
- movdqa xmm6, xmm7
- punpckldq xmm6, xmm5
- punpckhdq xmm7, xmm5
- paddd xmm6, xmm7
-
- punpckldq xmm0, xmm5
- punpckhdq xmm1, xmm5
- paddd xmm0, xmm1
-
- movdqa xmm7, xmm6
- movdqa xmm1, xmm0
-
- psrldq xmm7, 8
- psrldq xmm1, 8
-
- paddd xmm6, xmm7
- paddd xmm0, xmm1
-
- mov rsi, arg(5) ;[Sum]
- mov rdi, arg(6) ;[SSE]
-
- movd [rsi], xmm0
- movd [rdi], xmm6
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_half_vert_variance16x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_vert_variance16x_h_sse2) PRIVATE
-sym(vp9_half_vert_variance16x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr
-
- mov rdi, arg(2) ;src_ptr
- movsxd rcx, dword ptr arg(4) ;Height
- movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
- movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
-
- movdqu xmm5, XMMWORD PTR [rsi]
- lea rsi, [rsi + rax ]
- pxor xmm0, xmm0
-
-.half_vert_variance16x_h_1:
- movdqu xmm3, XMMWORD PTR [rsi]
-
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
- movdqa xmm4, xmm5
- punpcklbw xmm5, xmm0
- punpckhbw xmm4, xmm0
-
- movq xmm2, QWORD PTR [rdi]
- punpcklbw xmm2, xmm0
- psubw xmm5, xmm2
- movq xmm2, QWORD PTR [rdi+8]
- punpcklbw xmm2, xmm0
- psubw xmm4, xmm2
-
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- paddw xmm6, xmm4
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- pmaddwd xmm4, xmm4
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
- paddd xmm7, xmm4
-
- movdqa xmm5, xmm3
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
-
- sub rcx, 1
- jnz .half_vert_variance16x_h_1
-
- pxor xmm1, xmm1
- pxor xmm5, xmm5
-
- punpcklwd xmm0, xmm6
- punpckhwd xmm1, xmm6
- psrad xmm0, 16
- psrad xmm1, 16
- paddd xmm0, xmm1
- movdqa xmm1, xmm0
-
- movdqa xmm6, xmm7
- punpckldq xmm6, xmm5
- punpckhdq xmm7, xmm5
- paddd xmm6, xmm7
-
- punpckldq xmm0, xmm5
- punpckhdq xmm1, xmm5
- paddd xmm0, xmm1
-
- movdqa xmm7, xmm6
- movdqa xmm1, xmm0
-
- psrldq xmm7, 8
- psrldq xmm1, 8
-
- paddd xmm6, xmm7
- paddd xmm0, xmm1
-
- mov rsi, arg(5) ;[Sum]
- mov rdi, arg(6) ;[SSE]
-
- movd [rsi], xmm0
- movd [rdi], xmm6
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_half_horiz_variance16x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_horiz_variance16x_h_sse2) PRIVATE
-sym(vp9_half_horiz_variance16x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
- movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
- movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
-
- pxor xmm0, xmm0 ;
-
-.half_horiz_variance16x_h_1:
- movdqu xmm5, XMMWORD PTR [rsi] ; xmm5 = s0,s1,s2..s15
- movdqu xmm3, XMMWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s16
-
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
- movdqa xmm1, xmm5
- punpcklbw xmm5, xmm0 ; xmm5 = words of above
- punpckhbw xmm1, xmm0
-
- movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7
- punpcklbw xmm3, xmm0 ; xmm3 = words of above
- movq xmm2, QWORD PTR [rdi+8]
- punpcklbw xmm2, xmm0
-
- psubw xmm5, xmm3 ; xmm5 -= xmm3
- psubw xmm1, xmm2
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- paddw xmm6, xmm1
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- pmaddwd xmm1, xmm1
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
- paddd xmm7, xmm1
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
-
- sub rcx, 1 ;
- jnz .half_horiz_variance16x_h_1 ;
-
- pxor xmm1, xmm1
- pxor xmm5, xmm5
-
- punpcklwd xmm0, xmm6
- punpckhwd xmm1, xmm6
- psrad xmm0, 16
- psrad xmm1, 16
- paddd xmm0, xmm1
- movdqa xmm1, xmm0
-
- movdqa xmm6, xmm7
- punpckldq xmm6, xmm5
- punpckhdq xmm7, xmm5
- paddd xmm6, xmm7
-
- punpckldq xmm0, xmm5
- punpckhdq xmm1, xmm5
- paddd xmm0, xmm1
-
- movdqa xmm7, xmm6
- movdqa xmm1, xmm0
-
- psrldq xmm7, 8
- psrldq xmm1, 8
-
- paddd xmm6, xmm7
- paddd xmm0, xmm1
-
- mov rsi, arg(5) ;[Sum]
- mov rdi, arg(6) ;[SSE]
-
- movd [rsi], xmm0
- movd [rdi], xmm6
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/vp9/encoder/x86/vp9_variance_impl_sse2.asm b/vp9/encoder/x86/vp9_variance_impl_sse2.asm
index 2c5088134..483041278 100644
--- a/vp9/encoder/x86/vp9_variance_impl_sse2.asm
+++ b/vp9/encoder/x86/vp9_variance_impl_sse2.asm
@@ -398,337 +398,4 @@ sym(vp9_get8x8var_sse2):
pop rbp
ret
-;void vp9_half_horiz_vert_variance8x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_horiz_vert_variance8x_h_sse2) PRIVATE
-sym(vp9_half_horiz_vert_variance8x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
- movsxd r9, dword ptr arg(3) ;src_pixels_per_line
-%endif
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
- movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
-
- pxor xmm0, xmm0 ;
-
- movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
- movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1
-
-%if ABI_IS_32BIT
- add rsi, dword ptr arg(1) ;ref_pixels_per_line ; next source
-%else
- add rsi, r8
-%endif
-
-.half_horiz_vert_variance8x_h_1:
-
- movq xmm1, QWORD PTR [rsi] ;
- movq xmm2, QWORD PTR [rsi+1] ;
- pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1
-
- pavgb xmm5, xmm1 ; xmm = vertical average of the above
- punpcklbw xmm5, xmm0 ; xmm5 = words of above
-
- movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8
- punpcklbw xmm3, xmm0 ; xmm3 = words of above
-
- psubw xmm5, xmm3 ; xmm5 -= xmm3
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
-
- movdqa xmm5, xmm1 ; save xmm1 for use on the next row
-
-%if ABI_IS_32BIT
- add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source
- add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination
-%else
- add rsi, r8
- add rdi, r9
-%endif
-
- sub rcx, 1 ;
- jnz .half_horiz_vert_variance8x_h_1 ;
-
- movdq2q mm6, xmm6 ;
- movdq2q mm7, xmm7 ;
-
- psrldq xmm6, 8
- psrldq xmm7, 8
-
- movdq2q mm2, xmm6
- movdq2q mm3, xmm7
-
- paddw mm6, mm2
- paddd mm7, mm3
-
- pxor mm3, mm3 ;
- pxor mm2, mm2 ;
-
- punpcklwd mm2, mm6 ;
- punpckhwd mm3, mm6 ;
-
- paddd mm2, mm3 ;
- movq mm6, mm2 ;
-
- psrlq mm6, 32 ;
- paddd mm2, mm6 ;
-
- psrad mm2, 16 ;
- movq mm4, mm7 ;
-
- psrlq mm4, 32 ;
- paddd mm4, mm7 ;
-
- mov rsi, arg(5) ; sum
- mov rdi, arg(6) ; sumsquared
-
- movd [rsi], mm2 ;
- movd [rdi], mm4 ;
-
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_half_vert_variance8x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_vert_variance8x_h_sse2) PRIVATE
-sym(vp9_half_vert_variance8x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
- movsxd r9, dword ptr arg(3) ;src_pixels_per_line
-%endif
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
- movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
-
- pxor xmm0, xmm0 ;
-.half_vert_variance8x_h_1:
- movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
- movq xmm3, QWORD PTR [rsi+rax] ; xmm3 = s1,s2,s3..s9
-
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
- punpcklbw xmm5, xmm0 ; xmm5 = words of above
-
- movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8
- punpcklbw xmm3, xmm0 ; xmm3 = words of above
-
- psubw xmm5, xmm3 ; xmm5 -= xmm3
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
-
-%if ABI_IS_32BIT
- add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source
- add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination
-%else
- add rsi, r8
- add rdi, r9
-%endif
- sub rcx, 1 ;
- jnz .half_vert_variance8x_h_1 ;
-
- movdq2q mm6, xmm6 ;
- movdq2q mm7, xmm7 ;
-
- psrldq xmm6, 8
- psrldq xmm7, 8
-
- movdq2q mm2, xmm6
- movdq2q mm3, xmm7
-
- paddw mm6, mm2
- paddd mm7, mm3
-
- pxor mm3, mm3 ;
- pxor mm2, mm2 ;
-
- punpcklwd mm2, mm6 ;
- punpckhwd mm3, mm6 ;
-
- paddd mm2, mm3 ;
- movq mm6, mm2 ;
-
- psrlq mm6, 32 ;
- paddd mm2, mm6 ;
-
- psrad mm2, 16 ;
- movq mm4, mm7 ;
-
- psrlq mm4, 32 ;
- paddd mm4, mm7 ;
-
- mov rsi, arg(5) ; sum
- mov rdi, arg(6) ; sumsquared
-
- movd [rsi], mm2 ;
- movd [rdi], mm4 ;
-
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp9_half_horiz_variance8x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_horiz_variance8x_h_sse2) PRIVATE
-sym(vp9_half_horiz_variance8x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
- movsxd r9, dword ptr arg(3) ;src_pixels_per_line
-%endif
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
-
- pxor xmm0, xmm0 ;
-.half_horiz_variance8x_h_1:
- movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
- movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9
-
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
- punpcklbw xmm5, xmm0 ; xmm5 = words of above
-
- movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8
- punpcklbw xmm3, xmm0 ; xmm3 = words of above
-
- psubw xmm5, xmm3 ; xmm5 -= xmm3
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
-
-%if ABI_IS_32BIT
- add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source
- add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination
-%else
- add rsi, r8
- add rdi, r9
-%endif
- sub rcx, 1 ;
- jnz .half_horiz_variance8x_h_1 ;
-
- movdq2q mm6, xmm6 ;
- movdq2q mm7, xmm7 ;
-
- psrldq xmm6, 8
- psrldq xmm7, 8
-
- movdq2q mm2, xmm6
- movdq2q mm3, xmm7
-
- paddw mm6, mm2
- paddd mm7, mm3
-
- pxor mm3, mm3 ;
- pxor mm2, mm2 ;
-
- punpcklwd mm2, mm6 ;
- punpckhwd mm3, mm6 ;
-
- paddd mm2, mm3 ;
- movq mm6, mm2 ;
-
- psrlq mm6, 32 ;
- paddd mm2, mm6 ;
-
- psrad mm2, 16 ;
- movq mm4, mm7 ;
-
- psrlq mm4, 32 ;
- paddd mm4, mm7 ;
-
- mov rsi, arg(5) ; sum
- mov rdi, arg(6) ; sumsquared
-
- movd [rsi], mm2 ;
- movd [rdi], mm4 ;
-
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c
index 25d594632..41f225922 100644
--- a/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/vp9/encoder/x86/vp9_variance_sse2.c
@@ -42,66 +42,6 @@ unsigned int vp9_get8x8var_sse2
unsigned int *SSE,
int *Sum
);
-void vp9_half_horiz_vert_variance8x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
-void vp9_half_horiz_vert_variance16x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
-void vp9_half_horiz_variance8x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
-void vp9_half_horiz_variance16x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
-void vp9_half_vert_variance8x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
-void vp9_half_vert_variance16x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
typedef unsigned int (*get_var_sse2) (
const unsigned char *src_ptr,