summaryrefslogtreecommitdiff
path: root/vp8/common/x86
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/common/x86')
-rw-r--r--vp8/common/x86/loopfilter_mmx.asm78
-rw-r--r--vp8/common/x86/loopfilter_sse2.asm63
-rw-r--r--vp8/common/x86/loopfilter_x86.c170
-rw-r--r--vp8/common/x86/loopfilter_x86.h24
-rw-r--r--vp8/common/x86/x86_systemdependent.c10
5 files changed, 111 insertions, 234 deletions
diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm
index c6c215c3c..ad47284cf 100644
--- a/vp8/common/x86/loopfilter_mmx.asm
+++ b/vp8/common/x86/loopfilter_mmx.asm
@@ -16,7 +16,7 @@
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -122,12 +122,10 @@ next8_h:
paddusb mm5, mm5 ; abs(p0-q0)*2
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- mov rdx, arg(2) ;flimit ; get flimit
- movq mm2, [rdx] ; flimit mm2
- paddb mm2, mm2 ; flimit*2 (less than 255)
- paddb mm7, mm2 ; flimit * 2 + limit (less than 255)
+ mov rdx, arg(2) ;blimit ; get blimit
+ movq mm7, [rdx] ; blimit
- psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por mm1, mm5
pxor mm5, mm5
pcmpeqb mm1, mm5 ; mask mm1
@@ -230,7 +228,7 @@ next8_h:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -406,9 +404,9 @@ next8_v:
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw mm5, 1 ; abs(p1-q1)/2
- mov rdx, arg(2) ;flimit ;
+ mov rdx, arg(2) ;blimit ;
- movq mm2, [rdx] ;flimit mm2
+ movq mm4, [rdx] ;blimit
movq mm1, mm3 ; mm1=mm3=p0
movq mm7, mm6 ; mm7=mm6=q0
@@ -419,10 +417,7 @@ next8_v:
paddusb mm1, mm1 ; abs(q0-p0)*2
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- paddb mm2, mm2 ; flimit*2 (less than 255)
- paddb mm4, mm2 ; flimit * 2 + limit (less than 255)
-
- psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por mm1, mm0; ; mask
pxor mm0, mm0
@@ -603,7 +598,7 @@ next8_v:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -719,17 +714,15 @@ next8_mbh:
paddusb mm5, mm5 ; abs(p0-q0)*2
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- mov rdx, arg(2) ;flimit ; get flimit
- movq mm2, [rdx] ; flimit mm2
- paddb mm2, mm2 ; flimit*2 (less than 255)
- paddb mm7, mm2 ; flimit * 2 + limit (less than 255)
+ mov rdx, arg(2) ;blimit ; get blimit
+ movq mm7, [rdx] ; blimit
- psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por mm1, mm5
pxor mm5, mm5
pcmpeqb mm1, mm5 ; mask mm1
- ; mm1 = mask, mm0=q0, mm7 = flimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
+ ; mm1 = mask, mm0=q0, mm7 = blimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
; mm6 = p0,
; calculate high edge variance
@@ -922,7 +915,7 @@ next8_mbh:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -1108,9 +1101,9 @@ next8_mbv:
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw mm5, 1 ; abs(p1-q1)/2
- mov rdx, arg(2) ;flimit ;
+ mov rdx, arg(2) ;blimit ;
- movq mm2, [rdx] ;flimit mm2
+ movq mm4, [rdx] ;blimit
movq mm1, mm3 ; mm1=mm3=p0
movq mm7, mm6 ; mm7=mm6=q0
@@ -1121,10 +1114,7 @@ next8_mbv:
paddusb mm1, mm1 ; abs(q0-p0)*2
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- paddb mm2, mm2 ; flimit*2 (less than 255)
- paddb mm4, mm2 ; flimit * 2 + limit (less than 255)
-
- psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por mm1, mm0; ; mask
pxor mm0, mm0
@@ -1392,16 +1382,13 @@ next8_mbv:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
-; const char *limit,
-; const char *thresh,
-; int count
+; const char *blimit
;)
global sym(vp8_loop_filter_simple_horizontal_edge_mmx)
sym(vp8_loop_filter_simple_horizontal_edge_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
+ SHADOW_ARGS_TO_STACK 3
GET_GOT rbx
push rsi
push rdi
@@ -1410,14 +1397,10 @@ sym(vp8_loop_filter_simple_horizontal_edge_mmx):
mov rsi, arg(0) ;src_ptr
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
- movsxd rcx, dword ptr arg(5) ;count
+ mov rcx, 2 ; count
nexts8_h:
- mov rdx, arg(3) ;limit
- movq mm7, [rdx]
- mov rdx, arg(2) ;flimit ; get flimit
+ mov rdx, arg(2) ;blimit ; get blimit
movq mm3, [rdx] ;
- paddb mm3, mm3 ; flimit*2 (less than 255)
- paddb mm3, mm7 ; flimit * 2 + limit (less than 255)
mov rdi, rsi ; rdi points to row +1 for indirect addressing
add rdi, rax
@@ -1445,7 +1428,7 @@ nexts8_h:
paddusb mm5, mm5 ; abs(p0-q0)*2
paddusb mm5, mm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor mm3, mm3
pcmpeqb mm5, mm3
@@ -1515,16 +1498,13 @@ nexts8_h:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
-; const char *limit,
-; const char *thresh,
-; int count
+; const char *blimit
;)
global sym(vp8_loop_filter_simple_vertical_edge_mmx)
sym(vp8_loop_filter_simple_vertical_edge_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
+ SHADOW_ARGS_TO_STACK 3
GET_GOT rbx
push rsi
push rdi
@@ -1539,7 +1519,7 @@ sym(vp8_loop_filter_simple_vertical_edge_mmx):
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
lea rsi, [rsi + rax*4- 2]; ;
- movsxd rcx, dword ptr arg(5) ;count
+ mov rcx, 2 ; count
nexts8_v:
lea rdi, [rsi + rax];
@@ -1602,14 +1582,10 @@ nexts8_v:
paddusb mm5, mm5 ; abs(p0-q0)*2
paddusb mm5, mm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- mov rdx, arg(2) ;flimit ; get flimit
+ mov rdx, arg(2) ;blimit ; get blimit
movq mm7, [rdx]
- mov rdx, arg(3) ; get limit
- movq mm6, [rdx]
- paddb mm7, mm7 ; flimit*2 (less than 255)
- paddb mm7, mm6 ; flimit * 2 + limit (less than 255)
- psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor mm7, mm7
pcmpeqb mm5, mm7 ; mm5 = mask
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index c2ce1a106..4efff7eb5 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -110,7 +110,7 @@
psubusb xmm6, xmm5 ; p1-=p0
por xmm6, xmm4 ; abs(p1 - p0)
- mov rdx, arg(2) ; get flimit
+ mov rdx, arg(2) ; get blimit
movdqa t1, xmm6 ; save to t1
@@ -123,7 +123,7 @@
psubusb xmm1, xmm7
por xmm2, xmm3 ; abs(p1-q1)
- movdqa xmm4, XMMWORD PTR [rdx] ; flimit
+ movdqa xmm7, XMMWORD PTR [rdx] ; blimit
movdqa xmm3, xmm0 ; q0
pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
@@ -134,13 +134,11 @@
psrlw xmm2, 1 ; abs(p1-q1)/2
psubusb xmm5, xmm3 ; p0-=q0
- paddb xmm4, xmm4 ; flimit*2 (less than 255)
psubusb xmm3, xmm6 ; q0-=p0
por xmm5, xmm3 ; abs(p0 - q0)
paddusb xmm5, xmm5 ; abs(p0-q0)*2
- paddb xmm7, xmm4 ; flimit * 2 + limit (less than 255)
movdqa xmm4, t0 ; hev get abs (q1 - q0)
@@ -150,7 +148,7 @@
movdqa xmm2, XMMWORD PTR [rdx] ; hev
- psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
psubusb xmm4, xmm2 ; hev
psubusb xmm3, xmm2 ; hev
@@ -278,7 +276,7 @@
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -328,7 +326,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -574,7 +572,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -624,7 +622,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
;(
; unsigned char *u,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; unsigned char *v
@@ -904,7 +902,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
movdqa xmm4, XMMWORD PTR [rdx]; limit
pmaxub xmm0, xmm7
- mov rdx, arg(2) ; flimit
+ mov rdx, arg(2) ; blimit
psubusb xmm0, xmm4
movdqa xmm5, xmm2 ; q1
@@ -921,12 +919,11 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
psrlw xmm5, 1 ; abs(p1-q1)/2
psubusb xmm6, xmm3 ; q0-p0
- movdqa xmm2, XMMWORD PTR [rdx]; flimit
+ movdqa xmm4, XMMWORD PTR [rdx]; blimit
mov rdx, arg(4) ; get thresh
por xmm1, xmm6 ; abs(q0-p0)
- paddb xmm2, xmm2 ; flimit*2 (less than 255)
movdqa xmm6, t0 ; get abs (q1 - q0)
@@ -939,10 +936,9 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
psubusb xmm6, xmm7 ; abs(q1 - q0) > thresh
- paddb xmm4, xmm2 ; flimit * 2 + limit (less than 255)
psubusb xmm3, xmm7 ; abs(p1 - p0)> thresh
- psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por xmm6, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
por xmm1, xmm0 ; mask
@@ -1014,7 +1010,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -1081,7 +1077,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
;(
; unsigned char *u,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; unsigned char *v
@@ -1239,7 +1235,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -1308,7 +1304,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
;(
; unsigned char *u,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; unsigned char *v
@@ -1376,16 +1372,13 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
-; const char *limit,
-; const char *thresh,
-; int count
+; const char *blimit,
;)
global sym(vp8_loop_filter_simple_horizontal_edge_sse2)
sym(vp8_loop_filter_simple_horizontal_edge_sse2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
+ SHADOW_ARGS_TO_STACK 3
SAVE_XMM 7
GET_GOT rbx
push rsi
@@ -1394,13 +1387,8 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
mov rsi, arg(0) ;src_ptr
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
- mov rdx, arg(2) ;flimit ; get flimit
+ mov rdx, arg(2) ;blimit
movdqa xmm3, XMMWORD PTR [rdx]
- mov rdx, arg(3) ;limit
- movdqa xmm7, XMMWORD PTR [rdx]
-
- paddb xmm3, xmm3 ; flimit*2 (less than 255)
- paddb xmm3, xmm7 ; flimit * 2 + limit (less than 255)
mov rdi, rsi ; rdi points to row +1 for indirect addressing
add rdi, rax
@@ -1428,7 +1416,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
paddusb xmm5, xmm5 ; abs(p0-q0)*2
paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor xmm3, xmm3
pcmpeqb xmm5, xmm3
@@ -1493,16 +1481,13 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
-; const char *limit,
-; const char *thresh,
-; int count
+; const char *blimit,
;)
global sym(vp8_loop_filter_simple_vertical_edge_sse2)
sym(vp8_loop_filter_simple_vertical_edge_sse2):
push rbp ; save old base pointer value.
mov rbp, rsp ; set new base pointer value.
- SHADOW_ARGS_TO_STACK 6
+ SHADOW_ARGS_TO_STACK 3
SAVE_XMM 7
GET_GOT rbx ; save callee-saved reg
push rsi
@@ -1607,14 +1592,10 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
paddusb xmm5, xmm5 ; abs(p0-q0)*2
paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- mov rdx, arg(2) ;flimit
+ mov rdx, arg(2) ;blimit
movdqa xmm7, XMMWORD PTR [rdx]
- mov rdx, arg(3) ; get limit
- movdqa xmm6, XMMWORD PTR [rdx]
- paddb xmm7, xmm7 ; flimit*2 (less than 255)
- paddb xmm7, xmm6 ; flimit * 2 + limit (less than 255)
- psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor xmm7, xmm7
pcmpeqb xmm5, xmm7 ; mm5 = mask
diff --git a/vp8/common/x86/loopfilter_x86.c b/vp8/common/x86/loopfilter_x86.c
index a52420c98..9360ac17c 100644
--- a/vp8/common/x86/loopfilter_x86.c
+++ b/vp8/common/x86/loopfilter_x86.c
@@ -9,30 +9,18 @@
*/
-#include "vpx_ports/config.h"
+#include "vpx_config.h"
#include "vp8/common/loopfilter.h"
-prototype_loopfilter(vp8_loop_filter_horizontal_edge_c);
-prototype_loopfilter(vp8_loop_filter_vertical_edge_c);
-prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_c);
-prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
-prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
-prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
-
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx);
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx);
prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx);
prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx);
-prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
-prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx);
prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2);
prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2);
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2);
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2);
-prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
-prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
-prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2);
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
@@ -44,23 +32,13 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
+ vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
- vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
-}
-
-
-void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
-{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
@@ -68,23 +46,13 @@ void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
+ vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
- vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
-}
-
-
-void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
-{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
@@ -92,27 +60,23 @@ void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
+ vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
- vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
+ vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
-void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
+void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, blimit);
+ vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, blimit);
+ vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, blimit);
}
@@ -120,27 +84,23 @@ void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
+ vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
- vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
+ vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
-void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
+void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit);
+ vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit);
+ vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit);
}
#endif
@@ -150,20 +110,10 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
-}
-
-
-void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
-{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
}
@@ -171,20 +121,10 @@ void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
-}
-
-
-void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
-{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
}
@@ -192,24 +132,20 @@ void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4 * uv_stride);
+ vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4 * uv_stride);
}
-void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
+void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, blimit);
+ vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, blimit);
+ vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, blimit);
}
@@ -217,36 +153,20 @@ void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4);
+ vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4);
}
-void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
+void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit);
+ vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit);
+ vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit);
}
#endif
-
-#if 0
-void vp8_fast_loop_filter_vertical_edges_sse(unsigned char *y_ptr,
- int y_stride,
- loop_filter_info *lfi)
-{
-
- vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
-}
-#endif
diff --git a/vp8/common/x86/loopfilter_x86.h b/vp8/common/x86/loopfilter_x86.h
index 80dbebc8d..1ed6c213f 100644
--- a/vp8/common/x86/loopfilter_x86.h
+++ b/vp8/common/x86/loopfilter_x86.h
@@ -24,10 +24,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_mmx);
extern prototype_loopfilter_block(vp8_loop_filter_bv_mmx);
extern prototype_loopfilter_block(vp8_loop_filter_mbh_mmx);
extern prototype_loopfilter_block(vp8_loop_filter_bh_mmx);
-extern prototype_loopfilter_block(vp8_loop_filter_mbvs_mmx);
-extern prototype_loopfilter_block(vp8_loop_filter_bvs_mmx);
-extern prototype_loopfilter_block(vp8_loop_filter_mbhs_mmx);
-extern prototype_loopfilter_block(vp8_loop_filter_bhs_mmx);
+extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
+extern prototype_simple_loopfilter(vp8_loop_filter_bvs_mmx);
+extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx);
+extern prototype_simple_loopfilter(vp8_loop_filter_bhs_mmx);
#if !CONFIG_RUNTIME_CPU_DETECT
@@ -44,13 +44,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_mmx);
#define vp8_lf_normal_b_h vp8_loop_filter_bh_mmx
#undef vp8_lf_simple_mb_v
-#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_mmx
+#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_mmx
#undef vp8_lf_simple_b_v
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_mmx
#undef vp8_lf_simple_mb_h
-#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_mmx
+#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_mmx
#undef vp8_lf_simple_b_h
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_mmx
@@ -63,10 +63,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_sse2);
extern prototype_loopfilter_block(vp8_loop_filter_bv_sse2);
extern prototype_loopfilter_block(vp8_loop_filter_mbh_sse2);
extern prototype_loopfilter_block(vp8_loop_filter_bh_sse2);
-extern prototype_loopfilter_block(vp8_loop_filter_mbvs_sse2);
-extern prototype_loopfilter_block(vp8_loop_filter_bvs_sse2);
-extern prototype_loopfilter_block(vp8_loop_filter_mbhs_sse2);
-extern prototype_loopfilter_block(vp8_loop_filter_bhs_sse2);
+extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
+extern prototype_simple_loopfilter(vp8_loop_filter_bvs_sse2);
+extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
+extern prototype_simple_loopfilter(vp8_loop_filter_bhs_sse2);
#if !CONFIG_RUNTIME_CPU_DETECT
@@ -83,13 +83,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_sse2);
#define vp8_lf_normal_b_h vp8_loop_filter_bh_sse2
#undef vp8_lf_simple_mb_v
-#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_sse2
+#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_sse2
#undef vp8_lf_simple_b_v
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_sse2
#undef vp8_lf_simple_mb_h
-#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_sse2
+#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_sse2
#undef vp8_lf_simple_b_h
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_sse2
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index 87374f3c6..33a984b79 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -9,7 +9,7 @@
*/
-#include "vpx_ports/config.h"
+#include "vpx_config.h"
#include "vpx_ports/x86.h"
#include "vp8/common/g_common.h"
#include "vp8/common/subpixel.h"
@@ -63,9 +63,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_mmx;
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_mmx;
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_mmx;
- rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_mmx;
+ rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_mmx;
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_mmx;
- rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_mmx;
+ rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_mmx;
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_mmx;
#if CONFIG_POSTPROC
@@ -101,9 +101,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_sse2;
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_sse2;
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_sse2;
- rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_sse2;
+ rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_sse2;
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_sse2;
- rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_sse2;
+ rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_sse2;
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_sse2;
#if CONFIG_POSTPROC