summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libmkv/EbmlWriter.c30
-rw-r--r--libmkv/EbmlWriter.h2
-rw-r--r--libmkv/WebMElement.c4
-rw-r--r--vp8/common/arm/arm_systemdependent.c6
-rw-r--r--vp8/common/arm/armv6/loopfilter_v6.asm64
-rw-r--r--vp8/common/arm/armv6/simpleloopfilter_v6.asm29
-rw-r--r--vp8/common/arm/loopfilter_arm.c191
-rw-r--r--vp8/common/arm/loopfilter_arm.h34
-rw-r--r--vp8/common/arm/neon/loopfilter_neon.asm294
-rw-r--r--vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm100
-rw-r--r--vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm178
-rw-r--r--vp8/common/arm/neon/mbloopfilter_neon.asm422
-rw-r--r--vp8/common/extend.c8
-rw-r--r--vp8/common/generic/systemdependent.c4
-rw-r--r--vp8/common/loopfilter.c696
-rw-r--r--vp8/common/loopfilter.h67
-rw-r--r--vp8/common/loopfilter_filters.c80
-rw-r--r--vp8/common/onyxc_int.h21
-rw-r--r--vp8/common/x86/loopfilter_mmx.asm78
-rw-r--r--vp8/common/x86/loopfilter_sse2.asm63
-rw-r--r--vp8/common/x86/loopfilter_x86.c170
-rw-r--r--vp8/common/x86/loopfilter_x86.h24
-rw-r--r--vp8/common/x86/x86_systemdependent.c10
-rw-r--r--vp8/decoder/decodemv.c6
-rw-r--r--vp8/decoder/onyxd_if.c2
-rw-r--r--vp8/decoder/threading.c230
-rw-r--r--vp8/encoder/onyx_if.c24
-rw-r--r--vp8/encoder/onyx_int.h3
-rw-r--r--vp8/encoder/pickinter.c23
-rw-r--r--vp8/encoder/rdopt.c21
-rw-r--r--vp8/encoder/temporal_filter.c20
-rw-r--r--vp8/encoder/tokenize.c220
-rw-r--r--vpx_mem/include/nds/vpx_mem_nds.h30
-rw-r--r--vpx_mem/vpx_mem_tracker.c25
-rw-r--r--vpx_scale/arm/nds/yv12extend.c221
-rw-r--r--vpx_scale/generic/yv12config.c36
-rw-r--r--vpxenc.c56
37 files changed, 1592 insertions, 1900 deletions
diff --git a/libmkv/EbmlWriter.c b/libmkv/EbmlWriter.c
index ac70d097d..fbf2c66e9 100644
--- a/libmkv/EbmlWriter.c
+++ b/libmkv/EbmlWriter.c
@@ -11,6 +11,7 @@
#include <stdlib.h>
#include <wchar.h>
#include <string.h>
+#include <limits.h>
#if defined(_MSC_VER)
#define LITERALU64(n) n
#else
@@ -33,7 +34,7 @@ void Ebml_WriteLen(EbmlGlobal *glob, long long val)
val |= (LITERALU64(0x000000000000080) << ((size - 1) * 7));
- Ebml_Serialize(glob, (void *) &val, size);
+ Ebml_Serialize(glob, (void *) &val, sizeof(val), size);
}
void Ebml_WriteString(EbmlGlobal *glob, const char *str)
@@ -60,21 +61,26 @@ void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr)
void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id)
{
+ int len;
+
if (class_id >= 0x01000000)
- Ebml_Serialize(glob, (void *)&class_id, 4);
+ len = 4;
else if (class_id >= 0x00010000)
- Ebml_Serialize(glob, (void *)&class_id, 3);
+ len = 3;
else if (class_id >= 0x00000100)
- Ebml_Serialize(glob, (void *)&class_id, 2);
+ len = 2;
else
- Ebml_Serialize(glob, (void *)&class_id, 1);
+ len = 1;
+
+ Ebml_Serialize(glob, (void *)&class_id, sizeof(class_id), len);
}
+
void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui)
{
unsigned char sizeSerialized = 8 | 0x80;
Ebml_WriteID(glob, class_id);
- Ebml_Serialize(glob, &sizeSerialized, 1);
- Ebml_Serialize(glob, &ui, 8);
+ Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
+ Ebml_Serialize(glob, &ui, sizeof(ui), 8);
}
void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui)
@@ -97,8 +103,8 @@ void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned l
}
sizeSerialized = 0x80 | size;
- Ebml_Serialize(glob, &sizeSerialized, 1);
- Ebml_Serialize(glob, &ui, size);
+ Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
+ Ebml_Serialize(glob, &ui, sizeof(ui), size);
}
//TODO: perhaps this is a poor name for this id serializer helper function
void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin)
@@ -119,14 +125,14 @@ void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d)
unsigned char len = 0x88;
Ebml_WriteID(glob, class_id);
- Ebml_Serialize(glob, &len, 1);
- Ebml_Serialize(glob, &d, 8);
+ Ebml_Serialize(glob, &len, sizeof(len), 1);
+ Ebml_Serialize(glob, &d, sizeof(d), 8);
}
void Ebml_WriteSigned16(EbmlGlobal *glob, short val)
{
signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8;
- Ebml_Serialize(glob, &out, 3);
+ Ebml_Serialize(glob, &out, sizeof(out), 3);
}
void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s)
diff --git a/libmkv/EbmlWriter.h b/libmkv/EbmlWriter.h
index 8c7fe7c66..324c9bca0 100644
--- a/libmkv/EbmlWriter.h
+++ b/libmkv/EbmlWriter.h
@@ -15,7 +15,7 @@
#include "vpx/vpx_integer.h"
typedef struct EbmlGlobal EbmlGlobal;
-void Ebml_Serialize(EbmlGlobal *glob, const void *, unsigned long);
+void Ebml_Serialize(EbmlGlobal *glob, const void *, int, unsigned long);
void Ebml_Write(EbmlGlobal *glob, const void *, unsigned long);
/////
diff --git a/libmkv/WebMElement.c b/libmkv/WebMElement.c
index 25a90249a..0ef5100bb 100644
--- a/libmkv/WebMElement.c
+++ b/libmkv/WebMElement.c
@@ -35,11 +35,11 @@ void writeSimpleBlock(EbmlGlobal *glob, unsigned char trackNumber, short timeCod
Ebml_WriteID(glob, SimpleBlock);
unsigned long blockLength = 4 + dataLength;
blockLength |= 0x10000000; //TODO check length < 0x0FFFFFFFF
- Ebml_Serialize(glob, &blockLength, 4);
+ Ebml_Serialize(glob, &blockLength, sizeof(blockLength), 4);
trackNumber |= 0x80; //TODO check track nubmer < 128
Ebml_Write(glob, &trackNumber, 1);
//Ebml_WriteSigned16(glob, timeCode,2); //this is 3 bytes
- Ebml_Serialize(glob, &timeCode, 2);
+ Ebml_Serialize(glob, &timeCode, sizeof(timeCode), 2);
unsigned char flags = 0x00 | (isKeyframe ? 0x80 : 0x00) | (lacingFlag << 1) | discardable;
Ebml_Write(glob, &flags, 1);
Ebml_Write(glob, data, dataLength);
diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c
index 8aab0ff03..c0467cd84 100644
--- a/vp8/common/arm/arm_systemdependent.c
+++ b/vp8/common/arm/arm_systemdependent.c
@@ -54,9 +54,11 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6;
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_armv6;
- rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
+ rtcd->loopfilter.simple_mb_v =
+ vp8_loop_filter_simple_vertical_edge_armv6;
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_armv6;
- rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
+ rtcd->loopfilter.simple_mb_h =
+ vp8_loop_filter_simple_horizontal_edge_armv6;
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_armv6;
rtcd->recon.copy16x16 = vp8_copy_mem16x16_v6;
diff --git a/vp8/common/arm/armv6/loopfilter_v6.asm b/vp8/common/arm/armv6/loopfilter_v6.asm
index c7441b055..1cbbbcdef 100644
--- a/vp8/common/arm/armv6/loopfilter_v6.asm
+++ b/vp8/common/arm/armv6/loopfilter_v6.asm
@@ -53,14 +53,11 @@ count RN r5
;r0 unsigned char *src_ptr,
;r1 int src_pixel_step,
-;r2 const char *flimit,
+;r2 const char *blimit,
;r3 const char *limit,
;stack const char *thresh,
;stack int count
-;Note: All 16 elements in flimit are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|vp8_loop_filter_horizontal_edge_armv6| PROC
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
@@ -72,14 +69,18 @@ count RN r5
sub sp, sp, #16 ; create temp buffer
ldr r9, [src], pstep ; p3
- ldr r4, [r2], #4 ; flimit
+ ldrb r4, [r2] ; blimit
ldr r10, [src], pstep ; p2
- ldr r2, [r3], #4 ; limit
+ ldrb r2, [r3] ; limit
ldr r11, [src], pstep ; p1
- uadd8 r4, r4, r4 ; flimit * 2
- ldr r3, [r6], #4 ; thresh
+ orr r4, r4, r4, lsl #8
+ ldrb r3, [r6] ; thresh
+ orr r2, r2, r2, lsl #8
mov count, count, lsl #1 ; 4-in-parallel
- uadd8 r4, r4, r2 ; flimit * 2 + limit
+ orr r4, r4, r4, lsl #16
+ orr r3, r3, r3, lsl #8
+ orr r2, r2, r2, lsl #16
+ orr r3, r3, r3, lsl #16
|Hnext8|
; vp8_filter_mask() function
@@ -275,14 +276,18 @@ count RN r5
sub sp, sp, #16 ; create temp buffer
ldr r9, [src], pstep ; p3
- ldr r4, [r2], #4 ; flimit
+ ldrb r4, [r2] ; blimit
ldr r10, [src], pstep ; p2
- ldr r2, [r3], #4 ; limit
+ ldrb r2, [r3] ; limit
ldr r11, [src], pstep ; p1
- uadd8 r4, r4, r4 ; flimit * 2
- ldr r3, [r6], #4 ; thresh
+ orr r4, r4, r4, lsl #8
+ ldrb r3, [r6] ; thresh
+ orr r2, r2, r2, lsl #8
mov count, count, lsl #1 ; 4-in-parallel
- uadd8 r4, r4, r2 ; flimit * 2 + limit
+ orr r4, r4, r4, lsl #16
+ orr r3, r3, r3, lsl #8
+ orr r2, r2, r2, lsl #16
+ orr r3, r3, r3, lsl #16
|MBHnext8|
@@ -584,15 +589,19 @@ count RN r5
sub sp, sp, #16 ; create temp buffer
ldr r6, [src], pstep ; load source data
- ldr r4, [r2], #4 ; flimit
+ ldrb r4, [r2] ; blimit
ldr r7, [src], pstep
- ldr r2, [r3], #4 ; limit
+ ldrb r2, [r3] ; limit
ldr r8, [src], pstep
- uadd8 r4, r4, r4 ; flimit * 2
- ldr r3, [r12], #4 ; thresh
+ orr r4, r4, r4, lsl #8
+ ldrb r3, [r12] ; thresh
+ orr r2, r2, r2, lsl #8
ldr lr, [src], pstep
mov count, count, lsl #1 ; 4-in-parallel
- uadd8 r4, r4, r2 ; flimit * 2 + limit
+ orr r4, r4, r4, lsl #16
+ orr r3, r3, r3, lsl #8
+ orr r2, r2, r2, lsl #16
+ orr r3, r3, r3, lsl #16
|Vnext8|
@@ -855,18 +864,22 @@ count RN r5
sub sp, sp, #16 ; create temp buffer
ldr r6, [src], pstep ; load source data
- ldr r4, [r2], #4 ; flimit
+ ldrb r4, [r2] ; blimit
pld [src, #23]
ldr r7, [src], pstep
- ldr r2, [r3], #4 ; limit
+ ldrb r2, [r3] ; limit
pld [src, #23]
ldr r8, [src], pstep
- uadd8 r4, r4, r4 ; flimit * 2
- ldr r3, [r12], #4 ; thresh
+ orr r4, r4, r4, lsl #8
+ ldrb r3, [r12] ; thresh
+ orr r2, r2, r2, lsl #8
pld [src, #23]
ldr lr, [src], pstep
mov count, count, lsl #1 ; 4-in-parallel
- uadd8 r4, r4, r2 ; flimit * 2 + limit
+ orr r4, r4, r4, lsl #16
+ orr r3, r3, r3, lsl #8
+ orr r2, r2, r2, lsl #16
+ orr r3, r3, r3, lsl #16
|MBVnext8|
; vp8_filter_mask() function
@@ -906,6 +919,7 @@ count RN r5
str lr, [sp, #8]
ldr lr, [src], pstep
+
TRANSPOSE_MATRIX r6, r7, r8, lr, r9, r10, r11, r12
ldr lr, [sp, #8] ; load back (f)limit accumulator
@@ -954,6 +968,7 @@ count RN r5
beq mbvskip_filter ; skip filtering
+
;vp8_hevmask() function
;calculate high edge variance
@@ -1121,6 +1136,7 @@ count RN r5
smlabb r8, r6, lr, r7
smlatb r6, r6, lr, r7
smlabb r9, r10, lr, r7
+
smlatb r10, r10, lr, r7
ssat r8, #8, r8, asr #7
ssat r6, #8, r6, asr #7
diff --git a/vp8/common/arm/armv6/simpleloopfilter_v6.asm b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
index 40a71f49d..5e00cf01b 100644
--- a/vp8/common/arm/armv6/simpleloopfilter_v6.asm
+++ b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
@@ -45,35 +45,28 @@
MEND
+
src RN r0
pstep RN r1
;r0 unsigned char *src_ptr,
;r1 int src_pixel_step,
-;r2 const char *flimit,
-;r3 const char *limit,
-;stack const char *thresh,
-;stack int count
-
-; All 16 elements in flimit are equal. So, in the code, only one load is needed
-; for flimit. Same applies to limit. thresh is not used in simple looopfilter
+;r2 const char *blimit
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|vp8_loop_filter_simple_horizontal_edge_armv6| PROC
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
stmdb sp!, {r4 - r11, lr}
- ldr r12, [r3] ; limit
+ ldrb r12, [r2] ; blimit
ldr r3, [src, -pstep, lsl #1] ; p1
ldr r4, [src, -pstep] ; p0
ldr r5, [src] ; q0
ldr r6, [src, pstep] ; q1
- ldr r7, [r2] ; flimit
+ orr r12, r12, r12, lsl #8 ; blimit
ldr r2, c0x80808080
- ldr r9, [sp, #40] ; count for 8-in-parallel
- uadd8 r7, r7, r7 ; flimit * 2
- mov r9, r9, lsl #1 ; double the count. we're doing 4 at a time
- uadd8 r12, r7, r12 ; flimit * 2 + limit
+ orr r12, r12, r12, lsl #16 ; blimit
+ mov r9, #4 ; double the count. we're doing 4 at a time
mov lr, #0 ; need 0 in a couple places
|simple_hnext8|
@@ -148,34 +141,32 @@ pstep RN r1
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
stmdb sp!, {r4 - r11, lr}
- ldr r12, [r2] ; r12: flimit
+ ldrb r12, [r2] ; r12: blimit
ldr r2, c0x80808080
- ldr r7, [r3] ; limit
+ orr r12, r12, r12, lsl #8
; load soure data to r7, r8, r9, r10
ldrh r3, [src, #-2]
pld [src, #23] ; preload for next block
ldrh r4, [src], pstep
- uadd8 r12, r12, r12 ; flimit * 2
+ orr r12, r12, r12, lsl #16
ldrh r5, [src, #-2]
pld [src, #23]
ldrh r6, [src], pstep
- uadd8 r12, r12, r7 ; flimit * 2 + limit
pkhbt r7, r3, r4, lsl #16
ldrh r3, [src, #-2]
pld [src, #23]
ldrh r4, [src], pstep
- ldr r11, [sp, #40] ; count (r11) for 8-in-parallel
pkhbt r8, r5, r6, lsl #16
ldrh r5, [src, #-2]
pld [src, #23]
ldrh r6, [src], pstep
- mov r11, r11, lsl #1 ; 4-in-parallel
+ mov r11, #4 ; double the count. we're doing 4 at a time
|simple_vnext8|
; vp8_simple_filter_mask() function
diff --git a/vp8/common/arm/loopfilter_arm.c b/vp8/common/arm/loopfilter_arm.c
index 6d1caa485..c841d455a 100644
--- a/vp8/common/arm/loopfilter_arm.c
+++ b/vp8/common/arm/loopfilter_arm.c
@@ -9,30 +9,34 @@
*/
-#include "vpx_ports/config.h"
-#include <math.h>
+#include "vpx_config.h"
#include "vp8/common/loopfilter.h"
#include "vp8/common/onyxc_int.h"
+#if HAVE_ARMV6
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
-extern prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_armv6);
-extern prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_armv6);
-
-extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_y_neon);
-extern prototype_loopfilter(vp8_loop_filter_vertical_edge_y_neon);
-extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_y_neon);
-extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_y_neon);
-extern prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_neon);
-extern prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_neon);
-
-extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_neon;
-extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_neon;
-extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_neon;
-extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_neon;
+#endif
+#if HAVE_ARMV7
+typedef void loopfilter_y_neon(unsigned char *src, int pitch,
+ unsigned char blimit, unsigned char limit, unsigned char thresh);
+typedef void loopfilter_uv_neon(unsigned char *u, int pitch,
+ unsigned char blimit, unsigned char limit, unsigned char thresh,
+ unsigned char *v);
+
+extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon;
+extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon;
+extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon;
+extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon;
+
+extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon;
+extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon;
+extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon;
+extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon;
+#endif
#if HAVE_ARMV6
/*ARMV6 loopfilter functions*/
@@ -40,96 +44,72 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_neon;
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
+ vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
- vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
-}
-
-void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
-{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
/* Vertical MB Filtering */
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
+ vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
- vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
-}
-
-void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
-{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
+ vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
- vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
+ vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
-void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
+void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride,
+ const unsigned char *blimit)
{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit);
+ vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit);
+ vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit);
}
/* Vertical B Filtering */
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
+ vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
- vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
+ vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
-void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
+void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride,
+ const unsigned char *blimit)
{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit);
+ vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit);
+ vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit);
}
#endif
@@ -139,83 +119,58 @@ void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ unsigned char mblim = *lfi->mblim;
+ unsigned char lim = *lfi->lim;
+ unsigned char hev_thr = *lfi->hev_thr;
+ vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
if (u_ptr)
- vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
-}
-
-void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
-{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
}
/* Vertical MB Filtering */
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ unsigned char mblim = *lfi->mblim;
+ unsigned char lim = *lfi->lim;
+ unsigned char hev_thr = *lfi->hev_thr;
- if (u_ptr)
- vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
-}
+ vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
-void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
-{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ if (u_ptr)
+ vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ unsigned char blim = *lfi->blim;
+ unsigned char lim = *lfi->lim;
+ unsigned char hev_thr = *lfi->hev_thr;
- if (u_ptr)
- vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4 * uv_stride);
-}
+ vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr);
+ vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr);
+ vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr);
-void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
-{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ if (u_ptr)
+ vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride);
}
/* Vertical B Filtering */
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ unsigned char blim = *lfi->blim;
+ unsigned char lim = *lfi->lim;
+ unsigned char hev_thr = *lfi->hev_thr;
- if (u_ptr)
- vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4);
-}
+ vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr);
+ vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr);
+ vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr);
-void vp8_loop_filter_bvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
-{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ if (u_ptr)
+ vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4);
}
#endif
diff --git a/vp8/common/arm/loopfilter_arm.h b/vp8/common/arm/loopfilter_arm.h
index cd62207d7..390a547b0 100644
--- a/vp8/common/arm/loopfilter_arm.h
+++ b/vp8/common/arm/loopfilter_arm.h
@@ -12,15 +12,17 @@
#ifndef LOOPFILTER_ARM_H
#define LOOPFILTER_ARM_H
+#include "vpx_config.h"
+
#if HAVE_ARMV6
extern prototype_loopfilter_block(vp8_loop_filter_mbv_armv6);
extern prototype_loopfilter_block(vp8_loop_filter_bv_armv6);
extern prototype_loopfilter_block(vp8_loop_filter_mbh_armv6);
extern prototype_loopfilter_block(vp8_loop_filter_bh_armv6);
-extern prototype_loopfilter_block(vp8_loop_filter_mbvs_armv6);
-extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6);
-extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6);
-extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
+extern prototype_simple_loopfilter(vp8_loop_filter_bvs_armv6);
+extern prototype_simple_loopfilter(vp8_loop_filter_bhs_armv6);
+extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_armv6);
+extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_armv6);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_lf_normal_mb_v
@@ -36,28 +38,29 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
#define vp8_lf_normal_b_h vp8_loop_filter_bh_armv6
#undef vp8_lf_simple_mb_v
-#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_armv6
+#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_armv6
#undef vp8_lf_simple_b_v
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_armv6
#undef vp8_lf_simple_mb_h
-#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_armv6
+#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_armv6
#undef vp8_lf_simple_b_h
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6
-#endif
-#endif
+#endif /* !CONFIG_RUNTIME_CPU_DETECT */
+
+#endif /* HAVE_ARMV6 */
#if HAVE_ARMV7
extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon);
extern prototype_loopfilter_block(vp8_loop_filter_bv_neon);
extern prototype_loopfilter_block(vp8_loop_filter_mbh_neon);
extern prototype_loopfilter_block(vp8_loop_filter_bh_neon);
-extern prototype_loopfilter_block(vp8_loop_filter_mbvs_neon);
-extern prototype_loopfilter_block(vp8_loop_filter_bvs_neon);
-extern prototype_loopfilter_block(vp8_loop_filter_mbhs_neon);
-extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
+extern prototype_simple_loopfilter(vp8_loop_filter_mbvs_neon);
+extern prototype_simple_loopfilter(vp8_loop_filter_bvs_neon);
+extern prototype_simple_loopfilter(vp8_loop_filter_mbhs_neon);
+extern prototype_simple_loopfilter(vp8_loop_filter_bhs_neon);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_lf_normal_mb_v
@@ -83,7 +86,8 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
#undef vp8_lf_simple_b_h
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon
-#endif
-#endif
+#endif /* !CONFIG_RUNTIME_CPU_DETECT */
+
+#endif /* HAVE_ARMV7 */
-#endif
+#endif /* LOOPFILTER_ARM_H */
diff --git a/vp8/common/arm/neon/loopfilter_neon.asm b/vp8/common/arm/neon/loopfilter_neon.asm
index e73dd6401..e44be0a1e 100644
--- a/vp8/common/arm/neon/loopfilter_neon.asm
+++ b/vp8/common/arm/neon/loopfilter_neon.asm
@@ -14,109 +14,97 @@
EXPORT |vp8_loop_filter_vertical_edge_y_neon|
EXPORT |vp8_loop_filter_vertical_edge_uv_neon|
ARM
- REQUIRE8
- PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
-; flimit, limit, and thresh should be positive numbers.
-; All 16 elements in these variables are equal.
-
-; void vp8_loop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
-; const signed char *flimit,
-; const signed char *limit,
-; const signed char *thresh,
-; int count)
; r0 unsigned char *src
; r1 int pitch
-; r2 const signed char *flimit
-; r3 const signed char *limit
-; sp const signed char *thresh,
-; sp+4 int count (unused)
+; r2 unsigned char blimit
+; r3 unsigned char limit
+; sp unsigned char thresh,
|vp8_loop_filter_horizontal_edge_y_neon| PROC
- stmdb sp!, {lr}
- vld1.s8 {d0[], d1[]}, [r2] ; flimit
- vld1.s8 {d2[], d3[]}, [r3] ; limit
- sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines
- ldr r12, [sp, #4] ; load thresh pointer
-
- vld1.u8 {q3}, [r2], r1 ; p3
- vld1.u8 {q4}, [r2], r1 ; p2
- vld1.u8 {q5}, [r2], r1 ; p1
- vld1.u8 {q6}, [r2], r1 ; p0
- vld1.u8 {q7}, [r2], r1 ; q0
- vld1.u8 {q8}, [r2], r1 ; q1
- vld1.u8 {q9}, [r2], r1 ; q2
- vld1.u8 {q10}, [r2] ; q3
- vld1.s8 {d4[], d5[]}, [r12] ; thresh
- sub r0, r0, r1, lsl #1
+ push {lr}
+ vdup.u8 q0, r2 ; duplicate blimit
+ vdup.u8 q1, r3 ; duplicate limit
+ sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines
+ ldr r3, [sp, #4] ; load thresh
+ add r12, r2, r1
+ add r1, r1, r1
+
+ vdup.u8 q2, r3 ; duplicate thresh
+
+ vld1.u8 {q3}, [r2@128], r1 ; p3
+ vld1.u8 {q4}, [r12@128], r1 ; p2
+ vld1.u8 {q5}, [r2@128], r1 ; p1
+ vld1.u8 {q6}, [r12@128], r1 ; p0
+ vld1.u8 {q7}, [r2@128], r1 ; q0
+ vld1.u8 {q8}, [r12@128], r1 ; q1
+ vld1.u8 {q9}, [r2@128] ; q2
+ vld1.u8 {q10}, [r12@128] ; q3
+
+ sub r2, r2, r1, lsl #1
+ sub r12, r12, r1, lsl #1
bl vp8_loop_filter_neon
- vst1.u8 {q5}, [r0], r1 ; store op1
- vst1.u8 {q6}, [r0], r1 ; store op0
- vst1.u8 {q7}, [r0], r1 ; store oq0
- vst1.u8 {q8}, [r0], r1 ; store oq1
+ vst1.u8 {q5}, [r2@128], r1 ; store op1
+ vst1.u8 {q6}, [r12@128], r1 ; store op0
+ vst1.u8 {q7}, [r2@128], r1 ; store oq0
+ vst1.u8 {q8}, [r12@128], r1 ; store oq1
- ldmia sp!, {pc}
+ pop {pc}
ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
-; void vp8_loop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch
-; const signed char *flimit,
-; const signed char *limit,
-; const signed char *thresh,
-; unsigned char *v)
+
; r0 unsigned char *u,
; r1 int pitch,
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; sp const signed char *thresh,
+; r2 unsigned char blimit
+; r3 unsigned char limit
+; sp unsigned char thresh,
; sp+4 unsigned char *v
|vp8_loop_filter_horizontal_edge_uv_neon| PROC
- stmdb sp!, {lr}
- vld1.s8 {d0[], d1[]}, [r2] ; flimit
- vld1.s8 {d2[], d3[]}, [r3] ; limit
+ push {lr}
+ vdup.u8 q0, r2 ; duplicate blimit
+ vdup.u8 q1, r3 ; duplicate limit
+ ldr r12, [sp, #4] ; load thresh
ldr r2, [sp, #8] ; load v ptr
+ vdup.u8 q2, r12 ; duplicate thresh
sub r3, r0, r1, lsl #2 ; move u pointer down by 4 lines
- vld1.u8 {d6}, [r3], r1 ; p3
- vld1.u8 {d8}, [r3], r1 ; p2
- vld1.u8 {d10}, [r3], r1 ; p1
- vld1.u8 {d12}, [r3], r1 ; p0
- vld1.u8 {d14}, [r3], r1 ; q0
- vld1.u8 {d16}, [r3], r1 ; q1
- vld1.u8 {d18}, [r3], r1 ; q2
- vld1.u8 {d20}, [r3] ; q3
-
- ldr r3, [sp, #4] ; load thresh pointer
-
sub r12, r2, r1, lsl #2 ; move v pointer down by 4 lines
- vld1.u8 {d7}, [r12], r1 ; p3
- vld1.u8 {d9}, [r12], r1 ; p2
- vld1.u8 {d11}, [r12], r1 ; p1
- vld1.u8 {d13}, [r12], r1 ; p0
- vld1.u8 {d15}, [r12], r1 ; q0
- vld1.u8 {d17}, [r12], r1 ; q1
- vld1.u8 {d19}, [r12], r1 ; q2
- vld1.u8 {d21}, [r12] ; q3
- vld1.s8 {d4[], d5[]}, [r3] ; thresh
+ vld1.u8 {d6}, [r3@64], r1 ; p3
+ vld1.u8 {d7}, [r12@64], r1 ; p3
+ vld1.u8 {d8}, [r3@64], r1 ; p2
+ vld1.u8 {d9}, [r12@64], r1 ; p2
+ vld1.u8 {d10}, [r3@64], r1 ; p1
+ vld1.u8 {d11}, [r12@64], r1 ; p1
+ vld1.u8 {d12}, [r3@64], r1 ; p0
+ vld1.u8 {d13}, [r12@64], r1 ; p0
+ vld1.u8 {d14}, [r3@64], r1 ; q0
+ vld1.u8 {d15}, [r12@64], r1 ; q0
+ vld1.u8 {d16}, [r3@64], r1 ; q1
+ vld1.u8 {d17}, [r12@64], r1 ; q1
+ vld1.u8 {d18}, [r3@64], r1 ; q2
+ vld1.u8 {d19}, [r12@64], r1 ; q2
+ vld1.u8 {d20}, [r3@64] ; q3
+ vld1.u8 {d21}, [r12@64] ; q3
bl vp8_loop_filter_neon
sub r0, r0, r1, lsl #1
sub r2, r2, r1, lsl #1
- vst1.u8 {d10}, [r0], r1 ; store u op1
- vst1.u8 {d11}, [r2], r1 ; store v op1
- vst1.u8 {d12}, [r0], r1 ; store u op0
- vst1.u8 {d13}, [r2], r1 ; store v op0
- vst1.u8 {d14}, [r0], r1 ; store u oq0
- vst1.u8 {d15}, [r2], r1 ; store v oq0
- vst1.u8 {d16}, [r0] ; store u oq1
- vst1.u8 {d17}, [r2] ; store v oq1
+ vst1.u8 {d10}, [r0@64], r1 ; store u op1
+ vst1.u8 {d11}, [r2@64], r1 ; store v op1
+ vst1.u8 {d12}, [r0@64], r1 ; store u op0
+ vst1.u8 {d13}, [r2@64], r1 ; store v op0
+ vst1.u8 {d14}, [r0@64], r1 ; store u oq0
+ vst1.u8 {d15}, [r2@64], r1 ; store v oq0
+ vst1.u8 {d16}, [r0@64] ; store u oq1
+ vst1.u8 {d17}, [r2@64] ; store v oq1
- ldmia sp!, {pc}
+ pop {pc}
ENDP ; |vp8_loop_filter_horizontal_edge_uv_neon|
; void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
@@ -124,39 +112,38 @@
; const signed char *limit,
; const signed char *thresh,
; int count)
-; r0 unsigned char *src,
-; r1 int pitch,
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; sp const signed char *thresh,
-; sp+4 int count (unused)
+; r0 unsigned char *src
+; r1 int pitch
+; r2 unsigned char blimit
+; r3 unsigned char limit
+; sp unsigned char thresh,
+
|vp8_loop_filter_vertical_edge_y_neon| PROC
- stmdb sp!, {lr}
- vld1.s8 {d0[], d1[]}, [r2] ; flimit
- vld1.s8 {d2[], d3[]}, [r3] ; limit
- sub r2, r0, #4 ; src ptr down by 4 columns
- sub r0, r0, #2 ; dst ptr
- ldr r12, [sp, #4] ; load thresh pointer
-
- vld1.u8 {d6}, [r2], r1 ; load first 8-line src data
- vld1.u8 {d8}, [r2], r1
+ push {lr}
+ vdup.u8 q0, r2 ; duplicate blimit
+ vdup.u8 q1, r3 ; duplicate limit
+ sub r2, r0, #4 ; src ptr down by 4 columns
+ add r1, r1, r1
+ ldr r3, [sp, #4] ; load thresh
+ add r12, r2, r1, asr #1
+
+ vld1.u8 {d6}, [r2], r1
+ vld1.u8 {d8}, [r12], r1
vld1.u8 {d10}, [r2], r1
- vld1.u8 {d12}, [r2], r1
+ vld1.u8 {d12}, [r12], r1
vld1.u8 {d14}, [r2], r1
- vld1.u8 {d16}, [r2], r1
+ vld1.u8 {d16}, [r12], r1
vld1.u8 {d18}, [r2], r1
- vld1.u8 {d20}, [r2], r1
-
- vld1.s8 {d4[], d5[]}, [r12] ; thresh
+ vld1.u8 {d20}, [r12], r1
vld1.u8 {d7}, [r2], r1 ; load second 8-line src data
- vld1.u8 {d9}, [r2], r1
+ vld1.u8 {d9}, [r12], r1
vld1.u8 {d11}, [r2], r1
- vld1.u8 {d13}, [r2], r1
+ vld1.u8 {d13}, [r12], r1
vld1.u8 {d15}, [r2], r1
- vld1.u8 {d17}, [r2], r1
- vld1.u8 {d19}, [r2], r1
- vld1.u8 {d21}, [r2]
+ vld1.u8 {d17}, [r12], r1
+ vld1.u8 {d19}, [r2]
+ vld1.u8 {d21}, [r12]
;transpose to 8x16 matrix
vtrn.32 q3, q7
@@ -164,6 +151,8 @@
vtrn.32 q5, q9
vtrn.32 q6, q10
+ vdup.u8 q2, r3 ; duplicate thresh
+
vtrn.16 q3, q5
vtrn.16 q4, q6
vtrn.16 q7, q9
@@ -178,28 +167,34 @@
vswp d12, d11
vswp d16, d13
+
+ sub r0, r0, #2 ; dst ptr
+
vswp d14, d12
vswp d16, d15
+ add r12, r0, r1, asr #1
+
;store op1, op0, oq0, oq1
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
- vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
+ vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r12], r1
vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
- vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
+ vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r12], r1
vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
- vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
+ vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r12], r1
vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
- vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
+ vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r12], r1
+
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
- vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r0], r1
+ vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r12], r1
vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
- vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r0], r1
+ vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r12], r1
vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
- vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r0], r1
- vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0], r1
- vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r0]
+ vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r12], r1
+ vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0]
+ vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r12]
- ldmia sp!, {pc}
+ pop {pc}
ENDP ; |vp8_loop_filter_vertical_edge_y_neon|
; void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch
@@ -209,38 +204,36 @@
; unsigned char *v)
; r0 unsigned char *u,
; r1 int pitch,
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; sp const signed char *thresh,
+; r2 unsigned char blimit
+; r3 unsigned char limit
+; sp unsigned char thresh,
; sp+4 unsigned char *v
|vp8_loop_filter_vertical_edge_uv_neon| PROC
- stmdb sp!, {lr}
- sub r12, r0, #4 ; move u pointer down by 4 columns
- vld1.s8 {d0[], d1[]}, [r2] ; flimit
- vld1.s8 {d2[], d3[]}, [r3] ; limit
-
+ push {lr}
+ vdup.u8 q0, r2 ; duplicate blimit
+ sub r12, r0, #4 ; move u pointer down by 4 columns
ldr r2, [sp, #8] ; load v ptr
-
- vld1.u8 {d6}, [r12], r1 ;load u data
- vld1.u8 {d8}, [r12], r1
- vld1.u8 {d10}, [r12], r1
- vld1.u8 {d12}, [r12], r1
- vld1.u8 {d14}, [r12], r1
- vld1.u8 {d16}, [r12], r1
- vld1.u8 {d18}, [r12], r1
- vld1.u8 {d20}, [r12]
-
+ vdup.u8 q1, r3 ; duplicate limit
sub r3, r2, #4 ; move v pointer down by 4 columns
+
+ vld1.u8 {d6}, [r12], r1 ;load u data
vld1.u8 {d7}, [r3], r1 ;load v data
+ vld1.u8 {d8}, [r12], r1
vld1.u8 {d9}, [r3], r1
+ vld1.u8 {d10}, [r12], r1
vld1.u8 {d11}, [r3], r1
+ vld1.u8 {d12}, [r12], r1
vld1.u8 {d13}, [r3], r1
+ vld1.u8 {d14}, [r12], r1
vld1.u8 {d15}, [r3], r1
+ vld1.u8 {d16}, [r12], r1
vld1.u8 {d17}, [r3], r1
+ vld1.u8 {d18}, [r12], r1
vld1.u8 {d19}, [r3], r1
+ vld1.u8 {d20}, [r12]
vld1.u8 {d21}, [r3]
- ldr r12, [sp, #4] ; load thresh pointer
+ ldr r12, [sp, #4] ; load thresh
;transpose to 8x16 matrix
vtrn.32 q3, q7
@@ -248,6 +241,8 @@
vtrn.32 q5, q9
vtrn.32 q6, q10
+ vdup.u8 q2, r12 ; duplicate thresh
+
vtrn.16 q3, q5
vtrn.16 q4, q6
vtrn.16 q7, q9
@@ -258,18 +253,16 @@
vtrn.8 q7, q8
vtrn.8 q9, q10
- vld1.s8 {d4[], d5[]}, [r12] ; thresh
-
bl vp8_loop_filter_neon
- sub r0, r0, #2
- sub r2, r2, #2
-
vswp d12, d11
vswp d16, d13
vswp d14, d12
vswp d16, d15
+ sub r0, r0, #2
+ sub r2, r2, #2
+
;store op1, op0, oq0, oq1
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
@@ -288,7 +281,7 @@
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0]
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2]
- ldmia sp!, {pc}
+ pop {pc}
ENDP ; |vp8_loop_filter_vertical_edge_uv_neon|
; void vp8_loop_filter_neon();
@@ -316,42 +309,44 @@
vabd.u8 q14, q8, q7 ; abs(q1 - q0)
vabd.u8 q3, q9, q8 ; abs(q2 - q1)
vabd.u8 q4, q10, q9 ; abs(q3 - q2)
- vabd.u8 q9, q6, q7 ; abs(p0 - q0)
vmax.u8 q11, q11, q12
vmax.u8 q12, q13, q14
vmax.u8 q3, q3, q4
vmax.u8 q15, q11, q12
+ vabd.u8 q9, q6, q7 ; abs(p0 - q0)
+
; vp8_hevmask
vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
vmax.u8 q15, q15, q3
- vadd.u8 q0, q0, q0 ; flimit * 2
- vadd.u8 q0, q0, q1 ; flimit * 2 + limit
- vcge.u8 q15, q1, q15
+ vmov.u8 q10, #0x80 ; 0x80
vabd.u8 q2, q5, q8 ; a = abs(p1 - q1)
vqadd.u8 q9, q9, q9 ; b = abs(p0 - q0) * 2
- vshr.u8 q2, q2, #1 ; a = a / 2
- vqadd.u8 q9, q9, q2 ; a = b + a
- vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1
- vmov.u8 q0, #0x80 ; 0x80
+ vcge.u8 q15, q1, q15
; vp8_filter() function
; convert to signed
- veor q7, q7, q0 ; qs0
- veor q6, q6, q0 ; ps0
- veor q5, q5, q0 ; ps1
- veor q8, q8, q0 ; qs1
+ veor q7, q7, q10 ; qs0
+ vshr.u8 q2, q2, #1 ; a = a / 2
+ veor q6, q6, q10 ; ps0
+
+ veor q5, q5, q10 ; ps1
+ vqadd.u8 q9, q9, q2 ; a = b + a
+
+ veor q8, q8, q10 ; qs1
vmov.u8 q10, #3 ; #3
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
vsubl.s8 q11, d15, d13
+ vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1
+
vmovl.u8 q4, d20
vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
@@ -378,19 +373,20 @@
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
vshr.s8 q1, q1, #3 ; Filter1 >>= 3
+
vqadd.s8 q11, q6, q2 ; u = clamp(ps0 + Filter2)
vqsub.s8 q10, q7, q1 ; u = clamp(qs0 - Filter1)
; outer tap adjustments: ++vp8_filter >> 1
vrshr.s8 q1, q1, #1
vbic q1, q1, q14 ; vp8_filter &= ~hev
-
+ vmov.u8 q0, #0x80 ; 0x80
vqadd.s8 q13, q5, q1 ; u = clamp(ps1 + vp8_filter)
vqsub.s8 q12, q8, q1 ; u = clamp(qs1 - vp8_filter)
- veor q5, q13, q0 ; *op1 = u^0x80
veor q6, q11, q0 ; *op0 = u^0x80
veor q7, q10, q0 ; *oq0 = u^0x80
+ veor q5, q13, q0 ; *op1 = u^0x80
veor q8, q12, q0 ; *oq1 = u^0x80
bx lr
diff --git a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
index 7c5ea3644..adf848b9c 100644
--- a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
@@ -9,99 +9,109 @@
;
- EXPORT |vp8_loop_filter_simple_horizontal_edge_neon|
+ ;EXPORT |vp8_loop_filter_simple_horizontal_edge_neon|
+ EXPORT |vp8_loop_filter_bhs_neon|
+ EXPORT |vp8_loop_filter_mbhs_neon|
ARM
- REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0 unsigned char *s,
-; r1 int p, //pitch
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; stack(r4) const signed char *thresh (unused)
-; //stack(r5) int count --unused
+
+; r0 unsigned char *s, PRESERVE
+; r1 int p, PRESERVE
+; q1 limit, PRESERVE
|vp8_loop_filter_simple_horizontal_edge_neon| PROC
- sub r0, r0, r1, lsl #1 ; move src pointer down by 2 lines
- vld1.u8 {q5}, [r0], r1 ; p1
- vld1.s8 {d2[], d3[]}, [r2] ; flimit
- vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
- vld1.u8 {q6}, [r0], r1 ; p0
- vmov.u8 q0, #0x80 ; 0x80
- vld1.u8 {q7}, [r0], r1 ; q0
- vmov.u8 q10, #0x03 ; 0x03
- vld1.u8 {q8}, [r0] ; q1
+ sub r3, r0, r1, lsl #1 ; move src pointer down by 2 lines
+
+ vld1.u8 {q7}, [r0@128], r1 ; q0
+ vld1.u8 {q5}, [r3@128], r1 ; p0
+ vld1.u8 {q8}, [r0@128] ; q1
+ vld1.u8 {q6}, [r3@128] ; p1
- ;vp8_filter_mask() function
vabd.u8 q15, q6, q7 ; abs(p0 - q0)
vabd.u8 q14, q5, q8 ; abs(p1 - q1)
+
vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2
+ vmov.u8 q0, #0x80 ; 0x80
+ vmov.s16 q13, #3
vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
- ;vp8_filter() function
veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
- vadd.u8 q1, q1, q1 ; flimit * 2
- vadd.u8 q1, q1, q13 ; flimit * 2 + limit
- vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
+ vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > limit)*-1
-;;;;;;;;;;
- ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0)
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
vsubl.s8 q3, d15, d13
vqsub.s8 q4, q5, q8 ; q4: vp8_filter = vp8_signed_char_clamp(ps1-qs1)
- ;vmul.i8 q2, q2, q10 ; 3 * ( qs0 - ps0)
- vadd.s16 q11, q2, q2 ; 3 * ( qs0 - ps0)
- vadd.s16 q12, q3, q3
+ vmul.s16 q2, q2, q13 ; 3 * ( qs0 - ps0)
+ vmul.s16 q3, q3, q13
+ vmov.u8 q10, #0x03 ; 0x03
vmov.u8 q9, #0x04 ; 0x04
- vadd.s16 q2, q2, q11
- vadd.s16 q3, q3, q12
-
vaddw.s8 q2, q2, d8 ; vp8_filter + 3 * ( qs0 - ps0)
vaddw.s8 q3, q3, d9
- ;vqadd.s8 q4, q4, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
vqmovn.s16 d8, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
vqmovn.s16 d9, q3
-;;;;;;;;;;;;;
- vand q4, q4, q15 ; vp8_filter &= mask
+ vand q14, q4, q15 ; vp8_filter &= mask
- vqadd.s8 q2, q4, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
- vqadd.s8 q4, q4, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
+ vqadd.s8 q2, q14, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
+ vqadd.s8 q3, q14, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
- vshr.s8 q4, q4, #3 ; Filter1 >>= 3
+ vshr.s8 q4, q3, #3 ; Filter1 >>= 3
- sub r0, r0, r1, lsl #1
+ sub r0, r0, r1
;calculate output
vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
vqsub.s8 q10, q7, q4 ; u = vp8_signed_char_clamp(qs0 - Filter1)
- add r3, r0, r1
-
veor q6, q11, q0 ; *op0 = u^0x80
veor q7, q10, q0 ; *oq0 = u^0x80
- vst1.u8 {q6}, [r0] ; store op0
- vst1.u8 {q7}, [r3] ; store oq0
+ vst1.u8 {q6}, [r3@128] ; store op0
+ vst1.u8 {q7}, [r0@128] ; store oq0
bx lr
ENDP ; |vp8_loop_filter_simple_horizontal_edge_neon|
-;-----------------
+; r0 unsigned char *y
+; r1 int ystride
+; r2 const unsigned char *blimit
+
+|vp8_loop_filter_bhs_neon| PROC
+ push {r4, lr}
+ ldrb r3, [r2] ; load blim from mem
+ vdup.s8 q1, r3 ; duplicate blim
+
+ add r0, r0, r1, lsl #2 ; src = y_ptr + 4 * y_stride
+ bl vp8_loop_filter_simple_horizontal_edge_neon
+ ; vp8_loop_filter_simple_horizontal_edge_neon preserves r0, r1 and q1
+ add r0, r0, r1, lsl #2 ; src = y_ptr + 8* y_stride
+ bl vp8_loop_filter_simple_horizontal_edge_neon
+ add r0, r0, r1, lsl #2 ; src = y_ptr + 12 * y_stride
+ pop {r4, lr}
+ b vp8_loop_filter_simple_horizontal_edge_neon
+ ENDP ;|vp8_loop_filter_bhs_neon|
+
+; r0 unsigned char *y
+; r1 int ystride
+; r2 const unsigned char *blimit
+
+|vp8_loop_filter_mbhs_neon| PROC
+ ldrb r3, [r2] ; load blim from mem
+ vdup.s8 q1, r3 ; duplicate mblim
+ b vp8_loop_filter_simple_horizontal_edge_neon
+ ENDP ;|vp8_loop_filter_bhs_neon|
END
diff --git a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
index a7f7b690e..e690df2f7 100644
--- a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
@@ -9,59 +9,54 @@
;
- EXPORT |vp8_loop_filter_simple_vertical_edge_neon|
+ ;EXPORT |vp8_loop_filter_simple_vertical_edge_neon|
+ EXPORT |vp8_loop_filter_bvs_neon|
+ EXPORT |vp8_loop_filter_mbvs_neon|
ARM
- REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh should be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0 unsigned char *s,
-; r1 int p, //pitch
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; stack(r4) const signed char *thresh (unused)
-; //stack(r5) int count --unused
+
+; r0 unsigned char *s, PRESERVE
+; r1 int p, PRESERVE
+; q1 limit, PRESERVE
|vp8_loop_filter_simple_vertical_edge_neon| PROC
sub r0, r0, #2 ; move src pointer down by 2 columns
-
- vld4.8 {d6[0], d7[0], d8[0], d9[0]}, [r0], r1
- vld1.s8 {d2[], d3[]}, [r2] ; flimit
- vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
- vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r0], r1
- vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r1
- vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r0], r1
- vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r1
- vld4.8 {d6[5], d7[5], d8[5], d9[5]}, [r0], r1
- vld4.8 {d6[6], d7[6], d8[6], d9[6]}, [r0], r1
- vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r0], r1
-
- vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
- vmov.u8 q0, #0x80 ; 0x80
- vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
- vmov.u8 q11, #0x03 ; 0x03
- vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
- vmov.u8 q12, #0x04 ; 0x04
- vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
- vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
- vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
- vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
- vld4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
+ add r12, r1, r1
+ add r3, r0, r1
+
+ vld4.8 {d6[0], d7[0], d8[0], d9[0]}, [r0], r12
+ vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r3], r12
+ vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r12
+ vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r3], r12
+ vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r12
+ vld4.8 {d6[5], d7[5], d8[5], d9[5]}, [r3], r12
+ vld4.8 {d6[6], d7[6], d8[6], d9[6]}, [r0], r12
+ vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r3], r12
+
+ vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r12
+ vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [r3], r12
+ vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r12
+ vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [r3], r12
+ vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r12
+ vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [r3], r12
+ vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r12
+ vld4.8 {d10[7], d11[7], d12[7], d13[7]}, [r3]
vswp d7, d10
vswp d12, d9
- ;vswp q4, q5 ; p1:q3, p0:q5, q0:q4, q1:q6
;vp8_filter_mask() function
;vp8_hevmask() function
sub r0, r0, r1, lsl #4
vabd.u8 q15, q5, q4 ; abs(p0 - q0)
vabd.u8 q14, q3, q6 ; abs(p1 - q1)
+
vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2
+ vmov.u8 q0, #0x80 ; 0x80
+ vmov.s16 q11, #3
vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
veor q4, q4, q0 ; qs0: q0 offset to convert to a signed value
@@ -69,80 +64,91 @@
veor q3, q3, q0 ; ps1: p1 offset to convert to a signed value
veor q6, q6, q0 ; qs1: q1 offset to convert to a signed value
- vadd.u8 q1, q1, q1 ; flimit * 2
- vadd.u8 q1, q1, q13 ; flimit * 2 + limit
vcge.u8 q15, q1, q15 ; abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
- ;vp8_filter() function
-;;;;;;;;;;
- ;vqsub.s8 q2, q5, q4 ; ( qs0 - ps0)
vsubl.s8 q2, d8, d10 ; ( qs0 - ps0)
vsubl.s8 q13, d9, d11
- vqsub.s8 q1, q3, q6 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
+ vqsub.s8 q14, q3, q6 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
+
+ vmul.s16 q2, q2, q11 ; 3 * ( qs0 - ps0)
+ vmul.s16 q13, q13, q11
- ;vmul.i8 q2, q2, q11 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
- vadd.s16 q10, q2, q2 ; 3 * ( qs0 - ps0)
- vadd.s16 q14, q13, q13
- vadd.s16 q2, q2, q10
- vadd.s16 q13, q13, q14
+ vmov.u8 q11, #0x03 ; 0x03
+ vmov.u8 q12, #0x04 ; 0x04
- ;vqadd.s8 q1, q1, q2
- vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
- vaddw.s8 q13, q13, d3
+ vaddw.s8 q2, q2, d28 ; vp8_filter + 3 * ( qs0 - ps0)
+ vaddw.s8 q13, q13, d29
- vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
- vqmovn.s16 d3, q13
+ vqmovn.s16 d28, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
+ vqmovn.s16 d29, q13
add r0, r0, #1
- add r2, r0, r1
-;;;;;;;;;;;
+ add r3, r0, r1
- vand q1, q1, q15 ; vp8_filter &= mask
+ vand q14, q14, q15 ; vp8_filter &= mask
- vqadd.s8 q2, q1, q11 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
- vqadd.s8 q1, q1, q12 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
+ vqadd.s8 q2, q14, q11 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
+ vqadd.s8 q3, q14, q12 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
- vshr.s8 q1, q1, #3 ; Filter1 >>= 3
+ vshr.s8 q14, q3, #3 ; Filter1 >>= 3
;calculate output
- vqsub.s8 q10, q4, q1 ; u = vp8_signed_char_clamp(qs0 - Filter1)
vqadd.s8 q11, q5, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
+ vqsub.s8 q10, q4, q14 ; u = vp8_signed_char_clamp(qs0 - Filter1)
- veor q7, q10, q0 ; *oq0 = u^0x80
veor q6, q11, q0 ; *op0 = u^0x80
-
- add r3, r2, r1
+ veor q7, q10, q0 ; *oq0 = u^0x80
+ add r12, r1, r1
vswp d13, d14
- add r12, r3, r1
;store op1, op0, oq0, oq1
- vst2.8 {d12[0], d13[0]}, [r0]
- vst2.8 {d12[1], d13[1]}, [r2]
- vst2.8 {d12[2], d13[2]}, [r3]
- vst2.8 {d12[3], d13[3]}, [r12], r1
- add r0, r12, r1
- vst2.8 {d12[4], d13[4]}, [r12]
- vst2.8 {d12[5], d13[5]}, [r0], r1
- add r2, r0, r1
- vst2.8 {d12[6], d13[6]}, [r0]
- vst2.8 {d12[7], d13[7]}, [r2], r1
- add r3, r2, r1
- vst2.8 {d14[0], d15[0]}, [r2]
- vst2.8 {d14[1], d15[1]}, [r3], r1
- add r12, r3, r1
- vst2.8 {d14[2], d15[2]}, [r3]
- vst2.8 {d14[3], d15[3]}, [r12], r1
- add r0, r12, r1
- vst2.8 {d14[4], d15[4]}, [r12]
- vst2.8 {d14[5], d15[5]}, [r0], r1
- add r2, r0, r1
- vst2.8 {d14[6], d15[6]}, [r0]
- vst2.8 {d14[7], d15[7]}, [r2]
+ vst2.8 {d12[0], d13[0]}, [r0], r12
+ vst2.8 {d12[1], d13[1]}, [r3], r12
+ vst2.8 {d12[2], d13[2]}, [r0], r12
+ vst2.8 {d12[3], d13[3]}, [r3], r12
+ vst2.8 {d12[4], d13[4]}, [r0], r12
+ vst2.8 {d12[5], d13[5]}, [r3], r12
+ vst2.8 {d12[6], d13[6]}, [r0], r12
+ vst2.8 {d12[7], d13[7]}, [r3], r12
+ vst2.8 {d14[0], d15[0]}, [r0], r12
+ vst2.8 {d14[1], d15[1]}, [r3], r12
+ vst2.8 {d14[2], d15[2]}, [r0], r12
+ vst2.8 {d14[3], d15[3]}, [r3], r12
+ vst2.8 {d14[4], d15[4]}, [r0], r12
+ vst2.8 {d14[5], d15[5]}, [r3], r12
+ vst2.8 {d14[6], d15[6]}, [r0], r12
+ vst2.8 {d14[7], d15[7]}, [r3]
bx lr
ENDP ; |vp8_loop_filter_simple_vertical_edge_neon|
-;-----------------
-
+; r0 unsigned char *y
+; r1 int ystride
+; r2 const unsigned char *blimit
+
+|vp8_loop_filter_bvs_neon| PROC
+ push {r4, lr}
+ ldrb r3, [r2] ; load blim from mem
+ mov r4, r0
+ add r0, r0, #4
+ vdup.s8 q1, r3 ; duplicate blim
+ bl vp8_loop_filter_simple_vertical_edge_neon
+ ; vp8_loop_filter_simple_vertical_edge_neon preserves r1 and q1
+ add r0, r4, #8
+ bl vp8_loop_filter_simple_vertical_edge_neon
+ add r0, r4, #12
+ pop {r4, lr}
+ b vp8_loop_filter_simple_vertical_edge_neon
+ ENDP ;|vp8_loop_filter_bvs_neon|
+
+; r0 unsigned char *y
+; r1 int ystride
+; r2 const unsigned char *blimit
+
+|vp8_loop_filter_mbvs_neon| PROC
+ ldrb r3, [r2] ; load mblim from mem
+ vdup.s8 q1, r3 ; duplicate mblim
+ b vp8_loop_filter_simple_vertical_edge_neon
+ ENDP ;|vp8_loop_filter_bvs_neon|
END
diff --git a/vp8/common/arm/neon/mbloopfilter_neon.asm b/vp8/common/arm/neon/mbloopfilter_neon.asm
index 72f0f9271..f41c156df 100644
--- a/vp8/common/arm/neon/mbloopfilter_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilter_neon.asm
@@ -14,155 +14,143 @@
EXPORT |vp8_mbloop_filter_vertical_edge_y_neon|
EXPORT |vp8_mbloop_filter_vertical_edge_uv_neon|
ARM
- REQUIRE8
- PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
-; flimit, limit, and thresh should be positive numbers.
-; All 16 elements in these variables are equal.
-
; void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
-; const signed char *flimit,
-; const signed char *limit,
-; const signed char *thresh,
-; int count)
+; const unsigned char *blimit,
+; const unsigned char *limit,
+; const unsigned char *thresh)
; r0 unsigned char *src,
; r1 int pitch,
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; sp const signed char *thresh,
-; sp+4 int count (unused)
+; r2 unsigned char blimit
+; r3 unsigned char limit
+; sp unsigned char thresh,
|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
- stmdb sp!, {lr}
- sub r0, r0, r1, lsl #2 ; move src pointer down by 4 lines
- ldr r12, [sp, #4] ; load thresh pointer
-
- vld1.u8 {q3}, [r0], r1 ; p3
- vld1.s8 {d2[], d3[]}, [r3] ; limit
- vld1.u8 {q4}, [r0], r1 ; p2
- vld1.s8 {d4[], d5[]}, [r12] ; thresh
- vld1.u8 {q5}, [r0], r1 ; p1
- vld1.u8 {q6}, [r0], r1 ; p0
- vld1.u8 {q7}, [r0], r1 ; q0
- vld1.u8 {q8}, [r0], r1 ; q1
- vld1.u8 {q9}, [r0], r1 ; q2
- vld1.u8 {q10}, [r0], r1 ; q3
+ push {lr}
+ add r1, r1, r1 ; double stride
+ ldr r12, [sp, #4] ; load thresh
+ sub r0, r0, r1, lsl #1 ; move src pointer down by 4 lines
+ vdup.u8 q2, r12 ; thresh
+ add r12, r0, r1, lsr #1 ; move src pointer up by 1 line
+
+ vld1.u8 {q3}, [r0@128], r1 ; p3
+ vld1.u8 {q4}, [r12@128], r1 ; p2
+ vld1.u8 {q5}, [r0@128], r1 ; p1
+ vld1.u8 {q6}, [r12@128], r1 ; p0
+ vld1.u8 {q7}, [r0@128], r1 ; q0
+ vld1.u8 {q8}, [r12@128], r1 ; q1
+ vld1.u8 {q9}, [r0@128], r1 ; q2
+ vld1.u8 {q10}, [r12@128], r1 ; q3
bl vp8_mbloop_filter_neon
- sub r0, r0, r1, lsl #3
- add r0, r0, r1
- add r2, r0, r1
- add r3, r2, r1
-
- vst1.u8 {q4}, [r0] ; store op2
- vst1.u8 {q5}, [r2] ; store op1
- vst1.u8 {q6}, [r3], r1 ; store op0
- add r12, r3, r1
- vst1.u8 {q7}, [r3] ; store oq0
- vst1.u8 {q8}, [r12], r1 ; store oq1
- vst1.u8 {q9}, [r12] ; store oq2
-
- ldmia sp!, {pc}
+ sub r12, r12, r1, lsl #2
+ add r0, r12, r1, lsr #1
+
+ vst1.u8 {q4}, [r12@128],r1 ; store op2
+ vst1.u8 {q5}, [r0@128],r1 ; store op1
+ vst1.u8 {q6}, [r12@128], r1 ; store op0
+ vst1.u8 {q7}, [r0@128],r1 ; store oq0
+ vst1.u8 {q8}, [r12@128] ; store oq1
+ vst1.u8 {q9}, [r0@128] ; store oq2
+
+ pop {pc}
ENDP ; |vp8_mbloop_filter_horizontal_edge_y_neon|
; void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch,
-; const signed char *flimit,
-; const signed char *limit,
-; const signed char *thresh,
+; const unsigned char *blimit,
+; const unsigned char *limit,
+; const unsigned char *thresh,
; unsigned char *v)
; r0 unsigned char *u,
; r1 int pitch,
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; sp const signed char *thresh,
+; r2 unsigned char blimit
+; r3 unsigned char limit
+; sp unsigned char thresh,
; sp+4 unsigned char *v
+
|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
- stmdb sp!, {lr}
- sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines
- vld1.s8 {d2[], d3[]}, [r3] ; limit
- ldr r3, [sp, #8] ; load v ptr
- ldr r12, [sp, #4] ; load thresh pointer
- sub r3, r3, r1, lsl #2 ; move v pointer down by 4 lines
-
- vld1.u8 {d6}, [r0], r1 ; p3
- vld1.u8 {d7}, [r3], r1 ; p3
- vld1.u8 {d8}, [r0], r1 ; p2
- vld1.u8 {d9}, [r3], r1 ; p2
- vld1.u8 {d10}, [r0], r1 ; p1
- vld1.u8 {d11}, [r3], r1 ; p1
- vld1.u8 {d12}, [r0], r1 ; p0
- vld1.u8 {d13}, [r3], r1 ; p0
- vld1.u8 {d14}, [r0], r1 ; q0
- vld1.u8 {d15}, [r3], r1 ; q0
- vld1.u8 {d16}, [r0], r1 ; q1
- vld1.u8 {d17}, [r3], r1 ; q1
- vld1.u8 {d18}, [r0], r1 ; q2
- vld1.u8 {d19}, [r3], r1 ; q2
- vld1.u8 {d20}, [r0], r1 ; q3
- vld1.u8 {d21}, [r3], r1 ; q3
-
- vld1.s8 {d4[], d5[]}, [r12] ; thresh
+ push {lr}
+ ldr r12, [sp, #4] ; load thresh
+ sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines
+ vdup.u8 q2, r12 ; thresh
+ ldr r12, [sp, #8] ; load v ptr
+ sub r12, r12, r1, lsl #2 ; move v pointer down by 4 lines
+
+ vld1.u8 {d6}, [r0@64], r1 ; p3
+ vld1.u8 {d7}, [r12@64], r1 ; p3
+ vld1.u8 {d8}, [r0@64], r1 ; p2
+ vld1.u8 {d9}, [r12@64], r1 ; p2
+ vld1.u8 {d10}, [r0@64], r1 ; p1
+ vld1.u8 {d11}, [r12@64], r1 ; p1
+ vld1.u8 {d12}, [r0@64], r1 ; p0
+ vld1.u8 {d13}, [r12@64], r1 ; p0
+ vld1.u8 {d14}, [r0@64], r1 ; q0
+ vld1.u8 {d15}, [r12@64], r1 ; q0
+ vld1.u8 {d16}, [r0@64], r1 ; q1
+ vld1.u8 {d17}, [r12@64], r1 ; q1
+ vld1.u8 {d18}, [r0@64], r1 ; q2
+ vld1.u8 {d19}, [r12@64], r1 ; q2
+ vld1.u8 {d20}, [r0@64], r1 ; q3
+ vld1.u8 {d21}, [r12@64], r1 ; q3
bl vp8_mbloop_filter_neon
sub r0, r0, r1, lsl #3
- sub r3, r3, r1, lsl #3
+ sub r12, r12, r1, lsl #3
add r0, r0, r1
- add r3, r3, r1
-
- vst1.u8 {d8}, [r0], r1 ; store u op2
- vst1.u8 {d9}, [r3], r1 ; store v op2
- vst1.u8 {d10}, [r0], r1 ; store u op1
- vst1.u8 {d11}, [r3], r1 ; store v op1
- vst1.u8 {d12}, [r0], r1 ; store u op0
- vst1.u8 {d13}, [r3], r1 ; store v op0
- vst1.u8 {d14}, [r0], r1 ; store u oq0
- vst1.u8 {d15}, [r3], r1 ; store v oq0
- vst1.u8 {d16}, [r0], r1 ; store u oq1
- vst1.u8 {d17}, [r3], r1 ; store v oq1
- vst1.u8 {d18}, [r0], r1 ; store u oq2
- vst1.u8 {d19}, [r3], r1 ; store v oq2
-
- ldmia sp!, {pc}
+ add r12, r12, r1
+
+ vst1.u8 {d8}, [r0@64], r1 ; store u op2
+ vst1.u8 {d9}, [r12@64], r1 ; store v op2
+ vst1.u8 {d10}, [r0@64], r1 ; store u op1
+ vst1.u8 {d11}, [r12@64], r1 ; store v op1
+ vst1.u8 {d12}, [r0@64], r1 ; store u op0
+ vst1.u8 {d13}, [r12@64], r1 ; store v op0
+ vst1.u8 {d14}, [r0@64], r1 ; store u oq0
+ vst1.u8 {d15}, [r12@64], r1 ; store v oq0
+ vst1.u8 {d16}, [r0@64], r1 ; store u oq1
+ vst1.u8 {d17}, [r12@64], r1 ; store v oq1
+ vst1.u8 {d18}, [r0@64], r1 ; store u oq2
+ vst1.u8 {d19}, [r12@64], r1 ; store v oq2
+
+ pop {pc}
ENDP ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
; void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
-; const signed char *flimit,
-; const signed char *limit,
-; const signed char *thresh,
-; int count)
+; const unsigned char *blimit,
+; const unsigned char *limit,
+; const unsigned char *thresh)
; r0 unsigned char *src,
; r1 int pitch,
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; sp const signed char *thresh,
-; sp+4 int count (unused)
+; r2 unsigned char blimit
+; r3 unsigned char limit
+; sp unsigned char thresh,
|vp8_mbloop_filter_vertical_edge_y_neon| PROC
- stmdb sp!, {lr}
+ push {lr}
+ ldr r12, [sp, #4] ; load thresh
sub r0, r0, #4 ; move src pointer down by 4 columns
+ vdup.s8 q2, r12 ; thresh
+ add r12, r0, r1, lsl #3 ; move src pointer down by 8 lines
vld1.u8 {d6}, [r0], r1 ; load first 8-line src data
- ldr r12, [sp, #4] ; load thresh pointer
+ vld1.u8 {d7}, [r12], r1 ; load second 8-line src data
vld1.u8 {d8}, [r0], r1
- sub sp, sp, #32
+ vld1.u8 {d9}, [r12], r1
vld1.u8 {d10}, [r0], r1
+ vld1.u8 {d11}, [r12], r1
vld1.u8 {d12}, [r0], r1
+ vld1.u8 {d13}, [r12], r1
vld1.u8 {d14}, [r0], r1
+ vld1.u8 {d15}, [r12], r1
vld1.u8 {d16}, [r0], r1
+ vld1.u8 {d17}, [r12], r1
vld1.u8 {d18}, [r0], r1
+ vld1.u8 {d19}, [r12], r1
vld1.u8 {d20}, [r0], r1
-
- vld1.u8 {d7}, [r0], r1 ; load second 8-line src data
- vld1.u8 {d9}, [r0], r1
- vld1.u8 {d11}, [r0], r1
- vld1.u8 {d13}, [r0], r1
- vld1.u8 {d15}, [r0], r1
- vld1.u8 {d17}, [r0], r1
- vld1.u8 {d19}, [r0], r1
- vld1.u8 {d21}, [r0], r1
+ vld1.u8 {d21}, [r12], r1
;transpose to 8x16 matrix
vtrn.32 q3, q7
@@ -180,29 +168,17 @@
vtrn.8 q7, q8
vtrn.8 q9, q10
- vld1.s8 {d4[], d5[]}, [r12] ; thresh
- vld1.s8 {d2[], d3[]}, [r3] ; limit
- mov r12, sp
- vst1.u8 {q3}, [r12]!
- vst1.u8 {q10}, [r12]!
+ sub r0, r0, r1, lsl #3
bl vp8_mbloop_filter_neon
- sub r0, r0, r1, lsl #4
-
- add r2, r0, r1
-
- add r3, r2, r1
-
- vld1.u8 {q3}, [sp]!
- vld1.u8 {q10}, [sp]!
+ sub r12, r12, r1, lsl #3
;transpose to 16x8 matrix
vtrn.32 q3, q7
vtrn.32 q4, q8
vtrn.32 q5, q9
vtrn.32 q6, q10
- add r12, r3, r1
vtrn.16 q3, q5
vtrn.16 q4, q6
@@ -215,36 +191,30 @@
vtrn.8 q9, q10
;store op2, op1, op0, oq0, oq1, oq2
- vst1.8 {d6}, [r0]
- vst1.8 {d8}, [r2]
- vst1.8 {d10}, [r3]
- vst1.8 {d12}, [r12], r1
- add r0, r12, r1
- vst1.8 {d14}, [r12]
- vst1.8 {d16}, [r0], r1
- add r2, r0, r1
- vst1.8 {d18}, [r0]
- vst1.8 {d20}, [r2], r1
- add r3, r2, r1
- vst1.8 {d7}, [r2]
- vst1.8 {d9}, [r3], r1
- add r12, r3, r1
- vst1.8 {d11}, [r3]
+ vst1.8 {d6}, [r0], r1
+ vst1.8 {d7}, [r12], r1
+ vst1.8 {d8}, [r0], r1
+ vst1.8 {d9}, [r12], r1
+ vst1.8 {d10}, [r0], r1
+ vst1.8 {d11}, [r12], r1
+ vst1.8 {d12}, [r0], r1
vst1.8 {d13}, [r12], r1
- add r0, r12, r1
- vst1.8 {d15}, [r12]
- vst1.8 {d17}, [r0], r1
- add r2, r0, r1
- vst1.8 {d19}, [r0]
- vst1.8 {d21}, [r2]
-
- ldmia sp!, {pc}
+ vst1.8 {d14}, [r0], r1
+ vst1.8 {d15}, [r12], r1
+ vst1.8 {d16}, [r0], r1
+ vst1.8 {d17}, [r12], r1
+ vst1.8 {d18}, [r0], r1
+ vst1.8 {d19}, [r12], r1
+ vst1.8 {d20}, [r0]
+ vst1.8 {d21}, [r12]
+
+ pop {pc}
ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon|
; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
-; const signed char *flimit,
-; const signed char *limit,
-; const signed char *thresh,
+; const unsigned char *blimit,
+; const unsigned char *limit,
+; const unsigned char *thresh,
; unsigned char *v)
; r0 unsigned char *u,
; r1 int pitch,
@@ -253,30 +223,29 @@
; sp const signed char *thresh,
; sp+4 unsigned char *v
|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
- stmdb sp!, {lr}
- sub r0, r0, #4 ; move src pointer down by 4 columns
- vld1.s8 {d2[], d3[]}, [r3] ; limit
- ldr r3, [sp, #8] ; load v ptr
- ldr r12, [sp, #4] ; load thresh pointer
-
- sub r3, r3, #4 ; move v pointer down by 4 columns
+ push {lr}
+ ldr r12, [sp, #4] ; load thresh
+ sub r0, r0, #4 ; move u pointer down by 4 columns
+ vdup.u8 q2, r12 ; thresh
+ ldr r12, [sp, #8] ; load v ptr
+ sub r12, r12, #4 ; move v pointer down by 4 columns
vld1.u8 {d6}, [r0], r1 ;load u data
- vld1.u8 {d7}, [r3], r1 ;load v data
+ vld1.u8 {d7}, [r12], r1 ;load v data
vld1.u8 {d8}, [r0], r1
- vld1.u8 {d9}, [r3], r1
+ vld1.u8 {d9}, [r12], r1
vld1.u8 {d10}, [r0], r1
- vld1.u8 {d11}, [r3], r1
+ vld1.u8 {d11}, [r12], r1
vld1.u8 {d12}, [r0], r1
- vld1.u8 {d13}, [r3], r1
+ vld1.u8 {d13}, [r12], r1
vld1.u8 {d14}, [r0], r1
- vld1.u8 {d15}, [r3], r1
+ vld1.u8 {d15}, [r12], r1
vld1.u8 {d16}, [r0], r1
- vld1.u8 {d17}, [r3], r1
+ vld1.u8 {d17}, [r12], r1
vld1.u8 {d18}, [r0], r1
- vld1.u8 {d19}, [r3], r1
+ vld1.u8 {d19}, [r12], r1
vld1.u8 {d20}, [r0], r1
- vld1.u8 {d21}, [r3], r1
+ vld1.u8 {d21}, [r12], r1
;transpose to 8x16 matrix
vtrn.32 q3, q7
@@ -294,19 +263,11 @@
vtrn.8 q7, q8
vtrn.8 q9, q10
- sub sp, sp, #32
- vld1.s8 {d4[], d5[]}, [r12] ; thresh
- mov r12, sp
- vst1.u8 {q3}, [r12]!
- vst1.u8 {q10}, [r12]!
+ sub r0, r0, r1, lsl #3
bl vp8_mbloop_filter_neon
- sub r0, r0, r1, lsl #3
- sub r3, r3, r1, lsl #3
-
- vld1.u8 {q3}, [sp]!
- vld1.u8 {q10}, [sp]!
+ sub r12, r12, r1, lsl #3
;transpose to 16x8 matrix
vtrn.32 q3, q7
@@ -326,23 +287,23 @@
;store op2, op1, op0, oq0, oq1, oq2
vst1.8 {d6}, [r0], r1
- vst1.8 {d7}, [r3], r1
+ vst1.8 {d7}, [r12], r1
vst1.8 {d8}, [r0], r1
- vst1.8 {d9}, [r3], r1
+ vst1.8 {d9}, [r12], r1
vst1.8 {d10}, [r0], r1
- vst1.8 {d11}, [r3], r1
+ vst1.8 {d11}, [r12], r1
vst1.8 {d12}, [r0], r1
- vst1.8 {d13}, [r3], r1
+ vst1.8 {d13}, [r12], r1
vst1.8 {d14}, [r0], r1
- vst1.8 {d15}, [r3], r1
+ vst1.8 {d15}, [r12], r1
vst1.8 {d16}, [r0], r1
- vst1.8 {d17}, [r3], r1
+ vst1.8 {d17}, [r12], r1
vst1.8 {d18}, [r0], r1
- vst1.8 {d19}, [r3], r1
- vst1.8 {d20}, [r0], r1
- vst1.8 {d21}, [r3], r1
+ vst1.8 {d19}, [r12], r1
+ vst1.8 {d20}, [r0]
+ vst1.8 {d21}, [r12]
- ldmia sp!, {pc}
+ pop {pc}
ENDP ; |vp8_mbloop_filter_vertical_edge_uv_neon|
; void vp8_mbloop_filter_neon()
@@ -350,26 +311,19 @@
; functions do the necessary load, transpose (if necessary), preserve (if
; necessary) and store.
-; TODO:
-; The vertical filter writes p3/q3 back out because two 4 element writes are
-; much simpler than ordering and writing two 3 element sets (or three 2 elements
-; sets, or whichever other combinations are possible).
-; If we can preserve q3 and q10, the vertical filter will be able to avoid
-; storing those values on the stack and reading them back after the filter.
-
; r0,r1 PRESERVE
-; r2 flimit
-; r3 PRESERVE
-; q1 limit
+; r2 mblimit
+; r3 limit
+
; q2 thresh
-; q3 p3
+; q3 p3 PRESERVE
; q4 p2
; q5 p1
; q6 p0
; q7 q0
; q8 q1
; q9 q2
-; q10 q3
+; q10 q3 PRESERVE
|vp8_mbloop_filter_neon| PROC
@@ -378,12 +332,12 @@
vabd.u8 q12, q4, q5 ; abs(p2 - p1)
vabd.u8 q13, q5, q6 ; abs(p1 - p0)
vabd.u8 q14, q8, q7 ; abs(q1 - q0)
- vabd.u8 q3, q9, q8 ; abs(q2 - q1)
+ vabd.u8 q1, q9, q8 ; abs(q2 - q1)
vabd.u8 q0, q10, q9 ; abs(q3 - q2)
vmax.u8 q11, q11, q12
vmax.u8 q12, q13, q14
- vmax.u8 q3, q3, q0
+ vmax.u8 q1, q1, q0
vmax.u8 q15, q11, q12
vabd.u8 q12, q6, q7 ; abs(p0 - q0)
@@ -391,44 +345,46 @@
; vp8_hevmask
vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh) * -1
vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh) * -1
- vmax.u8 q15, q15, q3
+ vmax.u8 q15, q15, q1
- vld1.s8 {d4[], d5[]}, [r2] ; flimit
+ vdup.u8 q1, r3 ; limit
+ vdup.u8 q2, r2 ; mblimit
vmov.u8 q0, #0x80 ; 0x80
- vadd.u8 q2, q2, q2 ; flimit * 2
- vadd.u8 q2, q2, q1 ; flimit * 2 + limit
vcge.u8 q15, q1, q15
vabd.u8 q1, q5, q8 ; a = abs(p1 - q1)
vqadd.u8 q12, q12, q12 ; b = abs(p0 - q0) * 2
- vshr.u8 q1, q1, #1 ; a = a / 2
- vqadd.u8 q12, q12, q1 ; a = b + a
- vcge.u8 q12, q2, q12 ; (a > flimit * 2 + limit) * -1
+ vmov.u16 q11, #3 ; #3
; vp8_filter
; convert to signed
veor q7, q7, q0 ; qs0
+ vshr.u8 q1, q1, #1 ; a = a / 2
veor q6, q6, q0 ; ps0
veor q5, q5, q0 ; ps1
+
+ vqadd.u8 q12, q12, q1 ; a = b + a
+
veor q8, q8, q0 ; qs1
veor q4, q4, q0 ; ps2
veor q9, q9, q0 ; qs2
vorr q14, q13, q14 ; vp8_hevmask
+ vcge.u8 q12, q2, q12 ; (a > flimit * 2 + limit) * -1
+
vsubl.s8 q2, d14, d12 ; qs0 - ps0
vsubl.s8 q13, d15, d13
vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
- vadd.s16 q10, q2, q2 ; 3 * (qs0 - ps0)
- vadd.s16 q11, q13, q13
+ vmul.i16 q2, q2, q11 ; 3 * ( qs0 - ps0)
+
vand q15, q15, q12 ; vp8_filter_mask
- vadd.s16 q2, q2, q10
- vadd.s16 q13, q13, q11
+ vmul.i16 q13, q13, q11
vmov.u8 q12, #3 ; #3
@@ -447,23 +403,19 @@
vand q13, q1, q14 ; Filter2 &= hev
- vmov.u8 d7, #9 ; #9
-
vqadd.s8 q2, q13, q11 ; Filter1 = clamp(Filter2+4)
vqadd.s8 q13, q13, q12 ; Filter2 = clamp(Filter2+3)
- vmov.u8 d6, #18 ; #18
+ vmov q0, q15
vshr.s8 q2, q2, #3 ; Filter1 >>= 3
vshr.s8 q13, q13, #3 ; Filter2 >>= 3
- vmov q10, q15
+ vmov q11, q15
vmov q12, q15
vqsub.s8 q7, q7, q2 ; qs0 = clamp(qs0 - Filter1)
- vmov.u8 d5, #27 ; #27
-
vqadd.s8 q6, q6, q13 ; ps0 = clamp(ps0 + Filter2)
vbic q1, q1, q14 ; vp8_filter &= ~hev
@@ -471,35 +423,43 @@
; roughly 1/7th difference across boundary
; roughly 2/7th difference across boundary
; roughly 3/7th difference across boundary
- vmov q11, q15
+
+ vmov.u8 d5, #9 ; #9
+ vmov.u8 d4, #18 ; #18
+
vmov q13, q15
vmov q14, q15
- vmlal.s8 q10, d2, d7 ; Filter2 * 9
- vmlal.s8 q11, d3, d7
- vmlal.s8 q12, d2, d6 ; Filter2 * 18
- vmlal.s8 q13, d3, d6
- vmlal.s8 q14, d2, d5 ; Filter2 * 27
+ vmlal.s8 q0, d2, d5 ; 63 + Filter2 * 9
+ vmlal.s8 q11, d3, d5
+ vmov.u8 d5, #27 ; #27
+ vmlal.s8 q12, d2, d4 ; 63 + Filter2 * 18
+ vmlal.s8 q13, d3, d4
+ vmlal.s8 q14, d2, d5 ; 63 + Filter2 * 27
vmlal.s8 q15, d3, d5
- vqshrn.s16 d20, q10, #7 ; u = clamp((63 + Filter2 * 9)>>7)
- vqshrn.s16 d21, q11, #7
+
+ vqshrn.s16 d0, q0, #7 ; u = clamp((63 + Filter2 * 9)>>7)
+ vqshrn.s16 d1, q11, #7
vqshrn.s16 d24, q12, #7 ; u = clamp((63 + Filter2 * 18)>>7)
vqshrn.s16 d25, q13, #7
vqshrn.s16 d28, q14, #7 ; u = clamp((63 + Filter2 * 27)>>7)
vqshrn.s16 d29, q15, #7
- vqsub.s8 q11, q9, q10 ; s = clamp(qs2 - u)
- vqadd.s8 q10, q4, q10 ; s = clamp(ps2 + u)
+ vmov.u8 q1, #0x80 ; 0x80
+
+ vqsub.s8 q11, q9, q0 ; s = clamp(qs2 - u)
+ vqadd.s8 q0, q4, q0 ; s = clamp(ps2 + u)
vqsub.s8 q13, q8, q12 ; s = clamp(qs1 - u)
vqadd.s8 q12, q5, q12 ; s = clamp(ps1 + u)
vqsub.s8 q15, q7, q14 ; s = clamp(qs0 - u)
vqadd.s8 q14, q6, q14 ; s = clamp(ps0 + u)
- veor q9, q11, q0 ; *oq2 = s^0x80
- veor q4, q10, q0 ; *op2 = s^0x80
- veor q8, q13, q0 ; *oq1 = s^0x80
- veor q5, q12, q0 ; *op2 = s^0x80
- veor q7, q15, q0 ; *oq0 = s^0x80
- veor q6, q14, q0 ; *op0 = s^0x80
+
+ veor q9, q11, q1 ; *oq2 = s^0x80
+ veor q4, q0, q1 ; *op2 = s^0x80
+ veor q8, q13, q1 ; *oq1 = s^0x80
+ veor q5, q12, q1 ; *op2 = s^0x80
+ veor q7, q15, q1 ; *oq0 = s^0x80
+ veor q6, q14, q1 ; *op0 = s^0x80
bx lr
ENDP ; |vp8_mbloop_filter_neon|
diff --git a/vp8/common/extend.c b/vp8/common/extend.c
index 036bafc5d..a2d325332 100644
--- a/vp8/common/extend.c
+++ b/vp8/common/extend.c
@@ -85,10 +85,10 @@ void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
src->y_height, src->y_width,
et, el, eb, er);
- et = (et + 1) >> 1;
- el = (el + 1) >> 1;
- eb = (eb + 1) >> 1;
- er = (er + 1) >> 1;
+ et = dst->border >> 1;
+ el = dst->border >> 1;
+ eb = (dst->border >> 1) + dst->uv_height - src->uv_height;
+ er = (dst->border >> 1) + dst->uv_width - src->uv_width;
copy_and_extend_plane(src->u_buffer, src->uv_stride,
dst->u_buffer, dst->uv_stride,
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 133938097..c61629407 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -108,9 +108,9 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_c;
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_c;
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_c;
- rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_c;
+ rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_c;
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_c;
- rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
+ rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_c;
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c;
#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_INTERNAL_STATS)
diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
index a3242716f..be3f53593 100644
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -9,152 +9,149 @@
*/
-#include "vpx_ports/config.h"
+#include "vpx_config.h"
#include "loopfilter.h"
#include "onyxc_int.h"
+#include "vpx_mem/vpx_mem.h"
typedef unsigned char uc;
-
prototype_loopfilter(vp8_loop_filter_horizontal_edge_c);
prototype_loopfilter(vp8_loop_filter_vertical_edge_c);
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_c);
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
-prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
-prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
+
+prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
+prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
/* Horizontal MB filtering */
-void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
+void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr, int y_stride, int uv_stride,
+ loop_filter_info *lfi)
{
- vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
+ vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
- vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
-}
-
-void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
-{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
/* Vertical MB Filtering */
-void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
+void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr, int y_stride, int uv_stride,
+ loop_filter_info *lfi)
{
- vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
+ vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
- vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
-}
-
-void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
-{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
/* Horizontal B Filtering */
-void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
+void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr, int y_stride, int uv_stride,
+ loop_filter_info *lfi)
{
- vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
+ vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
- vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
+ vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
-void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
+void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
+ const unsigned char *blimit)
{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit);
+ vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit);
+ vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit);
}
/* Vertical B Filtering */
-void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
+void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
+ unsigned char *v_ptr, int y_stride, int uv_stride,
+ loop_filter_info *lfi)
{
- vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
+ vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
- vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
+ vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
-void vp8_loop_filter_bvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
+void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
+ const unsigned char *blimit)
{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
+ vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
+ vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
}
-void vp8_init_loop_filter(VP8_COMMON *cm)
+static void lf_init_lut(loop_filter_info_n *lfi)
{
- loop_filter_info *lfi = cm->lf_info;
- LOOPFILTERTYPE lft = cm->filter_type;
- int sharpness_lvl = cm->sharpness_level;
- int frame_type = cm->frame_type;
- int i, j;
+ int filt_lvl;
- int block_inside_limit = 0;
- int HEVThresh;
-
- /* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
- for (i = 0; i <= MAX_LOOP_FILTER; i++)
+ for (filt_lvl = 0; filt_lvl <= MAX_LOOP_FILTER; filt_lvl++)
{
- int filt_lvl = i;
-
- if (frame_type == KEY_FRAME)
+ if (filt_lvl >= 40)
{
- if (filt_lvl >= 40)
- HEVThresh = 2;
- else if (filt_lvl >= 15)
- HEVThresh = 1;
- else
- HEVThresh = 0;
+ lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 2;
+ lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 3;
+ }
+ else if (filt_lvl >= 20)
+ {
+ lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
+ lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 2;
+ }
+ else if (filt_lvl >= 15)
+ {
+ lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
+ lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 1;
}
else
{
- if (filt_lvl >= 40)
- HEVThresh = 3;
- else if (filt_lvl >= 20)
- HEVThresh = 2;
- else if (filt_lvl >= 15)
- HEVThresh = 1;
- else
- HEVThresh = 0;
+ lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 0;
+ lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 0;
}
+ }
+
+ lfi->mode_lf_lut[DC_PRED] = 1;
+ lfi->mode_lf_lut[V_PRED] = 1;
+ lfi->mode_lf_lut[H_PRED] = 1;
+ lfi->mode_lf_lut[TM_PRED] = 1;
+ lfi->mode_lf_lut[B_PRED] = 0;
+
+ lfi->mode_lf_lut[ZEROMV] = 1;
+ lfi->mode_lf_lut[NEARESTMV] = 2;
+ lfi->mode_lf_lut[NEARMV] = 2;
+ lfi->mode_lf_lut[NEWMV] = 2;
+ lfi->mode_lf_lut[SPLITMV] = 3;
+
+}
+
+void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
+ int sharpness_lvl)
+{
+ int i;
+
+ /* For each possible value for the loop filter fill out limits */
+ for (i = 0; i <= MAX_LOOP_FILTER; i++)
+ {
+ int filt_lvl = i;
+ int block_inside_limit = 0;
/* Set loop filter paramaeters that control sharpness. */
block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
@@ -169,119 +166,120 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
if (block_inside_limit < 1)
block_inside_limit = 1;
- for (j = 0; j < 16; j++)
- {
- lfi[i].lim[j] = block_inside_limit;
- lfi[i].mbflim[j] = filt_lvl + 2;
- lfi[i].flim[j] = filt_lvl;
- lfi[i].thr[j] = HEVThresh;
- }
-
+ vpx_memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH);
+ vpx_memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit),
+ SIMD_WIDTH);
+ vpx_memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit),
+ SIMD_WIDTH);
}
+}
- /* Set up the function pointers depending on the type of loop filtering selected */
- if (lft == NORMAL_LOOPFILTER)
- {
- cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v);
- cm->lf_bv = LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v);
- cm->lf_mbh = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h);
- cm->lf_bh = LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h);
- }
- else
+void vp8_loop_filter_init(VP8_COMMON *cm)
+{
+ loop_filter_info_n *lfi = &cm->lf_info;
+ int i;
+
+ /* init limits for given sharpness*/
+ vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level);
+ cm->last_sharpness_level = cm->sharpness_level;
+
+ /* init LUT for lvl and hev thr picking */
+ lf_init_lut(lfi);
+
+ /* init hev threshold const vectors */
+ for(i = 0; i < 4 ; i++)
{
- cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v);
- cm->lf_bv = LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v);
- cm->lf_mbh = LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h);
- cm->lf_bh = LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h);
+ vpx_memset(lfi->hev_thr[i], i, SIMD_WIDTH);
}
}
-/* Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
- * each frame. Check last_frame_type to skip the function most of times.
- */
-void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
+void vp8_loop_filter_frame_init(VP8_COMMON *cm,
+ MACROBLOCKD *mbd,
+ int default_filt_lvl,
+ int sharpness_lvl)
{
- int HEVThresh;
- int i, j;
+ int seg, /* segment number */
+ ref, /* index in ref_lf_deltas */
+ mode; /* index in mode_lf_deltas */
- /* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
- for (i = 0; i <= MAX_LOOP_FILTER; i++)
+ loop_filter_info_n *lfi = &cm->lf_info;
+
+ /* update limits if sharpness has changed */
+ if(cm->last_sharpness_level != sharpness_lvl)
{
- int filt_lvl = i;
+ vp8_loop_filter_update_sharpness(lfi, sharpness_lvl);
+ cm->last_sharpness_level = sharpness_lvl;
+ }
- if (frame_type == KEY_FRAME)
- {
- if (filt_lvl >= 40)
- HEVThresh = 2;
- else if (filt_lvl >= 15)
- HEVThresh = 1;
- else
- HEVThresh = 0;
- }
- else
+ for(seg = 0; seg < MAX_MB_SEGMENTS; seg++)
+ {
+ int lvl_seg = default_filt_lvl;
+ int lvl_ref, lvl_mode;
+
+ /* Note the baseline filter values for each segment */
+ if (mbd->segmentation_enabled)
{
- if (filt_lvl >= 40)
- HEVThresh = 3;
- else if (filt_lvl >= 20)
- HEVThresh = 2;
- else if (filt_lvl >= 15)
- HEVThresh = 1;
- else
- HEVThresh = 0;
+ /* Abs value */
+ if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
+ {
+ lvl_seg = mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
+ }
+ else /* Delta Value */
+ {
+ lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
+ lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
+ }
}
- for (j = 0; j < 16; j++)
+ if (!mbd->mode_ref_lf_delta_enabled)
{
- /*lfi[i].lim[j] = block_inside_limit;
- lfi[i].mbflim[j] = filt_lvl+2;*/
- /*lfi[i].flim[j] = filt_lvl;*/
- lfi[i].thr[j] = HEVThresh;
+ /* we could get rid of this if we assume that deltas are set to
+ * zero when not in use; encoder always uses deltas
+ */
+ vpx_memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 );
+ continue;
}
- }
-}
+ lvl_ref = lvl_seg;
-int vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int filter_level)
-{
- MB_MODE_INFO *mbmi = &mbd->mode_info_context->mbmi;
+ /* INTRA_FRAME */
+ ref = INTRA_FRAME;
- if (mbd->mode_ref_lf_delta_enabled)
- {
/* Apply delta for reference frame */
- filter_level += mbd->ref_lf_deltas[mbmi->ref_frame];
+ lvl_ref += mbd->ref_lf_deltas[ref];
- /* Apply delta for mode */
- if (mbmi->ref_frame == INTRA_FRAME)
- {
- /* Only the split mode BPRED has a further special case */
- if (mbmi->mode == B_PRED)
- filter_level += mbd->mode_lf_deltas[0];
- }
- else
+ /* Apply delta for Intra modes */
+ mode = 0; /* B_PRED */
+ /* Only the split mode BPRED has a further special case */
+ lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
+ lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */
+
+ lfi->lvl[seg][ref][mode] = lvl_mode;
+
+ mode = 1; /* all the rest of Intra modes */
+ lvl_mode = (lvl_ref > 0) ? (lvl_ref > 63 ? 63 : lvl_ref) : 0; /* clamp */
+ lfi->lvl[seg][ref][mode] = lvl_mode;
+
+ /* LAST, GOLDEN, ALT */
+ for(ref = 1; ref < MAX_REF_FRAMES; ref++)
{
- /* Zero motion mode */
- if (mbmi->mode == ZEROMV)
- filter_level += mbd->mode_lf_deltas[1];
+ int lvl_ref = lvl_seg;
- /* Split MB motion mode */
- else if (mbmi->mode == SPLITMV)
- filter_level += mbd->mode_lf_deltas[3];
+ /* Apply delta for reference frame */
+ lvl_ref += mbd->ref_lf_deltas[ref];
- /* All other inter motion modes (Nearest, Near, New) */
- else
- filter_level += mbd->mode_lf_deltas[2];
- }
+ /* Apply delta for Inter modes */
+ for (mode = 1; mode < 4; mode++)
+ {
+ lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
+ lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */
- /* Range check */
- if (filter_level > MAX_LOOP_FILTER)
- filter_level = MAX_LOOP_FILTER;
- else if (filter_level < 0)
- filter_level = 0;
+ lfi->lvl[seg][ref][mode] = lvl_mode;
+ }
+ }
}
- return filter_level;
}
-
void vp8_loop_filter_frame
(
VP8_COMMON *cm,
@@ -290,49 +288,23 @@ void vp8_loop_filter_frame
)
{
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
- loop_filter_info *lfi = cm->lf_info;
+ loop_filter_info_n *lfi_n = &cm->lf_info;
+ loop_filter_info lfi;
+
FRAME_TYPE frame_type = cm->frame_type;
int mb_row;
int mb_col;
-
- int baseline_filter_level[MAX_MB_SEGMENTS];
int filter_level;
- int alt_flt_enabled = mbd->segmentation_enabled;
- int i;
unsigned char *y_ptr, *u_ptr, *v_ptr;
- mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */
-
- /* Note the baseline filter values for each segment */
- if (alt_flt_enabled)
- {
- for (i = 0; i < MAX_MB_SEGMENTS; i++)
- {
- /* Abs value */
- if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
- baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- /* Delta Value */
- else
- {
- baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
- }
- }
- }
- else
- {
- for (i = 0; i < MAX_MB_SEGMENTS; i++)
- baseline_filter_level[i] = default_filt_lvl;
- }
+ /* Point at base of Mb MODE_INFO list */
+ const MODE_INFO *mode_info_context = cm->mi;
/* Initialize the loop filter for this frame. */
- if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
- vp8_init_loop_filter(cm);
- else if (frame_type != cm->last_frame_type)
- vp8_frame_init_loop_filter(lfi, frame_type);
+ vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl, cm->sharpness_level);
/* Set up the buffer pointers */
y_ptr = post->y_buffer;
@@ -344,51 +316,79 @@ void vp8_loop_filter_frame
{
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
- int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
- int skip_lf = (mbd->mode_info_context->mbmi.mode != B_PRED &&
- mbd->mode_info_context->mbmi.mode != SPLITMV &&
- mbd->mode_info_context->mbmi.mb_skip_coeff);
+ int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+ mode_info_context->mbmi.mode != SPLITMV &&
+ mode_info_context->mbmi.mb_skip_coeff);
- filter_level = baseline_filter_level[Segment];
+ const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
+ const int seg = mode_info_context->mbmi.segment_id;
+ const int ref_frame = mode_info_context->mbmi.ref_frame;
- /* Distance of Mb to the various image edges.
- * These specified to 8th pel as they are always compared to values that are in 1/8th pel units
- * Apply any context driven MB level adjustment
- */
- filter_level = vp8_adjust_mb_lf_value(mbd, filter_level);
+ filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
if (filter_level)
{
- if (mb_col > 0)
- cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level]);
-
- if (!skip_lf)
- cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level]);
-
- /* don't apply across umv border */
- if (mb_row > 0)
- cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level]);
-
- if (!skip_lf)
- cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level]);
+ if (cm->filter_type == NORMAL_LOOPFILTER)
+ {
+ const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
+ lfi.mblim = lfi_n->mblim[filter_level];
+ lfi.blim = lfi_n->blim[filter_level];
+ lfi.lim = lfi_n->lim[filter_level];
+ lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+ if (mb_col > 0)
+ LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
+ (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
+
+ if (!skip_lf)
+ LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
+ (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
+ (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
+
+ if (!skip_lf)
+ LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
+ (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
+ }
+ else
+ {
+ if (mb_col > 0)
+ LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v)
+ (y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v)
+ (y_ptr, post->y_stride, lfi_n->blim[filter_level]);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h)
+ (y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h)
+ (y_ptr, post->y_stride, lfi_n->blim[filter_level]);
+ }
}
y_ptr += 16;
u_ptr += 8;
v_ptr += 8;
- mbd->mode_info_context++; /* step to next MB */
+ mode_info_context++; /* step to next MB */
}
y_ptr += post->y_stride * 16 - post->y_width;
u_ptr += post->uv_stride * 8 - post->uv_width;
v_ptr += post->uv_stride * 8 - post->uv_width;
- mbd->mode_info_context++; /* Skip border mb */
+ mode_info_context++; /* Skip border mb */
}
}
-
void vp8_loop_filter_frame_yonly
(
VP8_COMMON *cm,
@@ -399,49 +399,28 @@ void vp8_loop_filter_frame_yonly
{
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
- int i;
unsigned char *y_ptr;
int mb_row;
int mb_col;
- loop_filter_info *lfi = cm->lf_info;
- int baseline_filter_level[MAX_MB_SEGMENTS];
+ loop_filter_info_n *lfi_n = &cm->lf_info;
+ loop_filter_info lfi;
+
int filter_level;
- int alt_flt_enabled = mbd->segmentation_enabled;
FRAME_TYPE frame_type = cm->frame_type;
- (void) sharpness_lvl;
+ /* Point at base of Mb MODE_INFO list */
+ const MODE_INFO *mode_info_context = cm->mi;
- /*MODE_INFO * this_mb_mode_info = cm->mi;*/ /* Point at base of Mb MODE_INFO list */
- mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */
+ sharpness_lvl = cm->sharpness_level;
- /* Note the baseline filter values for each segment */
- if (alt_flt_enabled)
- {
- for (i = 0; i < MAX_MB_SEGMENTS; i++)
- {
- /* Abs value */
- if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
- baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- /* Delta Value */
- else
- {
- baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
- }
- }
- }
- else
- {
- for (i = 0; i < MAX_MB_SEGMENTS; i++)
- baseline_filter_level[i] = default_filt_lvl;
- }
+#if 0
+ if(default_filt_lvl == 0) /* no filter applied */
+ return;
+#endif
/* Initialize the loop filter for this frame. */
- if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
- vp8_init_loop_filter(cm);
- else if (frame_type != cm->last_frame_type)
- vp8_frame_init_loop_filter(lfi, frame_type);
+ vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl, sharpness_lvl);
/* Set up the buffer pointers */
y_ptr = post->y_buffer;
@@ -451,44 +430,75 @@ void vp8_loop_filter_frame_yonly
{
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
- int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
- int skip_lf = (mbd->mode_info_context->mbmi.mode != B_PRED &&
- mbd->mode_info_context->mbmi.mode != SPLITMV &&
- mbd->mode_info_context->mbmi.mb_skip_coeff);
+ int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+ mode_info_context->mbmi.mode != SPLITMV &&
+ mode_info_context->mbmi.mb_skip_coeff);
- filter_level = baseline_filter_level[Segment];
+ const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
+ const int seg = mode_info_context->mbmi.segment_id;
+ const int ref_frame = mode_info_context->mbmi.ref_frame;
- /* Apply any context driven MB level adjustment */
- filter_level = vp8_adjust_mb_lf_value(mbd, filter_level);
+ filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
if (filter_level)
{
- if (mb_col > 0)
- cm->lf_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
-
- if (!skip_lf)
- cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
-
- /* don't apply across umv border */
- if (mb_row > 0)
- cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
-
- if (!skip_lf)
- cm->lf_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
+ if (cm->filter_type == NORMAL_LOOPFILTER)
+ {
+ const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
+ lfi.mblim = lfi_n->mblim[filter_level];
+ lfi.blim = lfi_n->blim[filter_level];
+ lfi.lim = lfi_n->lim[filter_level];
+ lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+ if (mb_col > 0)
+ LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
+ (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ if (!skip_lf)
+ LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
+ (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
+ (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ if (!skip_lf)
+ LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
+ (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+ }
+ else
+ {
+ if (mb_col > 0)
+ LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v)
+ (y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v)
+ (y_ptr, post->y_stride, lfi_n->blim[filter_level]);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h)
+ (y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h)
+ (y_ptr, post->y_stride, lfi_n->blim[filter_level]);
+ }
}
y_ptr += 16;
- mbd->mode_info_context ++; /* step to next MB */
+ mode_info_context ++; /* step to next MB */
}
y_ptr += post->y_stride * 16 - post->y_width;
- mbd->mode_info_context ++; /* Skip border mb */
+ mode_info_context ++; /* Skip border mb */
}
}
-
void vp8_loop_filter_partial_frame
(
VP8_COMMON *cm,
@@ -500,25 +510,32 @@ void vp8_loop_filter_partial_frame
{
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
- int i;
unsigned char *y_ptr;
int mb_row;
int mb_col;
- /*int mb_rows = post->y_height >> 4;*/
int mb_cols = post->y_width >> 4;
- int linestocopy;
+ int linestocopy, i;
+
+ loop_filter_info_n *lfi_n = &cm->lf_info;
+ loop_filter_info lfi;
- loop_filter_info *lfi = cm->lf_info;
- int baseline_filter_level[MAX_MB_SEGMENTS];
int filter_level;
int alt_flt_enabled = mbd->segmentation_enabled;
FRAME_TYPE frame_type = cm->frame_type;
- (void) sharpness_lvl;
+ const MODE_INFO *mode_info_context;
+
+ int lvl_seg[MAX_MB_SEGMENTS];
+
+ sharpness_lvl = cm->sharpness_level;
- /*MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1);*/ /* Point at base of Mb MODE_INFO list */
- mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); /* Point at base of Mb MODE_INFO list */
+#if 0
+ if(default_filt_lvl == 0) /* no filter applied */
+ return;
+#endif
+
+ mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);
linestocopy = (post->y_height >> (4 + Fraction));
@@ -531,29 +548,24 @@ void vp8_loop_filter_partial_frame
if (alt_flt_enabled)
{
for (i = 0; i < MAX_MB_SEGMENTS; i++)
- {
- /* Abs value */
+ { /* Abs value */
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
- baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+ {
+ lvl_seg[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+ }
/* Delta Value */
else
{
- baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
+ lvl_seg[i] = default_filt_lvl
+ + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+ lvl_seg[i] = (lvl_seg[i] > 0) ?
+ ((lvl_seg[i] > 63) ? 63: lvl_seg[i]) : 0;
}
}
}
else
- {
- for (i = 0; i < MAX_MB_SEGMENTS; i++)
- baseline_filter_level[i] = default_filt_lvl;
- }
+ lvl_seg[0] = default_filt_lvl;
- /* Initialize the loop filter for this frame. */
- if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
- vp8_init_loop_filter(cm);
- else if (frame_type != cm->last_frame_type)
- vp8_frame_init_loop_filter(lfi, frame_type);
/* Set up the buffer pointers */
y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride;
@@ -563,32 +575,64 @@ void vp8_loop_filter_partial_frame
{
for (mb_col = 0; mb_col < mb_cols; mb_col++)
{
- int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
- int skip_lf = (mbd->mode_info_context->mbmi.mode != B_PRED &&
- mbd->mode_info_context->mbmi.mode != SPLITMV &&
- mbd->mode_info_context->mbmi.mb_skip_coeff);
+ int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
+ mode_info_context->mbmi.mode != SPLITMV &&
+ mode_info_context->mbmi.mb_skip_coeff);
- filter_level = baseline_filter_level[Segment];
+ if (alt_flt_enabled)
+ filter_level = lvl_seg[mode_info_context->mbmi.segment_id];
+ else
+ filter_level = lvl_seg[0];
if (filter_level)
{
- if (mb_col > 0)
- cm->lf_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
-
- if (!skip_lf)
- cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
-
- cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
-
- if (!skip_lf)
- cm->lf_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
+ if (cm->filter_type == NORMAL_LOOPFILTER)
+ {
+ const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
+ lfi.mblim = lfi_n->mblim[filter_level];
+ lfi.blim = lfi_n->blim[filter_level];
+ lfi.lim = lfi_n->lim[filter_level];
+ lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+ if (mb_col > 0)
+ LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
+ (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ if (!skip_lf)
+ LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
+ (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
+ (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+
+ if (!skip_lf)
+ LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
+ (y_ptr, 0, 0, post->y_stride, 0, &lfi);
+ }
+ else
+ {
+ if (mb_col > 0)
+ LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v)
+ (y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v)
+ (y_ptr, post->y_stride, lfi_n->blim[filter_level]);
+
+ LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h)
+ (y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h)
+ (y_ptr, post->y_stride, lfi_n->blim[filter_level]);
+ }
}
y_ptr += 16;
- mbd->mode_info_context += 1; /* step to next MB */
+ mode_info_context += 1; /* step to next MB */
}
y_ptr += post->y_stride * 16 - post->y_width;
- mbd->mode_info_context += 1; /* Skip border mb */
+ mode_info_context += 1; /* Skip border mb */
}
}
diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h
index ca136b3a4..2d6dad306 100644
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -13,6 +13,7 @@
#define loopfilter_h
#include "vpx_ports/mem.h"
+#include "vpx_config.h"
#define MAX_LOOP_FILTER 63
@@ -22,27 +23,46 @@ typedef enum
SIMPLE_LOOPFILTER = 1
} LOOPFILTERTYPE;
-/* FRK
- * Need to align this structure so when it is declared and
+#if ARCH_ARM
+#define SIMD_WIDTH 1
+#else
+#define SIMD_WIDTH 16
+#endif
+
+/* Need to align this structure so when it is declared and
* passed it can be loaded into vector registers.
*/
typedef struct
{
- DECLARE_ALIGNED(16, signed char, lim[16]);
- DECLARE_ALIGNED(16, signed char, flim[16]);
- DECLARE_ALIGNED(16, signed char, thr[16]);
- DECLARE_ALIGNED(16, signed char, mbflim[16]);
+ DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
+ DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
+ DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
+ DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]);
+ unsigned char lvl[4][4][4];
+ unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
+ unsigned char mode_lf_lut[10];
+} loop_filter_info_n;
+
+typedef struct
+{
+ const unsigned char * mblim;
+ const unsigned char * blim;
+ const unsigned char * lim;
+ const unsigned char * hev_thr;
} loop_filter_info;
#define prototype_loopfilter(sym) \
- void sym(unsigned char *src, int pitch, const signed char *flimit,\
- const signed char *limit, const signed char *thresh, int count)
+ void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
+ const unsigned char *limit, const unsigned char *thresh, int count)
#define prototype_loopfilter_block(sym) \
- void sym(unsigned char *y, unsigned char *u, unsigned char *v,\
+ void sym(unsigned char *y, unsigned char *u, unsigned char *v, \
int ystride, int uv_stride, loop_filter_info *lfi)
+#define prototype_simple_loopfilter(sym) \
+ void sym(unsigned char *y, int ystride, const unsigned char *blimit)
+
#if ARCH_X86 || ARCH_X86_64
#include "x86/loopfilter_x86.h"
#endif
@@ -71,38 +91,39 @@ extern prototype_loopfilter_block(vp8_lf_normal_mb_h);
#endif
extern prototype_loopfilter_block(vp8_lf_normal_b_h);
-
#ifndef vp8_lf_simple_mb_v
-#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_c
+#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_c
#endif
-extern prototype_loopfilter_block(vp8_lf_simple_mb_v);
+extern prototype_simple_loopfilter(vp8_lf_simple_mb_v);
#ifndef vp8_lf_simple_b_v
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_c
#endif
-extern prototype_loopfilter_block(vp8_lf_simple_b_v);
+extern prototype_simple_loopfilter(vp8_lf_simple_b_v);
#ifndef vp8_lf_simple_mb_h
-#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_c
+#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_c
#endif
-extern prototype_loopfilter_block(vp8_lf_simple_mb_h);
+extern prototype_simple_loopfilter(vp8_lf_simple_mb_h);
#ifndef vp8_lf_simple_b_h
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_c
#endif
-extern prototype_loopfilter_block(vp8_lf_simple_b_h);
+extern prototype_simple_loopfilter(vp8_lf_simple_b_h);
typedef prototype_loopfilter_block((*vp8_lf_block_fn_t));
+typedef prototype_simple_loopfilter((*vp8_slf_block_fn_t));
+
typedef struct
{
vp8_lf_block_fn_t normal_mb_v;
vp8_lf_block_fn_t normal_b_v;
vp8_lf_block_fn_t normal_mb_h;
vp8_lf_block_fn_t normal_b_h;
- vp8_lf_block_fn_t simple_mb_v;
- vp8_lf_block_fn_t simple_b_v;
- vp8_lf_block_fn_t simple_mb_h;
- vp8_lf_block_fn_t simple_b_h;
+ vp8_slf_block_fn_t simple_mb_v;
+ vp8_slf_block_fn_t simple_b_v;
+ vp8_slf_block_fn_t simple_mb_h;
+ vp8_slf_block_fn_t simple_b_h;
} vp8_loopfilter_rtcd_vtable_t;
#if CONFIG_RUNTIME_CPU_DETECT
@@ -115,9 +136,9 @@ typedef void loop_filter_uvfunction
(
unsigned char *u, /* source pointer */
int p, /* pitch */
- const signed char *flimit,
- const signed char *limit,
- const signed char *thresh,
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh,
unsigned char *v
);
diff --git a/vp8/common/loopfilter_filters.c b/vp8/common/loopfilter_filters.c
index 694052924..10228ae09 100644
--- a/vp8/common/loopfilter_filters.c
+++ b/vp8/common/loopfilter_filters.c
@@ -24,8 +24,9 @@ static __inline signed char vp8_signed_char_clamp(int t)
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
-static __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
- uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
+static __inline signed char vp8_filter_mask(uc limit, uc blimit,
+ uc p3, uc p2, uc p1, uc p0,
+ uc q0, uc q1, uc q2, uc q3)
{
signed char mask = 0;
mask |= (abs(p3 - p2) > limit) * -1;
@@ -34,13 +35,13 @@ static __inline signed char vp8_filter_mask(signed char limit, signed char flimi
mask |= (abs(q1 - q0) > limit) * -1;
mask |= (abs(q2 - q1) > limit) * -1;
mask |= (abs(q3 - q2) > limit) * -1;
- mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit) * -1;
+ mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
mask = ~mask;
return mask;
}
/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
-static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
+static __inline signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
{
signed char hev = 0;
hev |= (abs(p1 - p0) > thresh) * -1;
@@ -48,7 +49,8 @@ static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0,
return hev;
}
-static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
+static __inline void vp8_filter(signed char mask, uc hev, uc *op1,
+ uc *op0, uc *oq0, uc *oq1)
{
signed char ps0, qs0;
@@ -98,9 +100,9 @@ void vp8_loop_filter_horizontal_edge_c
(
unsigned char *s,
int p, /* pitch */
- const signed char *flimit,
- const signed char *limit,
- const signed char *thresh,
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh,
int count
)
{
@@ -113,11 +115,11 @@ void vp8_loop_filter_horizontal_edge_c
*/
do
{
- mask = vp8_filter_mask(limit[i], flimit[i],
+ mask = vp8_filter_mask(limit[0], blimit[0],
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
s[0*p], s[1*p], s[2*p], s[3*p]);
- hev = vp8_hevmask(thresh[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
+ hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
@@ -130,9 +132,9 @@ void vp8_loop_filter_vertical_edge_c
(
unsigned char *s,
int p,
- const signed char *flimit,
- const signed char *limit,
- const signed char *thresh,
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh,
int count
)
{
@@ -145,10 +147,10 @@ void vp8_loop_filter_vertical_edge_c
*/
do
{
- mask = vp8_filter_mask(limit[i], flimit[i],
+ mask = vp8_filter_mask(limit[0], blimit[0],
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
- hev = vp8_hevmask(thresh[i], s[-2], s[-1], s[0], s[1]);
+ hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
@@ -157,7 +159,7 @@ void vp8_loop_filter_vertical_edge_c
while (++i < count * 8);
}
-static __inline void vp8_mbfilter(signed char mask, signed char hev,
+static __inline void vp8_mbfilter(signed char mask, uc hev,
uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
{
signed char s, u;
@@ -216,9 +218,9 @@ void vp8_mbloop_filter_horizontal_edge_c
(
unsigned char *s,
int p,
- const signed char *flimit,
- const signed char *limit,
- const signed char *thresh,
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh,
int count
)
{
@@ -232,11 +234,11 @@ void vp8_mbloop_filter_horizontal_edge_c
do
{
- mask = vp8_filter_mask(limit[i], flimit[i],
+ mask = vp8_filter_mask(limit[0], blimit[0],
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
s[0*p], s[1*p], s[2*p], s[3*p]);
- hev = vp8_hevmask(thresh[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
+ hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
@@ -251,9 +253,9 @@ void vp8_mbloop_filter_vertical_edge_c
(
unsigned char *s,
int p,
- const signed char *flimit,
- const signed char *limit,
- const signed char *thresh,
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh,
int count
)
{
@@ -264,10 +266,10 @@ void vp8_mbloop_filter_vertical_edge_c
do
{
- mask = vp8_filter_mask(limit[i], flimit[i],
+ mask = vp8_filter_mask(limit[0], blimit[0],
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
- hev = vp8_hevmask(thresh[i], s[-2], s[-1], s[0], s[1]);
+ hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
@@ -278,13 +280,13 @@ void vp8_mbloop_filter_vertical_edge_c
}
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
-static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
+static __inline signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
{
/* Why does this cause problems for win32?
* error C2143: syntax error : missing ';' before 'type'
* (void) limit;
*/
- signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= flimit * 2 + limit) * -1;
+ signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
return mask;
}
@@ -317,47 +319,37 @@ void vp8_loop_filter_simple_horizontal_edge_c
(
unsigned char *s,
int p,
- const signed char *flimit,
- const signed char *limit,
- const signed char *thresh,
- int count
+ const unsigned char *blimit
)
{
signed char mask = 0;
int i = 0;
- (void) thresh;
do
{
- /*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);*/
- mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
+ mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
++s;
}
- while (++i < count * 8);
+ while (++i < 16);
}
void vp8_loop_filter_simple_vertical_edge_c
(
unsigned char *s,
int p,
- const signed char *flimit,
- const signed char *limit,
- const signed char *thresh,
- int count
+ const unsigned char *blimit
)
{
signed char mask = 0;
int i = 0;
- (void) thresh;
do
{
- /*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);*/
- mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2], s[-1], s[0], s[1]);
+ mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
s += p;
}
- while (++i < count * 8);
+ while (++i < 16);
}
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index a381dfe87..4356b5133 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -83,6 +83,7 @@ typedef struct VP8_COMMON_RTCD
} VP8_COMMON_RTCD;
typedef struct VP8Common
+
{
struct vpx_internal_error_info error;
@@ -107,7 +108,8 @@ typedef struct VP8Common
YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG temp_scale_frame;
- FRAME_TYPE last_frame_type; /* Save last frame's frame type for loopfilter init checking and motion search. */
+
+ FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
FRAME_TYPE frame_type;
int show_frame;
@@ -148,11 +150,9 @@ typedef struct VP8Common
INTERPOLATIONFILTERTYPE mcomp_filter_type;
LOOPFILTERTYPE last_filter_type;
LOOPFILTERTYPE filter_type;
- loop_filter_info lf_info[MAX_LOOP_FILTER+1];
- prototype_loopfilter_block((*lf_mbv));
- prototype_loopfilter_block((*lf_mbh));
- prototype_loopfilter_block((*lf_bv));
- prototype_loopfilter_block((*lf_bh));
+
+ loop_filter_info_n lf_info;
+
int filter_level;
int last_sharpness_level;
int sharpness_level;
@@ -205,10 +205,9 @@ typedef struct VP8Common
struct postproc_state postproc_state;
} VP8_COMMON;
-
-int vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int filter_level);
-void vp8_init_loop_filter(VP8_COMMON *cm);
-void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type);
-extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val);
+void vp8_loop_filter_init(VP8_COMMON *cm);
+void vp8_loop_filter_frame_init(VP8_COMMON *cm, MACROBLOCKD *mbd,
+ int default_filt_lvl, int sharpness_lvl);
+void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val);
#endif
diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm
index c6c215c3c..ad47284cf 100644
--- a/vp8/common/x86/loopfilter_mmx.asm
+++ b/vp8/common/x86/loopfilter_mmx.asm
@@ -16,7 +16,7 @@
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -122,12 +122,10 @@ next8_h:
paddusb mm5, mm5 ; abs(p0-q0)*2
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- mov rdx, arg(2) ;flimit ; get flimit
- movq mm2, [rdx] ; flimit mm2
- paddb mm2, mm2 ; flimit*2 (less than 255)
- paddb mm7, mm2 ; flimit * 2 + limit (less than 255)
+ mov rdx, arg(2) ;blimit ; get blimit
+ movq mm7, [rdx] ; blimit
- psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por mm1, mm5
pxor mm5, mm5
pcmpeqb mm1, mm5 ; mask mm1
@@ -230,7 +228,7 @@ next8_h:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -406,9 +404,9 @@ next8_v:
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw mm5, 1 ; abs(p1-q1)/2
- mov rdx, arg(2) ;flimit ;
+ mov rdx, arg(2) ;blimit ;
- movq mm2, [rdx] ;flimit mm2
+ movq mm4, [rdx] ;blimit
movq mm1, mm3 ; mm1=mm3=p0
movq mm7, mm6 ; mm7=mm6=q0
@@ -419,10 +417,7 @@ next8_v:
paddusb mm1, mm1 ; abs(q0-p0)*2
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- paddb mm2, mm2 ; flimit*2 (less than 255)
- paddb mm4, mm2 ; flimit * 2 + limit (less than 255)
-
- psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por mm1, mm0; ; mask
pxor mm0, mm0
@@ -603,7 +598,7 @@ next8_v:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -719,17 +714,15 @@ next8_mbh:
paddusb mm5, mm5 ; abs(p0-q0)*2
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- mov rdx, arg(2) ;flimit ; get flimit
- movq mm2, [rdx] ; flimit mm2
- paddb mm2, mm2 ; flimit*2 (less than 255)
- paddb mm7, mm2 ; flimit * 2 + limit (less than 255)
+ mov rdx, arg(2) ;blimit ; get blimit
+ movq mm7, [rdx] ; blimit
- psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por mm1, mm5
pxor mm5, mm5
pcmpeqb mm1, mm5 ; mask mm1
- ; mm1 = mask, mm0=q0, mm7 = flimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
+ ; mm1 = mask, mm0=q0, mm7 = blimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
; mm6 = p0,
; calculate high edge variance
@@ -922,7 +915,7 @@ next8_mbh:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -1108,9 +1101,9 @@ next8_mbv:
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw mm5, 1 ; abs(p1-q1)/2
- mov rdx, arg(2) ;flimit ;
+ mov rdx, arg(2) ;blimit ;
- movq mm2, [rdx] ;flimit mm2
+ movq mm4, [rdx] ;blimit
movq mm1, mm3 ; mm1=mm3=p0
movq mm7, mm6 ; mm7=mm6=q0
@@ -1121,10 +1114,7 @@ next8_mbv:
paddusb mm1, mm1 ; abs(q0-p0)*2
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- paddb mm2, mm2 ; flimit*2 (less than 255)
- paddb mm4, mm2 ; flimit * 2 + limit (less than 255)
-
- psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por mm1, mm0; ; mask
pxor mm0, mm0
@@ -1392,16 +1382,13 @@ next8_mbv:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
-; const char *limit,
-; const char *thresh,
-; int count
+; const char *blimit
;)
global sym(vp8_loop_filter_simple_horizontal_edge_mmx)
sym(vp8_loop_filter_simple_horizontal_edge_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
+ SHADOW_ARGS_TO_STACK 3
GET_GOT rbx
push rsi
push rdi
@@ -1410,14 +1397,10 @@ sym(vp8_loop_filter_simple_horizontal_edge_mmx):
mov rsi, arg(0) ;src_ptr
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
- movsxd rcx, dword ptr arg(5) ;count
+ mov rcx, 2 ; count
nexts8_h:
- mov rdx, arg(3) ;limit
- movq mm7, [rdx]
- mov rdx, arg(2) ;flimit ; get flimit
+ mov rdx, arg(2) ;blimit ; get blimit
movq mm3, [rdx] ;
- paddb mm3, mm3 ; flimit*2 (less than 255)
- paddb mm3, mm7 ; flimit * 2 + limit (less than 255)
mov rdi, rsi ; rdi points to row +1 for indirect addressing
add rdi, rax
@@ -1445,7 +1428,7 @@ nexts8_h:
paddusb mm5, mm5 ; abs(p0-q0)*2
paddusb mm5, mm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor mm3, mm3
pcmpeqb mm5, mm3
@@ -1515,16 +1498,13 @@ nexts8_h:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
-; const char *limit,
-; const char *thresh,
-; int count
+; const char *blimit
;)
global sym(vp8_loop_filter_simple_vertical_edge_mmx)
sym(vp8_loop_filter_simple_vertical_edge_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
+ SHADOW_ARGS_TO_STACK 3
GET_GOT rbx
push rsi
push rdi
@@ -1539,7 +1519,7 @@ sym(vp8_loop_filter_simple_vertical_edge_mmx):
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
lea rsi, [rsi + rax*4- 2]; ;
- movsxd rcx, dword ptr arg(5) ;count
+ mov rcx, 2 ; count
nexts8_v:
lea rdi, [rsi + rax];
@@ -1602,14 +1582,10 @@ nexts8_v:
paddusb mm5, mm5 ; abs(p0-q0)*2
paddusb mm5, mm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- mov rdx, arg(2) ;flimit ; get flimit
+ mov rdx, arg(2) ;blimit ; get blimit
movq mm7, [rdx]
- mov rdx, arg(3) ; get limit
- movq mm6, [rdx]
- paddb mm7, mm7 ; flimit*2 (less than 255)
- paddb mm7, mm6 ; flimit * 2 + limit (less than 255)
- psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor mm7, mm7
pcmpeqb mm5, mm7 ; mm5 = mask
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index c2ce1a106..4efff7eb5 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -110,7 +110,7 @@
psubusb xmm6, xmm5 ; p1-=p0
por xmm6, xmm4 ; abs(p1 - p0)
- mov rdx, arg(2) ; get flimit
+ mov rdx, arg(2) ; get blimit
movdqa t1, xmm6 ; save to t1
@@ -123,7 +123,7 @@
psubusb xmm1, xmm7
por xmm2, xmm3 ; abs(p1-q1)
- movdqa xmm4, XMMWORD PTR [rdx] ; flimit
+ movdqa xmm7, XMMWORD PTR [rdx] ; blimit
movdqa xmm3, xmm0 ; q0
pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
@@ -134,13 +134,11 @@
psrlw xmm2, 1 ; abs(p1-q1)/2
psubusb xmm5, xmm3 ; p0-=q0
- paddb xmm4, xmm4 ; flimit*2 (less than 255)
psubusb xmm3, xmm6 ; q0-=p0
por xmm5, xmm3 ; abs(p0 - q0)
paddusb xmm5, xmm5 ; abs(p0-q0)*2
- paddb xmm7, xmm4 ; flimit * 2 + limit (less than 255)
movdqa xmm4, t0 ; hev get abs (q1 - q0)
@@ -150,7 +148,7 @@
movdqa xmm2, XMMWORD PTR [rdx] ; hev
- psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
psubusb xmm4, xmm2 ; hev
psubusb xmm3, xmm2 ; hev
@@ -278,7 +276,7 @@
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -328,7 +326,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -574,7 +572,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -624,7 +622,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
;(
; unsigned char *u,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; unsigned char *v
@@ -904,7 +902,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
movdqa xmm4, XMMWORD PTR [rdx]; limit
pmaxub xmm0, xmm7
- mov rdx, arg(2) ; flimit
+ mov rdx, arg(2) ; blimit
psubusb xmm0, xmm4
movdqa xmm5, xmm2 ; q1
@@ -921,12 +919,11 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
psrlw xmm5, 1 ; abs(p1-q1)/2
psubusb xmm6, xmm3 ; q0-p0
- movdqa xmm2, XMMWORD PTR [rdx]; flimit
+ movdqa xmm4, XMMWORD PTR [rdx]; blimit
mov rdx, arg(4) ; get thresh
por xmm1, xmm6 ; abs(q0-p0)
- paddb xmm2, xmm2 ; flimit*2 (less than 255)
movdqa xmm6, t0 ; get abs (q1 - q0)
@@ -939,10 +936,9 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
psubusb xmm6, xmm7 ; abs(q1 - q0) > thresh
- paddb xmm4, xmm2 ; flimit * 2 + limit (less than 255)
psubusb xmm3, xmm7 ; abs(p1 - p0)> thresh
- psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por xmm6, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
por xmm1, xmm0 ; mask
@@ -1014,7 +1010,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -1081,7 +1077,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
;(
; unsigned char *u,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; unsigned char *v
@@ -1239,7 +1235,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@@ -1308,7 +1304,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
;(
; unsigned char *u,
; int src_pixel_step,
-; const char *flimit,
+; const char *blimit,
; const char *limit,
; const char *thresh,
; unsigned char *v
@@ -1376,16 +1372,13 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
-; const char *limit,
-; const char *thresh,
-; int count
+; const char *blimit,
;)
global sym(vp8_loop_filter_simple_horizontal_edge_sse2)
sym(vp8_loop_filter_simple_horizontal_edge_sse2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
+ SHADOW_ARGS_TO_STACK 3
SAVE_XMM 7
GET_GOT rbx
push rsi
@@ -1394,13 +1387,8 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
mov rsi, arg(0) ;src_ptr
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
- mov rdx, arg(2) ;flimit ; get flimit
+ mov rdx, arg(2) ;blimit
movdqa xmm3, XMMWORD PTR [rdx]
- mov rdx, arg(3) ;limit
- movdqa xmm7, XMMWORD PTR [rdx]
-
- paddb xmm3, xmm3 ; flimit*2 (less than 255)
- paddb xmm3, xmm7 ; flimit * 2 + limit (less than 255)
mov rdi, rsi ; rdi points to row +1 for indirect addressing
add rdi, rax
@@ -1428,7 +1416,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
paddusb xmm5, xmm5 ; abs(p0-q0)*2
paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor xmm3, xmm3
pcmpeqb xmm5, xmm3
@@ -1493,16 +1481,13 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
-; const char *flimit,
-; const char *limit,
-; const char *thresh,
-; int count
+; const char *blimit,
;)
global sym(vp8_loop_filter_simple_vertical_edge_sse2)
sym(vp8_loop_filter_simple_vertical_edge_sse2):
push rbp ; save old base pointer value.
mov rbp, rsp ; set new base pointer value.
- SHADOW_ARGS_TO_STACK 6
+ SHADOW_ARGS_TO_STACK 3
SAVE_XMM 7
GET_GOT rbx ; save callee-saved reg
push rsi
@@ -1607,14 +1592,10 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
paddusb xmm5, xmm5 ; abs(p0-q0)*2
paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- mov rdx, arg(2) ;flimit
+ mov rdx, arg(2) ;blimit
movdqa xmm7, XMMWORD PTR [rdx]
- mov rdx, arg(3) ; get limit
- movdqa xmm6, XMMWORD PTR [rdx]
- paddb xmm7, xmm7 ; flimit*2 (less than 255)
- paddb xmm7, xmm6 ; flimit * 2 + limit (less than 255)
- psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor xmm7, xmm7
pcmpeqb xmm5, xmm7 ; mm5 = mask
diff --git a/vp8/common/x86/loopfilter_x86.c b/vp8/common/x86/loopfilter_x86.c
index a52420c98..9360ac17c 100644
--- a/vp8/common/x86/loopfilter_x86.c
+++ b/vp8/common/x86/loopfilter_x86.c
@@ -9,30 +9,18 @@
*/
-#include "vpx_ports/config.h"
+#include "vpx_config.h"
#include "vp8/common/loopfilter.h"
-prototype_loopfilter(vp8_loop_filter_horizontal_edge_c);
-prototype_loopfilter(vp8_loop_filter_vertical_edge_c);
-prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_c);
-prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
-prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
-prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
-
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx);
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx);
prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx);
prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx);
-prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
-prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx);
prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2);
prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2);
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2);
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2);
-prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
-prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
-prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2);
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
@@ -44,23 +32,13 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
+ vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
- vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
-}
-
-
-void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
-{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
@@ -68,23 +46,13 @@ void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
+ vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
- vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
-}
-
-
-void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
-{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
@@ -92,27 +60,23 @@ void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
+ vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
- vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
+ vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
-void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
+void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, blimit);
+ vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, blimit);
+ vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, blimit);
}
@@ -120,27 +84,23 @@ void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
+ vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
- vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
+ vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
-void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
+void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit);
+ vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit);
+ vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit);
}
#endif
@@ -150,20 +110,10 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
-}
-
-
-void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
-{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
}
@@ -171,20 +121,10 @@ void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
-}
-
-
-void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
-{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
+ vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
}
@@ -192,24 +132,20 @@ void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4 * uv_stride);
+ vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4 * uv_stride);
}
-void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
+void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, blimit);
+ vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, blimit);
+ vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, blimit);
}
@@ -217,36 +153,20 @@ void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
- vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+ vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
- vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4);
+ vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4);
}
-void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
- int y_stride, int uv_stride, loop_filter_info *lfi)
+void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
- (void) u_ptr;
- (void) v_ptr;
- (void) uv_stride;
- vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
+ vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit);
+ vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit);
+ vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit);
}
#endif
-
-#if 0
-void vp8_fast_loop_filter_vertical_edges_sse(unsigned char *y_ptr,
- int y_stride,
- loop_filter_info *lfi)
-{
-
- vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
- vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
-}
-#endif
diff --git a/vp8/common/x86/loopfilter_x86.h b/vp8/common/x86/loopfilter_x86.h
index 80dbebc8d..1ed6c213f 100644
--- a/vp8/common/x86/loopfilter_x86.h
+++ b/vp8/common/x86/loopfilter_x86.h
@@ -24,10 +24,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_mmx);
extern prototype_loopfilter_block(vp8_loop_filter_bv_mmx);
extern prototype_loopfilter_block(vp8_loop_filter_mbh_mmx);
extern prototype_loopfilter_block(vp8_loop_filter_bh_mmx);
-extern prototype_loopfilter_block(vp8_loop_filter_mbvs_mmx);
-extern prototype_loopfilter_block(vp8_loop_filter_bvs_mmx);
-extern prototype_loopfilter_block(vp8_loop_filter_mbhs_mmx);
-extern prototype_loopfilter_block(vp8_loop_filter_bhs_mmx);
+extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
+extern prototype_simple_loopfilter(vp8_loop_filter_bvs_mmx);
+extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx);
+extern prototype_simple_loopfilter(vp8_loop_filter_bhs_mmx);
#if !CONFIG_RUNTIME_CPU_DETECT
@@ -44,13 +44,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_mmx);
#define vp8_lf_normal_b_h vp8_loop_filter_bh_mmx
#undef vp8_lf_simple_mb_v
-#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_mmx
+#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_mmx
#undef vp8_lf_simple_b_v
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_mmx
#undef vp8_lf_simple_mb_h
-#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_mmx
+#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_mmx
#undef vp8_lf_simple_b_h
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_mmx
@@ -63,10 +63,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_sse2);
extern prototype_loopfilter_block(vp8_loop_filter_bv_sse2);
extern prototype_loopfilter_block(vp8_loop_filter_mbh_sse2);
extern prototype_loopfilter_block(vp8_loop_filter_bh_sse2);
-extern prototype_loopfilter_block(vp8_loop_filter_mbvs_sse2);
-extern prototype_loopfilter_block(vp8_loop_filter_bvs_sse2);
-extern prototype_loopfilter_block(vp8_loop_filter_mbhs_sse2);
-extern prototype_loopfilter_block(vp8_loop_filter_bhs_sse2);
+extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
+extern prototype_simple_loopfilter(vp8_loop_filter_bvs_sse2);
+extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
+extern prototype_simple_loopfilter(vp8_loop_filter_bhs_sse2);
#if !CONFIG_RUNTIME_CPU_DETECT
@@ -83,13 +83,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_sse2);
#define vp8_lf_normal_b_h vp8_loop_filter_bh_sse2
#undef vp8_lf_simple_mb_v
-#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_sse2
+#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_sse2
#undef vp8_lf_simple_b_v
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_sse2
#undef vp8_lf_simple_mb_h
-#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_sse2
+#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_sse2
#undef vp8_lf_simple_b_h
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_sse2
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index 87374f3c6..33a984b79 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -9,7 +9,7 @@
*/
-#include "vpx_ports/config.h"
+#include "vpx_config.h"
#include "vpx_ports/x86.h"
#include "vp8/common/g_common.h"
#include "vp8/common/subpixel.h"
@@ -63,9 +63,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_mmx;
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_mmx;
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_mmx;
- rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_mmx;
+ rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_mmx;
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_mmx;
- rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_mmx;
+ rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_mmx;
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_mmx;
#if CONFIG_POSTPROC
@@ -101,9 +101,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_sse2;
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_sse2;
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_sse2;
- rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_sse2;
+ rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_sse2;
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_sse2;
- rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_sse2;
+ rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_sse2;
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_sse2;
#if CONFIG_POSTPROC
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 01d940233..0a7942d89 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -180,11 +180,11 @@ static MB_PREDICTION_MODE read_mv_ref(vp8_reader *bc, const vp8_prob *p)
return (MB_PREDICTION_MODE)i;
}
-static MB_PREDICTION_MODE sub_mv_ref(vp8_reader *bc, const vp8_prob *p)
+static B_PREDICTION_MODE sub_mv_ref(vp8_reader *bc, const vp8_prob *p)
{
const int i = vp8_treed_read(bc, vp8_sub_mv_ref_tree, p);
- return (MB_PREDICTION_MODE)i;
+ return (B_PREDICTION_MODE)i;
}
#ifdef VPX_MODE_COUNT
@@ -334,7 +334,7 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
abovemv.as_int = above_block_mv(mi, k, mis);
mv_contz = vp8_mv_cont(&leftmv, &abovemv);
- switch ((B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) /*pc->fc.sub_mv_ref_prob))*/
+ switch (sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) /*pc->fc.sub_mv_ref_prob))*/
{
case NEW4X4:
read_mv(bc, &blockmv.as_mv, (const MV_CONTEXT *) mvc);
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 5f81ee638..aeb1607b5 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -95,7 +95,7 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
{
VP8_COMMON *cm = &pbi->common;
- vp8_init_loop_filter(cm);
+ vp8_loop_filter_init(cm);
cm->last_frame_type = KEY_FRAME;
cm->last_filter_type = cm->filter_type;
cm->last_sharpness_level = cm->sharpness_level;
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index a7af9acfb..0c21689c0 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -274,9 +274,7 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
int filter_level;
- loop_filter_info *lfi = pc->lf_info;
- int alt_flt_enabled = xd->segmentation_enabled;
- int Segment;
+ loop_filter_info_n *lfi_n = &pc->lf_info;
pbi->mb_row_di[ithread].mb_row = mb_row;
pbi->mb_row_di[ithread].mbd.current_bc = &pbi->mbc[mb_row%num_part];
@@ -362,7 +360,16 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
if (pbi->common.filter_level)
{
- int skip_lf;
+ int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
+ xd->mode_info_context->mbmi.mode != SPLITMV &&
+ xd->mode_info_context->mbmi.mb_skip_coeff);
+
+ const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode];
+ const int seg = xd->mode_info_context->mbmi.segment_id;
+ const int ref_frame = xd->mode_info_context->mbmi.ref_frame;
+
+ filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
+
if( mb_row != pc->mb_rows-1 )
{
/* Save decoded MB last row data for next-row decoding */
@@ -388,35 +395,57 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
}
}
- /* update loopfilter info */
- Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
- skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
- xd->mode_info_context->mbmi.mode != SPLITMV &&
- xd->mode_info_context->mbmi.mb_skip_coeff);
-
- filter_level = pbi->mt_baseline_filter_level[Segment];
- /* Distance of Mb to the various image edges.
- * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
- * Apply any context driven MB level adjustment
- */
- filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
-
/* loopfilter on this macroblock. */
if (filter_level)
{
- if (mb_col > 0)
- pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
-
- if (!skip_lf)
- pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
-
- /* don't apply across umv border */
- if (mb_row > 0)
- pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
-
- if (!skip_lf)
- pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
+ if(pc->filter_type == NORMAL_LOOPFILTER)
+ {
+ loop_filter_info lfi;
+ FRAME_TYPE frame_type = pc->frame_type;
+ const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
+ lfi.mblim = lfi_n->mblim[filter_level];
+ lfi.blim = lfi_n->blim[filter_level];
+ lfi.lim = lfi_n->lim[filter_level];
+ lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+ if (mb_col > 0)
+ LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_v)
+ (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
+
+ if (!skip_lf)
+ LF_INVOKE(&pc->rtcd.loopfilter, normal_b_v)
+ (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_h)
+ (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
+
+ if (!skip_lf)
+ LF_INVOKE(&pc->rtcd.loopfilter, normal_b_h)
+ (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
+ }
+ else
+ {
+ if (mb_col > 0)
+ LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_v)
+ (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ LF_INVOKE(&pc->rtcd.loopfilter, simple_b_v)
+ (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_h)
+ (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ LF_INVOKE(&pc->rtcd.loopfilter, simple_b_h)
+ (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
+ }
}
+
}
recon_yoffset += 16;
@@ -681,53 +710,6 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
}
}
-
-static void lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
-{
- VP8_COMMON *cm = &pbi->common;
- MACROBLOCKD *mbd = &pbi->mb;
- /*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/ /*frame_to_show;*/
- loop_filter_info *lfi = cm->lf_info;
- FRAME_TYPE frame_type = cm->frame_type;
-
- /*int mb_row;
- int mb_col;
- int baseline_filter_level[MAX_MB_SEGMENTS];*/
- int alt_flt_enabled = mbd->segmentation_enabled;
-
- int i;
- /*unsigned char *y_ptr, *u_ptr, *v_ptr;*/
-
- /* Note the baseline filter values for each segment */
- if (alt_flt_enabled)
- {
- for (i = 0; i < MAX_MB_SEGMENTS; i++)
- {
- /* Abs value */
- if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
- pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- /* Delta Value */
- else
- {
- pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
- }
- }
- }
- else
- {
- for (i = 0; i < MAX_MB_SEGMENTS; i++)
- pbi->mt_baseline_filter_level[i] = default_filt_lvl;
- }
-
- /* Initialize the loop filter for this frame. */
- if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
- vp8_init_loop_filter(cm);
- else if (frame_type != cm->last_frame_type)
- vp8_frame_init_loop_filter(lfi, frame_type);
-}
-
-
void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
{
int mb_row;
@@ -738,12 +720,10 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
volatile int *last_row_current_mb_col = NULL;
int nsync = pbi->sync_range;
- int filter_level;
- loop_filter_info *lfi = pc->lf_info;
- int alt_flt_enabled = xd->segmentation_enabled;
- int Segment;
+ int filter_level = pc->filter_level;
+ loop_filter_info_n *lfi_n = &pc->lf_info;
- if(pbi->common.filter_level)
+ if (filter_level)
{
/* Set above_row buffer to 127 for decoding first MB row */
vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
@@ -764,7 +744,9 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
}
- lpf_init(pbi, pc->filter_level);
+
+ /* Initialize the loop filter for this frame. */
+ vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level, pc->sharpness_level);
}
setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
@@ -774,7 +756,6 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
{
-
xd->current_bc = &pbi->mbc[mb_row%num_part];
/* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
@@ -875,7 +856,16 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
if (pbi->common.filter_level)
{
- int skip_lf;
+ int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
+ xd->mode_info_context->mbmi.mode != SPLITMV &&
+ xd->mode_info_context->mbmi.mb_skip_coeff);
+
+ const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode];
+ const int seg = xd->mode_info_context->mbmi.segment_id;
+ const int ref_frame = xd->mode_info_context->mbmi.ref_frame;
+
+ filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
+
/* Save decoded MB last row data for next-row decoding */
if(mb_row != pc->mb_rows-1)
{
@@ -901,36 +891,58 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
}
}
- /* update loopfilter info */
- Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
- skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
- xd->mode_info_context->mbmi.mode != SPLITMV &&
- xd->mode_info_context->mbmi.mb_skip_coeff);
- filter_level = pbi->mt_baseline_filter_level[Segment];
- /* Distance of Mb to the various image edges.
- * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
- * Apply any context driven MB level adjustment
- */
- filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
-
/* loopfilter on this macroblock. */
if (filter_level)
{
- if (mb_col > 0)
- pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
-
- if (!skip_lf)
- pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
-
- /* don't apply across umv border */
- if (mb_row > 0)
- pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
-
- if (!skip_lf)
- pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
+ if(pc->filter_type == NORMAL_LOOPFILTER)
+ {
+ loop_filter_info lfi;
+ FRAME_TYPE frame_type = pc->frame_type;
+ const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
+ lfi.mblim = lfi_n->mblim[filter_level];
+ lfi.blim = lfi_n->blim[filter_level];
+ lfi.lim = lfi_n->lim[filter_level];
+ lfi.hev_thr = lfi_n->hev_thr[hev_index];
+
+ if (mb_col > 0)
+ LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_v)
+ (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
+
+ if (!skip_lf)
+ LF_INVOKE(&pc->rtcd.loopfilter, normal_b_v)
+ (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_h)
+ (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
+
+ if (!skip_lf)
+ LF_INVOKE(&pc->rtcd.loopfilter, normal_b_h)
+ (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
+ }
+ else
+ {
+ if (mb_col > 0)
+ LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_v)
+ (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ LF_INVOKE(&pc->rtcd.loopfilter, simple_b_v)
+ (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_h)
+ (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
+
+ if (!skip_lf)
+ LF_INVOKE(&pc->rtcd.loopfilter, simple_b_h)
+ (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
+ }
}
- }
+ }
recon_yoffset += 16;
recon_uvoffset += 8;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 73b4c7dcd..d719f36d9 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -561,10 +561,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->recode_loop = 1;
sf->quarter_pixel_search = 1;
sf->half_pixel_search = 1;
- sf->full_freq[0] = 7;
- sf->full_freq[1] = 7;
- sf->min_fs_radius = 8;
- sf->max_fs_radius = 32;
sf->iterative_sub_pixel = 1;
sf->optimize_coefficients = 1;
sf->use_fastquant_for_pick = 0;
@@ -607,8 +603,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->thresh_mult[THR_SPLITG ] = 5000;
sf->thresh_mult[THR_SPLITA ] = 5000;
- sf->full_freq[0] = 7;
- sf->full_freq[1] = 15;
sf->first_step = 0;
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
@@ -666,8 +660,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->thresh_mult[THR_SPLITG ] = 10000;
sf->thresh_mult[THR_SPLITA ] = 10000;
#endif
- sf->full_freq[0] = 15;
- sf->full_freq[1] = 31;
if (Speed > 0)
{
@@ -761,8 +753,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
// alt ref frames
sf->recode_loop = 2;
- sf->full_freq[0] = 31;
- sf->full_freq[1] = 63;
}
if (Speed > 3)
@@ -783,15 +773,11 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->recode_loop = 0; // recode loop off
sf->RD = 0; // Turn rd off
- sf->full_freq[0] = 63;
- sf->full_freq[1] = 127;
}
if (Speed > 4)
{
sf->auto_filter = 0; // Faster selection of loop filter
- sf->full_freq[0] = INT_MAX;
- sf->full_freq[1] = INT_MAX;
cpi->mode_check_freq[THR_V_PRED] = 2;
cpi->mode_check_freq[THR_H_PRED] = 2;
@@ -853,8 +839,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->thresh_mult[THR_SPLITMV ] = 5000;
sf->thresh_mult[THR_SPLITG ] = 10000;
sf->thresh_mult[THR_SPLITA ] = 10000;
- sf->full_freq[0] = 15;
- sf->full_freq[1] = 31;
sf->search_method = NSTEP;
if (Speed > 0)
@@ -935,8 +919,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->thresh_mult[THR_SPLITA ] = 50000;
}
- sf->full_freq[0] = 31;
- sf->full_freq[1] = 63;
}
if (Speed > 2)
@@ -963,15 +945,11 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->thresh_mult[THR_SPLITG ] = INT_MAX;
sf->thresh_mult[THR_SPLITA ] = INT_MAX;
- sf->full_freq[0] = 63;
- sf->full_freq[1] = 127;
}
if (Speed > 3)
{
sf->RD = 0;
- sf->full_freq[0] = INT_MAX;
- sf->full_freq[1] = INT_MAX;
sf->auto_filter = 1;
}
@@ -2105,7 +2083,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
//when needed. This will avoid unnecessary calls of vp8cx_init_quantizer() for every frame.
vp8cx_init_quantizer(cpi);
{
- vp8_init_loop_filter(cm);
+ vp8_loop_filter_init(cm);
cm->last_frame_type = KEY_FRAME;
cm->last_filter_type = cm->filter_type;
cm->last_sharpness_level = cm->sharpness_level;
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 107a681be..be79cb0a5 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -180,9 +180,6 @@ typedef struct
int half_pixel_search;
int quarter_pixel_search;
int thresh_mult[MAX_MODES];
- int full_freq[2];
- int min_fs_radius;
- int max_fs_radius;
int max_step_search_steps;
int first_step;
int optimize_coefficients;
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 8f87611e2..725e44e62 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -42,9 +42,7 @@ extern unsigned int cnt_pm;
extern const MV_REFERENCE_FRAME vp8_ref_frame_order[MAX_MODES];
extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES];
-
extern unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
-extern int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *best_ref_mv, int best_rd, int *, int *, int *, int, int *mvcost[2], int, int fullpixel);
extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
@@ -575,18 +573,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
continue;
}
- if(cpi->sf.improved_mv_pred && x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
- {
- if(!saddone)
- {
- vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] );
- saddone = 1;
- }
-
- vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
- x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
- }
-
switch (this_mode)
{
case B_PRED:
@@ -666,6 +652,15 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
if(cpi->sf.improved_mv_pred)
{
+ if(!saddone)
+ {
+ vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] );
+ saddone = 1;
+ }
+
+ vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
+ x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
+
sr += speed_adjust;
//adjust search range according to sr from mv prediction
if(sr > step_param)
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 451a81845..8b1854161 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1864,18 +1864,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
lf_or_gf = frame_lf_or_gf[x->e_mbd.mode_info_context->mbmi.ref_frame];
}
- if(x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
- {
- if(!saddone)
- {
- vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] );
- saddone = 1;
- }
-
- vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
- x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
- }
-
// Check to see if the testing frequency for this mode is at its max
// If so then prevent it from being tested and increase the threshold for its testing
if (cpi->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))
@@ -2016,6 +2004,15 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
int tmp_row_min = x->mv_row_min;
int tmp_row_max = x->mv_row_max;
+ if(!saddone)
+ {
+ vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] );
+ saddone = 1;
+ }
+
+ vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
+ x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
+
mvp_full.as_mv.col = mvp.as_mv.col>>3;
mvp_full.as_mv.row = mvp.as_mv.row>>3;
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index 1c2656e0a..19913a9b1 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -262,10 +262,19 @@ static void vp8_temporal_filter_iterate_c
for (mb_row = 0; mb_row < mb_rows; mb_row++)
{
#if ALT_REF_MC_ENABLED
- // Reduced search extent by 3 for 6-tap filter & smaller UMV border
- cpi->mb.mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 19));
+ // Source frames are extended to 16 pixels. This is different than
+ // L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS)
+ // A 6 tap filter is used for motion search. This requires 2 pixels
+ // before and 3 pixels after. So the largest Y mv on a border would
+ // then be 16 - 3. The UV blocks are half the size of the Y and
+ // therefore only extended by 8. The largest mv that a UV block
+ // can support is 8 - 3. A UV mv is half of a Y mv.
+ // (16 - 3) >> 1 == 6 which is greater than 8 - 3.
+ // To keep the mv in play for both Y and UV planes the max that it
+ // can be on a border is therefore 16 - 5.
+ cpi->mb.mv_row_min = -((mb_row * 16) + (16 - 5));
cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
- + (VP8BORDERINPIXELS - 19);
+ + (16 - 5);
#endif
for (mb_col = 0; mb_col < mb_cols; mb_col++)
@@ -277,10 +286,9 @@ static void vp8_temporal_filter_iterate_c
vpx_memset(count, 0, 384*sizeof(unsigned short));
#if ALT_REF_MC_ENABLED
- // Reduced search extent by 3 for 6-tap filter & smaller UMV border
- cpi->mb.mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 19));
+ cpi->mb.mv_col_min = -((mb_col * 16) + (16 - 5));
cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
- + (VP8BORDERINPIXELS - 19);
+ + (16 - 5);
#endif
for (frame = 0; frame < frame_count; frame++)
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index e14e6fc92..15e7336b1 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -95,101 +95,183 @@ static void fill_value_tokens()
static void tokenize2nd_order_b
(
- const BLOCKD *const b,
+ MACROBLOCKD *x,
TOKENEXTRA **tp,
- const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
- ENTROPY_CONTEXT *a,
- ENTROPY_CONTEXT *l,
VP8_COMP *cpi
)
{
- int pt; /* near block/prev token context index */
- int c = 0; /* start at DC */
- const int eob = b->eob; /* one beyond last nonzero coeff */
- TOKENEXTRA *t = *tp; /* store tokens starting here */
- int x;
- const short *qcoeff_ptr = b->qcoeff;
+ int pt; /* near block/prev token context index */
+ int c; /* start at DC */
+ TOKENEXTRA *t = *tp;/* store tokens starting here */
+ const BLOCKD *b;
+ const short *qcoeff_ptr;
+ ENTROPY_CONTEXT * a;
+ ENTROPY_CONTEXT * l;
+ int band, rc, v, token;
+
+ b = x->block + 24;
+ qcoeff_ptr = b->qcoeff;
+ a = (ENTROPY_CONTEXT *)x->above_context + 8;
+ l = (ENTROPY_CONTEXT *)x->left_context + 8;
+
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
- do
+ for (c = 0; c < b->eob; c++)
{
- const int band = vp8_coef_bands[c];
+ rc = vp8_default_zig_zag1d[c];
+ band = vp8_coef_bands[c];
+ v = qcoeff_ptr[rc];
- if (c < eob)
- {
- int rc = vp8_default_zig_zag1d[c];
- const int v = qcoeff_ptr[rc];
-#if CONFIG_DEBUG
- assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE));
-#endif
- t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
- x = vp8_dct_value_tokens_ptr[v].Token;
- }
- else
- x = DCT_EOB_TOKEN;
+ t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
+ token = vp8_dct_value_tokens_ptr[v].Token;
+
+ t->Token = token;
+ t->context_tree = cpi->common.fc.coef_probs [1] [band] [pt];
+
+ t->skip_eob_node = ((pt == 0) && (band > 0));
+
+ ++cpi->coef_counts [1] [band] [pt] [token];
+
+ pt = vp8_prev_token_class[token];
+ t++;
+ }
+ if (c < 16)
+ {
+ band = vp8_coef_bands[c];
+ t->Token = DCT_EOB_TOKEN;
+ t->context_tree = cpi->common.fc.coef_probs [1] [band] [pt];
- t->Token = x;
- t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
+ t->skip_eob_node = ((pt == 0) && (band > 0));
- t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0));
+ ++cpi->coef_counts [1] [band] [pt] [DCT_EOB_TOKEN];
- ++cpi->coef_counts [type] [band] [pt] [x];
+ t++;
}
- while (pt = vp8_prev_token_class[x], ++t, c < eob && ++c < 16);
*tp = t;
- pt = (c != !type); /* 0 <-> all coeff data is zero */
+ pt = (c != 0); /* 0 <-> all coeff data is zero */
*a = *l = pt;
}
static void tokenize1st_order_b
(
- const BLOCKD *const b,
+ MACROBLOCKD *x,
TOKENEXTRA **tp,
- const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
- ENTROPY_CONTEXT *a,
- ENTROPY_CONTEXT *l,
+ int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
VP8_COMP *cpi
)
{
- int pt; /* near block/prev token context index */
- int c = type ? 0 : 1; /* start at DC unless type 0 */
- const int eob = b->eob; /* one beyond last nonzero coeff */
- TOKENEXTRA *t = *tp; /* store tokens starting here */
- int x;
- const short *qcoeff_ptr = b->qcoeff;
- VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
-
- do
+ unsigned int block;
+ const BLOCKD *b;
+ int pt; /* near block/prev token context index */
+ int c;
+ int token;
+ TOKENEXTRA *t = *tp;/* store tokens starting here */
+ const short *qcoeff_ptr;
+ ENTROPY_CONTEXT * a;
+ ENTROPY_CONTEXT * l;
+ int band, rc, v;
+ int tmp1, tmp2;
+
+ b = x->block;
+ /* Luma */
+ for (block = 0; block < 16; block++, b++)
{
- const int band = vp8_coef_bands[c];
+ tmp1 = vp8_block2above[block];
+ tmp2 = vp8_block2left[block];
+ qcoeff_ptr = b->qcoeff;
+ a = (ENTROPY_CONTEXT *)x->above_context + tmp1;
+ l = (ENTROPY_CONTEXT *)x->left_context + tmp2;
+
+ VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
- x = DCT_EOB_TOKEN;
+ c = type ? 0 : 1;
- if (c < eob)
+ for (; c < b->eob; c++)
{
- int rc = vp8_default_zig_zag1d[c];
- const int v = qcoeff_ptr[rc];
-#if CONFIG_DEBUG
- assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE));
-#endif
+ rc = vp8_default_zig_zag1d[c];
+ band = vp8_coef_bands[c];
+ v = qcoeff_ptr[rc];
+
t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
- x = vp8_dct_value_tokens_ptr[v].Token;
+ token = vp8_dct_value_tokens_ptr[v].Token;
+
+ t->Token = token;
+ t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
+
+ t->skip_eob_node = pt == 0 &&
+ ((band > 0 && type > 0) || (band > 1 && type == 0));
+
+ ++cpi->coef_counts [type] [band] [pt] [token];
+
+ pt = vp8_prev_token_class[token];
+ t++;
}
+ if (c < 16)
+ {
+ band = vp8_coef_bands[c];
+ t->Token = DCT_EOB_TOKEN;
+ t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
- t->Token = x;
- t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
+ t->skip_eob_node = pt == 0 &&
+ ((band > 0 && type > 0) || (band > 1 && type == 0));
- t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0));
+ ++cpi->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN];
+
+ t++;
+ }
+ *tp = t;
+ pt = (c != !type); /* 0 <-> all coeff data is zero */
+ *a = *l = pt;
- ++cpi->coef_counts [type] [band] [pt] [x];
}
- while (pt = vp8_prev_token_class[x], ++t, c < eob && ++c < 16);
+ /* Chroma */
+ for (block = 16; block < 24; block++, b++)
+ {
+ tmp1 = vp8_block2above[block];
+ tmp2 = vp8_block2left[block];
+ qcoeff_ptr = b->qcoeff;
+ a = (ENTROPY_CONTEXT *)x->above_context + tmp1;
+ l = (ENTROPY_CONTEXT *)x->left_context + tmp2;
- *tp = t;
- pt = (c != !type); /* 0 <-> all coeff data is zero */
- *a = *l = pt;
+ VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+
+ for (c = 0; c < b->eob; c++)
+ {
+ rc = vp8_default_zig_zag1d[c];
+ band = vp8_coef_bands[c];
+ v = qcoeff_ptr[rc];
+
+ t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
+ token = vp8_dct_value_tokens_ptr[v].Token;
+
+ t->Token = token;
+ t->context_tree = cpi->common.fc.coef_probs [2] [band] [pt];
+
+ t->skip_eob_node = ((pt == 0) && (band > 0));
+
+ ++cpi->coef_counts [2] [band] [pt] [token];
+
+ pt = vp8_prev_token_class[token];
+ t++;
+ }
+ if (c < 16)
+ {
+ band = vp8_coef_bands[c];
+ t->Token = DCT_EOB_TOKEN;
+ t->context_tree = cpi->common.fc.coef_probs [2] [band] [pt];
+
+ t->skip_eob_node = ((pt == 0) && (band > 0));
+
+ ++cpi->coef_counts [2] [band] [pt] [DCT_EOB_TOKEN];
+
+ t++;
+ }
+ *tp = t;
+ pt = (c != 0); /* 0 <-> all coeff data is zero */
+ *a = *l = pt;
+ }
}
@@ -214,10 +296,7 @@ static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block)
void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
{
- ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context;
- ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
int plane_type;
- int b;
int has_y2_block;
has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED
@@ -240,26 +319,15 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
cpi->skip_false_count++;
-
-
plane_type = 3;
if(has_y2_block)
{
- tokenize2nd_order_b(x->block + 24, t, 1,
- A + vp8_block2above[24], L + vp8_block2left[24], cpi);
+ tokenize2nd_order_b(x, t, cpi);
plane_type = 0;
}
- for (b = 0; b < 16; b++)
- tokenize1st_order_b(x->block + b, t, plane_type,
- A + vp8_block2above[b],
- L + vp8_block2left[b], cpi);
-
- for (b = 16; b < 24; b++)
- tokenize1st_order_b(x->block + b, t, 2,
- A + vp8_block2above[b],
- L + vp8_block2left[b], cpi);
+ tokenize1st_order_b(x, t, plane_type, cpi);
}
diff --git a/vpx_mem/include/nds/vpx_mem_nds.h b/vpx_mem/include/nds/vpx_mem_nds.h
deleted file mode 100644
index e54f54d9b..000000000
--- a/vpx_mem/include/nds/vpx_mem_nds.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef __VPX_MEM_NDS_H__
-#define __VPX_MEM_NDS_H__
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-#include <nitro.h>
-#include <nitro/os.h>
-
- void *vpx_mem_nds_alloc(osarena_id id, osheap_handle handle, size_t size, size_t align);
- void vpx_mem_nds_free(osarena_id id, osheap_handle handle, void *mem);
- int vpx_nds_alloc_heap(osarena_id id, u32 size);
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif /*__VPX_MEM_NDS_H__*/
diff --git a/vpx_mem/vpx_mem_tracker.c b/vpx_mem/vpx_mem_tracker.c
index 938ad0716..9e8623a9a 100644
--- a/vpx_mem/vpx_mem_tracker.c
+++ b/vpx_mem/vpx_mem_tracker.c
@@ -36,9 +36,6 @@
# include <winbase.h>
#elif defined(VXWORKS)
# include <sem_lib.h>
-#elif defined(NDS_NITRO)
-# include <nitro.h>
-# include <nitro/os.h>
#endif
#include <stdio.h>
@@ -112,8 +109,6 @@ struct memory_tracker
HANDLE mutex;
#elif defined(VXWORKS)
SEM_ID mutex;
-#elif defined(NDS_NITRO)
- OSMutex mutex;
#elif defined(NO_MUTEX)
#else
#error "No mutex type defined for this platform!"
@@ -193,9 +188,6 @@ int vpx_memory_tracker_init(int padding_size, int pad_value)
memtrack.mutex = sem_bcreate(SEM_Q_FIFO, /*SEM_Q_FIFO non-priority based mutex*/
SEM_FULL); /*SEM_FULL initial state is unlocked*/
ret = !memtrack.mutex;
-#elif defined(NDS_NITRO)
- os_init_mutex(&memtrack.mutex);
- ret = 0;
#elif defined(NO_MUTEX)
ret = 0;
#endif
@@ -251,9 +243,7 @@ void vpx_memory_tracker_destroy()
if (!g_logging.type && g_logging.file && g_logging.file != stderr)
{
-#if !defined(NDS_NITRO)
fclose(g_logging.file);
-#endif
g_logging.file = NULL;
}
@@ -368,15 +358,12 @@ int vpx_memory_tracker_set_log_type(int type, char *option)
g_logging.file = stderr;
ret = 0;
}
-
-#if !defined(NDS_NITRO)
else
{
if ((g_logging.file = fopen((char *)option, "w")))
ret = 0;
}
-#endif
break;
#if defined(WIN32) && !defined(_WIN32_WCE)
case 1:
@@ -506,12 +493,6 @@ static void memory_tracker_dump()
p->addr, i, p->size,
p->file, p->line);
-#ifdef NDS_NITRO
-
- if (!(i % 20)) os_sleep(500);
-
-#endif
-
p = p->next;
++i;
}
@@ -719,9 +700,6 @@ static int memory_tracker_lock_mutex()
ret = WaitForSingleObject(memtrack.mutex, INFINITE);
#elif defined(VXWORKS)
ret = sem_take(memtrack.mutex, WAIT_FOREVER);
-#elif defined(NDS_NITRO)
- os_lock_mutex(&memtrack.mutex);
- ret = 0;
#endif
if (ret)
@@ -754,9 +732,6 @@ static int memory_tracker_unlock_mutex()
ret = !ReleaseMutex(memtrack.mutex);
#elif defined(VXWORKS)
ret = sem_give(memtrack.mutex);
-#elif defined(NDS_NITRO)
- os_unlock_mutex(&memtrack.mutex);
- ret = 0;
#endif
if (ret)
diff --git a/vpx_scale/arm/nds/yv12extend.c b/vpx_scale/arm/nds/yv12extend.c
deleted file mode 100644
index 48c0dfb33..000000000
--- a/vpx_scale/arm/nds/yv12extend.c
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/****************************************************************************
-*
-* Module Title : yv12extend.c
-*
-* Description :
-*
-***************************************************************************/
-
-/****************************************************************************
-* Header Files
-****************************************************************************/
-#include "vpx_scale/yv12config.h"
-#include "vpx_mem/vpx_mem.h"
-#include <nitro.h>
-#include <nitro/mi.h>
-#include <nitro/itcm_begin.h>
-
-//---- DMA Number
-#define DMA_NO 3
-
-/****************************************************************************
-* Exports
-****************************************************************************/
-
-/****************************************************************************
-*
-****************************************************************************/
-void
-vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
-{
- int i;
- unsigned char *src_ptr1, *src_ptr2;
- unsigned char *dest_ptr1, *dest_ptr2;
-
- unsigned int Border;
- int plane_stride;
- int plane_height;
- int plane_width;
-
- /***********/
- /* Y Plane */
- /***********/
- Border = ybf->border;
- plane_stride = ybf->y_stride;
- plane_height = ybf->y_height;
- plane_width = ybf->y_width;
-
- // copy the left and right most columns out
- src_ptr1 = ybf->y_buffer;
- src_ptr2 = src_ptr1 + plane_width - 1;
- dest_ptr1 = src_ptr1 - Border;
- dest_ptr2 = src_ptr2 + 1;
-
- for (i = 0; i < plane_height; i++)
- {
- mi_cpu_fill8(dest_ptr1, src_ptr1[0], Border);
- mi_cpu_fill8(dest_ptr2, src_ptr2[0], Border);
- src_ptr1 += plane_stride;
- src_ptr2 += plane_stride;
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-
- // Now copy the top and bottom source lines into each line of the respective borders
- src_ptr1 = ybf->y_buffer - Border;
- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
- dest_ptr1 = src_ptr1 - (Border * plane_stride);
- dest_ptr2 = src_ptr2 + plane_stride;
-
- for (i = 0; i < (int)Border; i++)
- {
- mi_cpu_copy_fast(src_ptr1, dest_ptr1, plane_stride);
- mi_cpu_copy_fast(src_ptr2, dest_ptr2, plane_stride);
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-
- plane_stride /= 2;
- plane_height /= 2;
- plane_width /= 2;
- Border /= 2;
-
- /***********/
- /* U Plane */
- /***********/
-
- // copy the left and right most columns out
- src_ptr1 = ybf->u_buffer;
- src_ptr2 = src_ptr1 + plane_width - 1;
- dest_ptr1 = src_ptr1 - Border;
- dest_ptr2 = src_ptr2 + 1;
-
- for (i = 0; i < plane_height; i++)
- {
- mi_cpu_fill8(dest_ptr1, src_ptr1[0], Border);
- mi_cpu_fill8(dest_ptr2, src_ptr2[0], Border);
- src_ptr1 += plane_stride;
- src_ptr2 += plane_stride;
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-
- // Now copy the top and bottom source lines into each line of the respective borders
- src_ptr1 = ybf->u_buffer - Border;
- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
- dest_ptr1 = src_ptr1 - (Border * plane_stride);
- dest_ptr2 = src_ptr2 + plane_stride;
-
- for (i = 0; i < (int)(Border); i++)
- {
- mi_cpu_copy_fast(src_ptr1, dest_ptr1, plane_stride);
- mi_cpu_copy_fast(src_ptr2, dest_ptr2, plane_stride);
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-
- /***********/
- /* V Plane */
- /***********/
-
- // copy the left and right most columns out
- src_ptr1 = ybf->v_buffer;
- src_ptr2 = src_ptr1 + plane_width - 1;
- dest_ptr1 = src_ptr1 - Border;
- dest_ptr2 = src_ptr2 + 1;
-
- for (i = 0; i < plane_height; i++)
- {
- mi_cpu_fill8(dest_ptr1, src_ptr1[0], Border);
- mi_cpu_fill8(dest_ptr2, src_ptr2[0], Border);
- src_ptr1 += plane_stride;
- src_ptr2 += plane_stride;
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-
- // Now copy the top and bottom source lines into each line of the respective borders
- src_ptr1 = ybf->v_buffer - Border;
- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
- dest_ptr1 = src_ptr1 - (Border * plane_stride);
- dest_ptr2 = src_ptr2 + plane_stride;
-
- for (i = 0; i < (int)(Border); i++)
- {
- mi_cpu_copy_fast(src_ptr1, dest_ptr1, plane_stride);
- mi_cpu_copy_fast(src_ptr2, dest_ptr2, plane_stride);
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-}
-
-
-
-/****************************************************************************
-*
-* ROUTINE : vp8_yv12_copy_frame
-*
-* INPUTS :
-*
-* OUTPUTS : None.
-*
-* RETURNS : void
-*
-* FUNCTION : Copies the source image into the destination image and
-* updates the destination's UMV borders.
-*
-* SPECIAL NOTES : The frames are assumed to be identical in size.
-*
-****************************************************************************/
-void
-vp8_yv12_copy_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc)
-{
- int yplane_size = (src_ybc->y_height + 2 * src_ybc->border) * (src_ybc->y_stride);
- int mem_size = (yplane_size * 3 / 2) + (src_ybc->y_stride * 2);
-
- mi_cpu_copy_fast(src_ybc->buffer_alloc, dst_ybc->buffer_alloc, mem_size);
-
- /* unsigned char *src_y, *dst_y;
- unsigned char *src_u, *dst_u;
- unsigned char *src_v, *dst_v;
-
- int yheight, uv_height;
- int ystride, uv_stride;
- int border;
- int yoffset, uvoffset;
-
- border = src_ybc->border;
- yheight = src_ybc->y_height;
- uv_height = src_ybc->uv_height;
-
- ystride = src_ybc->y_stride;
- uv_stride = src_ybc->uv_stride;
-
- yoffset = border * (ystride + 1);
- uvoffset = border/2 * (uv_stride + 1);
-
- src_y = src_ybc->y_buffer - yoffset;
- dst_y = dst_ybc->y_buffer - yoffset;
- src_u = src_ybc->u_buffer - uvoffset;
- dst_u = dst_ybc->u_buffer - uvoffset;
- src_v = src_ybc->v_buffer - uvoffset;
- dst_v = dst_ybc->v_buffer - uvoffset;
-
- mi_cpu_copy_fast (src_y, dst_y, ystride * (yheight + 2 * border));
- mi_cpu_copy_fast (src_u, dst_u, uv_stride * (uv_height + border));
- mi_cpu_copy_fast (src_v, dst_v, uv_stride * (uv_height + border));
- */
-}
-
-#include <nitro/itcm_end.h>
diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index cb0ab9466..d02cde28f 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -24,9 +24,12 @@ vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf)
{
if (ybf)
{
- duck_free(ybf->buffer_alloc);
+ vpx_free(ybf->buffer_alloc);
- ybf->buffer_alloc = 0;
+ /* buffer_alloc isn't accessed by most functions. Rather y_buffer,
+ u_buffer and v_buffer point to buffer_alloc and are used. Clear out
+ all of this so that a freed pointer isn't inadvertently used */
+ vpx_memset (ybf, 0, sizeof (YV12_BUFFER_CONFIG));
}
else
{
@@ -44,38 +47,37 @@ vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int
{
/*NOTE:*/
- int yplane_size = (height + 2 * border) * (width + 2 * border);
- int uvplane_size = ((1 + height) / 2 + border) * ((1 + width) / 2 + border);
-
if (ybf)
{
+ int uv_width = width >> 1;
+ int uv_height = height >> 1;
+ int yplane_size = (height + 2 * border) * (width + 2 * border);
+ int uvplane_size = (uv_height + border) * (uv_width + border);
+
vp8_yv12_de_alloc_frame_buffer(ybf);
+ /* only support allocating buffers that have
+ a height and width that are multiples of 16 */
+ if ((width & 0xf) | (height & 0xf))
+ return -3;
+
ybf->y_width = width;
ybf->y_height = height;
ybf->y_stride = width + 2 * border;
- ybf->uv_width = (1 + width) / 2;
- ybf->uv_height = (1 + height) / 2;
- ybf->uv_stride = ybf->uv_width + border;
+ ybf->uv_width = uv_width;
+ ybf->uv_height = uv_height;
+ ybf->uv_stride = uv_width + border;
ybf->border = border;
ybf->frame_size = yplane_size + 2 * uvplane_size;
- /* Added 2 extra lines to framebuffer so that copy12x12 doesn't fail
- * when we have a large motion vector in V on the last v block.
- * Note : We never use these pixels anyway so this doesn't hurt.
- */
- ybf->buffer_alloc = (unsigned char *) duck_memalign(32, ybf->frame_size + (ybf->y_stride * 2) + 32, 0);
+ ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size);
if (ybf->buffer_alloc == NULL)
return -1;
ybf->y_buffer = ybf->buffer_alloc + (border * ybf->y_stride) + border;
-
- if (yplane_size & 0xf)
- yplane_size += 16 - (yplane_size & 0xf);
-
ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * ybf->uv_stride) + border / 2;
ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * ybf->uv_stride) + border / 2;
diff --git a/vpxenc.c b/vpxenc.c
index 042f07b81..d82a97fad 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -501,15 +501,42 @@ void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
if(fwrite(buffer_in, 1, len, glob->stream));
}
-
-void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
+#define WRITE_BUFFER(s) \
+for(i = len-1; i>=0; i--)\
+{ \
+ x = *(const s *)buffer_in >> (i * CHAR_BIT); \
+ Ebml_Write(glob, &x, 1); \
+}
+void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, int buffer_size, unsigned long len)
{
- const unsigned char *q = (const unsigned char *)buffer_in + len - 1;
+ char x;
+ int i;
- for(; len; len--)
- Ebml_Write(glob, q--, 1);
+ /* buffer_size:
+ * 1 - int8_t;
+ * 2 - int16_t;
+ * 3 - int32_t;
+ * 4 - int64_t;
+ */
+ switch (buffer_size)
+ {
+ case 1:
+ WRITE_BUFFER(int8_t)
+ break;
+ case 2:
+ WRITE_BUFFER(int16_t)
+ break;
+ case 4:
+ WRITE_BUFFER(int32_t)
+ break;
+ case 8:
+ WRITE_BUFFER(int64_t)
+ break;
+ default:
+ break;
+ }
}
-
+#undef WRITE_BUFFER
/* Need a fixed size serializer for the track ID. libmkv provdes a 64 bit
* one, but not a 32 bit one.
@@ -518,8 +545,8 @@ static void Ebml_SerializeUnsigned32(EbmlGlobal *glob, unsigned long class_id, u
{
unsigned char sizeSerialized = 4 | 0x80;
Ebml_WriteID(glob, class_id);
- Ebml_Serialize(glob, &sizeSerialized, 1);
- Ebml_Serialize(glob, &ui, 4);
+ Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
+ Ebml_Serialize(glob, &ui, sizeof(ui), 4);
}
@@ -533,7 +560,7 @@ Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc,
Ebml_WriteID(glob, class_id);
*ebmlLoc = ftello(glob->stream);
- Ebml_Serialize(glob, &unknownLen, 8);
+ Ebml_Serialize(glob, &unknownLen, sizeof(unknownLen), 8);
}
static void
@@ -551,7 +578,7 @@ Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc)
/* Seek back to the beginning of the element and write the new size */
fseeko(glob->stream, *ebmlLoc, SEEK_SET);
- Ebml_Serialize(glob, &size, 8);
+ Ebml_Serialize(glob, &size, sizeof(size), 8);
/* Reset the stream pointer */
fseeko(glob->stream, pos, SEEK_SET);
@@ -741,13 +768,13 @@ write_webm_block(EbmlGlobal *glob,
block_length = pkt->data.frame.sz + 4;
block_length |= 0x10000000;
- Ebml_Serialize(glob, &block_length, 4);
+ Ebml_Serialize(glob, &block_length, sizeof(block_length), 4);
track_number = 1;
track_number |= 0x80;
Ebml_Write(glob, &track_number, 1);
- Ebml_Serialize(glob, &block_timecode, 2);
+ Ebml_Serialize(glob, &block_timecode, sizeof(block_timecode), 2);
flags = 0;
if(is_keyframe)
@@ -1312,6 +1339,11 @@ static void init_rate_histogram(struct rate_hist *hist,
* adjustment (5/4) to account for alt-refs
*/
hist->samples = cfg->rc_buf_sz * 5 / 4 * fps->num / fps->den / 1000;
+
+ // prevent division by zero
+ if (hist->samples == 0)
+ hist->samples=1;
+
hist->pts = calloc(hist->samples, sizeof(*hist->pts));
hist->sz = calloc(hist->samples, sizeof(*hist->sz));
for(i=0; i<RATE_BINS; i++)