summaryrefslogtreecommitdiff
path: root/vp8/common
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/common')
-rw-r--r--vp8/common/alloccommon.c18
-rw-r--r--vp8/common/blockd.h13
-rw-r--r--vp8/common/entropy.c6
-rw-r--r--vp8/common/entropy.h4
-rw-r--r--vp8/common/generic/systemdependent.c10
-rw-r--r--vp8/common/idctllm.c33
-rw-r--r--vp8/common/maskingmv.c855
-rw-r--r--vp8/common/onyxc_int.h1
-rw-r--r--vp8/common/quant_common.c45
-rw-r--r--vp8/common/reconintra4x4.c16
-rw-r--r--vp8/common/x86/mask_sse3.asm484
11 files changed, 1449 insertions, 36 deletions
diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index 9dce8c8f6..5ab8e29ab 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -126,7 +126,16 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
}
void vp8_setup_version(VP8_COMMON *cm)
{
- switch (cm->version)
+ if (cm->version & 0x4)
+ {
+ if (!CONFIG_EXPERIMENTAL)
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Bitstream was created by an experimental "
+ "encoder");
+ cm->experimental = 1;
+ }
+
+ switch (cm->version & 0x3)
{
case 0:
cm->no_lpf = 0;
@@ -152,13 +161,6 @@ void vp8_setup_version(VP8_COMMON *cm)
cm->use_bilinear_mc_filter = 1;
cm->full_pixel = 1;
break;
- default:
- /*4,5,6,7 are reserved for future use*/
- cm->no_lpf = 0;
- cm->simpler_lpf = 0;
- cm->use_bilinear_mc_filter = 0;
- cm->full_pixel = 0;
- break;
}
}
void vp8_create_common(VP8_COMMON *oci)
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 5a8991e65..3b4986f49 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -32,8 +32,8 @@ void vpx_log(const char *format, ...);
#define UCONTEXT 1
#define VCONTEXT 2
#define Y2CONTEXT 3
-
#define MB_FEATURE_TREE_PROBS 3
+
#define MAX_MB_SEGMENTS 4
#define MAX_REF_LF_DELTAS 4
@@ -168,6 +168,7 @@ typedef struct
int as_int;
MV as_mv;
} mv;
+ unsigned char segment_flag;
unsigned char partitioning;
unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
@@ -251,11 +252,15 @@ typedef struct
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
unsigned char mb_segement_abs_delta;
+ unsigned char temporal_update;
/* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
/* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
- vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Probability Tree used to code Segment number */
-
- signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* Segment parameters */
+#if CONFIG_SEGMENTATION
+ vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS + 3]; // Probability Tree used to code Segment number
+#else
+ vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];
+#endif
+ signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; // Segment parameters
/* mode_based Loop filter adjustment */
unsigned char mode_ref_lf_delta_enabled;
diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c
index a1fe4f4ab..219483289 100644
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -76,7 +76,7 @@ static const Prob Pcat3[] = { 173, 148, 140};
static const Prob Pcat4[] = { 176, 155, 140, 135};
static const Prob Pcat5[] = { 180, 157, 141, 134, 130};
static const Prob Pcat6[] =
-{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129};
+{ 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129};
static vp8_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[22];
@@ -111,7 +111,7 @@ static void init_bit_trees()
init_bit_tree(cat3, 3);
init_bit_tree(cat4, 4);
init_bit_tree(cat5, 5);
- init_bit_tree(cat6, 11);
+ init_bit_tree(cat6, 13);
}
vp8_extra_bit_struct vp8_extra_bits[12] =
@@ -126,7 +126,7 @@ vp8_extra_bit_struct vp8_extra_bits[12] =
{ cat3, Pcat3, 3, 11},
{ cat4, Pcat4, 4, 19},
{ cat5, Pcat5, 5, 35},
- { cat6, Pcat6, 11, 67},
+ { cat6, Pcat6, 13, 67},
{ 0, 0, 0, 0}
};
#include "defaultcoefcounts.h"
diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h
index d174e45b9..77f2673aa 100644
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -27,7 +27,7 @@
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
-#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */
+#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 13+1 */
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
#define vp8_coef_tokens 12
@@ -51,7 +51,7 @@ extern vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */
#define PROB_UPDATE_BASELINE_COST 7
#define MAX_PROB 255
-#define DCT_MAX_VALUE 2048
+#define DCT_MAX_VALUE 8192
/* Coefficients are predicted via a 3-dimensional probability table. */
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index c843d86fe..b2eca3e26 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -83,8 +83,18 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
vp8_arch_x86_common_init(ctx);
#endif
+
#if ARCH_ARM
vp8_arch_arm_common_init(ctx);
#endif
+#if CONFIG_EXTEND_QRANGE
+ rtcd->idct.idct1 = vp8_short_idct4x4llm_1_c;
+ rtcd->idct.idct16 = vp8_short_idct4x4llm_c;
+ rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c;
+ rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c;
+ rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c;
+
+#endif
+
}
diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c
index 196062df6..c65d35adc 100644
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -22,6 +22,8 @@
* so
* x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
**************************************************************************/
+#include "vpx_ports/config.h"
+
static const int cospi8sqrt2minus1 = 20091;
static const int sinpi8sqrt2 = 35468;
static const int rounding = 0;
@@ -75,11 +77,19 @@ void vp8_short_idct4x4llm_c(short *input, short *output, int pitch)
d1 = temp1 + temp2;
+#if !CONFIG_EXTEND_QRANGE
op[0] = (a1 + d1 + 4) >> 3;
op[3] = (a1 - d1 + 4) >> 3;
op[1] = (b1 + c1 + 4) >> 3;
op[2] = (b1 - c1 + 4) >> 3;
+#else
+ op[0] = (a1 + d1 + 16) >> 5;
+ op[3] = (a1 - d1 + 16) >> 5;
+
+ op[1] = (b1 + c1 + 16) >> 5;
+ op[2] = (b1 - c1 + 16) >> 5;
+#endif
ip += shortpitch;
op += shortpitch;
@@ -92,8 +102,11 @@ void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch)
int a1;
short *op = output;
int shortpitch = pitch >> 1;
+#if !CONFIG_EXTEND_QRANGE
a1 = ((input[0] + 4) >> 3);
-
+#else
+ a1 = ((input[0] + 16) >> 5);
+#endif
for (i = 0; i < 4; i++)
{
op[0] = a1;
@@ -106,7 +119,11 @@ void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch)
void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
{
+#if !CONFIG_EXTEND_QRANGE
int a1 = ((input_dc + 4) >> 3);
+#else
+ int a1 = ((input_dc + 16) >> 5);
+#endif
int r, c;
for (r = 0; r < 4; r++)
@@ -168,11 +185,17 @@ void vp8_short_inv_walsh4x4_c(short *input, short *output)
c2 = a1 - b1;
d2 = d1 - c1;
+#if !CONFIG_EXTEND_QRANGE
op[0] = (a2 + 3) >> 3;
op[1] = (b2 + 3) >> 3;
op[2] = (c2 + 3) >> 3;
op[3] = (d2 + 3) >> 3;
-
+#else
+ op[0] = (a2 + 1) >> 2;
+ op[1] = (b2 + 1) >> 2;
+ op[2] = (c2 + 1) >> 2;
+ op[3] = (d2 + 1) >> 2;
+#endif
ip += 4;
op += 4;
}
@@ -184,7 +207,11 @@ void vp8_short_inv_walsh4x4_1_c(short *input, short *output)
int a1;
short *op = output;
- a1 = ((input[0] + 3) >> 3);
+#if !CONFIG_EXTEND_QRANGE
+ a1 = (input[0] + 3 )>> 3;
+#else
+ a1 = (input[0] + 1 )>> 2;
+#endif
for (i = 0; i < 4; i++)
{
diff --git a/vp8/common/maskingmv.c b/vp8/common/maskingmv.c
new file mode 100644
index 000000000..d01a18fc8
--- /dev/null
+++ b/vp8/common/maskingmv.c
@@ -0,0 +1,855 @@
+/*
+ ============================================================================
+ Name : maskingmv.c
+ Author : jimbankoski
+ Version :
+ Copyright : Your copyright notice
+ Description : Hello World in C, Ansi-style
+ ============================================================================
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+extern unsigned int vp8_sad16x16_sse3(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ int max_err);
+
+extern void vp8_sad16x16x3_sse3(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ int *results);
+
+extern int vp8_growmaskmb_sse3(
+ unsigned char *om,
+ unsigned char *nm);
+
+extern void vp8_makemask_sse3(
+ unsigned char *y,
+ unsigned char *u,
+ unsigned char *v,
+ unsigned char *ym,
+ int yp,
+ int uvp,
+ int ys,
+ int us,
+ int vs,
+ int yt,
+ int ut,
+ int vt);
+
+unsigned int vp8_sad16x16_unmasked_wmt(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned char *mask);
+
+unsigned int vp8_sad16x16_masked_wmt(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned char *mask);
+
+unsigned int vp8_masked_predictor_wmt(
+ unsigned char *masked,
+ unsigned char *unmasked,
+ int src_stride,
+ unsigned char *dst_ptr,
+ int dst_stride,
+ unsigned char *mask);
+unsigned int vp8_masked_predictor_uv_wmt(
+ unsigned char *masked,
+ unsigned char *unmasked,
+ int src_stride,
+ unsigned char *dst_ptr,
+ int dst_stride,
+ unsigned char *mask);
+unsigned int vp8_uv_from_y_mask(
+ unsigned char *ymask,
+ unsigned char *uvmask);
+int yp=16;
+unsigned char sxy[]=
+{
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90
+};
+
+unsigned char sts[]=
+{
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+};
+unsigned char str[]=
+{
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+};
+
+unsigned char y[]=
+{
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40
+};
+int uvp=8;
+unsigned char u[]=
+{
+90,80,70,70,90,90,90,17,
+90,80,70,70,90,90,90,17,
+84,70,70,90,90,90,17,17,
+84,70,70,90,90,90,17,17,
+80,70,70,90,90,90,17,17,
+90,80,70,70,90,90,90,17,
+90,80,70,70,90,90,90,17,
+90,80,70,70,90,90,90,17
+};
+
+unsigned char v[]=
+{
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80
+};
+
+unsigned char ym[256];
+unsigned char uvm[64];
+typedef struct
+{
+ unsigned char y;
+ unsigned char yt;
+ unsigned char u;
+ unsigned char ut;
+ unsigned char v;
+ unsigned char vt;
+ unsigned char use;
+} COLOR_SEG_ELEMENT;
+
+/*
+COLOR_SEG_ELEMENT segmentation[]=
+{
+ { 60,4,80,17,80,10, 1},
+ { 40,4,15,10,80,10, 1},
+};
+*/
+
+COLOR_SEG_ELEMENT segmentation[]=
+{
+ { 79,44,92,44, 237,60, 1},
+};
+
+unsigned char pixel_mask(unsigned char y,unsigned char u,unsigned char v,
+ COLOR_SEG_ELEMENT sgm[],
+ int c)
+{
+ COLOR_SEG_ELEMENT *s=sgm;
+ unsigned char m =0;
+ int i;
+ for(i=0;i<c;i++,s++)
+ m |= ( abs(y-s->y)< s->yt &&
+ abs(u-s->u)< s->ut &&
+ abs(v-s->v)< s->vt ? 255 : 0 );
+
+ return m;
+}
+int neighbors[256][8];
+int makeneighbors(void)
+{
+ int i,j;
+ for(i=0;i<256;i++)
+ {
+ int r=(i>>4),c=(i&15);
+ int ni=0;
+ for(j=0;j<8;j++)
+ neighbors[i][j]=i;
+ for(j=0;j<256;j++)
+ {
+ int nr=(j>>4),nc=(j&15);
+ if(abs(nr-r)<2&&abs(nc-c)<2)
+ neighbors[i][ni++]=j;
+ }
+ }
+ return 0;
+}
+void grow_ymask(unsigned char *ym)
+{
+ unsigned char nym[256];
+ int i,j;
+
+ for(i=0;i<256;i++)
+ {
+ nym[i]=ym[i];
+ for(j=0;j<8;j++)
+ {
+ nym[i]|=ym[neighbors[i][j]];
+ }
+ }
+ for(i=0;i<256;i++)
+ ym[i]=nym[i];
+}
+void make_mb_mask(unsigned char *y, unsigned char *u, unsigned char *v,
+ unsigned char *ym, unsigned char *uvm,
+ int yp, int uvp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count)
+{
+ int r,c;
+ unsigned char *oym = ym;
+
+ memset(ym,20,256);
+ for(r=0;r<8;r++,uvm+=8,u+=uvp,v+=uvp,y+=(yp+yp),ym+=32)
+ for(c=0;c<8;c++)
+ {
+ int y1=y[c<<1];
+ int u1=u[c];
+ int v1=v[c];
+ int m = pixel_mask(y1,u1,v1,sgm,count);
+ uvm[c] = m;
+ ym[c<<1] = uvm[c];// = pixel_mask(y[c<<1],u[c],v[c],sgm,count);
+ ym[(c<<1)+1] = pixel_mask(y[1+(c<<1)],u[c],v[c],sgm,count);
+ ym[(c<<1)+16] = pixel_mask(y[yp+(c<<1)],u[c],v[c],sgm,count);
+ ym[(c<<1)+17] = pixel_mask(y[1+yp+(c<<1)],u[c],v[c],sgm,count);
+ }
+ grow_ymask(oym);
+}
+
+int masked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
+ unsigned char *ym )
+{
+ int i,j;
+ unsigned sad = 0;
+ for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16)
+ for(j=0;j<16;j++)
+ if(ym[j])
+ sad+= abs(src[j]-dst[j]);
+
+ return sad;
+}
+
+int compare_masks(unsigned char *sym, unsigned char *ym)
+{
+ int i,j;
+ unsigned sad = 0;
+ for(i=0;i<16;i++,sym += 16,ym+=16)
+ for(j=0;j<16;j++)
+ sad+= (sym[j]!=ym[j]?1:0);
+
+ return sad;
+}
+int unmasked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
+ unsigned char *ym)
+{
+ int i,j;
+ unsigned sad = 0;
+ for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16)
+ for(j=0;j<16;j++)
+ if(!ym[j])
+ sad+= abs(src[j]-dst[j]);
+
+ return sad;
+}
+int masked_motion_search( unsigned char *y, unsigned char *u, unsigned char *v,
+ int yp, int uvp,
+ unsigned char *dy, unsigned char *du, unsigned char *dv,
+ int dyp, int duvp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count,
+ int *mi,
+ int *mj,
+ int *ui,
+ int *uj,
+ int *wm)
+{
+ int i,j;
+
+ unsigned char ym[256];
+ unsigned char uvm[64];
+ unsigned char dym[256];
+ unsigned char duvm[64];
+ unsigned int e = 0 ;
+ int beste=256;
+ int bmi=-32,bmj=-32;
+ int bui=-32,buj=-32;
+ int beste1=256;
+ int bmi1=-32,bmj1=-32;
+ int bui1=-32,buj1=-32;
+ int obeste;
+
+ // first try finding best mask and then unmasked
+ beste = 0xffffffff;
+
+ // find best unmasked mv
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ unsigned char *duz = i/2*duvp + du;
+ unsigned char *dvz = i/2*duvp + dv;
+ for(j=-32;j<32;j++)
+ {
+ // 0,0 masked destination
+ make_mb_mask(dyz+j,duz+j/2, dvz+j/2, dym, duvm, dyp, duvp,sgm,count);
+
+ e = unmasked_sad(y, yp, dyz+j, dyp, dym );
+
+ if(e<beste)
+ {
+ bui=i;
+ buj=j;
+ beste=e;
+ }
+ }
+ }
+ //bui=0;buj=0;
+ // best mv masked destination
+ make_mb_mask(dy+bui*dyp+buj,du+bui/2*duvp+buj/2, dv+bui/2*duvp+buj/2,
+ dym, duvm, dyp, duvp,sgm,count);
+
+ obeste = beste;
+ beste = 0xffffffff;
+
+ // find best masked
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ for(j=-32;j<32;j++)
+ {
+ e = masked_sad(y, yp, dyz+j, dyp, dym );
+
+ if(e<beste)
+ {
+ bmi=i;
+ bmj=j;
+ beste=e;
+ }
+ }
+ }
+ beste1=beste+obeste;
+ bmi1=bmi;bmj1=bmj;
+ bui1=bui;buj1=buj;
+
+ beste = 0xffffffff;
+ // source mask
+ make_mb_mask(y,u, v, ym, uvm, yp, uvp,sgm,count);
+
+ // find best mask
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ unsigned char *duz = i/2*duvp + du;
+ unsigned char *dvz = i/2*duvp + dv;
+ for(j=-32;j<32;j++)
+ {
+ // 0,0 masked destination
+ make_mb_mask(dyz+j,duz+j/2, dvz+j/2, dym, duvm, dyp, duvp,sgm,count);
+
+ e = compare_masks(ym, dym);
+
+ if(e<beste)
+ {
+ bmi=i;
+ bmj=j;
+ beste=e;
+ }
+ }
+ }
+
+
+ // best mv masked destination
+ make_mb_mask(dy+bmi*dyp+bmj,du+bmi/2*duvp+bmj/2, dv+bmi/2*duvp+bmj/2,
+ dym, duvm, dyp, duvp,sgm,count);
+
+ obeste = masked_sad(y, yp, dy+bmi*dyp+bmj, dyp, dym );
+
+ beste = 0xffffffff;
+
+ // find best unmasked mv
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ for(j=-32;j<32;j++)
+ {
+ e = unmasked_sad(y, yp, dyz+j, dyp, dym );
+
+ if(e<beste)
+ {
+ bui=i;
+ buj=j;
+ beste=e;
+ }
+ }
+ }
+ beste += obeste;
+
+
+ if(beste<beste1)
+ {
+ *mi = bmi;
+ *mj = bmj;
+ *ui = bui;
+ *uj = buj;
+ *wm = 1;
+ }
+ else
+ {
+ *mi = bmi1;
+ *mj = bmj1;
+ *ui = bui1;
+ *uj = buj1;
+ *wm = 0;
+
+ }
+ return 0;
+}
+
+int predict(unsigned char *src, int p, unsigned char *dst, int dp,
+ unsigned char *ym, unsigned char *prd )
+{
+ int i,j;
+ for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16, prd+=16)
+ for(j=0;j<16;j++)
+ prd[j]=(ym[j] ? src[j]:dst[j]);
+ return 0;
+}
+
+int fast_masked_motion_search( unsigned char *y, unsigned char *u, unsigned char *v,
+ int yp, int uvp,
+ unsigned char *dy, unsigned char *du, unsigned char *dv,
+ int dyp, int duvp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count,
+ int *mi,
+ int *mj,
+ int *ui,
+ int *uj,
+ int *wm)
+{
+ int i,j;
+
+ unsigned char ym[256];
+ unsigned char ym2[256];
+ unsigned char uvm[64];
+ unsigned char dym2[256];
+ unsigned char dym[256];
+ unsigned char duvm[64];
+ unsigned int e = 0 ;
+ int beste=256;
+ int bmi=-32,bmj=-32;
+ int bui=-32,buj=-32;
+ int beste1=256;
+ int bmi1=-32,bmj1=-32;
+ int bui1=-32,buj1=-32;
+ int obeste;
+
+ // first try finding best mask and then unmasked
+ beste = 0xffffffff;
+
+#if 0
+ for(i=0;i<16;i++)
+ {
+ unsigned char *dy = i*yp + y;
+ for(j=0;j<16;j++)
+ printf("%2x",dy[j]);
+ printf("\n");
+ }
+ printf("\n");
+
+ for(i=-32;i<48;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ for(j=-32;j<48;j++)
+ printf("%2x",dyz[j]);
+ printf("\n");
+ }
+#endif
+
+ // find best unmasked mv
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ unsigned char *duz = i/2*duvp + du;
+ unsigned char *dvz = i/2*duvp + dv;
+ for(j=-32;j<32;j++)
+ {
+ // 0,0 masked destination
+ vp8_makemask_sse3(dyz+j,duz+j/2, dvz+j/2, dym, dyp, duvp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym,dym2);
+
+ e = vp8_sad16x16_unmasked_wmt(y, yp, dyz+j, dyp, dym2 );
+
+ if(e<beste)
+ {
+ bui=i;
+ buj=j;
+ beste=e;
+ }
+ }
+ }
+ //bui=0;buj=0;
+ // best mv masked destination
+
+ vp8_makemask_sse3(dy+bui*dyp+buj,du+bui/2*duvp+buj/2, dv+bui/2*duvp+buj/2,
+ dym, dyp, duvp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym,dym2);
+
+ obeste = beste;
+ beste = 0xffffffff;
+
+ // find best masked
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ for(j=-32;j<32;j++)
+ {
+ e = vp8_sad16x16_masked_wmt(y, yp, dyz+j, dyp, dym2 );
+ if(e<beste)
+ {
+ bmi=i;
+ bmj=j;
+ beste=e;
+ }
+ }
+ }
+ beste1=beste+obeste;
+ bmi1=bmi;bmj1=bmj;
+ bui1=bui;buj1=buj;
+
+ // source mask
+ vp8_makemask_sse3(y,u, v,
+ ym, yp, uvp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(ym,ym2);
+
+ // find best mask
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ unsigned char *duz = i/2*duvp + du;
+ unsigned char *dvz = i/2*duvp + dv;
+ for(j=-32;j<32;j++)
+ {
+ // 0,0 masked destination
+ vp8_makemask_sse3(dyz+j,duz+j/2, dvz+j/2, dym, dyp, duvp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym,dym2);
+
+ e = compare_masks(ym2, dym2);
+
+ if(e<beste)
+ {
+ bmi=i;
+ bmj=j;
+ beste=e;
+ }
+ }
+ }
+
+ vp8_makemask_sse3(dy+bmi*dyp+bmj,du+bmi/2*duvp+bmj/2, dv+bmi/2*duvp+bmj/2,
+ dym, dyp, duvp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym,dym2);
+
+ obeste = vp8_sad16x16_masked_wmt(y, yp, dy+bmi*dyp+bmj, dyp, dym2 );
+
+ beste = 0xffffffff;
+
+ // find best unmasked mv
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ for(j=-32;j<32;j++)
+ {
+ e = vp8_sad16x16_unmasked_wmt(y, yp, dyz+j, dyp, dym2 );
+
+ if(e<beste)
+ {
+ bui=i;
+ buj=j;
+ beste=e;
+ }
+ }
+ }
+ beste += obeste;
+
+ if(beste<beste1)
+ {
+ *mi = bmi;
+ *mj = bmj;
+ *ui = bui;
+ *uj = buj;
+ *wm = 1;
+ }
+ else
+ {
+ *mi = bmi1;
+ *mj = bmj1;
+ *ui = bui1;
+ *uj = buj1;
+ *wm = 0;
+ beste=beste1;
+
+ }
+ return beste;
+}
+
+int predict_all(unsigned char *ym, unsigned char *um, unsigned char *vm,
+ int ymp, int uvmp,
+ unsigned char *yp, unsigned char *up, unsigned char *vp,
+ int ypp, int uvpp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count,
+ int mi,
+ int mj,
+ int ui,
+ int uj,
+ int wm)
+{
+ int i,j;
+ unsigned char dym[256];
+ unsigned char dym2[256];
+ unsigned char duvm[64];
+ unsigned char *yu=ym,*uu=um, *vu=vm;
+
+ unsigned char *dym3=dym2;
+
+ ym+=mi*ymp+mj;
+ um+=mi/2*uvmp+mj/2;
+ vm+=mi/2*uvmp+mj/2;
+
+ yu+=ui*ymp+uj;
+ uu+=ui/2*uvmp+uj/2;
+ vu+=ui/2*uvmp+uj/2;
+
+ // best mv masked destination
+ if(wm)
+ vp8_makemask_sse3(ym,um, vm, dym, ymp, uvmp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+ else
+ vp8_makemask_sse3(yu,uu, vu, dym, ymp, uvmp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym,dym2);
+ vp8_masked_predictor_wmt(ym,yu,ymp,yp,ypp,dym3);
+ vp8_uv_from_y_mask(dym3,duvm);
+ vp8_masked_predictor_uv_wmt(um,uu,uvmp,up,uvpp,duvm);
+ vp8_masked_predictor_uv_wmt(vm,vu,uvmp,vp,uvpp,duvm);
+
+ return 0;
+}
+
+unsigned char f0p[1280*720*3/2];
+unsigned char f1p[1280*720*3/2];
+unsigned char prd[1280*720*3/2];
+unsigned char msk[1280*720*3/2];
+
+
+int mainz(int argc, char *argv[]) {
+
+ FILE *f=fopen(argv[1],"rb");
+ FILE *g=fopen(argv[2],"wb");
+ int w=atoi(argv[3]),h=atoi(argv[4]);
+ int y_stride=w,uv_stride=w/2;
+ int r,c;
+ unsigned char *f0=f0p,*f1=f1p,*t;
+ unsigned char ym[256],uvm[64];
+ unsigned char ym2[256],uvm2[64];
+ unsigned char ym3[256],uvm3[64];
+ int a,b;
+
+ COLOR_SEG_ELEMENT last={ 20,20,20,20, 230,20, 1},best;
+#if 0
+ makeneighbors();
+ COLOR_SEG_ELEMENT segmentation[]=
+ {
+ { 60,4,80,17,80,10, 1},
+ { 40,4,15,10,80,10, 1},
+ };
+ make_mb_mask(y, u, v,ym2,uvm2,16,8,segmentation,1);
+
+ vp8_makemask_sse3(y,u,v,ym, (int) 16,(int) 8,
+ (int) segmentation[0].y,(int) segmentation[0].u,(int) segmentation[0].v,
+ segmentation[0].yt,segmentation[0].ut,segmentation[0].vt);
+
+ vp8_growmaskmb_sse3(ym,ym3);
+
+ a = vp8_sad16x16_masked_wmt(str,16,sts,16,ym3);
+ b = vp8_sad16x16_unmasked_wmt(str,16,sts,16,ym3);
+
+ vp8_masked_predictor_wmt(str,sts,16,ym,16,ym3);
+
+ vp8_uv_from_y_mask(ym3,uvm3);
+
+ return 4;
+#endif
+ makeneighbors();
+
+
+ memset(prd,128,w*h*3/2);
+
+ fread(f0,w*h*3/2,1,f);
+
+ while(!feof(f))
+ {
+ unsigned char *ys=f1,*yd=f0,*yp=prd;
+ unsigned char *us=f1+w*h,*ud=f0+w*h,*up=prd+w*h;
+ unsigned char *vs=f1+w*h*5/4,*vd=f0+w*h*5/4,*vp=prd+w*h*5/4;
+ fread(f1,w*h*3/2,1,f);
+
+ ys+=32*y_stride;yd+=32*y_stride;yp+=32*y_stride;
+ us+=16*uv_stride;ud+=16*uv_stride;up+=16*uv_stride;
+ vs+=16*uv_stride;vd+=16*uv_stride;vp+=16*uv_stride;
+ for(r=32;r<h-32;r+=16,
+ ys+=16*w,yd+=16*w,yp+=16*w,
+ us+=8*uv_stride,ud+=8*uv_stride,up+=8*uv_stride,
+ vs+=8*uv_stride,vd+=8*uv_stride,vp+=8*uv_stride)
+ {
+ for(c=32;c<w-32;c+=16)
+ {
+ int mi,mj,ui,uj,wm;
+ int bmi,bmj,bui,buj,bwm;
+ unsigned char ym[256];
+
+ if(vp8_sad16x16_sse3( ys+c,y_stride, yd+c,y_stride,0xffff) == 0)
+ bmi=bmj=bui=buj=bwm=0;
+ else
+ {
+ COLOR_SEG_ELEMENT cs[5];
+ int j;
+ unsigned int beste=0xfffffff;
+ unsigned int bestj=0;
+
+ // try color from last mb segmentation
+ cs[0] = last;
+
+ // try color segs from 4 pixels in mb recon as segmentation
+ cs[1].y = yd[c + y_stride + 1];cs[1].u = ud[c/2 + uv_stride];
+ cs[1].v = vd[c/2 + uv_stride];
+ cs[1].yt = cs[1].ut = cs[1].vt = 20;
+ cs[2].y = yd[c + w + 14];
+ cs[2].u = ud[c/2 + uv_stride+7];
+ cs[2].v = vd[c/2 + uv_stride+7];
+ cs[2].yt = cs[2].ut = cs[2].vt = 20;
+ cs[3].y = yd[c + w*14 + 1];
+ cs[3].u = ud[c/2 + uv_stride*7];
+ cs[3].v = vd[c/2 + uv_stride*7];
+ cs[3].yt = cs[3].ut = cs[3].vt = 20;
+ cs[4].y = yd[c + w*14 + 14];
+ cs[4].u = ud[c/2 + uv_stride*7+7];
+ cs[4].v = vd[c/2 + uv_stride*7+7];
+ cs[4].yt = cs[4].ut = cs[4].vt = 20;
+
+ for(j=0;j<5;j++)
+ {
+ int e;
+
+ e = fast_masked_motion_search(
+ ys+c, us+c/2, vs+c/2, y_stride, uv_stride,
+ yd+c, ud+c/2, vd+c/2, y_stride, uv_stride,
+ &cs[j], 1, &mi,&mj,&ui,&uj,&wm);
+
+ if(e<beste)
+ {
+ bmi=mi;bmj=mj;bui=ui;buj=uj,bwm=wm;
+ bestj=j;
+ beste=e;
+ }
+ }
+ best = cs[bestj];
+ //best = segmentation[0];
+ last = best;
+ }
+ predict_all(yd+c, ud+c/2, vd+c/2, w, uv_stride,
+ yp+c, up+c/2, vp+c/2, w, uv_stride,
+ &best, 1, bmi,bmj,bui,buj,bwm);
+
+ }
+ }
+ fwrite(prd,w*h*3/2,1,g);
+ t=f0;
+ f0=f1;
+ f1=t;
+
+ }
+ fclose(f);
+ fclose(g);
+ return;
+}
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index c8c227787..a91cb337b 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -117,6 +117,7 @@ typedef struct VP8Common
int mode_info_stride;
/* profile settings */
+ int experimental;
int mb_no_coeff_skip;
int no_lpf;
int simpler_lpf;
diff --git a/vp8/common/quant_common.c b/vp8/common/quant_common.c
index e9833fe33..b8e6e2972 100644
--- a/vp8/common/quant_common.c
+++ b/vp8/common/quant_common.c
@@ -11,6 +11,8 @@
#include "quant_common.h"
+
+#if !CONFIG_EXTEND_QRANGE
static const int dc_qlookup[QINDEX_RANGE] =
{
4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
@@ -34,7 +36,32 @@ static const int ac_qlookup[QINDEX_RANGE] =
155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
};
+#else
+
+static const int dc_qlookup[QINDEX_RANGE] =
+{
+ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 32, 34, 36, 38, 40, 42,
+ 44, 46, 49, 52, 55, 58, 61, 64, 67, 70, 73, 76, 79, 82, 85, 88,
+ 92, 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
+ 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 205, 210, 215, 220,
+ 225, 230, 235, 240, 245, 250, 255, 260, 265, 270, 275, 280, 285, 290, 295, 300,
+ 310, 320, 330, 340, 350, 360, 370, 380, 390, 400, 410, 420, 430, 440, 450, 460,
+ 472, 484, 496, 508, 520, 532, 544, 556, 572, 588, 608, 628, 648, 668, 692, 720,
+};
+static const int ac_qlookup[QINDEX_RANGE] =
+{
+ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 51,
+ 54, 57, 60, 63, 66, 69, 72, 76, 80, 84, 88, 92, 96, 100, 105, 110,
+ 115, 120, 125, 130, 135, 140, 146, 152, 158, 164, 170, 176, 182, 188, 194, 200,
+ 206, 212, 218, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320,
+ 330, 340, 350, 360, 370, 380, 392, 404, 416, 428, 440, 454, 468, 482, 496, 510,
+ 524, 540, 556, 572, 588, 604, 622, 640, 658, 676, 696, 716, 736, 756, 776, 796,
+ 820, 844, 868, 892, 916, 944, 972, 1000, 1032, 1064, 1096, 1128, 1168, 1208, 1252, 1300
+};
+#endif
int vp8_dc_quant(int QIndex, int Delta)
{
@@ -62,7 +89,11 @@ int vp8_dc2quant(int QIndex, int Delta)
else if (QIndex < 0)
QIndex = 0;
+#if !CONFIG_EXTEND_QRANGE
retval = dc_qlookup[ QIndex ] * 2;
+#else
+ retval = dc_qlookup[ QIndex ];
+#endif
return retval;
}
@@ -72,16 +103,13 @@ int vp8_dc_uv_quant(int QIndex, int Delta)
QIndex = QIndex + Delta;
- if (QIndex > 127)
- QIndex = 127;
+ if (QIndex > 117)
+ QIndex = 117;
else if (QIndex < 0)
QIndex = 0;
retval = dc_qlookup[ QIndex ];
- if (retval > 132)
- retval = 132;
-
return retval;
}
@@ -108,12 +136,13 @@ int vp8_ac2quant(int QIndex, int Delta)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
-
+#if !CONFIG_EXTEND_QRANGE
retval = (ac_qlookup[ QIndex ] * 155) / 100;
-
if (retval < 8)
retval = 8;
-
+#else
+ retval = ac_qlookup[ QIndex ];
+#endif
return retval;
}
int vp8_ac_uv_quant(int QIndex, int Delta)
diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index cd70dca73..8ddae0059 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -81,10 +81,10 @@ void vp8_predict_intra4x4(BLOCKD *x,
{
unsigned int ap[4];
- ap[0] = (top_left + 2 * Above[0] + Above[1] + 2) >> 2;
- ap[1] = (Above[0] + 2 * Above[1] + Above[2] + 2) >> 2;
- ap[2] = (Above[1] + 2 * Above[2] + Above[3] + 2) >> 2;
- ap[3] = (Above[2] + 2 * Above[3] + Above[4] + 2) >> 2;
+ ap[0] = Above[0];
+ ap[1] = Above[1];
+ ap[2] = Above[2];
+ ap[3] = Above[3];
for (r = 0; r < 4; r++)
{
@@ -105,10 +105,10 @@ void vp8_predict_intra4x4(BLOCKD *x,
{
unsigned int lp[4];
- lp[0] = (top_left + 2 * Left[0] + Left[1] + 2) >> 2;
- lp[1] = (Left[0] + 2 * Left[1] + Left[2] + 2) >> 2;
- lp[2] = (Left[1] + 2 * Left[2] + Left[3] + 2) >> 2;
- lp[3] = (Left[2] + 2 * Left[3] + Left[3] + 2) >> 2;
+ lp[0] = Left[0];
+ lp[1] = Left[1];
+ lp[2] = Left[2];
+ lp[3] = Left[3];
for (r = 0; r < 4; r++)
{
diff --git a/vp8/common/x86/mask_sse3.asm b/vp8/common/x86/mask_sse3.asm
new file mode 100644
index 000000000..0d90cfa86
--- /dev/null
+++ b/vp8/common/x86/mask_sse3.asm
@@ -0,0 +1,484 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void int vp8_makemask_sse3(
+; unsigned char *y,
+; unsigned char *u,
+; unsigned char *v,
+; unsigned char *ym,
+; unsigned char *uvm,
+; int yp,
+; int uvp,
+; int ys,
+; int us,
+; int vs,
+; int yt,
+; int ut,
+; int vt)
+global sym(vp8_makemask_sse3)
+sym(vp8_makemask_sse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 14
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;y
+ mov rdi, arg(1) ;u
+ mov rcx, arg(2) ;v
+ mov rax, arg(3) ;ym
+ movsxd rbx, dword arg(4) ;yp
+ movsxd rdx, dword arg(5) ;uvp
+
+ pxor xmm0,xmm0
+
+ ;make 16 copies of the center y value
+ movd xmm1, arg(6)
+ pshufb xmm1, xmm0
+
+ ; make 16 copies of the center u value
+ movd xmm2, arg(7)
+ pshufb xmm2, xmm0
+
+ ; make 16 copies of the center v value
+ movd xmm3, arg(8)
+ pshufb xmm3, xmm0
+ unpcklpd xmm2, xmm3
+
+ ;make 16 copies of the y tolerance
+ movd xmm3, arg(9)
+ pshufb xmm3, xmm0
+
+ ;make 16 copies of the u tolerance
+ movd xmm4, arg(10)
+ pshufb xmm4, xmm0
+
+ ;make 16 copies of the v tolerance
+ movd xmm5, arg(11)
+ pshufb xmm5, xmm0
+ unpckhpd xmm4, xmm5
+
+ mov r8,8
+
+NextPairOfRows:
+
+ ;grab the y source values
+ movdqu xmm0, [rsi]
+
+ ;compute abs difference between source and y target
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm0
+ psubusb xmm0, xmm1
+ psubusb xmm6, xmm7
+ por xmm0, xmm6
+
+ ;compute abs difference between
+ movdqa xmm6, xmm3
+ pcmpgtb xmm6, xmm0
+
+ ;grab the y source values
+ add rsi, rbx
+ movdqu xmm0, [rsi]
+
+ ;compute abs difference between source and y target
+ movdqa xmm11, xmm1
+ movdqa xmm7, xmm0
+ psubusb xmm0, xmm1
+ psubusb xmm11, xmm7
+ por xmm0, xmm11
+
+ ;compute abs difference between
+ movdqa xmm11, xmm3
+ pcmpgtb xmm11, xmm0
+
+
+ ;grab the u and v source values
+ movdqu xmm7, [rdi]
+ movdqu xmm8, [rcx]
+ unpcklpd xmm7, xmm8
+
+ ;compute abs difference between source and uv targets
+ movdqa xmm9, xmm2
+ movdqa xmm10, xmm7
+ psubusb xmm7, xmm2
+ psubusb xmm9, xmm10
+ por xmm7, xmm9
+
+ ;check whether the number is < tolerance
+ movdqa xmm0, xmm4
+ pcmpgtb xmm0, xmm7
+
+ ;double u and v masks
+ movdqa xmm8, xmm0
+ punpckhbw xmm0, xmm0
+ punpcklbw xmm8, xmm8
+
+ ;mask row 0 and output
+ pand xmm6, xmm8
+ pand xmm6, xmm0
+ movdqa [rax],xmm6
+
+ ;mask row 1 and output
+ pand xmm11, xmm8
+ pand xmm11, xmm0
+ movdqa [rax+16],xmm11
+
+
+ ; to the next row or set of rows
+ add rsi, rbx
+ add rdi, rdx
+ add rcx, rdx
+ add rax,32
+ dec r8
+ jnz NextPairOfRows
+
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;GROW_HORIZ (register for result, source register or mem local)
+; takes source and shifts left and ors with source
+; then shifts right and ors with source
+%macro GROW_HORIZ 2
+ movdqa %1, %2
+ movdqa xmm14, %1
+ movdqa xmm15, %1
+ pslldq xmm14, 1
+ psrldq xmm15, 1
+ por %1,xmm14
+ por %1,xmm15
+%endmacro
+;GROW_VERT (result, center row, above row, below row)
+%macro GROW_VERT 4
+ movdqa %1,%2
+ por %1,%3
+ por %1,%4
+%endmacro
+
+;GROW_NEXTLINE (new line to grow, new source, line to write)
+%macro GROW_NEXTLINE 3
+ GROW_HORIZ %1, %2
+ GROW_VERT xmm3, xmm0, xmm1, xmm2
+ movdqa %3,xmm3
+%endmacro
+
+
+;void int vp8_growmaskmb_sse3(
+; unsigned char *om,
+; unsigned char *nm,
+global sym(vp8_growmaskmb_sse3)
+sym(vp8_growmaskmb_sse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 2
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src
+ mov rdi, arg(1) ;rst
+
+ GROW_HORIZ xmm0, [rsi]
+ GROW_HORIZ xmm1, [rsi+16]
+ GROW_HORIZ xmm2, [rsi+32]
+
+ GROW_VERT xmm3, xmm0, xmm1, xmm2
+ por xmm0,xmm1
+ movdqa [rdi], xmm0
+ movdqa [rdi+16],xmm3
+
+ GROW_NEXTLINE xmm0,[rsi+48],[rdi+32]
+ GROW_NEXTLINE xmm1,[rsi+64],[rdi+48]
+ GROW_NEXTLINE xmm2,[rsi+80],[rdi+64]
+ GROW_NEXTLINE xmm0,[rsi+96],[rdi+80]
+ GROW_NEXTLINE xmm1,[rsi+112],[rdi+96]
+ GROW_NEXTLINE xmm2,[rsi+128],[rdi+112]
+ GROW_NEXTLINE xmm0,[rsi+144],[rdi+128]
+ GROW_NEXTLINE xmm1,[rsi+160],[rdi+144]
+ GROW_NEXTLINE xmm2,[rsi+176],[rdi+160]
+ GROW_NEXTLINE xmm0,[rsi+192],[rdi+176]
+ GROW_NEXTLINE xmm1,[rsi+208],[rdi+192]
+ GROW_NEXTLINE xmm2,[rsi+224],[rdi+208]
+ GROW_NEXTLINE xmm0,[rsi+240],[rdi+224]
+
+ por xmm0,xmm2
+ movdqa [rdi+240], xmm0
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+
+;unsigned int vp8_sad16x16_masked_wmt(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned char *mask)
+global sym(vp8_sad16x16_masked_wmt)
+sym(vp8_sad16x16_masked_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ mov rbx, arg(4) ;mask
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ mov rcx, 16
+
+ pxor xmm3, xmm3
+
+NextSadRow:
+ movdqu xmm0, [rsi]
+ movdqu xmm1, [rdi]
+ movdqu xmm2, [rbx]
+ pand xmm0, xmm2
+ pand xmm1, xmm2
+
+ psadbw xmm0, xmm1
+ paddw xmm3, xmm0
+
+ add rsi, rax
+ add rdi, rdx
+ add rbx, 16
+
+ dec rcx
+ jnz NextSadRow
+
+ movdqa xmm4 , xmm3
+ psrldq xmm4, 8
+ paddw xmm3, xmm4
+ movq rax, xmm3
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp8_sad16x16_unmasked_wmt(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned char *mask)
+global sym(vp8_sad16x16_unmasked_wmt)
+sym(vp8_sad16x16_unmasked_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ mov rbx, arg(4) ;mask
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ mov rcx, 16
+
+ pxor xmm3, xmm3
+
+next_vp8_sad16x16_unmasked_wmt:
+ movdqu xmm0, [rsi]
+ movdqu xmm1, [rdi]
+ movdqu xmm2, [rbx]
+ por xmm0, xmm2
+ por xmm1, xmm2
+
+ psadbw xmm0, xmm1
+ paddw xmm3, xmm0
+
+ add rsi, rax
+ add rdi, rdx
+ add rbx, 16
+
+ dec rcx
+ jnz next_vp8_sad16x16_unmasked_wmt
+
+ movdqa xmm4 , xmm3
+ psrldq xmm4, 8
+ paddw xmm3, xmm4
+ movq rax, xmm3
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp8_masked_predictor_wmt(
+; unsigned char *masked,
+; unsigned char *unmasked,
+; int src_stride,
+; unsigned char *dst_ptr,
+; int dst_stride,
+; unsigned char *mask)
+global sym(vp8_masked_predictor_wmt)
+sym(vp8_masked_predictor_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(1) ;ref_ptr
+
+ mov rbx, arg(5) ;mask
+ movsxd rax, dword ptr arg(2) ;src_stride
+ mov r11, arg(3) ; destination
+ movsxd rdx, dword ptr arg(4) ;dst_stride
+
+ mov rcx, 16
+
+ pxor xmm3, xmm3
+
+next_vp8_masked_predictor_wmt:
+ movdqu xmm0, [rsi]
+ movdqu xmm1, [rdi]
+ movdqu xmm2, [rbx]
+
+ pand xmm0, xmm2
+ pandn xmm2, xmm1
+ por xmm0, xmm2
+ movdqu [r11], xmm0
+
+ add r11, rdx
+ add rsi, rax
+ add rdi, rdx
+ add rbx, 16
+
+ dec rcx
+ jnz next_vp8_masked_predictor_wmt
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;unsigned int vp8_masked_predictor_uv_wmt(
+; unsigned char *masked,
+; unsigned char *unmasked,
+; int src_stride,
+; unsigned char *dst_ptr,
+; int dst_stride,
+; unsigned char *mask)
+global sym(vp8_masked_predictor_uv_wmt)
+sym(vp8_masked_predictor_uv_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(1) ;ref_ptr
+
+ mov rbx, arg(5) ;mask
+ movsxd rax, dword ptr arg(2) ;src_stride
+ mov r11, arg(3) ; destination
+ movsxd rdx, dword ptr arg(4) ;dst_stride
+
+ mov rcx, 8
+
+ pxor xmm3, xmm3
+
+next_vp8_masked_predictor_uv_wmt:
+ movq xmm0, [rsi]
+ movq xmm1, [rdi]
+ movq xmm2, [rbx]
+
+ pand xmm0, xmm2
+ pandn xmm2, xmm1
+ por xmm0, xmm2
+ movq [r11], xmm0
+
+ add r11, rdx
+ add rsi, rax
+ add rdi, rax
+ add rbx, 8
+
+ dec rcx
+ jnz next_vp8_masked_predictor_uv_wmt
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp8_uv_from_y_mask(
+; unsigned char *ymask,
+; unsigned char *uvmask)
+global sym(vp8_uv_from_y_mask)
+sym(vp8_uv_from_y_mask):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(1) ;dst_ptr
+
+
+ mov rcx, 8
+
+ pxor xmm3, xmm3
+
+next_p8_uv_from_y_mask:
+ movdqu xmm0, [rsi]
+ pshufb xmm0, [shuf1b] ;[GLOBAL(shuf1b)]
+ movq [rdi],xmm0
+ add rdi, 8
+ add rsi,32
+
+ dec rcx
+ jnz next_p8_uv_from_y_mask
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+shuf1b:
+ db 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0
+