summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xconfigure5
-rw-r--r--vp8/common/blockd.h12
-rw-r--r--vp8/common/maskingmv.c855
-rw-r--r--vp8/common/x86/mask_sse3.asm484
-rw-r--r--vp8/decoder/decodemv.c50
-rw-r--r--vp8/decoder/decodframe.c13
-rw-r--r--vp8/decoder/onyxd_if.c9
-rw-r--r--vp8/decoder/onyxd_int.h3
-rw-r--r--vp8/encoder/bitstream.c98
-rw-r--r--vp8/encoder/encodeframe.c178
-rw-r--r--vp8/encoder/ethreading.c2
-rw-r--r--vp8/encoder/onyx_if.c16
-rw-r--r--vp8/encoder/onyx_int.h4
-rw-r--r--vp8/vp8_common.mk5
-rwxr-xr-xvpxenc.c10
15 files changed, 1690 insertions, 54 deletions
diff --git a/configure b/configure
index 0f30df2cb..338922bcf 100755
--- a/configure
+++ b/configure
@@ -214,6 +214,8 @@ HAVE_LIST="
"
EXPERIMENT_LIST="
extend_qrange
+ segmentation
+ csm
"
CONFIG_LIST="
external_build
@@ -294,10 +296,11 @@ CMDLINE_SELECT="
mem_tracker
spatial_resampling
realtime_only
- experimental
shared
small
postproc_visualizer
+
+ experimental
"
process_cmdline() {
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index fc8e0722c..906e05520 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -29,6 +29,7 @@ void vpx_log(const char *format, ...);
#define DCPREDCNTTHRESH 3
#define MB_FEATURE_TREE_PROBS 3
+
#define MAX_MB_SEGMENTS 4
#define MAX_REF_LF_DELTAS 4
@@ -166,6 +167,7 @@ typedef struct
int as_int;
MV as_mv;
} mv;
+ unsigned char segment_flag;
unsigned char partitioning;
unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
@@ -249,11 +251,15 @@ typedef struct
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
unsigned char mb_segement_abs_delta;
+ unsigned char temporal_update;
/* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
/* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
- vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Probability Tree used to code Segment number */
-
- signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* Segment parameters */
+#if CONFIG_SEGMENTATION
+ vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS + 3]; // Probability Tree used to code Segment number
+#else
+ vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];
+#endif
+ signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; // Segment parameters
/* mode_based Loop filter adjustment */
unsigned char mode_ref_lf_delta_enabled;
diff --git a/vp8/common/maskingmv.c b/vp8/common/maskingmv.c
new file mode 100644
index 000000000..d01a18fc8
--- /dev/null
+++ b/vp8/common/maskingmv.c
@@ -0,0 +1,855 @@
+/*
+ ============================================================================
+ Name : maskingmv.c
+ Author : jimbankoski
+ Version :
+ Copyright : Your copyright notice
+ Description : Hello World in C, Ansi-style
+ ============================================================================
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+extern unsigned int vp8_sad16x16_sse3(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ int max_err);
+
+extern void vp8_sad16x16x3_sse3(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ int *results);
+
+extern int vp8_growmaskmb_sse3(
+ unsigned char *om,
+ unsigned char *nm);
+
+extern void vp8_makemask_sse3(
+ unsigned char *y,
+ unsigned char *u,
+ unsigned char *v,
+ unsigned char *ym,
+ int yp,
+ int uvp,
+ int ys,
+ int us,
+ int vs,
+ int yt,
+ int ut,
+ int vt);
+
+unsigned int vp8_sad16x16_unmasked_wmt(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned char *mask);
+
+unsigned int vp8_sad16x16_masked_wmt(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned char *mask);
+
+unsigned int vp8_masked_predictor_wmt(
+ unsigned char *masked,
+ unsigned char *unmasked,
+ int src_stride,
+ unsigned char *dst_ptr,
+ int dst_stride,
+ unsigned char *mask);
+unsigned int vp8_masked_predictor_uv_wmt(
+ unsigned char *masked,
+ unsigned char *unmasked,
+ int src_stride,
+ unsigned char *dst_ptr,
+ int dst_stride,
+ unsigned char *mask);
+unsigned int vp8_uv_from_y_mask(
+ unsigned char *ymask,
+ unsigned char *uvmask);
+int yp=16;
+unsigned char sxy[]=
+{
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,80,120,120,90,90,90,90,90,80,120,120,90,90,90,90,90
+};
+
+unsigned char sts[]=
+{
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+};
+unsigned char str[]=
+{
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+};
+
+unsigned char y[]=
+{
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,
+60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,40,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,
+40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,40,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,
+40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40,
+40,40,40,60,60,60,60,40,40,40,40,60,60,60,60,40
+};
+int uvp=8;
+unsigned char u[]=
+{
+90,80,70,70,90,90,90,17,
+90,80,70,70,90,90,90,17,
+84,70,70,90,90,90,17,17,
+84,70,70,90,90,90,17,17,
+80,70,70,90,90,90,17,17,
+90,80,70,70,90,90,90,17,
+90,80,70,70,90,90,90,17,
+90,80,70,70,90,90,90,17
+};
+
+unsigned char v[]=
+{
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80,
+80,80,80,80,80,80,80,80
+};
+
+unsigned char ym[256];
+unsigned char uvm[64];
+typedef struct
+{
+ unsigned char y;
+ unsigned char yt;
+ unsigned char u;
+ unsigned char ut;
+ unsigned char v;
+ unsigned char vt;
+ unsigned char use;
+} COLOR_SEG_ELEMENT;
+
+/*
+COLOR_SEG_ELEMENT segmentation[]=
+{
+ { 60,4,80,17,80,10, 1},
+ { 40,4,15,10,80,10, 1},
+};
+*/
+
+COLOR_SEG_ELEMENT segmentation[]=
+{
+ { 79,44,92,44, 237,60, 1},
+};
+
+unsigned char pixel_mask(unsigned char y,unsigned char u,unsigned char v,
+ COLOR_SEG_ELEMENT sgm[],
+ int c)
+{
+ COLOR_SEG_ELEMENT *s=sgm;
+ unsigned char m =0;
+ int i;
+ for(i=0;i<c;i++,s++)
+ m |= ( abs(y-s->y)< s->yt &&
+ abs(u-s->u)< s->ut &&
+ abs(v-s->v)< s->vt ? 255 : 0 );
+
+ return m;
+}
+int neighbors[256][8];
+int makeneighbors(void)
+{
+ int i,j;
+ for(i=0;i<256;i++)
+ {
+ int r=(i>>4),c=(i&15);
+ int ni=0;
+ for(j=0;j<8;j++)
+ neighbors[i][j]=i;
+ for(j=0;j<256;j++)
+ {
+ int nr=(j>>4),nc=(j&15);
+ if(abs(nr-r)<2&&abs(nc-c)<2)
+ neighbors[i][ni++]=j;
+ }
+ }
+ return 0;
+}
+void grow_ymask(unsigned char *ym)
+{
+ unsigned char nym[256];
+ int i,j;
+
+ for(i=0;i<256;i++)
+ {
+ nym[i]=ym[i];
+ for(j=0;j<8;j++)
+ {
+ nym[i]|=ym[neighbors[i][j]];
+ }
+ }
+ for(i=0;i<256;i++)
+ ym[i]=nym[i];
+}
+void make_mb_mask(unsigned char *y, unsigned char *u, unsigned char *v,
+ unsigned char *ym, unsigned char *uvm,
+ int yp, int uvp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count)
+{
+ int r,c;
+ unsigned char *oym = ym;
+
+ memset(ym,20,256);
+ for(r=0;r<8;r++,uvm+=8,u+=uvp,v+=uvp,y+=(yp+yp),ym+=32)
+ for(c=0;c<8;c++)
+ {
+ int y1=y[c<<1];
+ int u1=u[c];
+ int v1=v[c];
+ int m = pixel_mask(y1,u1,v1,sgm,count);
+ uvm[c] = m;
+ ym[c<<1] = uvm[c];// = pixel_mask(y[c<<1],u[c],v[c],sgm,count);
+ ym[(c<<1)+1] = pixel_mask(y[1+(c<<1)],u[c],v[c],sgm,count);
+ ym[(c<<1)+16] = pixel_mask(y[yp+(c<<1)],u[c],v[c],sgm,count);
+ ym[(c<<1)+17] = pixel_mask(y[1+yp+(c<<1)],u[c],v[c],sgm,count);
+ }
+ grow_ymask(oym);
+}
+
+int masked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
+ unsigned char *ym )
+{
+ int i,j;
+ unsigned sad = 0;
+ for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16)
+ for(j=0;j<16;j++)
+ if(ym[j])
+ sad+= abs(src[j]-dst[j]);
+
+ return sad;
+}
+
+int compare_masks(unsigned char *sym, unsigned char *ym)
+{
+ int i,j;
+ unsigned sad = 0;
+ for(i=0;i<16;i++,sym += 16,ym+=16)
+ for(j=0;j<16;j++)
+ sad+= (sym[j]!=ym[j]?1:0);
+
+ return sad;
+}
+int unmasked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
+ unsigned char *ym)
+{
+ int i,j;
+ unsigned sad = 0;
+ for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16)
+ for(j=0;j<16;j++)
+ if(!ym[j])
+ sad+= abs(src[j]-dst[j]);
+
+ return sad;
+}
+int masked_motion_search( unsigned char *y, unsigned char *u, unsigned char *v,
+ int yp, int uvp,
+ unsigned char *dy, unsigned char *du, unsigned char *dv,
+ int dyp, int duvp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count,
+ int *mi,
+ int *mj,
+ int *ui,
+ int *uj,
+ int *wm)
+{
+ int i,j;
+
+ unsigned char ym[256];
+ unsigned char uvm[64];
+ unsigned char dym[256];
+ unsigned char duvm[64];
+ unsigned int e = 0 ;
+ int beste=256;
+ int bmi=-32,bmj=-32;
+ int bui=-32,buj=-32;
+ int beste1=256;
+ int bmi1=-32,bmj1=-32;
+ int bui1=-32,buj1=-32;
+ int obeste;
+
+ // first try finding best mask and then unmasked
+ beste = 0xffffffff;
+
+ // find best unmasked mv
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ unsigned char *duz = i/2*duvp + du;
+ unsigned char *dvz = i/2*duvp + dv;
+ for(j=-32;j<32;j++)
+ {
+ // 0,0 masked destination
+ make_mb_mask(dyz+j,duz+j/2, dvz+j/2, dym, duvm, dyp, duvp,sgm,count);
+
+ e = unmasked_sad(y, yp, dyz+j, dyp, dym );
+
+ if(e<beste)
+ {
+ bui=i;
+ buj=j;
+ beste=e;
+ }
+ }
+ }
+ //bui=0;buj=0;
+ // best mv masked destination
+ make_mb_mask(dy+bui*dyp+buj,du+bui/2*duvp+buj/2, dv+bui/2*duvp+buj/2,
+ dym, duvm, dyp, duvp,sgm,count);
+
+ obeste = beste;
+ beste = 0xffffffff;
+
+ // find best masked
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ for(j=-32;j<32;j++)
+ {
+ e = masked_sad(y, yp, dyz+j, dyp, dym );
+
+ if(e<beste)
+ {
+ bmi=i;
+ bmj=j;
+ beste=e;
+ }
+ }
+ }
+ beste1=beste+obeste;
+ bmi1=bmi;bmj1=bmj;
+ bui1=bui;buj1=buj;
+
+ beste = 0xffffffff;
+ // source mask
+ make_mb_mask(y,u, v, ym, uvm, yp, uvp,sgm,count);
+
+ // find best mask
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ unsigned char *duz = i/2*duvp + du;
+ unsigned char *dvz = i/2*duvp + dv;
+ for(j=-32;j<32;j++)
+ {
+ // 0,0 masked destination
+ make_mb_mask(dyz+j,duz+j/2, dvz+j/2, dym, duvm, dyp, duvp,sgm,count);
+
+ e = compare_masks(ym, dym);
+
+ if(e<beste)
+ {
+ bmi=i;
+ bmj=j;
+ beste=e;
+ }
+ }
+ }
+
+
+ // best mv masked destination
+ make_mb_mask(dy+bmi*dyp+bmj,du+bmi/2*duvp+bmj/2, dv+bmi/2*duvp+bmj/2,
+ dym, duvm, dyp, duvp,sgm,count);
+
+ obeste = masked_sad(y, yp, dy+bmi*dyp+bmj, dyp, dym );
+
+ beste = 0xffffffff;
+
+ // find best unmasked mv
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ for(j=-32;j<32;j++)
+ {
+ e = unmasked_sad(y, yp, dyz+j, dyp, dym );
+
+ if(e<beste)
+ {
+ bui=i;
+ buj=j;
+ beste=e;
+ }
+ }
+ }
+ beste += obeste;
+
+
+ if(beste<beste1)
+ {
+ *mi = bmi;
+ *mj = bmj;
+ *ui = bui;
+ *uj = buj;
+ *wm = 1;
+ }
+ else
+ {
+ *mi = bmi1;
+ *mj = bmj1;
+ *ui = bui1;
+ *uj = buj1;
+ *wm = 0;
+
+ }
+ return 0;
+}
+
+int predict(unsigned char *src, int p, unsigned char *dst, int dp,
+ unsigned char *ym, unsigned char *prd )
+{
+ int i,j;
+ for(i=0;i<16;i++,src+=p,dst+=dp,ym+=16, prd+=16)
+ for(j=0;j<16;j++)
+ prd[j]=(ym[j] ? src[j]:dst[j]);
+ return 0;
+}
+
+int fast_masked_motion_search( unsigned char *y, unsigned char *u, unsigned char *v,
+ int yp, int uvp,
+ unsigned char *dy, unsigned char *du, unsigned char *dv,
+ int dyp, int duvp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count,
+ int *mi,
+ int *mj,
+ int *ui,
+ int *uj,
+ int *wm)
+{
+ int i,j;
+
+ unsigned char ym[256];
+ unsigned char ym2[256];
+ unsigned char uvm[64];
+ unsigned char dym2[256];
+ unsigned char dym[256];
+ unsigned char duvm[64];
+ unsigned int e = 0 ;
+ int beste=256;
+ int bmi=-32,bmj=-32;
+ int bui=-32,buj=-32;
+ int beste1=256;
+ int bmi1=-32,bmj1=-32;
+ int bui1=-32,buj1=-32;
+ int obeste;
+
+ // first try finding best mask and then unmasked
+ beste = 0xffffffff;
+
+#if 0
+ for(i=0;i<16;i++)
+ {
+ unsigned char *dy = i*yp + y;
+ for(j=0;j<16;j++)
+ printf("%2x",dy[j]);
+ printf("\n");
+ }
+ printf("\n");
+
+ for(i=-32;i<48;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ for(j=-32;j<48;j++)
+ printf("%2x",dyz[j]);
+ printf("\n");
+ }
+#endif
+
+ // find best unmasked mv
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ unsigned char *duz = i/2*duvp + du;
+ unsigned char *dvz = i/2*duvp + dv;
+ for(j=-32;j<32;j++)
+ {
+ // 0,0 masked destination
+ vp8_makemask_sse3(dyz+j,duz+j/2, dvz+j/2, dym, dyp, duvp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym,dym2);
+
+ e = vp8_sad16x16_unmasked_wmt(y, yp, dyz+j, dyp, dym2 );
+
+ if(e<beste)
+ {
+ bui=i;
+ buj=j;
+ beste=e;
+ }
+ }
+ }
+ //bui=0;buj=0;
+ // best mv masked destination
+
+ vp8_makemask_sse3(dy+bui*dyp+buj,du+bui/2*duvp+buj/2, dv+bui/2*duvp+buj/2,
+ dym, dyp, duvp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym,dym2);
+
+ obeste = beste;
+ beste = 0xffffffff;
+
+ // find best masked
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ for(j=-32;j<32;j++)
+ {
+ e = vp8_sad16x16_masked_wmt(y, yp, dyz+j, dyp, dym2 );
+ if(e<beste)
+ {
+ bmi=i;
+ bmj=j;
+ beste=e;
+ }
+ }
+ }
+ beste1=beste+obeste;
+ bmi1=bmi;bmj1=bmj;
+ bui1=bui;buj1=buj;
+
+ // source mask
+ vp8_makemask_sse3(y,u, v,
+ ym, yp, uvp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(ym,ym2);
+
+ // find best mask
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ unsigned char *duz = i/2*duvp + du;
+ unsigned char *dvz = i/2*duvp + dv;
+ for(j=-32;j<32;j++)
+ {
+ // 0,0 masked destination
+ vp8_makemask_sse3(dyz+j,duz+j/2, dvz+j/2, dym, dyp, duvp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym,dym2);
+
+ e = compare_masks(ym2, dym2);
+
+ if(e<beste)
+ {
+ bmi=i;
+ bmj=j;
+ beste=e;
+ }
+ }
+ }
+
+ vp8_makemask_sse3(dy+bmi*dyp+bmj,du+bmi/2*duvp+bmj/2, dv+bmi/2*duvp+bmj/2,
+ dym, dyp, duvp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym,dym2);
+
+ obeste = vp8_sad16x16_masked_wmt(y, yp, dy+bmi*dyp+bmj, dyp, dym2 );
+
+ beste = 0xffffffff;
+
+ // find best unmasked mv
+ for(i=-32;i<32;i++)
+ {
+ unsigned char *dyz = i*dyp + dy;
+ for(j=-32;j<32;j++)
+ {
+ e = vp8_sad16x16_unmasked_wmt(y, yp, dyz+j, dyp, dym2 );
+
+ if(e<beste)
+ {
+ bui=i;
+ buj=j;
+ beste=e;
+ }
+ }
+ }
+ beste += obeste;
+
+ if(beste<beste1)
+ {
+ *mi = bmi;
+ *mj = bmj;
+ *ui = bui;
+ *uj = buj;
+ *wm = 1;
+ }
+ else
+ {
+ *mi = bmi1;
+ *mj = bmj1;
+ *ui = bui1;
+ *uj = buj1;
+ *wm = 0;
+ beste=beste1;
+
+ }
+ return beste;
+}
+
+int predict_all(unsigned char *ym, unsigned char *um, unsigned char *vm,
+ int ymp, int uvmp,
+ unsigned char *yp, unsigned char *up, unsigned char *vp,
+ int ypp, int uvpp,
+ COLOR_SEG_ELEMENT sgm[],
+ int count,
+ int mi,
+ int mj,
+ int ui,
+ int uj,
+ int wm)
+{
+ int i,j;
+ unsigned char dym[256];
+ unsigned char dym2[256];
+ unsigned char duvm[64];
+ unsigned char *yu=ym,*uu=um, *vu=vm;
+
+ unsigned char *dym3=dym2;
+
+ ym+=mi*ymp+mj;
+ um+=mi/2*uvmp+mj/2;
+ vm+=mi/2*uvmp+mj/2;
+
+ yu+=ui*ymp+uj;
+ uu+=ui/2*uvmp+uj/2;
+ vu+=ui/2*uvmp+uj/2;
+
+ // best mv masked destination
+ if(wm)
+ vp8_makemask_sse3(ym,um, vm, dym, ymp, uvmp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+ else
+ vp8_makemask_sse3(yu,uu, vu, dym, ymp, uvmp,
+ sgm[0].y,sgm[0].u,sgm[0].v,
+ sgm[0].yt,sgm[0].ut,sgm[0].vt);
+
+ vp8_growmaskmb_sse3(dym,dym2);
+ vp8_masked_predictor_wmt(ym,yu,ymp,yp,ypp,dym3);
+ vp8_uv_from_y_mask(dym3,duvm);
+ vp8_masked_predictor_uv_wmt(um,uu,uvmp,up,uvpp,duvm);
+ vp8_masked_predictor_uv_wmt(vm,vu,uvmp,vp,uvpp,duvm);
+
+ return 0;
+}
+
+unsigned char f0p[1280*720*3/2];
+unsigned char f1p[1280*720*3/2];
+unsigned char prd[1280*720*3/2];
+unsigned char msk[1280*720*3/2];
+
+
+int mainz(int argc, char *argv[]) {
+
+ FILE *f=fopen(argv[1],"rb");
+ FILE *g=fopen(argv[2],"wb");
+ int w=atoi(argv[3]),h=atoi(argv[4]);
+ int y_stride=w,uv_stride=w/2;
+ int r,c;
+ unsigned char *f0=f0p,*f1=f1p,*t;
+ unsigned char ym[256],uvm[64];
+ unsigned char ym2[256],uvm2[64];
+ unsigned char ym3[256],uvm3[64];
+ int a,b;
+
+ COLOR_SEG_ELEMENT last={ 20,20,20,20, 230,20, 1},best;
+#if 0
+ makeneighbors();
+ COLOR_SEG_ELEMENT segmentation[]=
+ {
+ { 60,4,80,17,80,10, 1},
+ { 40,4,15,10,80,10, 1},
+ };
+ make_mb_mask(y, u, v,ym2,uvm2,16,8,segmentation,1);
+
+ vp8_makemask_sse3(y,u,v,ym, (int) 16,(int) 8,
+ (int) segmentation[0].y,(int) segmentation[0].u,(int) segmentation[0].v,
+ segmentation[0].yt,segmentation[0].ut,segmentation[0].vt);
+
+ vp8_growmaskmb_sse3(ym,ym3);
+
+ a = vp8_sad16x16_masked_wmt(str,16,sts,16,ym3);
+ b = vp8_sad16x16_unmasked_wmt(str,16,sts,16,ym3);
+
+ vp8_masked_predictor_wmt(str,sts,16,ym,16,ym3);
+
+ vp8_uv_from_y_mask(ym3,uvm3);
+
+ return 4;
+#endif
+ makeneighbors();
+
+
+ memset(prd,128,w*h*3/2);
+
+ fread(f0,w*h*3/2,1,f);
+
+ while(!feof(f))
+ {
+ unsigned char *ys=f1,*yd=f0,*yp=prd;
+ unsigned char *us=f1+w*h,*ud=f0+w*h,*up=prd+w*h;
+ unsigned char *vs=f1+w*h*5/4,*vd=f0+w*h*5/4,*vp=prd+w*h*5/4;
+ fread(f1,w*h*3/2,1,f);
+
+ ys+=32*y_stride;yd+=32*y_stride;yp+=32*y_stride;
+ us+=16*uv_stride;ud+=16*uv_stride;up+=16*uv_stride;
+ vs+=16*uv_stride;vd+=16*uv_stride;vp+=16*uv_stride;
+ for(r=32;r<h-32;r+=16,
+ ys+=16*w,yd+=16*w,yp+=16*w,
+ us+=8*uv_stride,ud+=8*uv_stride,up+=8*uv_stride,
+ vs+=8*uv_stride,vd+=8*uv_stride,vp+=8*uv_stride)
+ {
+ for(c=32;c<w-32;c+=16)
+ {
+ int mi,mj,ui,uj,wm;
+ int bmi,bmj,bui,buj,bwm;
+ unsigned char ym[256];
+
+ if(vp8_sad16x16_sse3( ys+c,y_stride, yd+c,y_stride,0xffff) == 0)
+ bmi=bmj=bui=buj=bwm=0;
+ else
+ {
+ COLOR_SEG_ELEMENT cs[5];
+ int j;
+ unsigned int beste=0xfffffff;
+ unsigned int bestj=0;
+
+ // try color from last mb segmentation
+ cs[0] = last;
+
+ // try color segs from 4 pixels in mb recon as segmentation
+ cs[1].y = yd[c + y_stride + 1];cs[1].u = ud[c/2 + uv_stride];
+ cs[1].v = vd[c/2 + uv_stride];
+ cs[1].yt = cs[1].ut = cs[1].vt = 20;
+ cs[2].y = yd[c + w + 14];
+ cs[2].u = ud[c/2 + uv_stride+7];
+ cs[2].v = vd[c/2 + uv_stride+7];
+ cs[2].yt = cs[2].ut = cs[2].vt = 20;
+ cs[3].y = yd[c + w*14 + 1];
+ cs[3].u = ud[c/2 + uv_stride*7];
+ cs[3].v = vd[c/2 + uv_stride*7];
+ cs[3].yt = cs[3].ut = cs[3].vt = 20;
+ cs[4].y = yd[c + w*14 + 14];
+ cs[4].u = ud[c/2 + uv_stride*7+7];
+ cs[4].v = vd[c/2 + uv_stride*7+7];
+ cs[4].yt = cs[4].ut = cs[4].vt = 20;
+
+ for(j=0;j<5;j++)
+ {
+ int e;
+
+ e = fast_masked_motion_search(
+ ys+c, us+c/2, vs+c/2, y_stride, uv_stride,
+ yd+c, ud+c/2, vd+c/2, y_stride, uv_stride,
+ &cs[j], 1, &mi,&mj,&ui,&uj,&wm);
+
+ if(e<beste)
+ {
+ bmi=mi;bmj=mj;bui=ui;buj=uj,bwm=wm;
+ bestj=j;
+ beste=e;
+ }
+ }
+ best = cs[bestj];
+ //best = segmentation[0];
+ last = best;
+ }
+ predict_all(yd+c, ud+c/2, vd+c/2, w, uv_stride,
+ yp+c, up+c/2, vp+c/2, w, uv_stride,
+ &best, 1, bmi,bmj,bui,buj,bwm);
+
+ }
+ }
+ fwrite(prd,w*h*3/2,1,g);
+ t=f0;
+ f0=f1;
+ f1=t;
+
+ }
+ fclose(f);
+ fclose(g);
+ return;
+}
diff --git a/vp8/common/x86/mask_sse3.asm b/vp8/common/x86/mask_sse3.asm
new file mode 100644
index 000000000..0d90cfa86
--- /dev/null
+++ b/vp8/common/x86/mask_sse3.asm
@@ -0,0 +1,484 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void int vp8_makemask_sse3(
+; unsigned char *y,
+; unsigned char *u,
+; unsigned char *v,
+; unsigned char *ym,
+; unsigned char *uvm,
+; int yp,
+; int uvp,
+; int ys,
+; int us,
+; int vs,
+; int yt,
+; int ut,
+; int vt)
+global sym(vp8_makemask_sse3)
+sym(vp8_makemask_sse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 14
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;y
+ mov rdi, arg(1) ;u
+ mov rcx, arg(2) ;v
+ mov rax, arg(3) ;ym
+ movsxd rbx, dword arg(4) ;yp
+ movsxd rdx, dword arg(5) ;uvp
+
+ pxor xmm0,xmm0
+
+ ;make 16 copies of the center y value
+ movd xmm1, arg(6)
+ pshufb xmm1, xmm0
+
+ ; make 16 copies of the center u value
+ movd xmm2, arg(7)
+ pshufb xmm2, xmm0
+
+ ; make 16 copies of the center v value
+ movd xmm3, arg(8)
+ pshufb xmm3, xmm0
+ unpcklpd xmm2, xmm3
+
+ ;make 16 copies of the y tolerance
+ movd xmm3, arg(9)
+ pshufb xmm3, xmm0
+
+ ;make 16 copies of the u tolerance
+ movd xmm4, arg(10)
+ pshufb xmm4, xmm0
+
+ ;make 16 copies of the v tolerance
+ movd xmm5, arg(11)
+ pshufb xmm5, xmm0
+ unpckhpd xmm4, xmm5
+
+ mov r8,8
+
+NextPairOfRows:
+
+ ;grab the y source values
+ movdqu xmm0, [rsi]
+
+ ;compute abs difference between source and y target
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm0
+ psubusb xmm0, xmm1
+ psubusb xmm6, xmm7
+ por xmm0, xmm6
+
+ ;compute abs difference between
+ movdqa xmm6, xmm3
+ pcmpgtb xmm6, xmm0
+
+ ;grab the y source values
+ add rsi, rbx
+ movdqu xmm0, [rsi]
+
+ ;compute abs difference between source and y target
+ movdqa xmm11, xmm1
+ movdqa xmm7, xmm0
+ psubusb xmm0, xmm1
+ psubusb xmm11, xmm7
+ por xmm0, xmm11
+
+ ;compute abs difference between
+ movdqa xmm11, xmm3
+ pcmpgtb xmm11, xmm0
+
+
+ ;grab the u and v source values
+ movdqu xmm7, [rdi]
+ movdqu xmm8, [rcx]
+ unpcklpd xmm7, xmm8
+
+ ;compute abs difference between source and uv targets
+ movdqa xmm9, xmm2
+ movdqa xmm10, xmm7
+ psubusb xmm7, xmm2
+ psubusb xmm9, xmm10
+ por xmm7, xmm9
+
+ ;check whether the number is < tolerance
+ movdqa xmm0, xmm4
+ pcmpgtb xmm0, xmm7
+
+ ;double u and v masks
+ movdqa xmm8, xmm0
+ punpckhbw xmm0, xmm0
+ punpcklbw xmm8, xmm8
+
+ ;mask row 0 and output
+ pand xmm6, xmm8
+ pand xmm6, xmm0
+ movdqa [rax],xmm6
+
+ ;mask row 1 and output
+ pand xmm11, xmm8
+ pand xmm11, xmm0
+ movdqa [rax+16],xmm11
+
+
+ ; to the next row or set of rows
+ add rsi, rbx
+ add rdi, rdx
+ add rcx, rdx
+ add rax,32
+ dec r8
+ jnz NextPairOfRows
+
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;GROW_HORIZ (register for result, source register or mem local)
+; takes source and shifts left and ors with source
+; then shifts right and ors with source
+%macro GROW_HORIZ 2
+ movdqa %1, %2
+ movdqa xmm14, %1
+ movdqa xmm15, %1
+ pslldq xmm14, 1
+ psrldq xmm15, 1
+ por %1,xmm14
+ por %1,xmm15
+%endmacro
+;GROW_VERT (result, center row, above row, below row)
+%macro GROW_VERT 4
+ movdqa %1,%2
+ por %1,%3
+ por %1,%4
+%endmacro
+
+;GROW_NEXTLINE (new line to grow, new source, line to write)
+%macro GROW_NEXTLINE 3
+ GROW_HORIZ %1, %2
+ GROW_VERT xmm3, xmm0, xmm1, xmm2
+ movdqa %3,xmm3
+%endmacro
+
+
+;void int vp8_growmaskmb_sse3(
+; unsigned char *om,
+; unsigned char *nm,
+global sym(vp8_growmaskmb_sse3)
+sym(vp8_growmaskmb_sse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 2
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src
+ mov rdi, arg(1) ;rst
+
+ GROW_HORIZ xmm0, [rsi]
+ GROW_HORIZ xmm1, [rsi+16]
+ GROW_HORIZ xmm2, [rsi+32]
+
+ GROW_VERT xmm3, xmm0, xmm1, xmm2
+ por xmm0,xmm1
+ movdqa [rdi], xmm0
+ movdqa [rdi+16],xmm3
+
+ GROW_NEXTLINE xmm0,[rsi+48],[rdi+32]
+ GROW_NEXTLINE xmm1,[rsi+64],[rdi+48]
+ GROW_NEXTLINE xmm2,[rsi+80],[rdi+64]
+ GROW_NEXTLINE xmm0,[rsi+96],[rdi+80]
+ GROW_NEXTLINE xmm1,[rsi+112],[rdi+96]
+ GROW_NEXTLINE xmm2,[rsi+128],[rdi+112]
+ GROW_NEXTLINE xmm0,[rsi+144],[rdi+128]
+ GROW_NEXTLINE xmm1,[rsi+160],[rdi+144]
+ GROW_NEXTLINE xmm2,[rsi+176],[rdi+160]
+ GROW_NEXTLINE xmm0,[rsi+192],[rdi+176]
+ GROW_NEXTLINE xmm1,[rsi+208],[rdi+192]
+ GROW_NEXTLINE xmm2,[rsi+224],[rdi+208]
+ GROW_NEXTLINE xmm0,[rsi+240],[rdi+224]
+
+ por xmm0,xmm2
+ movdqa [rdi+240], xmm0
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+
+;unsigned int vp8_sad16x16_masked_wmt(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned char *mask)
+global sym(vp8_sad16x16_masked_wmt)
+sym(vp8_sad16x16_masked_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ mov rbx, arg(4) ;mask
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ mov rcx, 16
+
+ pxor xmm3, xmm3
+
+NextSadRow:
+ movdqu xmm0, [rsi]
+ movdqu xmm1, [rdi]
+ movdqu xmm2, [rbx]
+ pand xmm0, xmm2
+ pand xmm1, xmm2
+
+ psadbw xmm0, xmm1
+ paddw xmm3, xmm0
+
+ add rsi, rax
+ add rdi, rdx
+ add rbx, 16
+
+ dec rcx
+ jnz NextSadRow
+
+ movdqa xmm4 , xmm3
+ psrldq xmm4, 8
+ paddw xmm3, xmm4
+ movq rax, xmm3
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp8_sad16x16_unmasked_wmt(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned char *mask)
+global sym(vp8_sad16x16_unmasked_wmt)
+sym(vp8_sad16x16_unmasked_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ mov rbx, arg(4) ;mask
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ mov rcx, 16
+
+ pxor xmm3, xmm3
+
+next_vp8_sad16x16_unmasked_wmt:
+ movdqu xmm0, [rsi]
+ movdqu xmm1, [rdi]
+ movdqu xmm2, [rbx]
+ por xmm0, xmm2
+ por xmm1, xmm2
+
+ psadbw xmm0, xmm1
+ paddw xmm3, xmm0
+
+ add rsi, rax
+ add rdi, rdx
+ add rbx, 16
+
+ dec rcx
+ jnz next_vp8_sad16x16_unmasked_wmt
+
+ movdqa xmm4 , xmm3
+ psrldq xmm4, 8
+ paddw xmm3, xmm4
+ movq rax, xmm3
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp8_masked_predictor_wmt(
+; unsigned char *masked,
+; unsigned char *unmasked,
+; int src_stride,
+; unsigned char *dst_ptr,
+; int dst_stride,
+; unsigned char *mask)
+global sym(vp8_masked_predictor_wmt)
+sym(vp8_masked_predictor_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(1) ;ref_ptr
+
+ mov rbx, arg(5) ;mask
+ movsxd rax, dword ptr arg(2) ;src_stride
+ mov r11, arg(3) ; destination
+ movsxd rdx, dword ptr arg(4) ;dst_stride
+
+ mov rcx, 16
+
+ pxor xmm3, xmm3
+
+next_vp8_masked_predictor_wmt:
+ movdqu xmm0, [rsi]
+ movdqu xmm1, [rdi]
+ movdqu xmm2, [rbx]
+
+ pand xmm0, xmm2
+ pandn xmm2, xmm1
+ por xmm0, xmm2
+ movdqu [r11], xmm0
+
+ add r11, rdx
+ add rsi, rax
+ add rdi, rdx
+ add rbx, 16
+
+ dec rcx
+ jnz next_vp8_masked_predictor_wmt
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;unsigned int vp8_masked_predictor_uv_wmt(
+; unsigned char *masked,
+; unsigned char *unmasked,
+; int src_stride,
+; unsigned char *dst_ptr,
+; int dst_stride,
+; unsigned char *mask)
+global sym(vp8_masked_predictor_uv_wmt)
+sym(vp8_masked_predictor_uv_wmt):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(1) ;ref_ptr
+
+ mov rbx, arg(5) ;mask
+ movsxd rax, dword ptr arg(2) ;src_stride
+ mov r11, arg(3) ; destination
+ movsxd rdx, dword ptr arg(4) ;dst_stride
+
+ mov rcx, 8
+
+ pxor xmm3, xmm3
+
+next_vp8_masked_predictor_uv_wmt:
+ movq xmm0, [rsi]
+ movq xmm1, [rdi]
+ movq xmm2, [rbx]
+
+ pand xmm0, xmm2
+ pandn xmm2, xmm1
+ por xmm0, xmm2
+ movq [r11], xmm0
+
+ add r11, rdx
+ add rsi, rax
+ add rdi, rax
+ add rbx, 8
+
+ dec rcx
+ jnz next_vp8_masked_predictor_uv_wmt
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;unsigned int vp8_uv_from_y_mask(
+; unsigned char *ymask,
+; unsigned char *uvmask)
+global sym(vp8_uv_from_y_mask)
+sym(vp8_uv_from_y_mask):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(1) ;dst_ptr
+
+
+ mov rcx, 8
+
+ pxor xmm3, xmm3
+
+next_p8_uv_from_y_mask:
+ movdqu xmm0, [rsi]
+ pshufb xmm0, [shuf1b] ;[GLOBAL(shuf1b)]
+ movq [rdi],xmm0
+ add rdi, 8
+ add rsi,32
+
+ dec rcx
+ jnz next_p8_uv_from_y_mask
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+shuf1b:
+ db 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0
+
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index e5830e88b..286659536 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -238,11 +238,13 @@ static const unsigned char mbsplit_fill_offset[4][16] = {
-
static void mb_mode_mv_init(VP8D_COMP *pbi)
{
vp8_reader *const bc = & pbi->bc;
MV_CONTEXT *const mvc = pbi->common.fc.mvc;
+#if CONFIG_SEGMENTATION
+ MACROBLOCKD *const xd = & pbi->mb;
+#endif
pbi->prob_skip_false = 0;
if (pbi->common.mb_no_coeff_skip)
@@ -277,6 +279,9 @@ static void mb_mode_mv_init(VP8D_COMP *pbi)
}
read_mvcontexts(bc, mvc);
+#if CONFIG_SEGMENTATION
+ xd->temporal_update = vp8_read_bit(bc);
+#endif
}
}
@@ -287,7 +292,11 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
vp8_reader *const bc = & pbi->bc;
MV_CONTEXT *const mvc = pbi->common.fc.mvc;
const int mis = pbi->common.mode_info_stride;
-
+#if CONFIG_SEGMENTATION
+ MACROBLOCKD *const xd = & pbi->mb;
+ int sum;
+ int index = mb_row * pbi->common.mb_cols + mb_col;
+#endif
MV *const mv = & mbmi->mv.as_mv;
int mb_to_left_edge;
int mb_to_right_edge;
@@ -298,7 +307,6 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
mb_to_bottom_edge = pbi->mb.mb_to_bottom_edge;
mb_to_top_edge -= LEFT_TOP_MARGIN;
mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
-
mbmi->need_to_clamp_mvs = 0;
/* Distance of Mb to the various image edges.
* These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
@@ -313,7 +321,41 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
/* If required read in new segmentation data for this MB */
if (pbi->mb.update_mb_segmentation_map)
- vp8_read_mb_features(bc, mbmi, &pbi->mb);
+ {
+#if CONFIG_SEGMENTATION
+ if (xd->temporal_update)
+ {
+ sum = 0;
+
+ if (mb_col != 0)
+ sum += (mi-1)->mbmi.segment_flag;
+ if (mb_row != 0)
+ sum += (mi-pbi->common.mb_cols)->mbmi.segment_flag;
+
+ if (vp8_read(bc, xd->mb_segment_tree_probs[3+sum]) == 0)
+ {
+ mbmi->segment_id = pbi->segmentation_map[index];
+ mbmi->segment_flag = 0;
+ }
+ else
+ {
+ vp8_read_mb_features(bc, &mi->mbmi, &pbi->mb);
+ mbmi->segment_flag = 1;
+ pbi->segmentation_map[index] = mbmi->segment_id;
+ }
+
+ }
+ else
+ {
+ vp8_read_mb_features(bc, &mi->mbmi, &pbi->mb);
+ pbi->segmentation_map[index] = mbmi->segment_id;
+ }
+ index++;
+#else
+ vp8_read_mb_features(bc, &mi->mbmi, &pbi->mb);
+#endif
+ }
+
/* Read the macroblock coeff skip flag if this feature is in use, else default to 0 */
if (pbi->common.mb_no_coeff_skip)
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 82841e8b8..20de9101a 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -573,7 +573,6 @@ int vp8_decode_frame(VP8D_COMP *pbi)
const unsigned char *data = (const unsigned char *)pbi->Source;
const unsigned char *const data_end = data + pbi->source_sz;
ptrdiff_t first_partition_length_in_bytes;
-
int mb_row;
int i, j, k, l;
const int *const mb_feature_data_bits = vp8_mb_feature_data_bits;
@@ -660,7 +659,6 @@ int vp8_decode_frame(VP8D_COMP *pbi)
/* Is segmentation enabled */
xd->segmentation_enabled = (unsigned char)vp8_read_bit(bc);
-
if (xd->segmentation_enabled)
{
/* Signal whether or not the segmentation map is being explicitly updated this frame. */
@@ -696,9 +694,12 @@ int vp8_decode_frame(VP8D_COMP *pbi)
{
/* Which macro block level features are enabled */
vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs));
-
+#if CONFIG_SEGMENTATION
/* Read the probs used to decode the segment id for each macro block. */
+ for (i = 0; i < MB_FEATURE_TREE_PROBS+3; i++)
+#else
for (i = 0; i < MB_FEATURE_TREE_PROBS; i++)
+#endif
{
/* If not explicitly set value is defaulted to 255 by memset above */
if (vp8_read_bit(bc))
@@ -820,7 +821,6 @@ int vp8_decode_frame(VP8D_COMP *pbi)
fclose(z);
}
-
{
/* read coef probability tree */
@@ -843,6 +843,11 @@ int vp8_decode_frame(VP8D_COMP *pbi)
vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG));
vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG));
+#if CONFIG_SEGMENTATION
+ // Create the encoder segmentation map and set all entries to 0
+ CHECK_MEM_ERROR(pbi->segmentation_map, vpx_calloc((pc->mb_rows * pc->mb_cols), 1));
+#endif
+
/* set up frame new frame for intra coded blocks */
#if CONFIG_MULTITHREAD
if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level))
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index ef2e00d61..4225798f4 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -50,7 +50,6 @@ void vp8dx_initialize()
}
}
-
VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
{
VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP));
@@ -100,13 +99,17 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
return (VP8D_PTR) pbi;
}
-
void vp8dx_remove_decompressor(VP8D_PTR ptr)
{
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
if (!pbi)
return;
+#if CONFIG_SEGMENTATION
+ // Delete sementation map
+ if (pbi->segmentation_map != 0)
+ vpx_free(pbi->segmentation_map);
+#endif
#if CONFIG_MULTITHREAD
if (pbi->b_multithreaded_rd)
@@ -395,6 +398,8 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
}
+ vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
+
vp8_clear_system_state();
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index 512f1fc0f..70cb0fe3c 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -81,7 +81,7 @@ typedef struct VP8Decompressor
const unsigned char *Source;
unsigned int source_sz;
-
+ unsigned char *segmentation_map;
unsigned int CPUFreq;
unsigned int decode_microseconds;
unsigned int time_decoding;
@@ -95,7 +95,6 @@ typedef struct VP8Decompressor
int current_mb_col_main;
int decoding_thread_count;
int allocated_decoding_thread_count;
-
int mt_baseline_filter_level[MAX_MB_SEGMENTS];
int sync_range;
int *mt_current_mb_col; /* Each row remembers its already decoded column. */
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index adbd10698..db883c7d6 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -20,7 +20,9 @@
#include "vp8/common/pragmas.h"
#include "vpx_mem/vpx_mem.h"
#include "bitstream.h"
-
+#if CONFIG_SEGMENTATION
+static int segment_cost = 0;
+#endif
const int vp8cx_base_skip_false_prob[128] =
{
255, 255, 255, 255, 255, 255, 255, 255,
@@ -819,24 +821,39 @@ static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACRO
case 0:
vp8_write(w, 0, x->mb_segment_tree_probs[0]);
vp8_write(w, 0, x->mb_segment_tree_probs[1]);
+#if CONFIG_SEGMENTATION
+ segment_cost += vp8_cost_zero(x->mb_segment_tree_probs[0]) + vp8_cost_zero(x->mb_segment_tree_probs[1]);
+#endif
break;
case 1:
vp8_write(w, 0, x->mb_segment_tree_probs[0]);
vp8_write(w, 1, x->mb_segment_tree_probs[1]);
+#if CONFIG_SEGMENTATION
+ segment_cost += vp8_cost_zero(x->mb_segment_tree_probs[0]) + vp8_cost_one(x->mb_segment_tree_probs[1]);
+#endif
break;
case 2:
vp8_write(w, 1, x->mb_segment_tree_probs[0]);
vp8_write(w, 0, x->mb_segment_tree_probs[2]);
+#if CONFIG_SEGMENTATION
+ segment_cost += vp8_cost_one(x->mb_segment_tree_probs[0]) + vp8_cost_zero(x->mb_segment_tree_probs[2]);
+#endif
break;
case 3:
vp8_write(w, 1, x->mb_segment_tree_probs[0]);
vp8_write(w, 1, x->mb_segment_tree_probs[2]);
+#if CONFIG_SEGMENTATION
+ segment_cost += vp8_cost_one(x->mb_segment_tree_probs[0]) + vp8_cost_one(x->mb_segment_tree_probs[2]);
+#endif
break;
// TRAP.. This should not happen
default:
vp8_write(w, 0, x->mb_segment_tree_probs[0]);
vp8_write(w, 0, x->mb_segment_tree_probs[1]);
+#if CONFIG_SEGMENTATION
+ segment_cost += vp8_cost_zero(x->mb_segment_tree_probs[0]) + vp8_cost_zero(x->mb_segment_tree_probs[1]);
+#endif
break;
}
}
@@ -848,7 +865,13 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
VP8_COMMON *const pc = & cpi->common;
vp8_writer *const w = & cpi->bc;
const MV_CONTEXT *mvc = pc->fc.mvc;
-
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
+#if CONFIG_SEGMENTATION
+ int left_id, above_id;
+ int i;
+ int sum;
+ int index = 0;
+#endif
const int *const rfct = cpi->count_mb_ref_frame_usage;
const int rf_intra = rfct[INTRA_FRAME];
const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
@@ -905,7 +928,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
update_mbintra_mode_probs(cpi);
vp8_write_mvprobs(cpi);
-
+#if CONFIG_SEGMENTATION
+ vp8_write_bit(w, (xd->temporal_update) ? 1:0);
+#endif
while (++mb_row < pc->mb_rows)
{
int mb_col = -1;
@@ -916,7 +941,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
const MV_REFERENCE_FRAME rf = mi->ref_frame;
const MB_PREDICTION_MODE mode = mi->mode;
- MACROBLOCKD *xd = &cpi->mb.e_mbd;
+ //MACROBLOCKD *xd = &cpi->mb.e_mbd;
// Distance of Mb to the various image edges.
// These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
@@ -924,13 +949,46 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
xd->mb_to_top_edge = -((mb_row * 16)) << 3;
xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
-
+ xd->up_available = (mb_row != 0);
+ xd->left_available = (mb_col != 0);
#ifdef ENTROPY_STATS
active_section = 9;
#endif
if (cpi->mb.e_mbd.update_mb_segmentation_map)
+ {
+#if CONFIG_SEGMENTATION
+ if (xd->temporal_update)
+ {
+ sum = 0;
+ if (mb_col != 0)
+ sum += (m-1)->mbmi.segment_flag;
+ if (mb_row != 0)
+ sum += (m-pc->mb_cols)->mbmi.segment_flag;
+
+ if (m->mbmi.segment_flag == 0)
+ {
+ vp8_write(w,0,xd->mb_segment_tree_probs[3+sum]);
+ segment_cost += vp8_cost_zero(xd->mb_segment_tree_probs[3+sum]);
+ }
+ else
+ {
+ vp8_write(w,1,xd->mb_segment_tree_probs[3+sum]);
+ segment_cost += vp8_cost_one(xd->mb_segment_tree_probs[3+sum]);
+ write_mb_features(w, mi, &cpi->mb.e_mbd);
+ cpi->segmentation_map[index] = mi->segment_id;
+ }
+ }
+ else
+ {
+ write_mb_features(w, mi, &cpi->mb.e_mbd);
+ cpi->segmentation_map[index] = mi->segment_id;
+ }
+ index++;
+#else
write_mb_features(w, mi, &cpi->mb.e_mbd);
+#endif
+ }
if (pc->mb_no_coeff_skip)
vp8_encode_bool(w, m->mbmi.mb_skip_coeff, prob_skip_false);
@@ -1058,7 +1116,11 @@ static void write_kfmodes(VP8_COMP *cpi)
const VP8_COMMON *const c = & cpi->common;
/* const */
MODE_INFO *m = c->mi;
-
+#if CONFIG_SEGMENTATION
+ int left_id, above_id;
+ int i;
+ int index = 0;
+#endif
int mb_row = -1;
int prob_skip_false = 0;
@@ -1083,9 +1145,22 @@ static void write_kfmodes(VP8_COMP *cpi)
while (++mb_col < c->mb_cols)
{
const int ym = m->mbmi.mode;
-
+#if CONFIG_SEGMENTATION
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
+ xd->up_available = (mb_row != 0);
+ xd->left_available = (mb_col != 0);
+#endif
if (cpi->mb.e_mbd.update_mb_segmentation_map)
+ {
+#if CONFIG_SEGMENTATION
+
+ write_mb_features(bc, &m->mbmi, &cpi->mb.e_mbd);
+ cpi->segmentation_map[index] = m->mbmi.segment_id;
+ index++;
+#else
write_mb_features(bc, &m->mbmi, &cpi->mb.e_mbd);
+#endif
+ }
if (c->mb_no_coeff_skip)
vp8_encode_bool(bc, m->mbmi.mb_skip_coeff, prob_skip_false);
@@ -1411,6 +1486,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
else
vp8_start_encode(bc, cx_data);
+ xd->update_mb_segmentation_map = 1;
// Signal whether or not Segmentation is enabled
vp8_write_bit(bc, (xd->segmentation_enabled) ? 1 : 0);
@@ -1461,8 +1537,12 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
if (xd->update_mb_segmentation_map)
{
+ #if CONFIG_SEGMENTATION
// Write the probs used to decode the segment id for each macro block.
+ for (i = 0; i < MB_FEATURE_TREE_PROBS+3; i++)
+#else
for (i = 0; i < MB_FEATURE_TREE_PROBS; i++)
+#endif
{
int Data = xd->mb_segment_tree_probs[i];
@@ -1632,7 +1712,9 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
active_section = 1;
#endif
}
-
+#if CONFIG_SEGMENTATION
+ //printf("%d\n",segment_cost);
+#endif
vp8_stop_encode(bc);
oh.first_partition_length_in_bytes = cpi->bc.pos;
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 3a0ee5c31..61c273c7a 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -30,6 +30,7 @@
#include "vp8/common/subpixel.h"
#include "vpx_ports/vpx_timer.h"
+
#if CONFIG_RUNTIME_CPU_DETECT
#define RTCD(x) &cpi->common.rtcd.x
#define IF_RTCD(x) (x)
@@ -37,6 +38,13 @@
#define RTCD(x) NULL
#define IF_RTCD(x) NULL
#endif
+
+#if CONFIG_SEGMENTATION
+#define SEEK_SEGID 12
+#define SEEK_SAMEID 4
+#define SEEK_DIFFID 7
+#endif
+
extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex);
@@ -479,7 +487,10 @@ void encode_mb_row(VP8_COMP *cpi,
int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
int seg_map_index = (mb_row * cpi->common.mb_cols);
-
+#if CONFIG_SEGMENTATION
+ int left_id, above_id;
+ int sum;
+#endif
#if CONFIG_MULTITHREAD
const int nsync = cpi->mt_sync_range;
const int rightmost_col = cm->mb_cols - 1;
@@ -490,7 +501,6 @@ void encode_mb_row(VP8_COMP *cpi,
else
last_row_current_mb_col = &rightmost_col;
#endif
-
// reset above block coeffs
xd->above_context = cm->above_context;
@@ -564,6 +574,7 @@ void encode_mb_row(VP8_COMP *cpi,
xd->mode_info_context->mbmi.segment_id = 0;
vp8cx_mb_init_quantizer(cpi, x);
+
}
else
xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default
@@ -627,6 +638,11 @@ void encode_mb_row(VP8_COMP *cpi,
x->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb
+ if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
+ xd->mode_info_context->mbmi.segment_id = 0;
+ else
+ xd->mode_info_context->mbmi.segment_id = 1;
+
for (i = 0; i < 16; i++)
vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
@@ -638,9 +654,42 @@ void encode_mb_row(VP8_COMP *cpi,
recon_yoffset += 16;
recon_uvoffset += 8;
- // Keep track of segment useage
- segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
+#if CONFIG_SEGMENTATION
+ //cpi->segmentation_map[mb_row * cm->mb_cols + mb_col] = xd->mbmi.segment_id;
+ if (cm->frame_type == KEY_FRAME)
+ {
+ segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
+ }
+ else
+ {
+ sum = 0;
+ if (mb_col != 0)
+ sum += (xd->mode_info_context-1)->mbmi.segment_flag;
+ if (mb_row != 0)
+ sum += (xd->mode_info_context-cm->mb_cols)->mbmi.segment_flag;
+
+ if (xd->mode_info_context->mbmi.segment_id == cpi->segmentation_map[(mb_row*cm->mb_cols) + mb_col])
+ xd->mode_info_context->mbmi.segment_flag = 0;
+ else
+ xd->mode_info_context->mbmi.segment_flag = 1;
+ if (xd->mode_info_context->mbmi.segment_flag == 0)
+ {
+ segment_counts[SEEK_SAMEID + sum]++;
+ segment_counts[10]++;
+ }
+ else
+ {
+ segment_counts[SEEK_DIFFID + sum]++;
+ segment_counts[11]++;
+ //calculate individual segment ids
+ segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
+ }
+ }
+ segment_counts[SEEK_SEGID + xd->mode_info_context->mbmi.segment_id] ++;
+#else
+ segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
+#endif
// skip to next mb
xd->mode_info_context++;
x->partition_info++;
@@ -665,7 +714,6 @@ void encode_mb_row(VP8_COMP *cpi,
xd->mode_info_context++;
x->partition_info++;
x->activity_sum += activity_sum;
-
#if CONFIG_MULTITHREAD
if ((cpi->b_multi_threaded != 0) && (mb_row == cm->mb_rows - 1))
{
@@ -673,7 +721,6 @@ void encode_mb_row(VP8_COMP *cpi,
}
#endif
}
-
void vp8_encode_frame(VP8_COMP *cpi)
{
int mb_row;
@@ -682,7 +729,13 @@ void vp8_encode_frame(VP8_COMP *cpi)
MACROBLOCKD *const xd = & x->e_mbd;
TOKENEXTRA *tp = cpi->tok;
+#if CONFIG_SEGMENTATION
+ int segment_counts[MAX_MB_SEGMENTS + SEEK_SEGID];
+ int prob[3];
+ int new_cost, original_cost;
+#else
int segment_counts[MAX_MB_SEGMENTS];
+#endif
int totalrate;
// Functions setup for all frame types so we can use MC in AltRef
@@ -894,41 +947,126 @@ void vp8_encode_frame(VP8_COMP *cpi)
}
-
// Work out the segment probabilites if segmentation is enabled
if (xd->segmentation_enabled)
{
int tot_count;
- int i;
+ int i,j;
+ int count1,count2,count3,count4;
// Set to defaults
vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
+#if CONFIG_SEGMENTATION
+
+ tot_count = segment_counts[12] + segment_counts[13] + segment_counts[14] + segment_counts[15];
+ count1 = segment_counts[12] + segment_counts[13];
+ count2 = segment_counts[14] + segment_counts[15];
+
+ if (tot_count)
+ prob[0] = (count1 * 255) / tot_count;
+
+ if (count1 > 0)
+ prob[1] = (segment_counts[12] * 255) /count1;
+
+ if (count2 > 0)
+ prob[2] = (segment_counts[14] * 255) /count2;
+
+ if (cm->frame_type != KEY_FRAME)
+ {
+ tot_count = segment_counts[4] + segment_counts[7];
+ if (tot_count)
+ xd->mb_segment_tree_probs[3] = (segment_counts[4] * 255)/tot_count;
+
+ tot_count = segment_counts[5] + segment_counts[8];
+ if (tot_count)
+ xd->mb_segment_tree_probs[4] = (segment_counts[5] * 255)/tot_count;
+
+ tot_count = segment_counts[6] + segment_counts[9];
+ if (tot_count)
+ xd->mb_segment_tree_probs[5] = (segment_counts[6] * 255)/tot_count;
+ }
tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
+ count3 = segment_counts[0] + segment_counts[1];
+ count4 = segment_counts[2] + segment_counts[3];
if (tot_count)
+ xd->mb_segment_tree_probs[0] = (count3 * 255) / tot_count;
+
+ if (count3 > 0)
+ xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) /count3;
+
+ if (count4 > 0)
+ xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) /count4;
+
+ for (i = 0; i < MB_FEATURE_TREE_PROBS+3; i++)
{
- xd->mb_segment_tree_probs[0] = ((segment_counts[0] + segment_counts[1]) * 255) / tot_count;
+ if (xd->mb_segment_tree_probs[i] == 0)
+ xd->mb_segment_tree_probs[i] = 1;
+ }
- tot_count = segment_counts[0] + segment_counts[1];
+ original_cost = count1 * vp8_cost_zero(prob[0]) + count2 * vp8_cost_one(prob[0]);
- if (tot_count > 0)
- {
- xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count;
- }
+ if (count1 > 0)
+ original_cost += segment_counts[12] * vp8_cost_zero(prob[1]) + segment_counts[13] * vp8_cost_one(prob[1]);
+
+ if (count2 > 0)
+ original_cost += segment_counts[14] * vp8_cost_zero(prob[2]) + segment_counts[15] * vp8_cost_one(prob[2]) ;
- tot_count = segment_counts[2] + segment_counts[3];
+ new_cost = 0;
- if (tot_count > 0)
- xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count;
+ if (cm->frame_type != KEY_FRAME)
+ {
+ new_cost = segment_counts[4] * vp8_cost_zero(xd->mb_segment_tree_probs[3]) + segment_counts[7] * vp8_cost_one(xd->mb_segment_tree_probs[3]);
+
+ new_cost += segment_counts[5] * vp8_cost_zero(xd->mb_segment_tree_probs[4]) + segment_counts[8] * vp8_cost_one(xd->mb_segment_tree_probs[4]);
+
+ new_cost += segment_counts[6] * vp8_cost_zero(xd->mb_segment_tree_probs[5]) + segment_counts[9] * vp8_cost_one (xd->mb_segment_tree_probs[5]);
+ }
- // Zero probabilities not allowed
- for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++)
+ if (tot_count > 0)
+ new_cost += count3 * vp8_cost_zero(xd->mb_segment_tree_probs[0]) + count4 * vp8_cost_one(xd->mb_segment_tree_probs[0]);
+
+ if (count3 > 0)
+ new_cost += segment_counts[0] * vp8_cost_zero(xd->mb_segment_tree_probs[1]) + segment_counts[1] * vp8_cost_one(xd->mb_segment_tree_probs[1]);
+
+ if (count4 > 0)
+ new_cost += segment_counts[2] * vp8_cost_zero(xd->mb_segment_tree_probs[2]) + segment_counts[3] * vp8_cost_one(xd->mb_segment_tree_probs[2]) ;
+
+ if (new_cost < original_cost)
+ xd->temporal_update = 1;
+ else
+ {
+ xd->temporal_update = 0;
+ xd->mb_segment_tree_probs[0] = prob[0];
+ xd->mb_segment_tree_probs[1] = prob[1];
+ xd->mb_segment_tree_probs[2] = prob[2];
+ }
+#else
+ tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
+ count1 = segment_counts[0] + segment_counts[1];
+ count2 = segment_counts[2] + segment_counts[3];
+
+ if (tot_count)
+ xd->mb_segment_tree_probs[0] = (count1 * 255) / tot_count;
+
+ if (count1 > 0)
+ xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) /count1;
+
+ if (count2 > 0)
+ xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) /count2;
+
+#endif
+ // Zero probabilities not allowed
+#if CONFIG_SEGMENTATION
+ for (i = 0; i < MB_FEATURE_TREE_PROBS+3; i++)
+#else
+ for (i = 0; i < MB_FEATURE_TREE_PROBS; i++)
+#endif
{
if (xd->mb_segment_tree_probs[i] == 0)
xd->mb_segment_tree_probs[i] = 1;
}
- }
}
// 256 rate units to the bit
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index f5006ddab..8aef915b8 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -116,7 +116,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
int seg_map_index = (mb_row * cm->mb_cols);
-
+
if ((mb_col & (nsync - 1)) == 0)
{
while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != cm->mb_cols - 1)
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index e30c291f7..87f0f1853 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -411,7 +411,6 @@ static void set_segmentation_map(VP8_PTR ptr, unsigned char *segmentation_map)
// Copy in the new segmentation map
vpx_memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols));
-
// Signal that the map should be updated.
cpi->mb.e_mbd.update_mb_segmentation_map = 1;
cpi->mb.e_mbd.update_mb_segmentation_data = 1;
@@ -437,12 +436,10 @@ static void set_segment_data(VP8_PTR ptr, signed char *feature_data, unsigned ch
static void segmentation_test_function(VP8_PTR ptr)
{
VP8_COMP *cpi = (VP8_COMP *)(ptr);
-
unsigned char *seg_map;
signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];
-
+ CHECK_MEM_ERROR(seg_map, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1));
// Create a temporary map for segmentation data.
- CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
// MB loop to set local segmentation map
/*for ( i = 0; i < cpi->common.mb_rows; i++ )
@@ -502,7 +499,7 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment)
int mbs_in_frame = cpi->common.mb_rows * cpi->common.mb_cols;
// Create a temporary map for segmentation data.
- CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
+ CHECK_MEM_ERROR(seg_map, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1));
cpi->cyclic_refresh_q = Q;
@@ -1948,7 +1945,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int)));
// Create the encoder segmentation map and set all entries to 0
- CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
+ CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1));
CHECK_MEM_ERROR(cpi->active_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
vpx_memset(cpi->active_map , 1, (cpi->common.mb_rows * cpi->common.mb_cols));
cpi->active_map_enabled = 0;
@@ -1984,13 +1981,12 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->cyclic_refresh_q = 32;
if (cpi->cyclic_refresh_mode_enabled)
- {
CHECK_MEM_ERROR(cpi->cyclic_refresh_map, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1));
- }
else
cpi->cyclic_refresh_map = (signed char *) NULL;
// Test function for segmentation
+
//segmentation_test_function((VP8_PTR) cpi);
#ifdef ENTROPY_STATS
@@ -3432,6 +3428,10 @@ static void encode_frame_to_data_rate
// Test code for segmentation of gf/arf (0,0)
//segmentation_test_function((VP8_PTR) cpi);
+#if CONFIG_SEGMENTATION
+ cpi->mb.e_mbd.segmentation_enabled = 1;
+ cpi->mb.e_mbd.update_mb_segmentation_map = 1;
+#endif
#if CONFIG_REALTIME_ONLY
if(cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME)
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 0e53f6803..9c363598a 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -195,7 +195,11 @@ typedef struct
typedef struct
{
MACROBLOCK mb;
+#if CONFIG_SEGMENTATION
+ int segment_counts[MAX_MB_SEGMENTS + 8];
+#else
int segment_counts[MAX_MB_SEGMENTS];
+#endif
int totalrate;
} MB_ROW_COMP;
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 2a2f0cfad..54a7eacab 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -99,6 +99,11 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
endif
VP8_COMMON_SRCS-$(ARCH_ARM) += common/asm_com_offsets.c
+ifeq ($(CONFIG_CSM),yes)
+VP8_COMMON_SRCS-yes += common/maskingmv.c
+VP8_COMMON_SRCS-$(HAVE_SSE3) += common/x86/mask_sse3.asm
+endif
+
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c
# common (c)
diff --git a/vpxenc.c b/vpxenc.c
index 39256b665..93ad6f647 100755
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -31,6 +31,7 @@
#include <fcntl.h>
#include <unistd.h>
#endif
+#include "vpx_config.h"
#include "vpx_version.h"
#include "vpx/vp8cx.h"
#include "vpx_ports/mem_ops.h"
@@ -75,6 +76,9 @@ static const struct codec_item
unsigned int fourcc;
} codecs[] =
{
+#if CONFIG_EXPERIMENTAL && CONFIG_VP8_ENCODER
+ {"vp8x", &vpx_codec_vp8x_cx_algo, 0x78385056},
+#endif
#if CONFIG_VP8_ENCODER
{"vp8", &vpx_codec_vp8_cx_algo, 0x30385056},
#endif
@@ -1333,7 +1337,11 @@ int main(int argc, const char **argv_)
/* Handle codec specific options */
#if CONFIG_VP8_ENCODER
- if (codec->iface == &vpx_codec_vp8_cx_algo)
+ if (codec->iface == &vpx_codec_vp8_cx_algo
+#if CONFIG_EXPERIMENTAL
+ || codec->iface == &vpx_codec_vp8x_cx_algo
+#endif
+ )
{
ctrl_args = vp8_args;
ctrl_args_map = vp8_arg_ctrl_map;