summaryrefslogtreecommitdiff
path: root/vp8/encoder
diff options
context:
space:
mode:
authorDeb Mukherjee <debargha@google.com>2012-02-16 09:29:54 -0800
committerDeb Mukherjee <debargha@google.com>2012-02-23 09:25:21 -0800
commit18e90d744eba2d28ad96a566565bbf5642d24b59 (patch)
tree8d0859ca6973ad522f4b1e30566ba67ea6ed886a /vp8/encoder
parent3c872b6c27a5f03156b973fb359c9293049d6e84 (diff)
downloadlibvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.tar
libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.tar.gz
libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.tar.bz2
libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.zip
Supporting high precision 1/8-pel motion vectors
This is the initial patch for supporting 1/8th pel motion. Currently if we configure with enable-high-precision-mv, all motion vectors would default to 1/8 pel. Encode and decode syncs fine with the current code. In the next phase the code will be refactored so that we can choose the 1/8 pel mode adaptively at a frame/segment/mb level. Derf results: http://www.corp.google.com/~debargha/vp8_results/enhinterp_hpmv.html (about 0.83% better than 8-tap interpoaltion) Patch 3: Rebased. Also adding 1/16th pel interpolation for U and V Patch 4: HD results. http://www.corp.google.com/~debargha/vp8_results/enhinterp_hd_hpmv.html Seems impressive (unless I am doing something wrong). Patch 5: Added mmx/sse for bilateral filtering, as well as enforced use of c-versions of subpel filters with 8-taps and 1/16th pel; Also redesigned the 8-tap filters to reduce the cut-off in order to introduce a denoising effect. There is a new configure option sixteenth-subpel-uv which will use 1/16 th pel interpolation for uv, if the motion vectors have 1/8 pel accuracy. With the fixes the results are promising on the derf set. The enhanced interpolation option with 8-taps alone gives 3% improvement over thei derf set: http://www.corp.google.com/~debargha/vp8_results/enhinterpn.html Results on high precision mv and on the hd set are to follow. Patch 6: Adding a missing condition for CONFIG_SIXTEENTH_SUBPEL_UV in vp8/common/x86/x86_systemdependent.c Patch 7: Cleaning up various debug messages. Patch 8: Merge conflict Change-Id: I5b1d844457aefd7414a9e4e0e06c6ed38fd8cc04
Diffstat (limited to 'vp8/encoder')
-rw-r--r--vp8/encoder/arm/variance_arm.c18
-rw-r--r--vp8/encoder/bitstream.c5
-rw-r--r--vp8/encoder/encodemv.c53
-rw-r--r--vp8/encoder/mcomp.c368
-rw-r--r--vp8/encoder/onyx_if.c6
-rw-r--r--vp8/encoder/rdopt.c22
-rw-r--r--vp8/encoder/temporal_filter.c20
-rw-r--r--vp8/encoder/variance_c.c15
-rw-r--r--vp8/encoder/x86/variance_impl_sse2.asm20
-rw-r--r--vp8/encoder/x86/variance_impl_ssse3.asm20
-rw-r--r--vp8/encoder/x86/variance_mmx.c39
-rw-r--r--vp8/encoder/x86/variance_sse2.c34
-rw-r--r--vp8/encoder/x86/variance_ssse3.c18
13 files changed, 548 insertions, 90 deletions
diff --git a/vp8/encoder/arm/variance_arm.c b/vp8/encoder/arm/variance_arm.c
index e77be9f73..6e83c6e7b 100644
--- a/vp8/encoder/arm/variance_arm.c
+++ b/vp8/encoder/arm/variance_arm.c
@@ -13,6 +13,12 @@
#include "vp8/common/filter.h"
#include "vp8/common/arm/bilinearfilter_arm.h"
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+#define HALFNDX 8
+#else
+#define HALFNDX 4
+#endif
+
#if HAVE_ARMV6
unsigned int vp8_sub_pixel_variance8x8_armv6
@@ -59,17 +65,17 @@ unsigned int vp8_sub_pixel_variance16x16_armv6
const short *HFilter, *VFilter;
unsigned int var;
- if (xoffset == 4 && yoffset == 0)
+ if (xoffset == HALFNDX && yoffset == 0)
{
var = vp8_variance_halfpixvar16x16_h_armv6(src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, sse);
}
- else if (xoffset == 0 && yoffset == 4)
+ else if (xoffset == 0 && yoffset == HALFNDX)
{
var = vp8_variance_halfpixvar16x16_v_armv6(src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, sse);
}
- else if (xoffset == 4 && yoffset == 4)
+ else if (xoffset == HALFNDX && yoffset == HALFNDX)
{
var = vp8_variance_halfpixvar16x16_hv_armv6(src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, sse);
@@ -107,11 +113,11 @@ unsigned int vp8_sub_pixel_variance16x16_neon
unsigned int *sse
)
{
- if (xoffset == 4 && yoffset == 0)
+ if (xoffset == HALFNDX && yoffset == 0)
return vp8_variance_halfpixvar16x16_h_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
- else if (xoffset == 0 && yoffset == 4)
+ else if (xoffset == 0 && yoffset == HALFNDX)
return vp8_variance_halfpixvar16x16_v_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
- else if (xoffset == 4 && yoffset == 4)
+ else if (xoffset == HALFNDX && yoffset == HALFNDX)
return vp8_variance_halfpixvar16x16_hv_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
else
return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index c2613bfbd..72bc3d41d 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -2945,6 +2945,11 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
// Indicate reference frame sign bias for Golden and ARF frames (always 0 for last frame buffer)
vp8_write_bit(bc, pc->ref_frame_sign_bias[GOLDEN_FRAME]);
vp8_write_bit(bc, pc->ref_frame_sign_bias[ALTREF_FRAME]);
+
+#if CONFIG_HIGH_PRECISION_MV
+ // Signal whether to allow high MV precision
+ vp8_write_bit(bc, (xd->allow_high_precision_mv) ? 1 : 0);
+#endif
}
if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS)
diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index a4849c654..4d5d8cb81 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -20,6 +20,11 @@
extern unsigned int active_section;
#endif
+//#define DEBUG_ENC_MV
+#ifdef DEBUG_ENC_MV
+int enc_mvcount = 0;
+#endif
+
static void encode_mvcomponent(
vp8_writer *const w,
const int v,
@@ -32,8 +37,7 @@ static void encode_mvcomponent(
if (x < mvnum_short) // Small
{
vp8_write(w, 0, p [mvpis_short]);
- vp8_treed_write(w, vp8_small_mvtree, p + MVPshort, x, 3);
-
+ vp8_treed_write(w, vp8_small_mvtree, p + MVPshort, x, mvnum_short_bits);
if (!x)
return; // no sign bit
}
@@ -46,17 +50,17 @@ static void encode_mvcomponent(
do
vp8_write(w, (x >> i) & 1, p [MVPbits + i]);
- while (++i < 3);
+ while (++i < mvnum_short_bits);
i = mvlong_width - 1; /* Skip bit 3, which is sometimes implicit */
do
vp8_write(w, (x >> i) & 1, p [MVPbits + i]);
- while (--i > 3);
+ while (--i > mvnum_short_bits);
- if (x & 0xFFF0)
- vp8_write(w, (x >> 3) & 1, p [MVPbits + 3]);
+ if (x & ~((2<<mvnum_short_bits)-1))
+ vp8_write(w, (x >> mvnum_short_bits) & 1, p [MVPbits + mvnum_short_bits]);
}
vp8_write(w, v < 0, p [MVPsign]);
@@ -91,9 +95,17 @@ void vp8_encode_motion_vector(vp8_writer *w, const MV *mv, const MV_CONTEXT *mvc
}
}
#endif
-
- encode_mvcomponent(w, mv->row >> 1, &mvc[0]);
- encode_mvcomponent(w, mv->col >> 1, &mvc[1]);
+ encode_mvcomponent(w, mv->row >> MV_SHIFT, &mvc[0]);
+ encode_mvcomponent(w, mv->col >> MV_SHIFT, &mvc[1]);
+#ifdef DEBUG_ENC_MV
+ {
+ int i;
+ printf("%d: %d %d\n", enc_mvcount++, mv->row, mv->col);
+ for (i=0; i<MVPcount;++i) printf(" %d", (&mvc[0])->prob[i]); printf("\n");
+ for (i=0; i<MVPcount;++i) printf(" %d", (&mvc[1])->prob[i]); printf("\n");
+ fflush(stdout);
+ }
+#endif
}
@@ -106,7 +118,7 @@ static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc)
if (x < mvnum_short)
{
cost = vp8_cost_zero(p [mvpis_short])
- + vp8_treed_cost(vp8_small_mvtree, p + MVPshort, x, 3);
+ + vp8_treed_cost(vp8_small_mvtree, p + MVPshort, x, mvnum_short_bits);
if (!x)
return cost;
@@ -119,17 +131,17 @@ static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc)
do
cost += vp8_cost_bit(p [MVPbits + i], (x >> i) & 1);
- while (++i < 3);
+ while (++i < mvnum_short_bits);
i = mvlong_width - 1; /* Skip bit 3, which is sometimes implicit */
do
cost += vp8_cost_bit(p [MVPbits + i], (x >> i) & 1);
- while (--i > 3);
+ while (--i > mvnum_short_bits);
- if (x & 0xFFF0)
- cost += vp8_cost_bit(p [MVPbits + 3], (x >> 3) & 1);
+ if (x & ~((2<<mvnum_short_bits)-1))
+ cost += vp8_cost_bit(p [MVPbits + mvnum_short_bits], (x >> mvnum_short_bits) & 1);
}
return cost; // + vp8_cost_bit( p [MVPsign], v < 0);
@@ -258,7 +270,7 @@ static void write_component_probs(
{
const int c = events [mv_max];
- is_short_ct [0] += c; // Short vector
+ is_short_ct [0] += c; // Short vector
short_ct [0] += c; // Magnitude distribution
}
@@ -342,7 +354,7 @@ static void write_component_probs(
int j = 0;
vp8_tree_probs_from_distribution(
- 8, vp8_small_mvencodings, vp8_small_mvtree,
+ mvnum_short, vp8_small_mvencodings, vp8_small_mvtree,
p, short_bct, short_ct,
256, 1
);
@@ -398,6 +410,15 @@ void vp8_write_mvprobs(VP8_COMP *cpi)
vp8_writer *const w = & cpi->bc;
MV_CONTEXT *mvc = cpi->common.fc.mvc;
int flags[2] = {0, 0};
+#ifdef DEBUG_ENC_MV
+ {
+ int i;
+ printf("Writing probs\n");
+ for (i=0; i<MVPcount;++i) printf(" %d", vp8_default_mv_context[0].prob[i]); printf("\n");
+ for (i=0; i<MVPcount;++i) printf(" %d", vp8_default_mv_context[1].prob[i]); printf("\n");
+ fflush(stdout);
+ }
+#endif
#ifdef ENTROPY_STATS
active_section = 4;
#endif
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index b23abffad..093ac3354 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -28,13 +28,13 @@ int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
// over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
// cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
// The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
- return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
+ return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> MV_SHIFT] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> MV_SHIFT]) * Weight) >> 7;
}
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
{
- return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
- mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
+ return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> MV_SHIFT] +
+ mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> MV_SHIFT])
* error_per_bit + 128) >> 8;
}
@@ -175,13 +175,33 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
* 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
* could reduce the area.
*/
-#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
+
+#if CONFIG_HIGH_PRECISION_MV
+
+#define PRE(r,c) (y + (((r)>>3) * y_stride + ((c)>>3) -(offset))) // pointer to predictor base of a motionvector
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+#define SP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
+#else
+#define SP(x) ((x)&7) // convert motion vector component to offset for svf calc
+#endif /* CONFIG_SIXTEENTH_SUBPEL_UV */
+
+#else /* CONFIG_HIGH_PRECISION_MV */
+
#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+#define SP(x) (((x)&3)<<2) // convert motion vector component to offset for svf calc
+#else
#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
+#endif /* CONFIG_SIXTEENTH_SUBPEL_UV */
+
+#endif /* CONFIG_HIGH_PRECISION_MV */
+
+#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
+
#define MIN(x,y) (((x)<(y))?(x):(y))
#define MAX(x,y) (((x)>(y))?(x):(y))
@@ -194,8 +214,15 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
{
unsigned char *z = (*(b->base_src) + b->src);
+#if CONFIG_HIGH_PRECISION_MV
+ int rr = ref_mv->as_mv.row, rc = ref_mv->as_mv.col;
+ int br = bestmv->as_mv.row << 3, bc = bestmv->as_mv.col << 3;
+ int hstep = 4;
+#else
int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2;
+ int hstep = 2;
+#endif
int tr = br, tc = bc;
unsigned int besterr = INT_MAX;
unsigned int left, right, up, down, diag;
@@ -203,12 +230,22 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
unsigned int whichdir;
unsigned int halfiters = 4;
unsigned int quarteriters = 4;
+#if CONFIG_HIGH_PRECISION_MV
+ unsigned int eighthiters = 4;
+#endif
int thismse;
+#if CONFIG_HIGH_PRECISION_MV
+ int minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << mvlong_width) - 1));
+ int maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << mvlong_width) - 1));
+ int minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << mvlong_width) - 1));
+ int maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << mvlong_width) - 1));
+#else
int minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
int maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
int minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
int maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
+#endif
int y_stride;
int offset;
@@ -220,10 +257,10 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
int buf_r1, buf_r2, buf_c1, buf_c2;
// Clamping to avoid out-of-range data access
- buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
- buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
- buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
- buf_c2 = ((bestmv->as_mv.col + 3) > x->mv_col_max)?(x->mv_col_max - bestmv->as_mv.col):3;
+ buf_r1 = ((bestmv->as_mv.row - INTERP_EXTEND) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):INTERP_EXTEND;
+ buf_r2 = ((bestmv->as_mv.row + INTERP_EXTEND) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):INTERP_EXTEND;
+ buf_c1 = ((bestmv->as_mv.col - INTERP_EXTEND) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):INTERP_EXTEND;
+ buf_c2 = ((bestmv->as_mv.col + INTERP_EXTEND) > x->mv_col_max)?(x->mv_col_max - bestmv->as_mv.col):INTERP_EXTEND;
y_stride = 32;
/* Copy to intermediate buffer before searching. */
@@ -249,26 +286,26 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
while (--halfiters)
{
// 1/2 pel
- CHECK_BETTER(left, tr, tc - 2);
- CHECK_BETTER(right, tr, tc + 2);
- CHECK_BETTER(up, tr - 2, tc);
- CHECK_BETTER(down, tr + 2, tc);
+ CHECK_BETTER(left, tr, tc - hstep);
+ CHECK_BETTER(right, tr, tc + hstep);
+ CHECK_BETTER(up, tr - hstep, tc);
+ CHECK_BETTER(down, tr + hstep, tc);
whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
switch (whichdir)
{
case 0:
- CHECK_BETTER(diag, tr - 2, tc - 2);
+ CHECK_BETTER(diag, tr - hstep, tc - hstep);
break;
case 1:
- CHECK_BETTER(diag, tr - 2, tc + 2);
+ CHECK_BETTER(diag, tr - hstep, tc + hstep);
break;
case 2:
- CHECK_BETTER(diag, tr + 2, tc - 2);
+ CHECK_BETTER(diag, tr + hstep, tc - hstep);
break;
case 3:
- CHECK_BETTER(diag, tr + 2, tc + 2);
+ CHECK_BETTER(diag, tr + hstep, tc + hstep);
break;
}
@@ -282,28 +319,29 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
// TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
// 1/4 pel
+ hstep >>= 1;
while (--quarteriters)
{
- CHECK_BETTER(left, tr, tc - 1);
- CHECK_BETTER(right, tr, tc + 1);
- CHECK_BETTER(up, tr - 1, tc);
- CHECK_BETTER(down, tr + 1, tc);
+ CHECK_BETTER(left, tr, tc - hstep);
+ CHECK_BETTER(right, tr, tc + hstep);
+ CHECK_BETTER(up, tr - hstep, tc);
+ CHECK_BETTER(down, tr + hstep, tc);
whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
switch (whichdir)
{
case 0:
- CHECK_BETTER(diag, tr - 1, tc - 1);
+ CHECK_BETTER(diag, tr - hstep, tc - hstep);
break;
case 1:
- CHECK_BETTER(diag, tr - 1, tc + 1);
+ CHECK_BETTER(diag, tr - hstep, tc + hstep);
break;
case 2:
- CHECK_BETTER(diag, tr + 1, tc - 1);
+ CHECK_BETTER(diag, tr + hstep, tc - hstep);
break;
case 3:
- CHECK_BETTER(diag, tr + 1, tc + 1);
+ CHECK_BETTER(diag, tr + hstep, tc + hstep);
break;
}
@@ -315,8 +353,49 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
tc = bc;
}
+#if CONFIG_HIGH_PRECISION_MV
+ if (x->e_mbd.allow_high_precision_mv)
+ {
+ hstep >>= 1;
+ while (--eighthiters)
+ {
+ CHECK_BETTER(left, tr, tc - hstep);
+ CHECK_BETTER(right, tr, tc + hstep);
+ CHECK_BETTER(up, tr - hstep, tc);
+ CHECK_BETTER(down, tr + hstep, tc);
+
+ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+
+ switch (whichdir)
+ {
+ case 0:
+ CHECK_BETTER(diag, tr - hstep, tc - hstep);
+ break;
+ case 1:
+ CHECK_BETTER(diag, tr - hstep, tc + hstep);
+ break;
+ case 2:
+ CHECK_BETTER(diag, tr + hstep, tc - hstep);
+ break;
+ case 3:
+ CHECK_BETTER(diag, tr + hstep, tc + hstep);
+ break;
+ }
+
+ // no reason to check the same one again.
+ if (tr == br && tc == bc)
+ break;
+
+ tr = br;
+ tc = bc;
+ }
+ }
+ bestmv->as_mv.row = br;
+ bestmv->as_mv.col = bc;
+#else
bestmv->as_mv.row = br << 1;
bestmv->as_mv.col = bc << 1;
+#endif /* CONFIG_HIGH_PRECISION_MV */
if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
(abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
@@ -333,6 +412,12 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
#undef CHECK_BETTER
#undef MIN
#undef MAX
+
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+#define SP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
+#else
+#define SP(x) ((x)&7) // convert motion vector component to offset for svf calc
+#endif /* CONFIG_HIGH_PRECISION_MV */
int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
int_mv *bestmv, int_mv *ref_mv,
int error_per_bit,
@@ -343,6 +428,10 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
int bestmse = INT_MAX;
int_mv startmv;
int_mv this_mv;
+#if CONFIG_HIGH_PRECISION_MV
+ int_mv orig_mv;
+ int yrow_movedback=0, ycol_movedback=0;
+#endif
unsigned char *z = (*(b->base_src) + b->src);
int left, right, up, down, diag;
unsigned int sse;
@@ -368,6 +457,9 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
bestmv->as_mv.row <<= 3;
bestmv->as_mv.col <<= 3;
startmv = *bestmv;
+#if CONFIG_HIGH_PRECISION_MV
+ orig_mv = *bestmv;
+#endif
// calculate central point error
bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
@@ -473,10 +565,20 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
// time to check quarter pels.
if (bestmv->as_mv.row < startmv.as_mv.row)
+ {
y -= y_stride;
+#if CONFIG_HIGH_PRECISION_MV
+ yrow_movedback = 1;
+#endif
+ }
if (bestmv->as_mv.col < startmv.as_mv.col)
+ {
y--;
+#if CONFIG_HIGH_PRECISION_MV
+ ycol_movedback = 1;
+#endif
+ }
startmv = *bestmv;
@@ -488,12 +590,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
if (startmv.as_mv.col & 7)
{
this_mv.as_mv.col = startmv.as_mv.col - 2;
- thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
- thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
@@ -507,7 +609,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
}
this_mv.as_mv.col += 4;
- thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (right < bestmse)
@@ -524,12 +626,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
if (startmv.as_mv.row & 7)
{
this_mv.as_mv.row = startmv.as_mv.row - 2;
- thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
- thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
+ thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
}
up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
@@ -543,7 +645,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
}
this_mv.as_mv.row += 4;
- thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (down < bestmse)
@@ -573,12 +675,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
if (startmv.as_mv.col & 7)
{
this_mv.as_mv.col -= 2;
- thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
- thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
+ thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);;
}
}
else
@@ -588,12 +690,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
if (startmv.as_mv.col & 7)
{
this_mv.as_mv.col -= 2;
- thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
+ thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
- thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse);
+ thismse = vfp->svf(y - y_stride - 1, y_stride, SP(6), SP(6), z, b->src_stride, &sse);
}
}
@@ -604,12 +706,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
if (startmv.as_mv.row & 7)
{
this_mv.as_mv.row -= 2;
- thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
- thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
+ thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
}
break;
@@ -619,19 +721,19 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
if (startmv.as_mv.col & 7)
{
this_mv.as_mv.col -= 2;
- thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
- thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
}
break;
case 3:
this_mv.as_mv.col += 2;
this_mv.as_mv.row += 2;
- thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
break;
}
@@ -645,9 +747,195 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
*sse1 = sse;
}
+#if CONFIG_HIGH_PRECISION_MV
+ if (!x->e_mbd.allow_high_precision_mv)
+ return bestmse;
+
+ /* Now do 1/8th pixel */
+ if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback)
+ {
+ y -= y_stride;
+ yrow_movedback = 1;
+ }
+
+ if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback)
+ {
+ y--;
+ ycol_movedback = 1;
+ }
+
+ startmv = *bestmv;
+
+ // go left then right and check error
+ this_mv.as_mv.row = startmv.as_mv.row;
+
+ if (startmv.as_mv.col & 7)
+ {
+ this_mv.as_mv.col = startmv.as_mv.col - 1;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ }
+ else
+ {
+ this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
+ thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ }
+
+ left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+
+ if (left < bestmse)
+ {
+ *bestmv = this_mv;
+ bestmse = left;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ this_mv.as_mv.col += 2;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+
+ if (right < bestmse)
+ {
+ *bestmv = this_mv;
+ bestmse = right;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ // go up then down and check error
+ this_mv.as_mv.col = startmv.as_mv.col;
+
+ if (startmv.as_mv.row & 7)
+ {
+ this_mv.as_mv.row = startmv.as_mv.row - 1;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ }
+ else
+ {
+ this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
+ thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
+ }
+
+ up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+
+ if (up < bestmse)
+ {
+ *bestmv = this_mv;
+ bestmse = up;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+ this_mv.as_mv.row += 2;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+
+ if (down < bestmse)
+ {
+ *bestmv = this_mv;
+ bestmse = down;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+
+ // now check 1 more diagonal
+ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+
+// for(whichdir=0;whichdir<4;whichdir++)
+// {
+ this_mv = startmv;
+
+ switch (whichdir)
+ {
+ case 0:
+
+ if (startmv.as_mv.row & 7)
+ {
+ this_mv.as_mv.row -= 1;
+
+ if (startmv.as_mv.col & 7)
+ {
+ this_mv.as_mv.col -= 1;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ }
+ else
+ {
+ this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
+ thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);;
+ }
+ }
+ else
+ {
+ this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
+
+ if (startmv.as_mv.col & 7)
+ {
+ this_mv.as_mv.col -= 1;
+ thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
+ }
+ else
+ {
+ this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
+ thismse = vfp->svf(y - y_stride - 1, y_stride, SP(7), SP(7), z, b->src_stride, &sse);
+ }
+ }
+
+ break;
+ case 1:
+ this_mv.as_mv.col += 1;
+
+ if (startmv.as_mv.row & 7)
+ {
+ this_mv.as_mv.row -= 1;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ }
+ else
+ {
+ this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
+ thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
+ }
+
+ break;
+ case 2:
+ this_mv.as_mv.row += 1;
+
+ if (startmv.as_mv.col & 7)
+ {
+ this_mv.as_mv.col -= 1;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ }
+ else
+ {
+ this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
+ thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ }
+
+ break;
+ case 3:
+ this_mv.as_mv.col += 1;
+ this_mv.as_mv.row += 1;
+ thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+ break;
+ }
+
+ diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+
+ if (diag < bestmse)
+ {
+ *bestmv = this_mv;
+ bestmse = diag;
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+
+#endif /* CONFIG_HIGH_PRECISION_MV */
+
return bestmse;
}
+#undef SP
+
int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
int_mv *bestmv, int_mv *ref_mv,
int error_per_bit,
@@ -1945,5 +2233,3 @@ void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
}
#endif/* END MV ref count ENTROPY_STATS stats code */
-
-
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 8a2fb8448..2b4418a0a 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1365,6 +1365,9 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
(TOKEN_PARTITION) cpi->oxcf.token_partitions;
setup_features(cpi);
+#if CONFIG_HIGH_PRECISION_MV
+ cpi->mb.e_mbd.allow_high_precision_mv = 1; // Default mv precision adaptation
+#endif
{
int i;
@@ -2994,6 +2997,9 @@ static void encode_frame_to_data_rate
// Reset the loop filter deltas and segmentation map
setup_features(cpi);
+#if CONFIG_HIGH_PRECISION_MV
+ xd->allow_high_precision_mv = 1; // Default mv precision adaptation
+#endif
// If segmentation is enabled force a map update for key frames
if (xd->segmentation_enabled)
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 4f97e3c63..1491e645b 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -530,10 +530,17 @@ int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd)
if ((mv_row | mv_col) & 7)
{
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+ VARIANCE_INVOKE(rtcd, subpixvar8x8)(uptr, pre_stride,
+ (mv_col & 7)<<1, (mv_row & 7)<<1, upred_ptr, uv_stride, &sse2);
+ VARIANCE_INVOKE(rtcd, subpixvar8x8)(vptr, pre_stride,
+ (mv_col & 7)<<1, (mv_row & 7)<<1, vpred_ptr, uv_stride, &sse1);
+#else
VARIANCE_INVOKE(rtcd, subpixvar8x8)(uptr, pre_stride,
mv_col & 7, mv_row & 7, upred_ptr, uv_stride, &sse2);
VARIANCE_INVOKE(rtcd, subpixvar8x8)(vptr, pre_stride,
mv_col & 7, mv_row & 7, vpred_ptr, uv_stride, &sse1);
+#endif
sse2 += sse1;
}
else
@@ -1654,7 +1661,6 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost,
&distortion, &sse);
-
}
} /* NEW4X4 */
@@ -1700,8 +1706,10 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
segmentyrate += bestlabelyrate;
this_segment_rd += best_label_rd;
- if (this_segment_rd >= bsi->segment_rd)
+ if (this_segment_rd >= bsi->segment_rd) {
break;
+ }
+
} /* for each label */
@@ -1776,6 +1784,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
+
if (bsi.segment_rd < best_rd)
{
int col_min = (best_ref_mv->as_mv.col>>3) - MAX_FULL_PEL_VAL + ((best_ref_mv->as_mv.col & 7)?1:0);
@@ -2146,18 +2155,18 @@ static void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv)
if (x->partition_info->bmi[i].mode == NEW4X4)
{
cpi->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row
- - best_ref_mv->as_mv.row) >> 1)]++;
+ - best_ref_mv->as_mv.row) >> MV_SHIFT)]++;
cpi->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col
- - best_ref_mv->as_mv.col) >> 1)]++;
+ - best_ref_mv->as_mv.col) >> MV_SHIFT)]++;
}
}
}
else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
{
cpi->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row
- - best_ref_mv->as_mv.row) >> 1)]++;
+ - best_ref_mv->as_mv.row) >> MV_SHIFT)]++;
cpi->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col
- - best_ref_mv->as_mv.col) >> 1)]++;
+ - best_ref_mv->as_mv.col) >> MV_SHIFT)]++;
}
}
@@ -2473,6 +2482,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
vp8_update_zbin_extra(cpi, x);
}
+
if (!x->e_mbd.mode_info_context->mbmi.second_ref_frame)
switch (this_mode)
{
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index 8455bb877..ede65d669 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -50,14 +50,20 @@ static void vp8_temporal_filter_predictors_mb_c
{
int offset;
unsigned char *yptr, *uptr, *vptr;
+ int omv_row, omv_col;
// Y
yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
if ((mv_row | mv_col) & 7)
{
+#if CONFIG_SIXTEENTH_SUBPEL_UV
x->subpixel_predict16x16(yptr, stride,
- mv_col & 7, mv_row & 7, &pred[0], 16);
+ (mv_col & 7)<<1, (mv_row & 7)<<1, &pred[0], 16);
+#else
+ x->subpixel_predict16x16(yptr, stride,
+ mv_col & 7, mv_row & 7, &pred[0], 16);
+#endif
}
else
{
@@ -65,6 +71,8 @@ static void vp8_temporal_filter_predictors_mb_c
}
// U & V
+ omv_row = mv_row;
+ omv_col = mv_col;
mv_row >>= 1;
mv_col >>= 1;
stride = (stride + 1) >> 1;
@@ -72,6 +80,15 @@ static void vp8_temporal_filter_predictors_mb_c
uptr = u_mb_ptr + offset;
vptr = v_mb_ptr + offset;
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+ if ((omv_row | omv_col) & 15)
+ {
+ x->subpixel_predict8x8(uptr, stride,
+ (omv_col & 15), (omv_row & 15), &pred[256], 8);
+ x->subpixel_predict8x8(vptr, stride,
+ (omv_col & 15), (omv_row & 15), &pred[320], 8);
+ }
+#else
if ((mv_row | mv_col) & 7)
{
x->subpixel_predict8x8(uptr, stride,
@@ -79,6 +96,7 @@ static void vp8_temporal_filter_predictors_mb_c
x->subpixel_predict8x8(vptr, stride,
mv_col & 7, mv_row & 7, &pred[320], 8);
}
+#endif
else
{
RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, stride, &pred[256], 8);
diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c
index c7b9c2209..402ff0450 100644
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@@ -363,8 +363,13 @@ unsigned int vp8_variance_halfpixvar16x16_h_c(
int recon_stride,
unsigned int *sse)
{
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+ return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
+ ref_ptr, recon_stride, sse);
+#else
return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0,
ref_ptr, recon_stride, sse);
+#endif
}
@@ -375,8 +380,13 @@ unsigned int vp8_variance_halfpixvar16x16_v_c(
int recon_stride,
unsigned int *sse)
{
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+ return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
+ ref_ptr, recon_stride, sse);
+#else
return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4,
ref_ptr, recon_stride, sse);
+#endif
}
@@ -387,8 +397,13 @@ unsigned int vp8_variance_halfpixvar16x16_hv_c(
int recon_stride,
unsigned int *sse)
{
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+ return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
+ ref_ptr, recon_stride, sse);
+#else
return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4,
ref_ptr, recon_stride, sse);
+#endif
}
diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm
index 762922091..b13beee6e 100644
--- a/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/vp8/encoder/x86/variance_impl_sse2.asm
@@ -1348,12 +1348,32 @@ align 16
xmm_bi_rd:
times 8 dw 64
align 16
+%if CONFIG_SIXTEENTH_SUBPEL_UV
vp8_bilinear_filters_sse2:
dw 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0
+ dw 120, 120, 120, 120, 120, 120, 120, 120, 8, 8, 8, 8, 8, 8, 8, 8
dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16
+ dw 104, 104, 104, 104, 104, 104, 104, 104, 24, 24, 24, 24, 24, 24, 24, 24
dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32
+ dw 88, 88, 88, 88, 88, 88, 88, 88, 40, 40, 40, 40, 40, 40, 40, 40
dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48
+ dw 72, 72, 72, 72, 72, 72, 72, 72, 56, 56, 56, 56, 56, 56, 56, 56
dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+ dw 56, 56, 56, 56, 56, 56, 56, 56, 72, 72, 72, 72, 72, 72, 72, 72
dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80
+ dw 40, 40, 40, 40, 40, 40, 40, 40, 88, 88, 88, 88, 88, 88, 88, 88
dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96
+ dw 24, 24, 24, 24, 24, 24, 24, 24, 104, 104, 104, 104, 104, 104, 104, 104
dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112
+ dw 8, 8, 8, 8, 8, 8, 8, 8, 120, 120, 120, 120, 120, 120, 120, 120
+%else
+vp8_bilinear_filters_sse2:
+ dw 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0
+ dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16
+ dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32
+ dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48
+ dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+ dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80
+ dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96
+ dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112
+%endif
diff --git a/vp8/encoder/x86/variance_impl_ssse3.asm b/vp8/encoder/x86/variance_impl_ssse3.asm
index 97e8b0e2e..d60d53daa 100644
--- a/vp8/encoder/x86/variance_impl_ssse3.asm
+++ b/vp8/encoder/x86/variance_impl_ssse3.asm
@@ -353,6 +353,25 @@ align 16
xmm_bi_rd:
times 8 dw 64
align 16
+%if CONFIG_SIXTEENTH_SUBPEL_UV
+vp8_bilinear_filters_ssse3:
+ times 8 db 128, 0
+ times 8 db 120, 8
+ times 8 db 112, 16
+ times 8 db 104, 24
+ times 8 db 96, 32
+ times 8 db 88, 40
+ times 8 db 80, 48
+ times 8 db 72, 56
+ times 8 db 64, 64
+ times 8 db 56, 72
+ times 8 db 48, 80
+ times 8 db 40, 88
+ times 8 db 32, 96
+ times 8 db 24, 104
+ times 8 db 16, 112
+ times 8 db 8, 120
+%else
vp8_bilinear_filters_ssse3:
times 8 db 128, 0
times 8 db 112, 16
@@ -362,3 +381,4 @@ vp8_bilinear_filters_ssse3:
times 8 db 48, 80
times 8 db 32, 96
times 8 db 16, 112
+%endif
diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c
index 92b695f17..b84d00034 100644
--- a/vp8/encoder/x86/variance_mmx.c
+++ b/vp8/encoder/x86/variance_mmx.c
@@ -204,6 +204,27 @@ unsigned int vp8_variance8x16_mmx(
// the mmx function that does the bilinear filtering and var calculation //
// int one pass //
///////////////////////////////////////////////////////////////////////////
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[16][8]) =
+{
+ { 128, 128, 128, 128, 0, 0, 0, 0 },
+ { 120, 120, 120, 120, 8, 8, 8, 8 },
+ { 112, 112, 112, 112, 16, 16, 16, 16 },
+ { 104, 104, 104, 104, 24, 24, 24, 24 },
+ { 96, 96, 96, 96, 32, 32, 32, 32 },
+ { 88, 88, 88, 88, 40, 40, 40, 40 },
+ { 80, 80, 80, 80, 48, 48, 48, 48 },
+ { 72, 72, 72, 72, 56, 56, 56, 56 },
+ { 64, 64, 64, 64, 64, 64, 64, 64 },
+ { 56, 56, 56, 56, 72, 72, 72, 72 },
+ { 48, 48, 48, 48, 80, 80, 80, 80 },
+ { 40, 40, 40, 40, 88, 88, 88, 88 },
+ { 32, 32, 32, 32, 96, 96, 96, 96 },
+ { 24, 24, 24, 24, 104, 104, 104, 104 },
+ { 16, 16, 16, 16, 112, 112, 112, 112 },
+ { 8, 8, 8, 8, 120, 120, 120, 120 }
+};
+#else
DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
{
{ 128, 128, 128, 128, 0, 0, 0, 0 },
@@ -215,6 +236,7 @@ DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
{ 32, 32, 32, 32, 96, 96, 96, 96 },
{ 16, 16, 16, 16, 112, 112, 112, 112 }
};
+#endif
unsigned int vp8_sub_pixel_variance4x4_mmx
(
@@ -279,7 +301,6 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
int xsum0, xsum1;
unsigned int xxsum0, xxsum1;
-
vp8_filter_block2d_bil_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
@@ -287,7 +308,6 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
&xsum0, &xxsum0
);
-
vp8_filter_block2d_bil_var_mmx(
src_ptr + 8, src_pixels_per_line,
dst_ptr + 8, dst_pixels_per_line, 16,
@@ -386,8 +406,13 @@ unsigned int vp8_variance_halfpixvar16x16_h_mmx(
int recon_stride,
unsigned int *sse)
{
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+ return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 8, 0,
+ ref_ptr, recon_stride, sse);
+#else
return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
ref_ptr, recon_stride, sse);
+#endif
}
@@ -398,8 +423,13 @@ unsigned int vp8_variance_halfpixvar16x16_v_mmx(
int recon_stride,
unsigned int *sse)
{
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+ return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 8,
+ ref_ptr, recon_stride, sse);
+#else
return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
ref_ptr, recon_stride, sse);
+#endif
}
@@ -410,6 +440,11 @@ unsigned int vp8_variance_halfpixvar16x16_hv_mmx(
int recon_stride,
unsigned int *sse)
{
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+ return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 8, 8,
+ ref_ptr, recon_stride, sse);
+#else
return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
ref_ptr, recon_stride, sse);
+#endif
}
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c
index 24062eb9b..e3c6268ea 100644
--- a/vp8/encoder/x86/variance_sse2.c
+++ b/vp8/encoder/x86/variance_sse2.c
@@ -13,6 +13,12 @@
#include "vp8/common/pragmas.h"
#include "vpx_ports/mem.h"
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+#define HALFNDX 8
+#else
+#define HALFNDX 4
+#endif
+
extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
@@ -135,7 +141,11 @@ void vp8_half_vert_variance16x_h_sse2
unsigned int *sumsquared
);
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+DECLARE_ALIGNED(16, extern short, vp8_vp7_bilinear_filters_mmx[16][8]);
+#else
DECLARE_ALIGNED(16, extern short, vp8_vp7_bilinear_filters_mmx[8][8]);
+#endif
unsigned int vp8_variance4x4_wmt(
const unsigned char *src_ptr,
@@ -284,21 +294,21 @@ unsigned int vp8_sub_pixel_variance8x8_wmt
int xsum;
unsigned int xxsum;
- if (xoffset == 4 && yoffset == 0)
+ if (xoffset == HALFNDX && yoffset == 0)
{
vp8_half_horiz_variance8x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
&xsum, &xxsum);
}
- else if (xoffset == 0 && yoffset == 4)
+ else if (xoffset == 0 && yoffset == HALFNDX)
{
vp8_half_vert_variance8x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
&xsum, &xxsum);
}
- else if (xoffset == 4 && yoffset == 4)
+ else if (xoffset == HALFNDX && yoffset == HALFNDX)
{
vp8_half_horiz_vert_variance8x_h_sse2(
src_ptr, src_pixels_per_line,
@@ -335,21 +345,21 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
// note we could avoid these if statements if the calling function
// just called the appropriate functions inside.
- if (xoffset == 4 && yoffset == 0)
+ if (xoffset == HALFNDX && yoffset == 0)
{
vp8_half_horiz_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
&xsum0, &xxsum0);
}
- else if (xoffset == 0 && yoffset == 4)
+ else if (xoffset == 0 && yoffset == HALFNDX)
{
vp8_half_vert_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
&xsum0, &xxsum0);
}
- else if (xoffset == 4 && yoffset == 4)
+ else if (xoffset == HALFNDX && yoffset == HALFNDX)
{
vp8_half_horiz_vert_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
@@ -408,21 +418,21 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
int xsum0, xsum1;
unsigned int xxsum0, xxsum1;
- if (xoffset == 4 && yoffset == 0)
+ if (xoffset == HALFNDX && yoffset == 0)
{
vp8_half_horiz_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
&xsum0, &xxsum0);
}
- else if (xoffset == 0 && yoffset == 4)
+ else if (xoffset == 0 && yoffset == HALFNDX)
{
vp8_half_vert_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
&xsum0, &xxsum0);
}
- else if (xoffset == 4 && yoffset == 4)
+ else if (xoffset == HALFNDX && yoffset == HALFNDX)
{
vp8_half_horiz_vert_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
@@ -464,21 +474,21 @@ unsigned int vp8_sub_pixel_variance8x16_wmt
int xsum;
unsigned int xxsum;
- if (xoffset == 4 && yoffset == 0)
+ if (xoffset == HALFNDX && yoffset == 0)
{
vp8_half_horiz_variance8x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
&xsum, &xxsum);
}
- else if (xoffset == 0 && yoffset == 4)
+ else if (xoffset == 0 && yoffset == HALFNDX)
{
vp8_half_vert_variance8x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
&xsum, &xxsum);
}
- else if (xoffset == 4 && yoffset == 4)
+ else if (xoffset == HALFNDX && yoffset == HALFNDX)
{
vp8_half_horiz_vert_variance8x_h_sse2(
src_ptr, src_pixels_per_line,
diff --git a/vp8/encoder/x86/variance_ssse3.c b/vp8/encoder/x86/variance_ssse3.c
index 73f2e01a2..59e14971a 100644
--- a/vp8/encoder/x86/variance_ssse3.c
+++ b/vp8/encoder/x86/variance_ssse3.c
@@ -13,6 +13,12 @@
#include "vp8/common/pragmas.h"
#include "vpx_ports/mem.h"
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+#define HALFNDX 8
+#else
+#define HALFNDX 4
+#endif
+
extern unsigned int vp8_get16x16var_sse2
(
const unsigned char *src_ptr,
@@ -81,21 +87,21 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
// note we could avoid these if statements if the calling function
// just called the appropriate functions inside.
- if (xoffset == 4 && yoffset == 0)
+ if (xoffset == HALFNDX && yoffset == HALFNDX)
{
vp8_half_horiz_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
&xsum0, &xxsum0);
}
- else if (xoffset == 0 && yoffset == 4)
+ else if (xoffset == 0 && yoffset == HALFNDX)
{
vp8_half_vert_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
&xsum0, &xxsum0);
}
- else if (xoffset == 4 && yoffset == 4)
+ else if (xoffset == HALFNDX && yoffset == HALFNDX)
{
vp8_half_horiz_vert_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
@@ -130,21 +136,21 @@ unsigned int vp8_sub_pixel_variance16x8_ssse3
int xsum0;
unsigned int xxsum0;
- if (xoffset == 4 && yoffset == 0)
+ if (xoffset == HALFNDX && yoffset == 0)
{
vp8_half_horiz_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
&xsum0, &xxsum0);
}
- else if (xoffset == 0 && yoffset == 4)
+ else if (xoffset == 0 && yoffset == HALFNDX)
{
vp8_half_vert_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
&xsum0, &xxsum0);
}
- else if (xoffset == 4 && yoffset == 4)
+ else if (xoffset == HALFNDX && yoffset == HALFNDX)
{
vp8_half_horiz_vert_variance16x_h_sse2(
src_ptr, src_pixels_per_line,