Supporting high precision 1/8-pel motion vectors

This is the initial patch for supporting 1/8th pel motion. Currently if we configure with enable-high-precision-mv, all motion vectors would default to 1/8 pel. Encode and decode syncs fine with the current code. In the next phase the code will be refactored so that we can choose the 1/8 pel mode adaptively at a frame/segment/mb level. Derf results: http://www.corp.google.com/~debargha/vp8_results/enhinterp_hpmv.html (about 0.83% better than 8-tap interpoaltion) Patch 3: Rebased. Also adding 1/16th pel interpolation for U and V Patch 4: HD results. http://www.corp.google.com/~debargha/vp8_results/enhinterp_hd_hpmv.html Seems impressive (unless I am doing something wrong). Patch 5: Added mmx/sse for bilateral filtering, as well as enforced use of c-versions of subpel filters with 8-taps and 1/16th pel; Also redesigned the 8-tap filters to reduce the cut-off in order to introduce a denoising effect. There is a new configure option sixteenth-subpel-uv which will use 1/16 th pel interpolation for uv, if the motion vectors have 1/8 pel accuracy. With the fixes the results are promising on the derf set. The enhanced interpolation option with 8-taps alone gives 3% improvement over thei derf set: http://www.corp.google.com/~debargha/vp8_results/enhinterpn.html Results on high precision mv and on the hd set are to follow. Patch 6: Adding a missing condition for CONFIG_SIXTEENTH_SUBPEL_UV in vp8/common/x86/x86_systemdependent.c Patch 7: Cleaning up various debug messages. Patch 8: Merge conflict Change-Id: I5b1d844457aefd7414a9e4e0e06c6ed38fd8cc04
author: Deb Mukherjee <debargha@google.com> 2012-02-16 09:29:54 -0800
committer: Deb Mukherjee <debargha@google.com> 2012-02-23 09:25:21 -0800
commit: 18e90d744eba2d28ad96a566565bbf5642d24b59 (patch)
tree: 8d0859ca6973ad522f4b1e30566ba67ea6ed886a /vp8/encoder
parent: 3c872b6c27a5f03156b973fb359c9293049d6e84 (diff)
download: libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.tar
libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.tar.gz
libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.tar.bz2
libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.zip
13 files changed, 548 insertions, 90 deletions
diff --git a/vp8/encoder/arm/variance_arm.c b/vp8/encoder/arm/variance_arm.c
index e77be9f73..6e83c6e7b 100644
--- a/vp8/encoder/arm/variance_arm.c
+++ b/vp8/encoder/arm/variance_arm.c
@@ -13,6 +13,12 @@
 #include "vp8/common/filter.h"
 #include "vp8/common/arm/bilinearfilter_arm.h"
 
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+#define HALFNDX 8
+#else
+#define HALFNDX 4
+#endif
+
 #if HAVE_ARMV6
 
 unsigned int vp8_sub_pixel_variance8x8_armv6
@@ -59,17 +65,17 @@ unsigned int vp8_sub_pixel_variance16x16_armv6
     const short *HFilter, *VFilter;
     unsigned int var;
 
-    if (xoffset == 4 && yoffset == 0)
+    if (xoffset == HALFNDX && yoffset == 0)
     {
         var = vp8_variance_halfpixvar16x16_h_armv6(src_ptr, src_pixels_per_line,
                                                    dst_ptr, dst_pixels_per_line, sse);
     }
-    else if (xoffset == 0 && yoffset == 4)
+    else if (xoffset == 0 && yoffset == HALFNDX)
     {
         var = vp8_variance_halfpixvar16x16_v_armv6(src_ptr, src_pixels_per_line,
                                                    dst_ptr, dst_pixels_per_line, sse);
     }
-    else if (xoffset == 4 && yoffset == 4)
+    else if (xoffset == HALFNDX && yoffset == HALFNDX)
     {
         var = vp8_variance_halfpixvar16x16_hv_armv6(src_ptr, src_pixels_per_line,
                                                    dst_ptr, dst_pixels_per_line, sse);
@@ -107,11 +113,11 @@ unsigned int vp8_sub_pixel_variance16x16_neon
     unsigned int *sse
 )
 {
-  if (xoffset == 4 && yoffset == 0)
+  if (xoffset == HALFNDX && yoffset == 0)
     return vp8_variance_halfpixvar16x16_h_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
-  else if (xoffset == 0 && yoffset == 4)
+  else if (xoffset == 0 && yoffset == HALFNDX)
     return vp8_variance_halfpixvar16x16_v_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
-  else if (xoffset == 4 && yoffset == 4)
+  else if (xoffset == HALFNDX && yoffset == HALFNDX)
     return vp8_variance_halfpixvar16x16_hv_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
   else
     return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index c2613bfbd..72bc3d41d 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -2945,6 +2945,11 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
         // Indicate reference frame sign bias for Golden and ARF frames (always 0 for last frame buffer)
         vp8_write_bit(bc, pc->ref_frame_sign_bias[GOLDEN_FRAME]);
         vp8_write_bit(bc, pc->ref_frame_sign_bias[ALTREF_FRAME]);
+
+#if CONFIG_HIGH_PRECISION_MV
+        // Signal whether to allow high MV precision
+        vp8_write_bit(bc, (xd->allow_high_precision_mv) ? 1 : 0);
+#endif
     }
 
     if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS)
diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index a4849c654..4d5d8cb81 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -20,6 +20,11 @@
 extern unsigned int active_section;
 #endif
 
+//#define DEBUG_ENC_MV
+#ifdef DEBUG_ENC_MV
+int enc_mvcount = 0;
+#endif
+
 static void encode_mvcomponent(
     vp8_writer *const w,
     const int v,
@@ -32,8 +37,7 @@ static void encode_mvcomponent(
     if (x < mvnum_short)     // Small
     {
         vp8_write(w, 0, p [mvpis_short]);
-        vp8_treed_write(w, vp8_small_mvtree, p + MVPshort, x, 3);
-
+        vp8_treed_write(w, vp8_small_mvtree, p + MVPshort, x, mvnum_short_bits);
         if (!x)
             return;         // no sign bit
     }
@@ -46,17 +50,17 @@ static void encode_mvcomponent(
         do
             vp8_write(w, (x >> i) & 1, p [MVPbits + i]);
 
-        while (++i < 3);
+        while (++i < mvnum_short_bits);
 
         i = mvlong_width - 1;  /* Skip bit 3, which is sometimes implicit */
 
         do
             vp8_write(w, (x >> i) & 1, p [MVPbits + i]);
 
-        while (--i > 3);
+        while (--i > mvnum_short_bits);
 
-        if (x & 0xFFF0)
-            vp8_write(w, (x >> 3) & 1, p [MVPbits + 3]);
+        if (x & ~((2<<mvnum_short_bits)-1))
+            vp8_write(w, (x >> mvnum_short_bits) & 1, p [MVPbits + mvnum_short_bits]);
     }
 
     vp8_write(w, v < 0, p [MVPsign]);
@@ -91,9 +95,17 @@ void vp8_encode_motion_vector(vp8_writer *w, const MV *mv, const MV_CONTEXT *mvc
         }
     }
 #endif
-
-    encode_mvcomponent(w, mv->row >> 1, &mvc[0]);
-    encode_mvcomponent(w, mv->col >> 1, &mvc[1]);
+    encode_mvcomponent(w, mv->row >> MV_SHIFT, &mvc[0]);
+    encode_mvcomponent(w, mv->col >> MV_SHIFT, &mvc[1]);
+#ifdef DEBUG_ENC_MV
+    {
+    int i;
+    printf("%d: %d %d\n", enc_mvcount++, mv->row, mv->col);
+    for (i=0; i<MVPcount;++i) printf("  %d", (&mvc[0])->prob[i]); printf("\n");
+    for (i=0; i<MVPcount;++i) printf("  %d", (&mvc[1])->prob[i]); printf("\n");
+    fflush(stdout);
+    }
+#endif
 }
 
 
@@ -106,7 +118,7 @@ static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc)
     if (x < mvnum_short)
     {
         cost = vp8_cost_zero(p [mvpis_short])
-               + vp8_treed_cost(vp8_small_mvtree, p + MVPshort, x, 3);
+               + vp8_treed_cost(vp8_small_mvtree, p + MVPshort, x, mvnum_short_bits);
 
         if (!x)
             return cost;
@@ -119,17 +131,17 @@ static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc)
         do
             cost += vp8_cost_bit(p [MVPbits + i], (x >> i) & 1);
 
-        while (++i < 3);
+        while (++i < mvnum_short_bits);
 
         i = mvlong_width - 1;  /* Skip bit 3, which is sometimes implicit */
 
         do
             cost += vp8_cost_bit(p [MVPbits + i], (x >> i) & 1);
 
-        while (--i > 3);
+        while (--i > mvnum_short_bits);
 
-        if (x & 0xFFF0)
-            cost += vp8_cost_bit(p [MVPbits + 3], (x >> 3) & 1);
+        if (x & ~((2<<mvnum_short_bits)-1))
+            cost += vp8_cost_bit(p [MVPbits + mvnum_short_bits], (x >> mvnum_short_bits) & 1);
     }
 
     return cost;   // + vp8_cost_bit( p [MVPsign], v < 0);
@@ -258,7 +270,7 @@ static void write_component_probs(
     {
         const int c = events [mv_max];
 
-        is_short_ct [0] += c;     // Short vector
+        is_short_ct [0] += c;    // Short vector
         short_ct [0] += c;       // Magnitude distribution
     }
 
@@ -342,7 +354,7 @@ static void write_component_probs(
         int j = 0;
 
         vp8_tree_probs_from_distribution(
-            8, vp8_small_mvencodings, vp8_small_mvtree,
+            mvnum_short, vp8_small_mvencodings, vp8_small_mvtree,
             p, short_bct, short_ct,
             256, 1
         );
@@ -398,6 +410,15 @@ void vp8_write_mvprobs(VP8_COMP *cpi)
     vp8_writer *const w  = & cpi->bc;
     MV_CONTEXT *mvc = cpi->common.fc.mvc;
     int flags[2] = {0, 0};
+#ifdef DEBUG_ENC_MV
+    {
+    int i;
+    printf("Writing probs\n");
+    for (i=0; i<MVPcount;++i) printf("  %d", vp8_default_mv_context[0].prob[i]); printf("\n");
+    for (i=0; i<MVPcount;++i) printf("  %d", vp8_default_mv_context[1].prob[i]); printf("\n");
+    fflush(stdout);
+    }
+#endif
 #ifdef ENTROPY_STATS
     active_section = 4;
 #endif
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index b23abffad..093ac3354 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -28,13 +28,13 @@ int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
     // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
     // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
     // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
-    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
+    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> MV_SHIFT] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> MV_SHIFT]) * Weight) >> 7;
 }
 
 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
 {
-    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
-        mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
+    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> MV_SHIFT] +
+        mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> MV_SHIFT])
         * error_per_bit + 128) >> 8;
 }
 
@@ -175,13 +175,33 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
  * could reduce the area.
  */
-#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
+
+#if CONFIG_HIGH_PRECISION_MV
+
+#define PRE(r,c) (y + (((r)>>3) * y_stride + ((c)>>3) -(offset))) // pointer to predictor base of a motionvector
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+#define SP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
+#else
+#define SP(x) ((x)&7) // convert motion vector component to offset for svf calc
+#endif  /* CONFIG_SIXTEENTH_SUBPEL_UV */
+
+#else   /* CONFIG_HIGH_PRECISION_MV */
+
 #define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+#define SP(x) (((x)&3)<<2) // convert motion vector component to offset for svf calc
+#else
 #define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
+#endif  /* CONFIG_SIXTEENTH_SUBPEL_UV */
+
+#endif  /* CONFIG_HIGH_PRECISION_MV */
+
+#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
 #define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
 #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
+
 #define MIN(x,y) (((x)<(y))?(x):(y))
 #define MAX(x,y) (((x)>(y))?(x):(y))
 
@@ -194,8 +214,15 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 {
     unsigned char *z = (*(b->base_src) + b->src);
 
+#if CONFIG_HIGH_PRECISION_MV
+    int rr = ref_mv->as_mv.row, rc = ref_mv->as_mv.col;
+    int br = bestmv->as_mv.row << 3, bc = bestmv->as_mv.col << 3;
+    int hstep = 4;
+#else
     int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
     int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2;
+    int hstep = 2;
+#endif
     int tr = br, tc = bc;
     unsigned int besterr = INT_MAX;
     unsigned int left, right, up, down, diag;
@@ -203,12 +230,22 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     unsigned int whichdir;
     unsigned int halfiters = 4;
     unsigned int quarteriters = 4;
+#if CONFIG_HIGH_PRECISION_MV
+    unsigned int eighthiters = 4;
+#endif
     int thismse;
 
+#if CONFIG_HIGH_PRECISION_MV
+    int minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << mvlong_width) - 1));
+    int maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << mvlong_width) - 1));
+    int minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << mvlong_width) - 1));
+    int maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << mvlong_width) - 1));
+#else
     int minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
     int maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
     int minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
     int maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
+#endif
 
     int y_stride;
     int offset;
@@ -220,10 +257,10 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     int buf_r1, buf_r2, buf_c1, buf_c2;
 
     // Clamping to avoid out-of-range data access
-    buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
-    buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
-    buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
-    buf_c2 = ((bestmv->as_mv.col + 3) > x->mv_col_max)?(x->mv_col_max - bestmv->as_mv.col):3;
+    buf_r1 = ((bestmv->as_mv.row - INTERP_EXTEND) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):INTERP_EXTEND;
+    buf_r2 = ((bestmv->as_mv.row + INTERP_EXTEND) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):INTERP_EXTEND;
+    buf_c1 = ((bestmv->as_mv.col - INTERP_EXTEND) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):INTERP_EXTEND;
+    buf_c2 = ((bestmv->as_mv.col + INTERP_EXTEND) > x->mv_col_max)?(x->mv_col_max - bestmv->as_mv.col):INTERP_EXTEND;
     y_stride = 32;
 
     /* Copy to intermediate buffer before searching. */
@@ -249,26 +286,26 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     while (--halfiters)
     {
         // 1/2 pel
-        CHECK_BETTER(left, tr, tc - 2);
-        CHECK_BETTER(right, tr, tc + 2);
-        CHECK_BETTER(up, tr - 2, tc);
-        CHECK_BETTER(down, tr + 2, tc);
+        CHECK_BETTER(left, tr, tc - hstep);
+        CHECK_BETTER(right, tr, tc + hstep);
+        CHECK_BETTER(up, tr - hstep, tc);
+        CHECK_BETTER(down, tr + hstep, tc);
 
         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
 
         switch (whichdir)
         {
         case 0:
-            CHECK_BETTER(diag, tr - 2, tc - 2);
+            CHECK_BETTER(diag, tr - hstep, tc - hstep);
             break;
         case 1:
-            CHECK_BETTER(diag, tr - 2, tc + 2);
+            CHECK_BETTER(diag, tr - hstep, tc + hstep);
             break;
         case 2:
-            CHECK_BETTER(diag, tr + 2, tc - 2);
+            CHECK_BETTER(diag, tr + hstep, tc - hstep);
             break;
         case 3:
-            CHECK_BETTER(diag, tr + 2, tc + 2);
+            CHECK_BETTER(diag, tr + hstep, tc + hstep);
             break;
         }
 
@@ -282,28 +319,29 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 
     // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
     // 1/4 pel
+    hstep >>= 1;
     while (--quarteriters)
     {
-        CHECK_BETTER(left, tr, tc - 1);
-        CHECK_BETTER(right, tr, tc + 1);
-        CHECK_BETTER(up, tr - 1, tc);
-        CHECK_BETTER(down, tr + 1, tc);
+        CHECK_BETTER(left, tr, tc - hstep);
+        CHECK_BETTER(right, tr, tc + hstep);
+        CHECK_BETTER(up, tr - hstep, tc);
+        CHECK_BETTER(down, tr + hstep, tc);
 
         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
 
         switch (whichdir)
         {
         case 0:
-            CHECK_BETTER(diag, tr - 1, tc - 1);
+            CHECK_BETTER(diag, tr - hstep, tc - hstep);
             break;
         case 1:
-            CHECK_BETTER(diag, tr - 1, tc + 1);
+            CHECK_BETTER(diag, tr - hstep, tc + hstep);
             break;
         case 2:
-            CHECK_BETTER(diag, tr + 1, tc - 1);
+            CHECK_BETTER(diag, tr + hstep, tc - hstep);
             break;
         case 3:
-            CHECK_BETTER(diag, tr + 1, tc + 1);
+            CHECK_BETTER(diag, tr + hstep, tc + hstep);
             break;
         }
 
@@ -315,8 +353,49 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
         tc = bc;
     }
 
+#if CONFIG_HIGH_PRECISION_MV
+    if (x->e_mbd.allow_high_precision_mv)
+    {
+        hstep >>= 1;
+        while (--eighthiters)
+        {
+            CHECK_BETTER(left, tr, tc - hstep);
+            CHECK_BETTER(right, tr, tc + hstep);
+            CHECK_BETTER(up, tr - hstep, tc);
+            CHECK_BETTER(down, tr + hstep, tc);
+
+            whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+
+            switch (whichdir)
+            {
+            case 0:
+                CHECK_BETTER(diag, tr - hstep, tc - hstep);
+                break;
+            case 1:
+                CHECK_BETTER(diag, tr - hstep, tc + hstep);
+                break;
+            case 2:
+                CHECK_BETTER(diag, tr + hstep, tc - hstep);
+                break;
+            case 3:
+                CHECK_BETTER(diag, tr + hstep, tc + hstep);
+                break;
+            }
+
+            // no reason to check the same one again.
+            if (tr == br && tc == bc)
+                break;
+
+            tr = br;
+            tc = bc;
+        }
+    }
+    bestmv->as_mv.row = br;
+    bestmv->as_mv.col = bc;
+#else
     bestmv->as_mv.row = br << 1;
     bestmv->as_mv.col = bc << 1;
+#endif  /* CONFIG_HIGH_PRECISION_MV */
 
     if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
         (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
@@ -333,6 +412,12 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 #undef CHECK_BETTER
 #undef MIN
 #undef MAX
+
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+#define SP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
+#else
+#define SP(x) ((x)&7) // convert motion vector component to offset for svf calc
+#endif  /* CONFIG_HIGH_PRECISION_MV */
 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                                  int_mv *bestmv, int_mv *ref_mv,
                                  int error_per_bit,
@@ -343,6 +428,10 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     int bestmse = INT_MAX;
     int_mv startmv;
     int_mv this_mv;
+#if CONFIG_HIGH_PRECISION_MV
+    int_mv orig_mv;
+    int yrow_movedback=0, ycol_movedback=0;
+#endif
     unsigned char *z = (*(b->base_src) + b->src);
     int left, right, up, down, diag;
     unsigned int sse;
@@ -368,6 +457,9 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     bestmv->as_mv.row <<= 3;
     bestmv->as_mv.col <<= 3;
     startmv = *bestmv;
+#if CONFIG_HIGH_PRECISION_MV
+    orig_mv = *bestmv;
+#endif
 
     // calculate central point error
     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
@@ -473,10 +565,20 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 
     // time to check quarter pels.
     if (bestmv->as_mv.row < startmv.as_mv.row)
+    {
         y -= y_stride;
+#if CONFIG_HIGH_PRECISION_MV
+        yrow_movedback = 1;
+#endif
+    }
 
     if (bestmv->as_mv.col < startmv.as_mv.col)
+    {
         y--;
+#if CONFIG_HIGH_PRECISION_MV
+        ycol_movedback = 1;
+#endif
+    }
 
     startmv = *bestmv;
 
@@ -488,12 +590,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     if (startmv.as_mv.col & 7)
     {
         this_mv.as_mv.col = startmv.as_mv.col - 2;
-        thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+        thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
     }
     else
     {
         this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
-        thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+        thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
     }
 
     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
@@ -507,7 +609,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     }
 
     this_mv.as_mv.col += 4;
-    thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+    thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 
     if (right < bestmse)
@@ -524,12 +626,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     if (startmv.as_mv.row & 7)
     {
         this_mv.as_mv.row = startmv.as_mv.row - 2;
-        thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+        thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
     }
     else
     {
         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
-        thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
+        thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
     }
 
     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
@@ -543,7 +645,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     }
 
     this_mv.as_mv.row += 4;
-    thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+    thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 
     if (down < bestmse)
@@ -573,12 +675,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
             if (startmv.as_mv.col & 7)
             {
                 this_mv.as_mv.col -= 2;
-                thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+                thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
             }
             else
             {
                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
-                thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
+                thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);;
             }
         }
         else
@@ -588,12 +690,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
             if (startmv.as_mv.col & 7)
             {
                 this_mv.as_mv.col -= 2;
-                thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
+                thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
             }
             else
             {
                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
-                thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse);
+                thismse = vfp->svf(y - y_stride - 1, y_stride, SP(6), SP(6), z, b->src_stride, &sse);
             }
         }
 
@@ -604,12 +706,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
         if (startmv.as_mv.row & 7)
         {
             this_mv.as_mv.row -= 2;
-            thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+            thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
         }
         else
         {
             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
-            thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
+            thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
         }
 
         break;
@@ -619,19 +721,19 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
         if (startmv.as_mv.col & 7)
         {
             this_mv.as_mv.col -= 2;
-            thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+            thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
         }
         else
         {
             this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
-            thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+            thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
         }
 
         break;
     case 3:
         this_mv.as_mv.col += 2;
         this_mv.as_mv.row += 2;
-        thismse = vfp->svf(y, y_stride,  this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
+        thismse = vfp->svf(y, y_stride,  SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
         break;
     }
 
@@ -645,9 +747,195 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
         *sse1 = sse;
     }
 
+#if CONFIG_HIGH_PRECISION_MV
+    if (!x->e_mbd.allow_high_precision_mv)
+        return bestmse;
+
+    /* Now do 1/8th pixel */
+    if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback)
+    {
+        y -= y_stride;
+        yrow_movedback = 1;
+    }
+
+    if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback)
+    {
+        y--;
+        ycol_movedback = 1;
+    }
+
+    startmv = *bestmv;
+
+    // go left then right and check error
+    this_mv.as_mv.row = startmv.as_mv.row;
+
+    if (startmv.as_mv.col & 7)
+    {
+        this_mv.as_mv.col = startmv.as_mv.col - 1;
+        thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+    }
+    else
+    {
+        this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
+        thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+    }
+
+    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+
+    if (left < bestmse)
+    {
+        *bestmv = this_mv;
+        bestmse = left;
+        *distortion = thismse;
+        *sse1 = sse;
+    }
+
+    this_mv.as_mv.col += 2;
+    thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+
+    if (right < bestmse)
+    {
+        *bestmv = this_mv;
+        bestmse = right;
+        *distortion = thismse;
+        *sse1 = sse;
+    }
+
+    // go up then down and check error
+    this_mv.as_mv.col = startmv.as_mv.col;
+
+    if (startmv.as_mv.row & 7)
+    {
+        this_mv.as_mv.row = startmv.as_mv.row - 1;
+        thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+    }
+    else
+    {
+        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
+        thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
+    }
+
+    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+
+    if (up < bestmse)
+    {
+        *bestmv = this_mv;
+        bestmse = up;
+        *distortion = thismse;
+        *sse1 = sse;
+    }
+
+    this_mv.as_mv.row += 2;
+    thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+
+    if (down < bestmse)
+    {
+        *bestmv = this_mv;
+        bestmse = down;
+        *distortion = thismse;
+        *sse1 = sse;
+    }
+
+
+    // now check 1 more diagonal
+    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+
+//  for(whichdir=0;whichdir<4;whichdir++)
+//  {
+    this_mv = startmv;
+
+    switch (whichdir)
+    {
+    case 0:
+
+        if (startmv.as_mv.row & 7)
+        {
+            this_mv.as_mv.row -= 1;
+
+            if (startmv.as_mv.col & 7)
+            {
+                this_mv.as_mv.col -= 1;
+                thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+            }
+            else
+            {
+                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
+                thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);;
+            }
+        }
+        else
+        {
+            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
+
+            if (startmv.as_mv.col & 7)
+            {
+                this_mv.as_mv.col -= 1;
+                thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
+            }
+            else
+            {
+                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
+                thismse = vfp->svf(y - y_stride - 1, y_stride, SP(7), SP(7), z, b->src_stride, &sse);
+            }
+        }
+
+        break;
+    case 1:
+        this_mv.as_mv.col += 1;
+
+        if (startmv.as_mv.row & 7)
+        {
+            this_mv.as_mv.row -= 1;
+            thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+        }
+        else
+        {
+            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
+            thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
+        }
+
+        break;
+    case 2:
+        this_mv.as_mv.row += 1;
+
+        if (startmv.as_mv.col & 7)
+        {
+            this_mv.as_mv.col -= 1;
+            thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+        }
+        else
+        {
+            this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
+            thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+        }
+
+        break;
+    case 3:
+        this_mv.as_mv.col += 1;
+        this_mv.as_mv.row += 1;
+        thismse = vfp->svf(y, y_stride,  SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
+        break;
+    }
+
+    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+
+    if (diag < bestmse)
+    {
+        *bestmv = this_mv;
+        bestmse = diag;
+        *distortion = thismse;
+        *sse1 = sse;
+    }
+
+#endif  /* CONFIG_HIGH_PRECISION_MV */
+
     return bestmse;
 }
 
+#undef SP
+
 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                                   int_mv *bestmv, int_mv *ref_mv,
                                   int error_per_bit,
@@ -1945,5 +2233,3 @@ void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
 }
 
 #endif/* END MV ref count ENTROPY_STATS stats code */
-
-
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 8a2fb8448..2b4418a0a 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1365,6 +1365,9 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
             (TOKEN_PARTITION) cpi->oxcf.token_partitions;
 
     setup_features(cpi);
+#if CONFIG_HIGH_PRECISION_MV
+    cpi->mb.e_mbd.allow_high_precision_mv = 1;   // Default mv precision adaptation
+#endif
 
     {
         int i;
@@ -2994,6 +2997,9 @@ static void encode_frame_to_data_rate
 
         // Reset the loop filter deltas and segmentation map
         setup_features(cpi);
+#if CONFIG_HIGH_PRECISION_MV
+        xd->allow_high_precision_mv = 1;   // Default mv precision adaptation
+#endif
 
         // If segmentation is enabled force a map update for key frames
         if (xd->segmentation_enabled)
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 4f97e3c63..1491e645b 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -530,10 +530,17 @@ int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd)
 
     if ((mv_row | mv_col) & 7)
     {
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+        VARIANCE_INVOKE(rtcd, subpixvar8x8)(uptr, pre_stride,
+            (mv_col & 7)<<1, (mv_row & 7)<<1, upred_ptr, uv_stride, &sse2);
+        VARIANCE_INVOKE(rtcd, subpixvar8x8)(vptr, pre_stride,
+            (mv_col & 7)<<1, (mv_row & 7)<<1, vpred_ptr, uv_stride, &sse1);
+#else
         VARIANCE_INVOKE(rtcd, subpixvar8x8)(uptr, pre_stride,
             mv_col & 7, mv_row & 7, upred_ptr, uv_stride, &sse2);
         VARIANCE_INVOKE(rtcd, subpixvar8x8)(vptr, pre_stride,
             mv_col & 7, mv_row & 7, vpred_ptr, uv_stride, &sse1);
+#endif
         sse2 += sse1;
     }
     else
@@ -1654,7 +1661,6 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
                     cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
                         bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost,
                         &distortion, &sse);
-
                 }
             } /* NEW4X4 */
 
@@ -1700,8 +1706,10 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
         segmentyrate += bestlabelyrate;
         this_segment_rd += best_label_rd;
 
-        if (this_segment_rd >= bsi->segment_rd)
+        if (this_segment_rd >= bsi->segment_rd) {
             break;
+        }
+
 
     } /* for each label */
 
@@ -1776,6 +1784,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
 
         rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
 
+
         if (bsi.segment_rd < best_rd)
         {
             int col_min = (best_ref_mv->as_mv.col>>3) - MAX_FULL_PEL_VAL + ((best_ref_mv->as_mv.col & 7)?1:0);
@@ -2146,18 +2155,18 @@ static void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv)
             if (x->partition_info->bmi[i].mode == NEW4X4)
             {
                 cpi->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row
-                                          - best_ref_mv->as_mv.row) >> 1)]++;
+                                          - best_ref_mv->as_mv.row) >> MV_SHIFT)]++;
                 cpi->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col
-                                          - best_ref_mv->as_mv.col) >> 1)]++;
+                                          - best_ref_mv->as_mv.col) >> MV_SHIFT)]++;
             }
         }
     }
     else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
     {
         cpi->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row
-                                          - best_ref_mv->as_mv.row) >> 1)]++;
+                                          - best_ref_mv->as_mv.row) >> MV_SHIFT)]++;
         cpi->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col
-                                          - best_ref_mv->as_mv.col) >> 1)]++;
+                                          - best_ref_mv->as_mv.col) >> MV_SHIFT)]++;
     }
 }
 
@@ -2473,6 +2482,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             vp8_update_zbin_extra(cpi, x);
         }
 
+
         if (!x->e_mbd.mode_info_context->mbmi.second_ref_frame)
         switch (this_mode)
         {
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index 8455bb877..ede65d669 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -50,14 +50,20 @@ static void vp8_temporal_filter_predictors_mb_c
 {
     int offset;
     unsigned char *yptr, *uptr, *vptr;
+    int omv_row, omv_col;
 
     // Y
     yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
 
     if ((mv_row | mv_col) & 7)
     {
+#if CONFIG_SIXTEENTH_SUBPEL_UV
         x->subpixel_predict16x16(yptr, stride,
-                                    mv_col & 7, mv_row & 7, &pred[0], 16);
+                                 (mv_col & 7)<<1, (mv_row & 7)<<1, &pred[0], 16);
+#else
+        x->subpixel_predict16x16(yptr, stride,
+                                 mv_col & 7, mv_row & 7, &pred[0], 16);
+#endif
     }
     else
     {
@@ -65,6 +71,8 @@ static void vp8_temporal_filter_predictors_mb_c
     }
 
     // U & V
+    omv_row = mv_row;
+    omv_col = mv_col;
     mv_row >>= 1;
     mv_col >>= 1;
     stride = (stride + 1) >> 1;
@@ -72,6 +80,15 @@ static void vp8_temporal_filter_predictors_mb_c
     uptr = u_mb_ptr + offset;
     vptr = v_mb_ptr + offset;
 
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+    if ((omv_row | omv_col) & 15)
+    {
+        x->subpixel_predict8x8(uptr, stride,
+                            (omv_col & 15), (omv_row & 15), &pred[256], 8);
+        x->subpixel_predict8x8(vptr, stride,
+                            (omv_col & 15), (omv_row & 15), &pred[320], 8);
+    }
+#else
     if ((mv_row | mv_col) & 7)
     {
         x->subpixel_predict8x8(uptr, stride,
@@ -79,6 +96,7 @@ static void vp8_temporal_filter_predictors_mb_c
         x->subpixel_predict8x8(vptr, stride,
                             mv_col & 7, mv_row & 7, &pred[320], 8);
     }
+#endif
     else
     {
         RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, stride, &pred[256], 8);
diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c
index c7b9c2209..402ff0450 100644
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@@ -363,8 +363,13 @@ unsigned int vp8_variance_halfpixvar16x16_h_c(
     int  recon_stride,
     unsigned int *sse)
 {
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
+                                         ref_ptr, recon_stride, sse);
+#else
     return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0,
                                          ref_ptr, recon_stride, sse);
+#endif
 }
 
 
@@ -375,8 +380,13 @@ unsigned int vp8_variance_halfpixvar16x16_v_c(
     int  recon_stride,
     unsigned int *sse)
 {
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
+                                         ref_ptr, recon_stride, sse);
+#else
     return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4,
                                          ref_ptr, recon_stride, sse);
+#endif
 }
 
 
@@ -387,8 +397,13 @@ unsigned int vp8_variance_halfpixvar16x16_hv_c(
     int  recon_stride,
     unsigned int *sse)
 {
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
+                                         ref_ptr, recon_stride, sse);
+#else
     return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4,
                                          ref_ptr, recon_stride, sse);
+#endif
 }
 
 
diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm
index 762922091..b13beee6e 100644
--- a/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/vp8/encoder/x86/variance_impl_sse2.asm
@@ -1348,12 +1348,32 @@ align 16
 xmm_bi_rd:
     times 8 dw 64
 align 16
+%if CONFIG_SIXTEENTH_SUBPEL_UV
 vp8_bilinear_filters_sse2:
     dw 128, 128, 128, 128, 128, 128, 128, 128,  0,  0,  0,  0,  0,  0,  0,  0
+    dw 120, 120, 120, 120, 120, 120, 120, 120,  8,  8,  8,  8,  8,  8,  8,  8
     dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16
+    dw 104, 104, 104, 104, 104, 104, 104, 104, 24, 24, 24, 24, 24, 24, 24, 24
     dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32
+    dw 88, 88, 88, 88, 88, 88, 88, 88, 40, 40, 40, 40, 40, 40, 40, 40
     dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48
+    dw 72, 72, 72, 72, 72, 72, 72, 72, 56, 56, 56, 56, 56, 56, 56, 56
     dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+    dw 56, 56, 56, 56, 56, 56, 56, 56, 72, 72, 72, 72, 72, 72, 72, 72
     dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80
+    dw 40, 40, 40, 40, 40, 40, 40, 40, 88, 88, 88, 88, 88, 88, 88, 88
     dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96
+    dw 24, 24, 24, 24, 24, 24, 24, 24, 104, 104, 104, 104, 104, 104, 104, 104
     dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112
+    dw 8, 8, 8, 8, 8, 8, 8, 8, 120, 120, 120, 120, 120, 120, 120, 120
+%else
+vp8_bilinear_filters_sse2:
+    dw 128, 128, 128, 128, 128, 128, 128, 128,  0,  0,  0,  0,  0,  0,  0,  0
+    dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16
+    dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32
+    dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48
+    dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+    dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80
+    dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96
+    dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112
+%endif
diff --git a/vp8/encoder/x86/variance_impl_ssse3.asm b/vp8/encoder/x86/variance_impl_ssse3.asm
index 97e8b0e2e..d60d53daa 100644
--- a/vp8/encoder/x86/variance_impl_ssse3.asm
+++ b/vp8/encoder/x86/variance_impl_ssse3.asm
@@ -353,6 +353,25 @@ align 16
 xmm_bi_rd:
     times 8 dw 64
 align 16
+%if CONFIG_SIXTEENTH_SUBPEL_UV
+vp8_bilinear_filters_ssse3:
+    times 8 db 128, 0
+    times 8 db 120, 8
+    times 8 db 112, 16
+    times 8 db 104, 24
+    times 8 db  96, 32
+    times 8 db  88, 40
+    times 8 db  80, 48
+    times 8 db  72, 56
+    times 8 db  64, 64
+    times 8 db  56, 72
+    times 8 db  48, 80
+    times 8 db  40, 88
+    times 8 db  32, 96
+    times 8 db  24, 104
+    times 8 db  16, 112
+    times 8 db   8, 120
+%else
 vp8_bilinear_filters_ssse3:
     times 8 db 128, 0
     times 8 db 112, 16
@@ -362,3 +381,4 @@ vp8_bilinear_filters_ssse3:
     times 8 db 48,  80
     times 8 db 32,  96
     times 8 db 16,  112
+%endif
diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c
index 92b695f17..b84d00034 100644
--- a/vp8/encoder/x86/variance_mmx.c
+++ b/vp8/encoder/x86/variance_mmx.c
@@ -204,6 +204,27 @@ unsigned int vp8_variance8x16_mmx(
 // the mmx function that does the bilinear filtering and var calculation //
 // int one pass                                                          //
 ///////////////////////////////////////////////////////////////////////////
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[16][8]) =
+{
+    { 128, 128, 128, 128,  0,  0,  0,  0 },
+    { 120, 120, 120, 120,  8,  8,  8,  8 },
+    { 112, 112, 112, 112, 16, 16, 16, 16 },
+    { 104, 104, 104, 104, 24, 24, 24, 24 },
+    {  96, 96, 96, 96, 32, 32, 32, 32 },
+    {  88, 88, 88, 88, 40, 40, 40, 40 },
+    {  80, 80, 80, 80, 48, 48, 48, 48 },
+    {  72, 72, 72, 72, 56, 56, 56, 56 },
+    {  64, 64, 64, 64, 64, 64, 64, 64 },
+    {  56, 56, 56, 56, 72, 72, 72, 72 },
+    {  48, 48, 48, 48, 80, 80, 80, 80 },
+    {  40, 40, 40, 40, 88, 88, 88, 88 },
+    {  32, 32, 32, 32, 96, 96, 96, 96 },
+    {  24, 24, 24, 24, 104, 104, 104, 104 },
+    {  16, 16, 16, 16, 112, 112, 112, 112 },
+    {   8,  8,  8,  8, 120, 120, 120, 120 }
+};
+#else
 DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
 {
     { 128, 128, 128, 128,  0,  0,  0,  0 },
@@ -215,6 +236,7 @@ DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
     {  32, 32, 32, 32, 96, 96, 96, 96 },
     {  16, 16, 16, 16, 112, 112, 112, 112 }
 };
+#endif
 
 unsigned int vp8_sub_pixel_variance4x4_mmx
 (
@@ -279,7 +301,6 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
     int xsum0, xsum1;
     unsigned int xxsum0, xxsum1;
 
-
     vp8_filter_block2d_bil_var_mmx(
         src_ptr, src_pixels_per_line,
         dst_ptr, dst_pixels_per_line, 16,
@@ -287,7 +308,6 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
         &xsum0, &xxsum0
     );
 
-
     vp8_filter_block2d_bil_var_mmx(
         src_ptr + 8, src_pixels_per_line,
         dst_ptr + 8, dst_pixels_per_line, 16,
@@ -386,8 +406,13 @@ unsigned int vp8_variance_halfpixvar16x16_h_mmx(
     int  recon_stride,
     unsigned int *sse)
 {
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+    return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 8, 0,
+                                           ref_ptr, recon_stride, sse);
+#else
     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
                                            ref_ptr, recon_stride, sse);
+#endif
 }
 
 
@@ -398,8 +423,13 @@ unsigned int vp8_variance_halfpixvar16x16_v_mmx(
     int  recon_stride,
     unsigned int *sse)
 {
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+    return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 8,
+                                           ref_ptr, recon_stride, sse);
+#else
     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
                                            ref_ptr, recon_stride, sse);
+#endif
 }
 
 
@@ -410,6 +440,11 @@ unsigned int vp8_variance_halfpixvar16x16_hv_mmx(
     int  recon_stride,
     unsigned int *sse)
 {
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+    return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 8, 8,
+                                           ref_ptr, recon_stride, sse);
+#else
     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
                                            ref_ptr, recon_stride, sse);
+#endif
 }
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c
index 24062eb9b..e3c6268ea 100644
--- a/vp8/encoder/x86/variance_sse2.c
+++ b/vp8/encoder/x86/variance_sse2.c
@@ -13,6 +13,12 @@
 #include "vp8/common/pragmas.h"
 #include "vpx_ports/mem.h"
 
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+#define HALFNDX 8
+#else
+#define HALFNDX 4
+#endif
+
 extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
 extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
 extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
@@ -135,7 +141,11 @@ void vp8_half_vert_variance16x_h_sse2
     unsigned int *sumsquared
 );
 
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+DECLARE_ALIGNED(16, extern short, vp8_vp7_bilinear_filters_mmx[16][8]);
+#else
 DECLARE_ALIGNED(16, extern short, vp8_vp7_bilinear_filters_mmx[8][8]);
+#endif
 
 unsigned int vp8_variance4x4_wmt(
     const unsigned char *src_ptr,
@@ -284,21 +294,21 @@ unsigned int vp8_sub_pixel_variance8x8_wmt
     int xsum;
     unsigned int xxsum;
 
-    if (xoffset == 4 && yoffset == 0)
+    if (xoffset == HALFNDX && yoffset == 0)
     {
         vp8_half_horiz_variance8x_h_sse2(
             src_ptr, src_pixels_per_line,
             dst_ptr, dst_pixels_per_line, 8,
             &xsum, &xxsum);
     }
-    else if (xoffset == 0 && yoffset == 4)
+    else if (xoffset == 0 && yoffset == HALFNDX)
     {
         vp8_half_vert_variance8x_h_sse2(
             src_ptr, src_pixels_per_line,
             dst_ptr, dst_pixels_per_line, 8,
             &xsum, &xxsum);
     }
-    else if (xoffset == 4 && yoffset == 4)
+    else if (xoffset == HALFNDX && yoffset == HALFNDX)
     {
         vp8_half_horiz_vert_variance8x_h_sse2(
             src_ptr, src_pixels_per_line,
@@ -335,21 +345,21 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
 
     // note we could avoid these if statements if the calling function
     // just called the appropriate functions inside.
-    if (xoffset == 4 && yoffset == 0)
+    if (xoffset == HALFNDX && yoffset == 0)
     {
         vp8_half_horiz_variance16x_h_sse2(
             src_ptr, src_pixels_per_line,
             dst_ptr, dst_pixels_per_line, 16,
             &xsum0, &xxsum0);
     }
-    else if (xoffset == 0 && yoffset == 4)
+    else if (xoffset == 0 && yoffset == HALFNDX)
     {
         vp8_half_vert_variance16x_h_sse2(
             src_ptr, src_pixels_per_line,
             dst_ptr, dst_pixels_per_line, 16,
             &xsum0, &xxsum0);
     }
-    else if (xoffset == 4 && yoffset == 4)
+    else if (xoffset == HALFNDX && yoffset == HALFNDX)
     {
         vp8_half_horiz_vert_variance16x_h_sse2(
             src_ptr, src_pixels_per_line,
@@ -408,21 +418,21 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
     int xsum0, xsum1;
     unsigned int xxsum0, xxsum1;
 
-    if (xoffset == 4 && yoffset == 0)
+    if (xoffset == HALFNDX && yoffset == 0)
     {
         vp8_half_horiz_variance16x_h_sse2(
             src_ptr, src_pixels_per_line,
             dst_ptr, dst_pixels_per_line, 8,
             &xsum0, &xxsum0);
     }
-    else if (xoffset == 0 && yoffset == 4)
+    else if (xoffset == 0 && yoffset == HALFNDX)
     {
         vp8_half_vert_variance16x_h_sse2(
             src_ptr, src_pixels_per_line,
             dst_ptr, dst_pixels_per_line, 8,
             &xsum0, &xxsum0);
     }
-    else if (xoffset == 4 && yoffset == 4)
+    else if (xoffset == HALFNDX && yoffset == HALFNDX)
     {
         vp8_half_horiz_vert_variance16x_h_sse2(
             src_ptr, src_pixels_per_line,
@@ -464,21 +474,21 @@ unsigned int vp8_sub_pixel_variance8x16_wmt
     int xsum;
     unsigned int xxsum;
 
-    if (xoffset == 4 && yoffset == 0)
+    if (xoffset == HALFNDX && yoffset == 0)
     {
         vp8_half_horiz_variance8x_h_sse2(
             src_ptr, src_pixels_per_line,
             dst_ptr, dst_pixels_per_line, 16,
             &xsum, &xxsum);
     }
-    else if (xoffset == 0 && yoffset == 4)
+    else if (xoffset == 0 && yoffset == HALFNDX)
     {
         vp8_half_vert_variance8x_h_sse2(
             src_ptr, src_pixels_per_line,
             dst_ptr, dst_pixels_per_line, 16,
             &xsum, &xxsum);
     }
-    else if (xoffset == 4 && yoffset == 4)
+    else if (xoffset == HALFNDX && yoffset == HALFNDX)
     {
         vp8_half_horiz_vert_variance8x_h_sse2(
             src_ptr, src_pixels_per_line,
diff --git a/vp8/encoder/x86/variance_ssse3.c b/vp8/encoder/x86/variance_ssse3.c
index 73f2e01a2..59e14971a 100644
--- a/vp8/encoder/x86/variance_ssse3.c
+++ b/vp8/encoder/x86/variance_ssse3.c
@@ -13,6 +13,12 @@
 #include "vp8/common/pragmas.h"
 #include "vpx_ports/mem.h"
 
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+#define HALFNDX 8
+#else
+#define HALFNDX 4
+#endif
+
 extern unsigned int vp8_get16x16var_sse2
 (
     const unsigned char *src_ptr,
@@ -81,21 +87,21 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
 
     // note we could avoid these if statements if the calling function
     // just called the appropriate functions inside.
-    if (xoffset == 4 && yoffset == 0)
+    if (xoffset == HALFNDX && yoffset == HALFNDX)
     {
         vp8_half_horiz_variance16x_h_sse2(
             src_ptr, src_pixels_per_line,
             dst_ptr, dst_pixels_per_line, 16,
             &xsum0, &xxsum0);
     }
-    else if (xoffset == 0 && yoffset == 4)
+    else if (xoffset == 0 && yoffset == HALFNDX)
     {
         vp8_half_vert_variance16x_h_sse2(
             src_ptr, src_pixels_per_line,
             dst_ptr, dst_pixels_per_line, 16,
             &xsum0, &xxsum0);
     }
-    else if (xoffset == 4 && yoffset == 4)
+    else if (xoffset == HALFNDX && yoffset == HALFNDX)
     {
         vp8_half_horiz_vert_variance16x_h_sse2(
             src_ptr, src_pixels_per_line,
@@ -130,21 +136,21 @@ unsigned int vp8_sub_pixel_variance16x8_ssse3
     int xsum0;
     unsigned int xxsum0;
 
-    if (xoffset == 4 && yoffset == 0)
+    if (xoffset == HALFNDX && yoffset == 0)
     {
         vp8_half_horiz_variance16x_h_sse2(
             src_ptr, src_pixels_per_line,
             dst_ptr, dst_pixels_per_line, 8,
             &xsum0, &xxsum0);
     }
-    else if (xoffset == 0 && yoffset == 4)
+    else if (xoffset == 0 && yoffset == HALFNDX)
     {
         vp8_half_vert_variance16x_h_sse2(
             src_ptr, src_pixels_per_line,
             dst_ptr, dst_pixels_per_line, 8,
             &xsum0, &xxsum0);
     }
-    else if (xoffset == 4 && yoffset == 4)
+    else if (xoffset == HALFNDX && yoffset == HALFNDX)
     {
         vp8_half_horiz_vert_variance16x_h_sse2(
             src_ptr, src_pixels_per_line,
author	Deb Mukherjee <debargha@google.com>	2012-02-16 09:29:54 -0800
committer	Deb Mukherjee <debargha@google.com>	2012-02-23 09:25:21 -0800
commit	18e90d744eba2d28ad96a566565bbf5642d24b59 (patch)
tree	8d0859ca6973ad522f4b1e30566ba67ea6ed886a /vp8/encoder
parent	3c872b6c27a5f03156b973fb359c9293049d6e84 (diff)
download	libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.tar libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.tar.gz libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.tar.bz2 libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.zip