summaryrefslogtreecommitdiff
path: root/vp8/encoder/x86/variance_mmx.c
diff options
context:
space:
mode:
authorDeb Mukherjee <debargha@google.com>2012-02-16 09:29:54 -0800
committerDeb Mukherjee <debargha@google.com>2012-02-23 09:25:21 -0800
commit18e90d744eba2d28ad96a566565bbf5642d24b59 (patch)
tree8d0859ca6973ad522f4b1e30566ba67ea6ed886a /vp8/encoder/x86/variance_mmx.c
parent3c872b6c27a5f03156b973fb359c9293049d6e84 (diff)
downloadlibvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.tar
libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.tar.gz
libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.tar.bz2
libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.zip
Supporting high precision 1/8-pel motion vectors
This is the initial patch for supporting 1/8th pel motion. Currently if we configure with enable-high-precision-mv, all motion vectors would default to 1/8 pel. Encode and decode syncs fine with the current code. In the next phase the code will be refactored so that we can choose the 1/8 pel mode adaptively at a frame/segment/mb level. Derf results: http://www.corp.google.com/~debargha/vp8_results/enhinterp_hpmv.html (about 0.83% better than 8-tap interpoaltion) Patch 3: Rebased. Also adding 1/16th pel interpolation for U and V Patch 4: HD results. http://www.corp.google.com/~debargha/vp8_results/enhinterp_hd_hpmv.html Seems impressive (unless I am doing something wrong). Patch 5: Added mmx/sse for bilateral filtering, as well as enforced use of c-versions of subpel filters with 8-taps and 1/16th pel; Also redesigned the 8-tap filters to reduce the cut-off in order to introduce a denoising effect. There is a new configure option sixteenth-subpel-uv which will use 1/16 th pel interpolation for uv, if the motion vectors have 1/8 pel accuracy. With the fixes the results are promising on the derf set. The enhanced interpolation option with 8-taps alone gives 3% improvement over thei derf set: http://www.corp.google.com/~debargha/vp8_results/enhinterpn.html Results on high precision mv and on the hd set are to follow. Patch 6: Adding a missing condition for CONFIG_SIXTEENTH_SUBPEL_UV in vp8/common/x86/x86_systemdependent.c Patch 7: Cleaning up various debug messages. Patch 8: Merge conflict Change-Id: I5b1d844457aefd7414a9e4e0e06c6ed38fd8cc04
Diffstat (limited to 'vp8/encoder/x86/variance_mmx.c')
-rw-r--r--vp8/encoder/x86/variance_mmx.c39
1 files changed, 37 insertions, 2 deletions
diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c
index 92b695f17..b84d00034 100644
--- a/vp8/encoder/x86/variance_mmx.c
+++ b/vp8/encoder/x86/variance_mmx.c
@@ -204,6 +204,27 @@ unsigned int vp8_variance8x16_mmx(
// the mmx function that does the bilinear filtering and var calculation //
// int one pass //
///////////////////////////////////////////////////////////////////////////
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[16][8]) =
+{
+ { 128, 128, 128, 128, 0, 0, 0, 0 },
+ { 120, 120, 120, 120, 8, 8, 8, 8 },
+ { 112, 112, 112, 112, 16, 16, 16, 16 },
+ { 104, 104, 104, 104, 24, 24, 24, 24 },
+ { 96, 96, 96, 96, 32, 32, 32, 32 },
+ { 88, 88, 88, 88, 40, 40, 40, 40 },
+ { 80, 80, 80, 80, 48, 48, 48, 48 },
+ { 72, 72, 72, 72, 56, 56, 56, 56 },
+ { 64, 64, 64, 64, 64, 64, 64, 64 },
+ { 56, 56, 56, 56, 72, 72, 72, 72 },
+ { 48, 48, 48, 48, 80, 80, 80, 80 },
+ { 40, 40, 40, 40, 88, 88, 88, 88 },
+ { 32, 32, 32, 32, 96, 96, 96, 96 },
+ { 24, 24, 24, 24, 104, 104, 104, 104 },
+ { 16, 16, 16, 16, 112, 112, 112, 112 },
+ { 8, 8, 8, 8, 120, 120, 120, 120 }
+};
+#else
DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
{
{ 128, 128, 128, 128, 0, 0, 0, 0 },
@@ -215,6 +236,7 @@ DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
{ 32, 32, 32, 32, 96, 96, 96, 96 },
{ 16, 16, 16, 16, 112, 112, 112, 112 }
};
+#endif
unsigned int vp8_sub_pixel_variance4x4_mmx
(
@@ -279,7 +301,6 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
int xsum0, xsum1;
unsigned int xxsum0, xxsum1;
-
vp8_filter_block2d_bil_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
@@ -287,7 +308,6 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
&xsum0, &xxsum0
);
-
vp8_filter_block2d_bil_var_mmx(
src_ptr + 8, src_pixels_per_line,
dst_ptr + 8, dst_pixels_per_line, 16,
@@ -386,8 +406,13 @@ unsigned int vp8_variance_halfpixvar16x16_h_mmx(
int recon_stride,
unsigned int *sse)
{
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+ return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 8, 0,
+ ref_ptr, recon_stride, sse);
+#else
return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
ref_ptr, recon_stride, sse);
+#endif
}
@@ -398,8 +423,13 @@ unsigned int vp8_variance_halfpixvar16x16_v_mmx(
int recon_stride,
unsigned int *sse)
{
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+ return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 8,
+ ref_ptr, recon_stride, sse);
+#else
return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
ref_ptr, recon_stride, sse);
+#endif
}
@@ -410,6 +440,11 @@ unsigned int vp8_variance_halfpixvar16x16_hv_mmx(
int recon_stride,
unsigned int *sse)
{
+#if CONFIG_SIXTEENTH_SUBPEL_UV
+ return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 8, 8,
+ ref_ptr, recon_stride, sse);
+#else
return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
ref_ptr, recon_stride, sse);
+#endif
}