diff options
author | Deb Mukherjee <debargha@google.com> | 2012-02-16 09:29:54 -0800 |
---|---|---|
committer | Deb Mukherjee <debargha@google.com> | 2012-02-23 09:25:21 -0800 |
commit | 18e90d744eba2d28ad96a566565bbf5642d24b59 (patch) | |
tree | 8d0859ca6973ad522f4b1e30566ba67ea6ed886a /vp8/encoder/x86/variance_mmx.c | |
parent | 3c872b6c27a5f03156b973fb359c9293049d6e84 (diff) | |
download | libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.tar libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.tar.gz libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.tar.bz2 libvpx-18e90d744eba2d28ad96a566565bbf5642d24b59.zip |
Supporting high precision 1/8-pel motion vectors
This is the initial patch for supporting 1/8th pel
motion. Currently if we configure with enable-high-precision-mv,
all motion vectors would default to 1/8 pel. Encode and
decode syncs fine with the current code. In the next phase
the code will be refactored so that we can choose the 1/8
pel mode adaptively at a frame/segment/mb level.
Derf results:
http://www.corp.google.com/~debargha/vp8_results/enhinterp_hpmv.html
(about 0.83% better than 8-tap interpoaltion)
Patch 3: Rebased. Also adding 1/16th pel interpolation for U and V
Patch 4: HD results.
http://www.corp.google.com/~debargha/vp8_results/enhinterp_hd_hpmv.html
Seems impressive (unless I am doing something wrong).
Patch 5: Added mmx/sse for bilateral filtering, as well as enforced
use of c-versions of subpel filters with 8-taps and 1/16th pel;
Also redesigned the 8-tap filters to reduce the cut-off in order to
introduce a denoising effect. There is a new configure option
sixteenth-subpel-uv which will use 1/16 th pel interpolation for
uv, if the motion vectors have 1/8 pel accuracy.
With the fixes the results are promising on the derf set. The enhanced
interpolation option with 8-taps alone gives 3% improvement over thei
derf set:
http://www.corp.google.com/~debargha/vp8_results/enhinterpn.html
Results on high precision mv and on the hd set are to follow.
Patch 6: Adding a missing condition for CONFIG_SIXTEENTH_SUBPEL_UV in
vp8/common/x86/x86_systemdependent.c
Patch 7: Cleaning up various debug messages.
Patch 8: Merge conflict
Change-Id: I5b1d844457aefd7414a9e4e0e06c6ed38fd8cc04
Diffstat (limited to 'vp8/encoder/x86/variance_mmx.c')
-rw-r--r-- | vp8/encoder/x86/variance_mmx.c | 39 |
1 files changed, 37 insertions, 2 deletions
diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c index 92b695f17..b84d00034 100644 --- a/vp8/encoder/x86/variance_mmx.c +++ b/vp8/encoder/x86/variance_mmx.c @@ -204,6 +204,27 @@ unsigned int vp8_variance8x16_mmx( // the mmx function that does the bilinear filtering and var calculation // // int one pass // /////////////////////////////////////////////////////////////////////////// +#if CONFIG_SIXTEENTH_SUBPEL_UV +DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[16][8]) = +{ + { 128, 128, 128, 128, 0, 0, 0, 0 }, + { 120, 120, 120, 120, 8, 8, 8, 8 }, + { 112, 112, 112, 112, 16, 16, 16, 16 }, + { 104, 104, 104, 104, 24, 24, 24, 24 }, + { 96, 96, 96, 96, 32, 32, 32, 32 }, + { 88, 88, 88, 88, 40, 40, 40, 40 }, + { 80, 80, 80, 80, 48, 48, 48, 48 }, + { 72, 72, 72, 72, 56, 56, 56, 56 }, + { 64, 64, 64, 64, 64, 64, 64, 64 }, + { 56, 56, 56, 56, 72, 72, 72, 72 }, + { 48, 48, 48, 48, 80, 80, 80, 80 }, + { 40, 40, 40, 40, 88, 88, 88, 88 }, + { 32, 32, 32, 32, 96, 96, 96, 96 }, + { 24, 24, 24, 24, 104, 104, 104, 104 }, + { 16, 16, 16, 16, 112, 112, 112, 112 }, + { 8, 8, 8, 8, 120, 120, 120, 120 } +}; +#else DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) = { { 128, 128, 128, 128, 0, 0, 0, 0 }, @@ -215,6 +236,7 @@ DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) = { 32, 32, 32, 32, 96, 96, 96, 96 }, { 16, 16, 16, 16, 112, 112, 112, 112 } }; +#endif unsigned int vp8_sub_pixel_variance4x4_mmx ( @@ -279,7 +301,6 @@ unsigned int vp8_sub_pixel_variance16x16_mmx int xsum0, xsum1; unsigned int xxsum0, xxsum1; - vp8_filter_block2d_bil_var_mmx( src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, 16, @@ -287,7 +308,6 @@ unsigned int vp8_sub_pixel_variance16x16_mmx &xsum0, &xxsum0 ); - vp8_filter_block2d_bil_var_mmx( src_ptr + 8, src_pixels_per_line, dst_ptr + 8, dst_pixels_per_line, 16, @@ -386,8 +406,13 @@ unsigned int vp8_variance_halfpixvar16x16_h_mmx( int recon_stride, unsigned int *sse) { +#if CONFIG_SIXTEENTH_SUBPEL_UV + return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 8, 0, + ref_ptr, recon_stride, sse); +#else return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0, ref_ptr, recon_stride, sse); +#endif } @@ -398,8 +423,13 @@ unsigned int vp8_variance_halfpixvar16x16_v_mmx( int recon_stride, unsigned int *sse) { +#if CONFIG_SIXTEENTH_SUBPEL_UV + return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 8, + ref_ptr, recon_stride, sse); +#else return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4, ref_ptr, recon_stride, sse); +#endif } @@ -410,6 +440,11 @@ unsigned int vp8_variance_halfpixvar16x16_hv_mmx( int recon_stride, unsigned int *sse) { +#if CONFIG_SIXTEENTH_SUBPEL_UV + return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 8, 8, + ref_ptr, recon_stride, sse); +#else return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4, ref_ptr, recon_stride, sse); +#endif } |