diff options
author | John Koleszar <jkoleszar@google.com> | 2010-10-27 11:28:43 -0400 |
---|---|---|
committer | John Koleszar <jkoleszar@google.com> | 2010-10-27 13:00:30 -0400 |
commit | a0ae3682aa67f882006c604196f7ee83eff88d84 (patch) | |
tree | 7535a276dd78b46b229df9b2615b09f189e48489 /vp8/encoder/x86/variance_sse2.c | |
parent | 24c86055c386a90158cc24cbaed2f72773120acb (diff) | |
download | libvpx-a0ae3682aa67f882006c604196f7ee83eff88d84.tar libvpx-a0ae3682aa67f882006c604196f7ee83eff88d84.tar.gz libvpx-a0ae3682aa67f882006c604196f7ee83eff88d84.tar.bz2 libvpx-a0ae3682aa67f882006c604196f7ee83eff88d84.zip |
Fix half-pixel variance RTCD functions
This patch fixes the system dependent entries for the half-pixel
variance functions in both the RTCD and non-RTCD cases:
- The generic C versions of these functions are now correct.
Before all three cases called the hv code.
- Wire up the ARM functions in RTCD mode
- Created stubs for x86 to call the optimized subpixel functions
with the correct parameters, rather than falling back to C
code.
Change-Id: I1d937d074d929e0eb93aacb1232cc5e0ad1c6184
Diffstat (limited to 'vp8/encoder/x86/variance_sse2.c')
-rw-r--r-- | vp8/encoder/x86/variance_sse2.c | 81 |
1 files changed, 81 insertions, 0 deletions
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c index fb0bac1cb..006e0a24a 100644 --- a/vp8/encoder/x86/variance_sse2.c +++ b/vp8/encoder/x86/variance_sse2.c @@ -513,3 +513,84 @@ unsigned int vp8_i_sub_pixel_variance8x16_wmt return vp8_sub_pixel_variance8x16_wmt(src_ptr, (src_pixels_per_line >> 1), xoffset, yoffset, dst_ptr, (dst_pixels_per_line >> 1), sse); } + + +unsigned int vp8_variance_halfpixvar16x16_h_wmt( + const unsigned char *src_ptr, + int src_pixels_per_line, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) +{ + int xsum0, xsum1; + unsigned int xxsum0, xxsum1; + + vp8_half_horiz_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 16, + &xsum0, &xxsum0); + + vp8_half_horiz_variance16x_h_sse2( + src_ptr + 8, src_pixels_per_line, + dst_ptr + 8, dst_pixels_per_line, 16, + &xsum1, &xxsum1); + + xsum0 += xsum1; + xxsum0 += xxsum1; + *sse = xxsum0; + return (xxsum0 - ((xsum0 * xsum0) >> 8)); +} + + +unsigned int vp8_variance_halfpixvar16x16_v_wmt( + const unsigned char *src_ptr, + int src_pixels_per_line, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) +{ + int xsum0, xsum1; + unsigned int xxsum0, xxsum1; + + vp8_half_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 16, + &xsum0, &xxsum0); + + vp8_half_vert_variance16x_h_sse2( + src_ptr + 8, src_pixels_per_line, + dst_ptr + 8, dst_pixels_per_line, 16, + &xsum1, &xxsum1); + + xsum0 += xsum1; + xxsum0 += xxsum1; + *sse = xxsum0; + return (xxsum0 - ((xsum0 * xsum0) >> 8)); +} + + +unsigned int vp8_variance_halfpixvar16x16_hv_wmt( + const unsigned char *src_ptr, + int src_pixels_per_line, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) +{ + int xsum0, xsum1; + unsigned int xxsum0, xxsum1; + + vp8_half_horiz_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 16, + &xsum0, &xxsum0); + + vp8_half_horiz_vert_variance16x_h_sse2( + src_ptr + 8, src_pixels_per_line, + dst_ptr + 8, dst_pixels_per_line, 16, + &xsum1, &xxsum1); + + xsum0 += xsum1; + xxsum0 += xxsum1; + *sse = xxsum0; + return (xxsum0 - ((xsum0 * xsum0) >> 8)); +} |