summaryrefslogtreecommitdiff
path: root/vp8/encoder/x86/variance_sse2.c
diff options
context:
space:
mode:
authorJohn Koleszar <jkoleszar@google.com>2010-10-27 11:28:43 -0400
committerJohn Koleszar <jkoleszar@google.com>2010-10-27 13:00:30 -0400
commita0ae3682aa67f882006c604196f7ee83eff88d84 (patch)
tree7535a276dd78b46b229df9b2615b09f189e48489 /vp8/encoder/x86/variance_sse2.c
parent24c86055c386a90158cc24cbaed2f72773120acb (diff)
downloadlibvpx-a0ae3682aa67f882006c604196f7ee83eff88d84.tar
libvpx-a0ae3682aa67f882006c604196f7ee83eff88d84.tar.gz
libvpx-a0ae3682aa67f882006c604196f7ee83eff88d84.tar.bz2
libvpx-a0ae3682aa67f882006c604196f7ee83eff88d84.zip
Fix half-pixel variance RTCD functions
This patch fixes the system dependent entries for the half-pixel variance functions in both the RTCD and non-RTCD cases: - The generic C versions of these functions are now correct. Before all three cases called the hv code. - Wire up the ARM functions in RTCD mode - Created stubs for x86 to call the optimized subpixel functions with the correct parameters, rather than falling back to C code. Change-Id: I1d937d074d929e0eb93aacb1232cc5e0ad1c6184
Diffstat (limited to 'vp8/encoder/x86/variance_sse2.c')
-rw-r--r--vp8/encoder/x86/variance_sse2.c81
1 files changed, 81 insertions, 0 deletions
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c
index fb0bac1cb..006e0a24a 100644
--- a/vp8/encoder/x86/variance_sse2.c
+++ b/vp8/encoder/x86/variance_sse2.c
@@ -513,3 +513,84 @@ unsigned int vp8_i_sub_pixel_variance8x16_wmt
return vp8_sub_pixel_variance8x16_wmt(src_ptr, (src_pixels_per_line >> 1), xoffset, yoffset, dst_ptr, (dst_pixels_per_line >> 1), sse);
}
+
+
+unsigned int vp8_variance_halfpixvar16x16_h_wmt(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse)
+{
+ int xsum0, xsum1;
+ unsigned int xxsum0, xxsum1;
+
+ vp8_half_horiz_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum0, &xxsum0);
+
+ vp8_half_horiz_variance16x_h_sse2(
+ src_ptr + 8, src_pixels_per_line,
+ dst_ptr + 8, dst_pixels_per_line, 16,
+ &xsum1, &xxsum1);
+
+ xsum0 += xsum1;
+ xxsum0 += xxsum1;
+ *sse = xxsum0;
+ return (xxsum0 - ((xsum0 * xsum0) >> 8));
+}
+
+
+unsigned int vp8_variance_halfpixvar16x16_v_wmt(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse)
+{
+ int xsum0, xsum1;
+ unsigned int xxsum0, xxsum1;
+
+ vp8_half_vert_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum0, &xxsum0);
+
+ vp8_half_vert_variance16x_h_sse2(
+ src_ptr + 8, src_pixels_per_line,
+ dst_ptr + 8, dst_pixels_per_line, 16,
+ &xsum1, &xxsum1);
+
+ xsum0 += xsum1;
+ xxsum0 += xxsum1;
+ *sse = xxsum0;
+ return (xxsum0 - ((xsum0 * xsum0) >> 8));
+}
+
+
+unsigned int vp8_variance_halfpixvar16x16_hv_wmt(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse)
+{
+ int xsum0, xsum1;
+ unsigned int xxsum0, xxsum1;
+
+ vp8_half_horiz_vert_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum0, &xxsum0);
+
+ vp8_half_horiz_vert_variance16x_h_sse2(
+ src_ptr + 8, src_pixels_per_line,
+ dst_ptr + 8, dst_pixels_per_line, 16,
+ &xsum1, &xxsum1);
+
+ xsum0 += xsum1;
+ xxsum0 += xxsum1;
+ *sse = xxsum0;
+ return (xxsum0 - ((xsum0 * xsum0) >> 8));
+}