summaryrefslogtreecommitdiff
path: root/vp8/encoder/x86/variance_ssse3.c
diff options
context:
space:
mode:
authorJohn Koleszar <jkoleszar@google.com>2011-03-10 00:05:04 -0500
committerJohn Koleszar <jkoleszar@google.com>2011-03-10 00:05:04 -0500
commit820b2b927f0909891f2dd6dabce98c63b4b64fe7 (patch)
tree60bb2645c0feeb99a8a3d34dd5f4904995999826 /vp8/encoder/x86/variance_ssse3.c
parent0c8bb2f168d1db91dce8c8149f9bda7bc3e0c249 (diff)
parenta0306ea660b2a35d09645e6d3e98d786614a874d (diff)
downloadlibvpx-820b2b927f0909891f2dd6dabce98c63b4b64fe7.tar
libvpx-820b2b927f0909891f2dd6dabce98c63b4b64fe7.tar.gz
libvpx-820b2b927f0909891f2dd6dabce98c63b4b64fe7.tar.bz2
libvpx-820b2b927f0909891f2dd6dabce98c63b4b64fe7.zip
Merge remote branch 'internal/upstream' into HEAD
Diffstat (limited to 'vp8/encoder/x86/variance_ssse3.c')
-rw-r--r--vp8/encoder/x86/variance_ssse3.c85
1 files changed, 55 insertions, 30 deletions
diff --git a/vp8/encoder/x86/variance_ssse3.c b/vp8/encoder/x86/variance_ssse3.c
index 750ae8b86..eb5d486bf 100644
--- a/vp8/encoder/x86/variance_ssse3.c
+++ b/vp8/encoder/x86/variance_ssse3.c
@@ -76,8 +76,8 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
unsigned int *sse
)
{
- int xsum0, xsum1;
- unsigned int xxsum0, xxsum1;
+ int xsum0;
+ unsigned int xxsum0;
// note we could avoid these if statements if the calling function
// just called the appropriate functions inside.
@@ -87,14 +87,6 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
&xsum0, &xxsum0);
-
- vp8_half_horiz_variance16x_h_sse2(
- src_ptr + 8, src_pixels_per_line,
- dst_ptr + 8, dst_pixels_per_line, 16,
- &xsum1, &xxsum1);
-
- xsum0 += xsum1;
- xxsum0 += xxsum1;
}
else if (xoffset == 0 && yoffset == 4)
{
@@ -102,14 +94,6 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
&xsum0, &xxsum0);
-
- vp8_half_vert_variance16x_h_sse2(
- src_ptr + 8, src_pixels_per_line,
- dst_ptr + 8, dst_pixels_per_line, 16,
- &xsum1, &xxsum1);
-
- xsum0 += xsum1;
- xxsum0 += xxsum1;
}
else if (xoffset == 4 && yoffset == 4)
{
@@ -117,24 +101,65 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
&xsum0, &xxsum0);
+ }
+ else
+ {
+ vp8_filter_block2d_bil_var_ssse3(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ xoffset, yoffset,
+ &xsum0, &xxsum0);
+ }
- vp8_half_horiz_vert_variance16x_h_sse2(
- src_ptr + 8, src_pixels_per_line,
- dst_ptr + 8, dst_pixels_per_line, 16,
- &xsum1, &xxsum1);
+ *sse = xxsum0;
+ return (xxsum0 - ((xsum0 * xsum0) >> 8));
+}
- xsum0 += xsum1;
- xxsum0 += xxsum1;
+unsigned int vp8_sub_pixel_variance16x8_ssse3
+(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse
+
+)
+{
+ int xsum0;
+ unsigned int xxsum0;
+
+ if (xoffset == 4 && yoffset == 0)
+ {
+ vp8_half_horiz_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ &xsum0, &xxsum0);
+ }
+ else if (xoffset == 0 && yoffset == 4)
+ {
+ vp8_half_vert_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ &xsum0, &xxsum0);
+ }
+ else if (xoffset == 4 && yoffset == 4)
+ {
+ vp8_half_horiz_vert_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ &xsum0, &xxsum0);
}
else
{
- vp8_filter_block2d_bil_var_ssse3(
- src_ptr, src_pixels_per_line,
- dst_ptr, dst_pixels_per_line, 16,
- xoffset, yoffset,
- &xsum0, &xxsum0);
+ vp8_filter_block2d_bil_var_ssse3(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 8,
+ xoffset, yoffset,
+ &xsum0, &xxsum0);
}
*sse = xxsum0;
- return (xxsum0 - ((xsum0 * xsum0) >> 8));
+ return (xxsum0 - ((xsum0 * xsum0) >> 7));
}