diff options
Diffstat (limited to 'vp8/encoder/x86/variance_ssse3.c')
-rw-r--r-- | vp8/encoder/x86/variance_ssse3.c | 85 |
1 files changed, 55 insertions, 30 deletions
diff --git a/vp8/encoder/x86/variance_ssse3.c b/vp8/encoder/x86/variance_ssse3.c index 750ae8b86..eb5d486bf 100644 --- a/vp8/encoder/x86/variance_ssse3.c +++ b/vp8/encoder/x86/variance_ssse3.c @@ -76,8 +76,8 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3 unsigned int *sse ) { - int xsum0, xsum1; - unsigned int xxsum0, xxsum1; + int xsum0; + unsigned int xxsum0; // note we could avoid these if statements if the calling function // just called the appropriate functions inside. @@ -87,14 +87,6 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3 src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, 16, &xsum0, &xxsum0); - - vp8_half_horiz_variance16x_h_sse2( - src_ptr + 8, src_pixels_per_line, - dst_ptr + 8, dst_pixels_per_line, 16, - &xsum1, &xxsum1); - - xsum0 += xsum1; - xxsum0 += xxsum1; } else if (xoffset == 0 && yoffset == 4) { @@ -102,14 +94,6 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3 src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, 16, &xsum0, &xxsum0); - - vp8_half_vert_variance16x_h_sse2( - src_ptr + 8, src_pixels_per_line, - dst_ptr + 8, dst_pixels_per_line, 16, - &xsum1, &xxsum1); - - xsum0 += xsum1; - xxsum0 += xxsum1; } else if (xoffset == 4 && yoffset == 4) { @@ -117,24 +101,65 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3 src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, 16, &xsum0, &xxsum0); + } + else + { + vp8_filter_block2d_bil_var_ssse3( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 16, + xoffset, yoffset, + &xsum0, &xxsum0); + } - vp8_half_horiz_vert_variance16x_h_sse2( - src_ptr + 8, src_pixels_per_line, - dst_ptr + 8, dst_pixels_per_line, 16, - &xsum1, &xxsum1); + *sse = xxsum0; + return (xxsum0 - ((xsum0 * xsum0) >> 8)); +} - xsum0 += xsum1; - xxsum0 += xxsum1; +unsigned int vp8_sub_pixel_variance16x8_ssse3 +( + const unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse + +) +{ + int xsum0; + unsigned int xxsum0; + + if (xoffset == 4 && yoffset == 0) + { + vp8_half_horiz_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 8, + &xsum0, &xxsum0); + } + else if (xoffset == 0 && yoffset == 4) + { + vp8_half_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 8, + &xsum0, &xxsum0); + } + else if (xoffset == 4 && yoffset == 4) + { + vp8_half_horiz_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 8, + &xsum0, &xxsum0); } else { - vp8_filter_block2d_bil_var_ssse3( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - xoffset, yoffset, - &xsum0, &xxsum0); + vp8_filter_block2d_bil_var_ssse3( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 8, + xoffset, yoffset, + &xsum0, &xxsum0); } *sse = xxsum0; - return (xxsum0 - ((xsum0 * xsum0) >> 8)); + return (xxsum0 - ((xsum0 * xsum0) >> 7)); } |