diff options
Diffstat (limited to 'vp9/encoder/vp9_variance.c')
-rw-r--r-- | vp9/encoder/vp9_variance.c | 308 |
1 files changed, 13 insertions, 295 deletions
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index f38f96d6c..1f6b083c4 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -9,6 +9,7 @@ */ #include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" #include "vpx/vpx_integer.h" @@ -18,26 +19,6 @@ #include "vp9/encoder/vp9_variance.h" -void variance(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - int w, int h, unsigned int *sse, int *sum) { - int i, j; - - *sum = 0; - *sse = 0; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const int diff = a[j] - b[j]; - *sum += diff; - *sse += diff * diff; - } - - a += a_stride; - b += b_stride; - } -} - // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal // or vertical direction to produce the filtered output block. Used to implement // first-pass of 2-D separable filter. @@ -100,25 +81,6 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, } } -unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { - unsigned int i, sum = 0; - - for (i = 0; i < 256; ++i) { - sum += src_ptr[i] * src_ptr[i]; - } - - return sum; -} - -#define VAR(W, H) \ -unsigned int vp9_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - unsigned int *sse) { \ - int sum; \ - variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse - (((int64_t)sum * sum) / (W * H)); \ -} - #define SUBPIX_VAR(W, H) \ unsigned int vp9_sub_pixel_variance##W##x##H##_c( \ const uint8_t *src, int src_stride, \ @@ -133,7 +95,7 @@ unsigned int vp9_sub_pixel_variance##W##x##H##_c( \ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ BILINEAR_FILTERS_2TAP(yoffset)); \ \ - return vp9_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \ + return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \ } #define SUBPIX_AVG_VAR(W, H) \ @@ -152,178 +114,51 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ BILINEAR_FILTERS_2TAP(yoffset)); \ \ - vp9_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \ + vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \ \ - return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \ -} - -void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, - const uint8_t *ref_ptr, int ref_stride, - unsigned int *sse, int *sum) { - variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum); + return vpx_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \ } -void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, - const uint8_t *ref_ptr, int ref_stride, - unsigned int *sse, int *sum) { - variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); -} - -unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance(src, src_stride, ref, ref_stride, 16, 16, sse, &sum); - return *sse; -} - -unsigned int vp9_mse16x8_c(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance(src, src_stride, ref, ref_stride, 16, 8, sse, &sum); - return *sse; -} - -unsigned int vp9_mse8x16_c(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance(src, src_stride, ref, ref_stride, 8, 16, sse, &sum); - return *sse; -} - -unsigned int vp9_mse8x8_c(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance(src, src_stride, ref, ref_stride, 8, 8, sse, &sum); - return *sse; -} - -VAR(4, 4) SUBPIX_VAR(4, 4) SUBPIX_AVG_VAR(4, 4) -VAR(4, 8) SUBPIX_VAR(4, 8) SUBPIX_AVG_VAR(4, 8) -VAR(8, 4) SUBPIX_VAR(8, 4) SUBPIX_AVG_VAR(8, 4) -VAR(8, 8) SUBPIX_VAR(8, 8) SUBPIX_AVG_VAR(8, 8) -VAR(8, 16) SUBPIX_VAR(8, 16) SUBPIX_AVG_VAR(8, 16) -VAR(16, 8) SUBPIX_VAR(16, 8) SUBPIX_AVG_VAR(16, 8) -VAR(16, 16) SUBPIX_VAR(16, 16) SUBPIX_AVG_VAR(16, 16) -VAR(16, 32) SUBPIX_VAR(16, 32) SUBPIX_AVG_VAR(16, 32) -VAR(32, 16) SUBPIX_VAR(32, 16) SUBPIX_AVG_VAR(32, 16) -VAR(32, 32) SUBPIX_VAR(32, 32) SUBPIX_AVG_VAR(32, 32) -VAR(32, 64) SUBPIX_VAR(32, 64) SUBPIX_AVG_VAR(32, 64) -VAR(64, 32) SUBPIX_VAR(64, 32) SUBPIX_AVG_VAR(64, 32) -VAR(64, 64) SUBPIX_VAR(64, 64) SUBPIX_AVG_VAR(64, 64) -void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, - int height, const uint8_t *ref, int ref_stride) { - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - const int tmp = pred[j] + ref[j]; - comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); - } - comp_pred += width; - pred += width; - ref += ref_stride; - } -} - #if CONFIG_VP9_HIGHBITDEPTH -void highbd_variance64(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int w, int h, uint64_t *sse, - uint64_t *sum) { - int i, j; - - uint16_t *a = CONVERT_TO_SHORTPTR(a8); - uint16_t *b = CONVERT_TO_SHORTPTR(b8); - *sum = 0; - *sse = 0; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const int diff = a[j] - b[j]; - *sum += diff; - *sse += diff * diff; - } - a += a_stride; - b += b_stride; - } -} - -void highbd_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int w, int h, unsigned int *sse, - int *sum) { - uint64_t sse_long = 0; - uint64_t sum_long = 0; - highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); - *sse = (unsigned int)sse_long; - *sum = (int)sum_long; -} - -void highbd_10_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int w, int h, unsigned int *sse, - int *sum) { - uint64_t sse_long = 0; - uint64_t sum_long = 0; - highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); - *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2); - *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 4); -} - -void highbd_12_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - int w, int h, unsigned int *sse, - int *sum) { - uint64_t sse_long = 0; - uint64_t sum_long = 0; - highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); - *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4); - *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 8); -} - static void highbd_var_filter_block2d_bil_first_pass( const uint8_t *src_ptr8, uint16_t *output_ptr, @@ -374,35 +209,6 @@ static void highbd_var_filter_block2d_bil_second_pass( } } -#define HIGHBD_VAR(W, H) \ -unsigned int vp9_highbd_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - unsigned int *sse) { \ - int sum; \ - highbd_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse - (((int64_t)sum * sum) / (W * H)); \ -} \ -\ -unsigned int vp9_highbd_10_variance##W##x##H##_c(const uint8_t *a, \ - int a_stride, \ - const uint8_t *b, \ - int b_stride, \ - unsigned int *sse) { \ - int sum; \ - highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse - (((int64_t)sum * sum) / (W * H)); \ -} \ -\ -unsigned int vp9_highbd_12_variance##W##x##H##_c(const uint8_t *a, \ - int a_stride, \ - const uint8_t *b, \ - int b_stride, \ - unsigned int *sse) { \ - int sum; \ - highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse - (((int64_t)sum * sum) / (W * H)); \ -} - #define HIGHBD_SUBPIX_VAR(W, H) \ unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \ const uint8_t *src, int src_stride, \ @@ -417,7 +223,7 @@ unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ BILINEAR_FILTERS_2TAP(yoffset)); \ \ - return vp9_highbd_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \ + return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \ dst_stride, sse); \ } \ \ @@ -434,7 +240,7 @@ unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ BILINEAR_FILTERS_2TAP(yoffset)); \ \ - return vp9_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ + return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ W, dst, dst_stride, sse); \ } \ \ @@ -451,7 +257,7 @@ unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ BILINEAR_FILTERS_2TAP(yoffset)); \ \ - return vp9_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ + return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ W, dst, dst_stride, sse); \ } @@ -471,10 +277,10 @@ unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ BILINEAR_FILTERS_2TAP(yoffset)); \ \ - vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \ + vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ CONVERT_TO_BYTEPTR(temp2), W); \ \ - return vp9_highbd_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \ + return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \ dst_stride, sse); \ } \ \ @@ -493,10 +299,10 @@ unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ BILINEAR_FILTERS_2TAP(yoffset)); \ \ - vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \ + vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ CONVERT_TO_BYTEPTR(temp2), W); \ \ - return vp9_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \ + return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \ W, dst, dst_stride, sse); \ } \ \ @@ -515,137 +321,49 @@ unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ BILINEAR_FILTERS_2TAP(yoffset)); \ \ - vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \ + vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ CONVERT_TO_BYTEPTR(temp2), W); \ \ - return vp9_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \ + return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \ W, dst, dst_stride, sse); \ } -#define HIGHBD_GET_VAR(S) \ -void vp9_highbd_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int *sse, int *sum) { \ - highbd_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ -} \ -\ -void vp9_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int *sse, int *sum) { \ - highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ -} \ -\ -void vp9_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int *sse, int *sum) { \ - highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ -} - -#define HIGHBD_MSE(W, H) \ -unsigned int vp9_highbd_mse##W##x##H##_c(const uint8_t *src, \ - int src_stride, \ - const uint8_t *ref, \ - int ref_stride, \ - unsigned int *sse) { \ - int sum; \ - highbd_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ - return *sse; \ -} \ -\ -unsigned int vp9_highbd_10_mse##W##x##H##_c(const uint8_t *src, \ - int src_stride, \ - const uint8_t *ref, \ - int ref_stride, \ - unsigned int *sse) { \ - int sum; \ - highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ - return *sse; \ -} \ -\ -unsigned int vp9_highbd_12_mse##W##x##H##_c(const uint8_t *src, \ - int src_stride, \ - const uint8_t *ref, \ - int ref_stride, \ - unsigned int *sse) { \ - int sum; \ - highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ - return *sse; \ -} - -HIGHBD_GET_VAR(8) -HIGHBD_GET_VAR(16) - -HIGHBD_MSE(16, 16) -HIGHBD_MSE(16, 8) -HIGHBD_MSE(8, 16) -HIGHBD_MSE(8, 8) - -HIGHBD_VAR(4, 4) HIGHBD_SUBPIX_VAR(4, 4) HIGHBD_SUBPIX_AVG_VAR(4, 4) -HIGHBD_VAR(4, 8) HIGHBD_SUBPIX_VAR(4, 8) HIGHBD_SUBPIX_AVG_VAR(4, 8) -HIGHBD_VAR(8, 4) HIGHBD_SUBPIX_VAR(8, 4) HIGHBD_SUBPIX_AVG_VAR(8, 4) -HIGHBD_VAR(8, 8) HIGHBD_SUBPIX_VAR(8, 8) HIGHBD_SUBPIX_AVG_VAR(8, 8) -HIGHBD_VAR(8, 16) HIGHBD_SUBPIX_VAR(8, 16) HIGHBD_SUBPIX_AVG_VAR(8, 16) -HIGHBD_VAR(16, 8) HIGHBD_SUBPIX_VAR(16, 8) HIGHBD_SUBPIX_AVG_VAR(16, 8) -HIGHBD_VAR(16, 16) HIGHBD_SUBPIX_VAR(16, 16) HIGHBD_SUBPIX_AVG_VAR(16, 16) -HIGHBD_VAR(16, 32) HIGHBD_SUBPIX_VAR(16, 32) HIGHBD_SUBPIX_AVG_VAR(16, 32) -HIGHBD_VAR(32, 16) HIGHBD_SUBPIX_VAR(32, 16) HIGHBD_SUBPIX_AVG_VAR(32, 16) -HIGHBD_VAR(32, 32) HIGHBD_SUBPIX_VAR(32, 32) HIGHBD_SUBPIX_AVG_VAR(32, 32) -HIGHBD_VAR(32, 64) HIGHBD_SUBPIX_VAR(32, 64) HIGHBD_SUBPIX_AVG_VAR(32, 64) -HIGHBD_VAR(64, 32) HIGHBD_SUBPIX_VAR(64, 32) HIGHBD_SUBPIX_AVG_VAR(64, 32) -HIGHBD_VAR(64, 64) HIGHBD_SUBPIX_VAR(64, 64) HIGHBD_SUBPIX_AVG_VAR(64, 64) - -void vp9_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8, - int width, int height, const uint8_t *ref8, - int ref_stride) { - int i, j; - uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - const int tmp = pred[j] + ref[j]; - comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); - } - comp_pred += width; - pred += width; - ref += ref_stride; - } -} #endif // CONFIG_VP9_HIGHBITDEPTH |