diff options
author | Johann <johannkoenig@google.com> | 2015-05-15 11:52:03 -0700 |
---|---|---|
committer | Johann <johannkoenig@google.com> | 2015-05-26 12:01:52 -0700 |
commit | c3bdffb0a508ad08d5dfa613c029f368d4293d4c (patch) | |
tree | 4c087783da1d12bfbe09311ebb33f200e789ebf3 /vp8/encoder | |
parent | 976f7f42c1ad1ff3cc0792572f9c4f41f05bb375 (diff) | |
download | libvpx-c3bdffb0a508ad08d5dfa613c029f368d4293d4c.tar libvpx-c3bdffb0a508ad08d5dfa613c029f368d4293d4c.tar.gz libvpx-c3bdffb0a508ad08d5dfa613c029f368d4293d4c.tar.bz2 libvpx-c3bdffb0a508ad08d5dfa613c029f368d4293d4c.zip |
Move variance functions to vpx_dsp
subpel functions will be moved in another patch.
Change-Id: Idb2e049bad0b9b32ac42cc7731cd6903de2826ce
Diffstat (limited to 'vp8/encoder')
-rw-r--r-- | vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm | 138 | ||||
-rw-r--r-- | vp8/encoder/arm/neon/vp8_mse16x16_neon.c | 131 | ||||
-rw-r--r-- | vp8/encoder/encodeframe.c | 3 | ||||
-rw-r--r-- | vp8/encoder/encodeintra.c | 3 | ||||
-rw-r--r-- | vp8/encoder/firstpass.c | 11 | ||||
-rw-r--r-- | vp8/encoder/onyx_if.c | 21 | ||||
-rw-r--r-- | vp8/encoder/pickinter.c | 36 | ||||
-rw-r--r-- | vp8/encoder/picklpf.c | 3 | ||||
-rw-r--r-- | vp8/encoder/rdopt.c | 7 |
9 files changed, 32 insertions, 321 deletions
diff --git a/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm deleted file mode 100644 index 000805d4f..000000000 --- a/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm +++ /dev/null @@ -1,138 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_mse16x16_armv6| - - ARM - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -; -;note: Based on vp8_variance16x16_armv6. In this function, sum is never used. -; So, we can remove this part of calculation. - -|vp8_mse16x16_armv6| PROC - - push {r4-r9, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - - mov r12, #16 ; set loop counter to 16 (=block height) - mov r4, #0 ; initialize sse = 0 - -loop - ; 1st 4 pixels - ldr r5, [r0, #0x0] ; load 4 src pixels - ldr r6, [r2, #0x0] ; load 4 ref pixels - - mov lr, #0 ; constant zero - - usub8 r8, r5, r6 ; calculate difference - pld [r0, r1, lsl #1] - sel r7, r8, lr ; select bytes with positive difference - usub8 r9, r6, r5 ; calculate difference with reversed operands - pld [r2, r3, lsl #1] - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r5, r7, lr ; calculate sum of positive differences - usad8 r6, r8, lr ; calculate sum of negative differences - orr r8, r8, r7 ; differences of all 4 pixels - - ldr r5, [r0, #0x4] ; load 4 src pixels - - ; calculate sse - uxtb16 r6, r8 ; byte (two pixels) to halfwords - uxtb16 r7, r8, ror #8 ; another two pixels to halfwords - smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) - - ; 2nd 4 pixels - ldr r6, [r2, #0x4] ; load 4 ref pixels - smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) - - usub8 r8, r5, r6 ; calculate difference - sel r7, r8, lr ; select bytes with positive difference - usub8 r9, r6, r5 ; calculate difference with reversed operands - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r5, r7, lr ; calculate sum of positive differences - usad8 r6, r8, lr ; calculate sum of negative differences - orr r8, r8, r7 ; differences of all 4 pixels - ldr r5, [r0, #0x8] ; load 4 src pixels - ; calculate sse - uxtb16 r6, r8 ; byte (two pixels) to halfwords - uxtb16 r7, r8, ror #8 ; another two pixels to halfwords - smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) - - ; 3rd 4 pixels - ldr r6, [r2, #0x8] ; load 4 ref pixels - smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) - - usub8 r8, r5, r6 ; calculate difference - sel r7, r8, lr ; select bytes with positive difference - usub8 r9, r6, r5 ; calculate difference with reversed operands - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r5, r7, lr ; calculate sum of positive differences - usad8 r6, r8, lr ; calculate sum of negative differences - orr r8, r8, r7 ; differences of all 4 pixels - - ldr r5, [r0, #0xc] ; load 4 src pixels - - ; calculate sse - uxtb16 r6, r8 ; byte (two pixels) to halfwords - uxtb16 r7, r8, ror #8 ; another two pixels to halfwords - smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) - - ; 4th 4 pixels - ldr r6, [r2, #0xc] ; load 4 ref pixels - smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) - - usub8 r8, r5, r6 ; calculate difference - add r0, r0, r1 ; set src_ptr to next row - sel r7, r8, lr ; select bytes with positive difference - usub8 r9, r6, r5 ; calculate difference with reversed operands - add r2, r2, r3 ; set dst_ptr to next row - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r5, r7, lr ; calculate sum of positive differences - usad8 r6, r8, lr ; calculate sum of negative differences - orr r8, r8, r7 ; differences of all 4 pixels - - subs r12, r12, #1 ; next row - - ; calculate sse - uxtb16 r6, r8 ; byte (two pixels) to halfwords - uxtb16 r7, r8, ror #8 ; another two pixels to halfwords - smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) - smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) - - bne loop - - ; return stuff - ldr r1, [sp, #28] ; get address of sse - mov r0, r4 ; return sse - str r4, [r1] ; store sse - - pop {r4-r9, pc} - - ENDP - - END diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.c b/vp8/encoder/arm/neon/vp8_mse16x16_neon.c deleted file mode 100644 index f806809df..000000000 --- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -unsigned int vp8_mse16x16_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - int i; - int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; - int64x1_t d0s64; - uint8x16_t q0u8, q1u8, q2u8, q3u8; - int32x4_t q7s32, q8s32, q9s32, q10s32; - uint16x8_t q11u16, q12u16, q13u16, q14u16; - int64x2_t q1s64; - - q7s32 = vdupq_n_s32(0); - q8s32 = vdupq_n_s32(0); - q9s32 = vdupq_n_s32(0); - q10s32 = vdupq_n_s32(0); - - for (i = 0; i < 8; i++) { // mse16x16_neon_loop - q0u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - q1u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; - q2u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - q3u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; - - q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8)); - q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8)); - q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8)); - q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8)); - - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - q7s32 = vmlal_s16(q7s32, d22s16, d22s16); - q8s32 = vmlal_s16(q8s32, d23s16, d23s16); - - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - q9s32 = vmlal_s16(q9s32, d24s16, d24s16); - q10s32 = vmlal_s16(q10s32, d25s16, d25s16); - - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - q7s32 = vmlal_s16(q7s32, d26s16, d26s16); - q8s32 = vmlal_s16(q8s32, d27s16, d27s16); - - d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); - d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); - q9s32 = vmlal_s16(q9s32, d28s16, d28s16); - q10s32 = vmlal_s16(q10s32, d29s16, d29s16); - } - - q7s32 = vaddq_s32(q7s32, q8s32); - q9s32 = vaddq_s32(q9s32, q10s32); - q10s32 = vaddq_s32(q7s32, q9s32); - - q1s64 = vpaddlq_s32(q10s32); - d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); - - vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0); - return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0); -} - -unsigned int vp8_get4x4sse_cs_neon( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride) { - int16x4_t d22s16, d24s16, d26s16, d28s16; - int64x1_t d0s64; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; - int32x4_t q7s32, q8s32, q9s32, q10s32; - uint16x8_t q11u16, q12u16, q13u16, q14u16; - int64x2_t q1s64; - - d0u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d4u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d1u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d5u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d2u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d6u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - d3u8 = vld1_u8(src_ptr); - src_ptr += source_stride; - d7u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; - - q11u16 = vsubl_u8(d0u8, d4u8); - q12u16 = vsubl_u8(d1u8, d5u8); - q13u16 = vsubl_u8(d2u8, d6u8); - q14u16 = vsubl_u8(d3u8, d7u8); - - d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16)); - d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16)); - d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16)); - d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16)); - - q7s32 = vmull_s16(d22s16, d22s16); - q8s32 = vmull_s16(d24s16, d24s16); - q9s32 = vmull_s16(d26s16, d26s16); - q10s32 = vmull_s16(d28s16, d28s16); - - q7s32 = vaddq_s32(q7s32, q8s32); - q9s32 = vaddq_s32(q9s32, q10s32); - q9s32 = vaddq_s32(q7s32, q9s32); - - q1s64 = vpaddlq_s32(q9s32); - d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); - - return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0); -} diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 378e902c6..d381d8ddf 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -11,6 +11,7 @@ #include "vpx_config.h" #include "vp8_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "encodemb.h" #include "encodemv.h" #include "vp8/common/common.h" @@ -90,7 +91,7 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x ) * lambda using a non-linear combination (e.g., the smallest, or second * smallest, etc.). */ - act = vp8_variance16x16(x->src.y_buffer, + act = vpx_variance16x16(x->src.y_buffer, x->src.y_stride, VP8_VAR_OFFS, 0, &sse); act = act<<4; diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index cfa4cb927..e2de5eecb 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -11,6 +11,7 @@ #include "vpx_config.h" #include "vp8_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "quantize.h" #include "vp8/common/reconintra4x4.h" #include "encodemb.h" @@ -44,7 +45,7 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred) } } - intra_pred_var = vp8_get_mb_ss(x->src_diff); + intra_pred_var = vpx_get_mb_ss(x->src_diff); return intra_pred_var; } diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index a6ff0e7a0..3deb4abb3 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -12,6 +12,7 @@ #include <limits.h> #include <stdio.h> +#include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "block.h" #include "onyx_int.h" @@ -422,14 +423,14 @@ static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, /* Set up pointers for this macro block raw buffer */ raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset + d->offset); - vp8_mse16x16 ( src_ptr, src_stride, raw_ptr, raw_stride, - (unsigned int *)(raw_motion_err)); + vpx_mse16x16(src_ptr, src_stride, raw_ptr, raw_stride, + (unsigned int *)(raw_motion_err)); /* Set up pointers for this macro block recon buffer */ xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset ); - vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride, - (unsigned int *)(best_motion_err)); + vpx_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride, + (unsigned int *)(best_motion_err)); } static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, @@ -453,7 +454,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, int new_mv_mode_penalty = 256; /* override the default variance function to use MSE */ - v_fn_ptr.vf = vp8_mse16x16; + v_fn_ptr.vf = vpx_mse16x16; /* Set up pointers for this macro block recon buffer */ xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index c2bb23295..40e29e191 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -2131,7 +2131,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) #endif cpi->fn_ptr[BLOCK_16X16].sdf = vpx_sad16x16; - cpi->fn_ptr[BLOCK_16X16].vf = vp8_variance16x16; + cpi->fn_ptr[BLOCK_16X16].vf = vpx_variance16x16; cpi->fn_ptr[BLOCK_16X16].svf = vp8_sub_pixel_variance16x16; cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h = vp8_variance_halfpixvar16x16_h; cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = vp8_variance_halfpixvar16x16_v; @@ -2141,7 +2141,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) cpi->fn_ptr[BLOCK_16X16].sdx4df = vpx_sad16x16x4d; cpi->fn_ptr[BLOCK_16X8].sdf = vpx_sad16x8; - cpi->fn_ptr[BLOCK_16X8].vf = vp8_variance16x8; + cpi->fn_ptr[BLOCK_16X8].vf = vpx_variance16x8; cpi->fn_ptr[BLOCK_16X8].svf = vp8_sub_pixel_variance16x8; cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h = NULL; cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL; @@ -2151,7 +2151,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) cpi->fn_ptr[BLOCK_16X8].sdx4df = vpx_sad16x8x4d; cpi->fn_ptr[BLOCK_8X16].sdf = vpx_sad8x16; - cpi->fn_ptr[BLOCK_8X16].vf = vp8_variance8x16; + cpi->fn_ptr[BLOCK_8X16].vf = vpx_variance8x16; cpi->fn_ptr[BLOCK_8X16].svf = vp8_sub_pixel_variance8x16; cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h = NULL; cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL; @@ -2161,7 +2161,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) cpi->fn_ptr[BLOCK_8X16].sdx4df = vpx_sad8x16x4d; cpi->fn_ptr[BLOCK_8X8].sdf = vpx_sad8x8; - cpi->fn_ptr[BLOCK_8X8].vf = vp8_variance8x8; + cpi->fn_ptr[BLOCK_8X8].vf = vpx_variance8x8; cpi->fn_ptr[BLOCK_8X8].svf = vp8_sub_pixel_variance8x8; cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h = NULL; cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL; @@ -2171,7 +2171,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) cpi->fn_ptr[BLOCK_8X8].sdx4df = vpx_sad8x8x4d; cpi->fn_ptr[BLOCK_4X4].sdf = vpx_sad4x4; - cpi->fn_ptr[BLOCK_4X4].vf = vp8_variance4x4; + cpi->fn_ptr[BLOCK_4X4].vf = vpx_variance4x4; cpi->fn_ptr[BLOCK_4X4].svf = vp8_sub_pixel_variance4x4; cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h = NULL; cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL; @@ -2558,7 +2558,7 @@ static uint64_t calc_plane_error(unsigned char *orig, int orig_stride, { unsigned int sse; - vp8_mse16x16(orig + col, orig_stride, + vpx_mse16x16(orig + col, orig_stride, recon + col, recon_stride, &sse); total_sse += sse; @@ -3384,7 +3384,7 @@ static int measure_square_diff_partial(YV12_BUFFER_CONFIG *source, int index = block_index_row + (j >> 4); if (cpi->consec_zero_last[index] >= min_consec_zero_last) { unsigned int sse; - Total += vp8_mse16x16(src + j, + Total += vpx_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, &sse); @@ -3448,7 +3448,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) { int index = block_index_row + (j >> 4); if (cpi->consec_zero_last[index] >= min_consec_zero_last) { unsigned int sse; - const unsigned int var = vp8_variance16x16(src + j, + const unsigned int var = vpx_variance16x16(src + j, ystride, dst + j, ystride, @@ -3458,7 +3458,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) { // is small (to avoid effects from lighting change). if ((sse - var) < 128) { unsigned int sse2; - const unsigned int act = vp8_variance16x16(src + j, + const unsigned int act = vpx_variance16x16(src + j, ystride, const_source, 0, @@ -5993,7 +5993,8 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) for (j = 0; j < source->y_width; j += 16) { unsigned int sse; - Total += vp8_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, &sse); + Total += vpx_mse16x16(src + j, source->y_stride, + dst + j, dest->y_stride, &sse); } src += 16 * source->y_stride; diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index 98ea5a040..053bf119a 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -11,6 +11,7 @@ #include <limits.h> #include "vpx_config.h" +#include "./vpx_dsp_rtcd.h" #include "onyx_int.h" #include "modecosts.h" #include "encodeintra.h" @@ -215,33 +216,6 @@ int vp8_get_inter_mbpred_error(MACROBLOCK *mb, } - -unsigned int vp8_get4x4sse_cs_c -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride -) -{ - int distortion = 0; - int r, c; - - for (r = 0; r < 4; r++) - { - for (c = 0; c < 4; c++) - { - int diff = src_ptr[c] - ref_ptr[c]; - distortion += diff * diff; - } - - src_ptr += source_stride; - ref_ptr += recon_stride; - } - - return distortion; -} - static int get_prediction_error(BLOCK *be, BLOCKD *b) { unsigned char *sptr; @@ -249,7 +223,7 @@ static int get_prediction_error(BLOCK *be, BLOCKD *b) sptr = (*(be->base_src) + be->src); dptr = b->predictor; - return vp8_get4x4sse_cs(sptr, be->src_stride, dptr, 16); + return vpx_get4x4sse_cs(sptr, be->src_stride, dptr, 16); } @@ -1037,7 +1011,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, else { rate2 += rate; - distortion2 = vp8_variance16x16( + distortion2 = vpx_variance16x16( *(b->base_src), b->src_stride, x->e_mbd.predictor, 16, &sse); this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); @@ -1066,7 +1040,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, xd->dst.y_stride, xd->predictor, 16); - distortion2 = vp8_variance16x16 + distortion2 = vpx_variance16x16 (*(b->base_src), b->src_stride, x->e_mbd.predictor, 16, &sse); rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode]; @@ -1547,7 +1521,7 @@ void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_) xd->dst.y_stride, xd->predictor, 16); - distortion = vp8_variance16x16 + distortion = vpx_variance16x16 (*(b->base_src), b->src_stride, xd->predictor, 16, &sse); rate = x->mbmode_cost[xd->frame_type][mode]; this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c index 890053dcf..875b37f68 100644 --- a/vp8/encoder/picklpf.c +++ b/vp8/encoder/picklpf.c @@ -9,6 +9,7 @@ */ +#include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vp8/common/onyxc_int.h" #include "onyx_int.h" @@ -83,7 +84,7 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, for (j = 0; j < source->y_width; j += 16) { unsigned int sse; - Total += vp8_mse16x16(src + j, source->y_stride, + Total += vpx_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, &sse); } diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 9ccd85eb9..17194f0d4 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -15,6 +15,7 @@ #include <assert.h> #include "vpx_config.h" #include "vp8_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "tokenize.h" #include "treewriter.h" #include "onyx_int.h" @@ -507,9 +508,9 @@ int VP8_UVSSE(MACROBLOCK *x) } else { - vp8_variance8x8(uptr, pre_stride, + vpx_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2); - vp8_variance8x8(vptr, pre_stride, + vpx_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1); sse2 += sse1; } @@ -1783,7 +1784,7 @@ static int evaluate_inter_mode_rd(int mdcounts[4], if(threshold < x->encode_breakout) threshold = x->encode_breakout; - var = vp8_variance16x16 + var = vpx_variance16x16 (*(b->base_src), b->src_stride, x->e_mbd.predictor, 16, &sse); |