Move variance functions to vpx_dsp

subpel functions will be moved in another patch. Change-Id: Idb2e049bad0b9b32ac42cc7731cd6903de2826ce
author: Johann <johannkoenig@google.com> 2015-05-15 11:52:03 -0700
committer: Johann <johannkoenig@google.com> 2015-05-26 12:01:52 -0700
commit: c3bdffb0a508ad08d5dfa613c029f368d4293d4c (patch)
tree: 4c087783da1d12bfbe09311ebb33f200e789ebf3 /vp8/encoder
parent: 976f7f42c1ad1ff3cc0792572f9c4f41f05bb375 (diff)
download: libvpx-c3bdffb0a508ad08d5dfa613c029f368d4293d4c.tar
libvpx-c3bdffb0a508ad08d5dfa613c029f368d4293d4c.tar.gz
libvpx-c3bdffb0a508ad08d5dfa613c029f368d4293d4c.tar.bz2
libvpx-c3bdffb0a508ad08d5dfa613c029f368d4293d4c.zip
9 files changed, 32 insertions, 321 deletions
diff --git a/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
deleted file mode 100644
index 000805d4f..000000000
--- a/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
+++ /dev/null
@@ -1,138 +0,0 @@
-;
-;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_mse16x16_armv6|
-
-    ARM
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; r0    unsigned char *src_ptr
-; r1    int source_stride
-; r2    unsigned char *ref_ptr
-; r3    int  recon_stride
-; stack unsigned int *sse
-;
-;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.
-;      So, we can remove this part of calculation.
-
-|vp8_mse16x16_armv6| PROC
-
-    push    {r4-r9, lr}
-
-    pld     [r0, r1, lsl #0]
-    pld     [r2, r3, lsl #0]
-
-    mov     r12, #16            ; set loop counter to 16 (=block height)
-    mov     r4, #0              ; initialize sse = 0
-
-loop
-    ; 1st 4 pixels
-    ldr     r5, [r0, #0x0]      ; load 4 src pixels
-    ldr     r6, [r2, #0x0]      ; load 4 ref pixels
-
-    mov     lr, #0              ; constant zero
-
-    usub8   r8, r5, r6          ; calculate difference
-    pld     [r0, r1, lsl #1]
-    sel     r7, r8, lr          ; select bytes with positive difference
-    usub8   r9, r6, r5          ; calculate difference with reversed operands
-    pld     [r2, r3, lsl #1]
-    sel     r8, r9, lr          ; select bytes with negative difference
-
-    ; calculate partial sums
-    usad8   r5, r7, lr          ; calculate sum of positive differences
-    usad8   r6, r8, lr          ; calculate sum of negative differences
-    orr     r8, r8, r7          ; differences of all 4 pixels
-
-    ldr     r5, [r0, #0x4]      ; load 4 src pixels
-
-    ; calculate sse
-    uxtb16  r6, r8              ; byte (two pixels) to halfwords
-    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
-    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
-
-    ; 2nd 4 pixels
-    ldr     r6, [r2, #0x4]      ; load 4 ref pixels
-    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
-
-    usub8   r8, r5, r6          ; calculate difference
-    sel     r7, r8, lr          ; select bytes with positive difference
-    usub8   r9, r6, r5          ; calculate difference with reversed operands
-    sel     r8, r9, lr          ; select bytes with negative difference
-
-    ; calculate partial sums
-    usad8   r5, r7, lr          ; calculate sum of positive differences
-    usad8   r6, r8, lr          ; calculate sum of negative differences
-    orr     r8, r8, r7          ; differences of all 4 pixels
-    ldr     r5, [r0, #0x8]      ; load 4 src pixels
-    ; calculate sse
-    uxtb16  r6, r8              ; byte (two pixels) to halfwords
-    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
-    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
-
-    ; 3rd 4 pixels
-    ldr     r6, [r2, #0x8]      ; load 4 ref pixels
-    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
-
-    usub8   r8, r5, r6          ; calculate difference
-    sel     r7, r8, lr          ; select bytes with positive difference
-    usub8   r9, r6, r5          ; calculate difference with reversed operands
-    sel     r8, r9, lr          ; select bytes with negative difference
-
-    ; calculate partial sums
-    usad8   r5, r7, lr          ; calculate sum of positive differences
-    usad8   r6, r8, lr          ; calculate sum of negative differences
-    orr     r8, r8, r7          ; differences of all 4 pixels
-
-    ldr     r5, [r0, #0xc]      ; load 4 src pixels
-
-    ; calculate sse
-    uxtb16  r6, r8              ; byte (two pixels) to halfwords
-    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
-    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
-
-    ; 4th 4 pixels
-    ldr     r6, [r2, #0xc]      ; load 4 ref pixels
-    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
-
-    usub8   r8, r5, r6          ; calculate difference
-    add     r0, r0, r1          ; set src_ptr to next row
-    sel     r7, r8, lr          ; select bytes with positive difference
-    usub8   r9, r6, r5          ; calculate difference with reversed operands
-    add     r2, r2, r3          ; set dst_ptr to next row
-    sel     r8, r9, lr          ; select bytes with negative difference
-
-    ; calculate partial sums
-    usad8   r5, r7, lr          ; calculate sum of positive differences
-    usad8   r6, r8, lr          ; calculate sum of negative differences
-    orr     r8, r8, r7          ; differences of all 4 pixels
-
-    subs    r12, r12, #1        ; next row
-
-    ; calculate sse
-    uxtb16  r6, r8              ; byte (two pixels) to halfwords
-    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
-    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
-    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
-
-    bne     loop
-
-    ; return stuff
-    ldr     r1, [sp, #28]       ; get address of sse
-    mov     r0, r4              ; return sse
-    str     r4, [r1]            ; store sse
-
-    pop     {r4-r9, pc}
-
-    ENDP
-
-    END
diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.c b/vp8/encoder/arm/neon/vp8_mse16x16_neon.c
deleted file mode 100644
index f806809df..000000000
--- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <arm_neon.h>
-
-unsigned int vp8_mse16x16_neon(
-        const unsigned char *src_ptr,
-        int source_stride,
-        const unsigned char *ref_ptr,
-        int recon_stride,
-        unsigned int *sse) {
-    int i;
-    int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
-    int64x1_t d0s64;
-    uint8x16_t q0u8, q1u8, q2u8, q3u8;
-    int32x4_t q7s32, q8s32, q9s32, q10s32;
-    uint16x8_t q11u16, q12u16, q13u16, q14u16;
-    int64x2_t q1s64;
-
-    q7s32 = vdupq_n_s32(0);
-    q8s32 = vdupq_n_s32(0);
-    q9s32 = vdupq_n_s32(0);
-    q10s32 = vdupq_n_s32(0);
-
-    for (i = 0; i < 8; i++) {  // mse16x16_neon_loop
-        q0u8 = vld1q_u8(src_ptr);
-        src_ptr += source_stride;
-        q1u8 = vld1q_u8(src_ptr);
-        src_ptr += source_stride;
-        q2u8 = vld1q_u8(ref_ptr);
-        ref_ptr += recon_stride;
-        q3u8 = vld1q_u8(ref_ptr);
-        ref_ptr += recon_stride;
-
-        q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
-        q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
-        q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
-        q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
-
-        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-        d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
-        q7s32 = vmlal_s16(q7s32, d22s16, d22s16);
-        q8s32 = vmlal_s16(q8s32, d23s16, d23s16);
-
-        d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-        d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-        q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
-        q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-
-        d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-        d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-        q7s32 = vmlal_s16(q7s32, d26s16, d26s16);
-        q8s32 = vmlal_s16(q8s32, d27s16, d27s16);
-
-        d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
-        d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
-        q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
-        q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
-    }
-
-    q7s32 = vaddq_s32(q7s32, q8s32);
-    q9s32 = vaddq_s32(q9s32, q10s32);
-    q10s32 = vaddq_s32(q7s32, q9s32);
-
-    q1s64 = vpaddlq_s32(q10s32);
-    d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
-    vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0);
-    return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
-}
-
-unsigned int vp8_get4x4sse_cs_neon(
-        const unsigned char *src_ptr,
-        int source_stride,
-        const unsigned char *ref_ptr,
-        int recon_stride) {
-    int16x4_t d22s16, d24s16, d26s16, d28s16;
-    int64x1_t d0s64;
-    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
-    int32x4_t q7s32, q8s32, q9s32, q10s32;
-    uint16x8_t q11u16, q12u16, q13u16, q14u16;
-    int64x2_t q1s64;
-
-    d0u8 = vld1_u8(src_ptr);
-    src_ptr += source_stride;
-    d4u8 = vld1_u8(ref_ptr);
-    ref_ptr += recon_stride;
-    d1u8 = vld1_u8(src_ptr);
-    src_ptr += source_stride;
-    d5u8 = vld1_u8(ref_ptr);
-    ref_ptr += recon_stride;
-    d2u8 = vld1_u8(src_ptr);
-    src_ptr += source_stride;
-    d6u8 = vld1_u8(ref_ptr);
-    ref_ptr += recon_stride;
-    d3u8 = vld1_u8(src_ptr);
-    src_ptr += source_stride;
-    d7u8 = vld1_u8(ref_ptr);
-    ref_ptr += recon_stride;
-
-    q11u16 = vsubl_u8(d0u8, d4u8);
-    q12u16 = vsubl_u8(d1u8, d5u8);
-    q13u16 = vsubl_u8(d2u8, d6u8);
-    q14u16 = vsubl_u8(d3u8, d7u8);
-
-    d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16));
-    d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16));
-    d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16));
-    d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16));
-
-    q7s32 = vmull_s16(d22s16, d22s16);
-    q8s32 = vmull_s16(d24s16, d24s16);
-    q9s32 = vmull_s16(d26s16, d26s16);
-    q10s32 = vmull_s16(d28s16, d28s16);
-
-    q7s32 = vaddq_s32(q7s32, q8s32);
-    q9s32 = vaddq_s32(q9s32, q10s32);
-    q9s32 = vaddq_s32(q7s32, q9s32);
-
-    q1s64 = vpaddlq_s32(q9s32);
-    d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
-    return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
-}
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 378e902c6..d381d8ddf 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -11,6 +11,7 @@
 
 #include "vpx_config.h"
 #include "vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "encodemb.h"
 #include "encodemv.h"
 #include "vp8/common/common.h"
@@ -90,7 +91,7 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
      *  lambda using a non-linear combination (e.g., the smallest, or second
      *  smallest, etc.).
      */
-    act =  vp8_variance16x16(x->src.y_buffer,
+    act =  vpx_variance16x16(x->src.y_buffer,
                     x->src.y_stride, VP8_VAR_OFFS, 0, &sse);
     act = act<<4;
 
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index cfa4cb927..e2de5eecb 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -11,6 +11,7 @@
 
 #include "vpx_config.h"
 #include "vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "quantize.h"
 #include "vp8/common/reconintra4x4.h"
 #include "encodemb.h"
@@ -44,7 +45,7 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
         }
     }
 
-    intra_pred_var = vp8_get_mb_ss(x->src_diff);
+    intra_pred_var = vpx_get_mb_ss(x->src_diff);
 
     return intra_pred_var;
 }
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index a6ff0e7a0..3deb4abb3 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -12,6 +12,7 @@
 #include <limits.h>
 #include <stdio.h>
 
+#include "./vpx_dsp_rtcd.h"
 #include "./vpx_scale_rtcd.h"
 #include "block.h"
 #include "onyx_int.h"
@@ -422,14 +423,14 @@ static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x,
     /* Set up pointers for this macro block raw buffer */
     raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset
                                 + d->offset);
-    vp8_mse16x16 ( src_ptr, src_stride, raw_ptr, raw_stride,
-                   (unsigned int *)(raw_motion_err));
+    vpx_mse16x16(src_ptr, src_stride, raw_ptr, raw_stride,
+                 (unsigned int *)(raw_motion_err));
 
     /* Set up pointers for this macro block recon buffer */
     xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
     ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset );
-    vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride,
-                   (unsigned int *)(best_motion_err));
+    vpx_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride,
+                 (unsigned int *)(best_motion_err));
 }
 
 static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
@@ -453,7 +454,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
     int new_mv_mode_penalty = 256;
 
     /* override the default variance function to use MSE */
-    v_fn_ptr.vf    = vp8_mse16x16;
+    v_fn_ptr.vf    = vpx_mse16x16;
 
     /* Set up pointers for this macro block recon buffer */
     xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index c2bb23295..40e29e191 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -2131,7 +2131,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
 #endif
 
     cpi->fn_ptr[BLOCK_16X16].sdf            = vpx_sad16x16;
-    cpi->fn_ptr[BLOCK_16X16].vf             = vp8_variance16x16;
+    cpi->fn_ptr[BLOCK_16X16].vf             = vpx_variance16x16;
     cpi->fn_ptr[BLOCK_16X16].svf            = vp8_sub_pixel_variance16x16;
     cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h  = vp8_variance_halfpixvar16x16_h;
     cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v  = vp8_variance_halfpixvar16x16_v;
@@ -2141,7 +2141,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->fn_ptr[BLOCK_16X16].sdx4df         = vpx_sad16x16x4d;
 
     cpi->fn_ptr[BLOCK_16X8].sdf            = vpx_sad16x8;
-    cpi->fn_ptr[BLOCK_16X8].vf             = vp8_variance16x8;
+    cpi->fn_ptr[BLOCK_16X8].vf             = vpx_variance16x8;
     cpi->fn_ptr[BLOCK_16X8].svf            = vp8_sub_pixel_variance16x8;
     cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h  = NULL;
     cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v  = NULL;
@@ -2151,7 +2151,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->fn_ptr[BLOCK_16X8].sdx4df         = vpx_sad16x8x4d;
 
     cpi->fn_ptr[BLOCK_8X16].sdf            = vpx_sad8x16;
-    cpi->fn_ptr[BLOCK_8X16].vf             = vp8_variance8x16;
+    cpi->fn_ptr[BLOCK_8X16].vf             = vpx_variance8x16;
     cpi->fn_ptr[BLOCK_8X16].svf            = vp8_sub_pixel_variance8x16;
     cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h  = NULL;
     cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v  = NULL;
@@ -2161,7 +2161,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->fn_ptr[BLOCK_8X16].sdx4df         = vpx_sad8x16x4d;
 
     cpi->fn_ptr[BLOCK_8X8].sdf            = vpx_sad8x8;
-    cpi->fn_ptr[BLOCK_8X8].vf             = vp8_variance8x8;
+    cpi->fn_ptr[BLOCK_8X8].vf             = vpx_variance8x8;
     cpi->fn_ptr[BLOCK_8X8].svf            = vp8_sub_pixel_variance8x8;
     cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h  = NULL;
     cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v  = NULL;
@@ -2171,7 +2171,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->fn_ptr[BLOCK_8X8].sdx4df         = vpx_sad8x8x4d;
 
     cpi->fn_ptr[BLOCK_4X4].sdf            = vpx_sad4x4;
-    cpi->fn_ptr[BLOCK_4X4].vf             = vp8_variance4x4;
+    cpi->fn_ptr[BLOCK_4X4].vf             = vpx_variance4x4;
     cpi->fn_ptr[BLOCK_4X4].svf            = vp8_sub_pixel_variance4x4;
     cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h  = NULL;
     cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v  = NULL;
@@ -2558,7 +2558,7 @@ static uint64_t calc_plane_error(unsigned char *orig, int orig_stride,
         {
             unsigned int sse;
 
-            vp8_mse16x16(orig + col, orig_stride,
+            vpx_mse16x16(orig + col, orig_stride,
                                             recon + col, recon_stride,
                                             &sse);
             total_sse += sse;
@@ -3384,7 +3384,7 @@ static int measure_square_diff_partial(YV12_BUFFER_CONFIG *source,
                 int index = block_index_row + (j >> 4);
                 if (cpi->consec_zero_last[index] >= min_consec_zero_last) {
                   unsigned int sse;
-                  Total += vp8_mse16x16(src + j,
+                  Total += vpx_mse16x16(src + j,
                                         source->y_stride,
                                         dst + j, dest->y_stride,
                                         &sse);
@@ -3448,7 +3448,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
       int index = block_index_row + (j >> 4);
       if (cpi->consec_zero_last[index] >= min_consec_zero_last) {
         unsigned int sse;
-        const unsigned int var = vp8_variance16x16(src + j,
+        const unsigned int var = vpx_variance16x16(src + j,
                                                    ystride,
                                                    dst + j,
                                                    ystride,
@@ -3458,7 +3458,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
         // is small (to avoid effects from lighting change).
         if ((sse - var) < 128) {
           unsigned int sse2;
-          const unsigned int act = vp8_variance16x16(src + j,
+          const unsigned int act = vpx_variance16x16(src + j,
                                                      ystride,
                                                      const_source,
                                                      0,
@@ -5993,7 +5993,8 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest)
         for (j = 0; j < source->y_width; j += 16)
         {
             unsigned int sse;
-            Total += vp8_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, &sse);
+            Total += vpx_mse16x16(src + j, source->y_stride,
+                                  dst + j, dest->y_stride, &sse);
         }
 
         src += 16 * source->y_stride;
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 98ea5a040..053bf119a 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -11,6 +11,7 @@
 
 #include <limits.h>
 #include "vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
 #include "onyx_int.h"
 #include "modecosts.h"
 #include "encodeintra.h"
@@ -215,33 +216,6 @@ int vp8_get_inter_mbpred_error(MACROBLOCK *mb,
 
 }
 
-
-unsigned int vp8_get4x4sse_cs_c
-(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride
-)
-{
-    int distortion = 0;
-    int r, c;
-
-    for (r = 0; r < 4; r++)
-    {
-        for (c = 0; c < 4; c++)
-        {
-            int diff = src_ptr[c] - ref_ptr[c];
-            distortion += diff * diff;
-        }
-
-        src_ptr += source_stride;
-        ref_ptr += recon_stride;
-    }
-
-    return distortion;
-}
-
 static int get_prediction_error(BLOCK *be, BLOCKD *b)
 {
     unsigned char *sptr;
@@ -249,7 +223,7 @@ static int get_prediction_error(BLOCK *be, BLOCKD *b)
     sptr = (*(be->base_src) + be->src);
     dptr = b->predictor;
 
-    return vp8_get4x4sse_cs(sptr, be->src_stride, dptr, 16);
+    return vpx_get4x4sse_cs(sptr, be->src_stride, dptr, 16);
 
 }
 
@@ -1037,7 +1011,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
             else
             {
                 rate2 += rate;
-                distortion2 = vp8_variance16x16(
+                distortion2 = vpx_variance16x16(
                                     *(b->base_src), b->src_stride,
                                     x->e_mbd.predictor, 16, &sse);
                 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
@@ -1066,7 +1040,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
                                              xd->dst.y_stride,
                                              xd->predictor,
                                              16);
-            distortion2 = vp8_variance16x16
+            distortion2 = vpx_variance16x16
                                           (*(b->base_src), b->src_stride,
                                           x->e_mbd.predictor, 16, &sse);
             rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
@@ -1547,7 +1521,7 @@ void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_)
                                          xd->dst.y_stride,
                                          xd->predictor,
                                          16);
-        distortion = vp8_variance16x16
+        distortion = vpx_variance16x16
             (*(b->base_src), b->src_stride, xd->predictor, 16, &sse);
         rate = x->mbmode_cost[xd->frame_type][mode];
         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index 890053dcf..875b37f68 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -9,6 +9,7 @@
  */
 
 
+#include "./vpx_dsp_rtcd.h"
 #include "./vpx_scale_rtcd.h"
 #include "vp8/common/onyxc_int.h"
 #include "onyx_int.h"
@@ -83,7 +84,7 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
         for (j = 0; j < source->y_width; j += 16)
         {
             unsigned int sse;
-            Total += vp8_mse16x16(src + j, source->y_stride,
+            Total += vpx_mse16x16(src + j, source->y_stride,
                                                      dst + j, dest->y_stride,
                                                      &sse);
         }
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 9ccd85eb9..17194f0d4 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -15,6 +15,7 @@
 #include <assert.h>
 #include "vpx_config.h"
 #include "vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "tokenize.h"
 #include "treewriter.h"
 #include "onyx_int.h"
@@ -507,9 +508,9 @@ int VP8_UVSSE(MACROBLOCK *x)
     }
     else
     {
-        vp8_variance8x8(uptr, pre_stride,
+        vpx_variance8x8(uptr, pre_stride,
             upred_ptr, uv_stride, &sse2);
-        vp8_variance8x8(vptr, pre_stride,
+        vpx_variance8x8(vptr, pre_stride,
             vpred_ptr, uv_stride, &sse1);
         sse2 += sse1;
     }
@@ -1783,7 +1784,7 @@ static int evaluate_inter_mode_rd(int mdcounts[4],
         if(threshold < x->encode_breakout)
             threshold = x->encode_breakout;
 
-        var = vp8_variance16x16
+        var = vpx_variance16x16
                 (*(b->base_src), b->src_stride,
                 x->e_mbd.predictor, 16, &sse);
author	Johann <johannkoenig@google.com>	2015-05-15 11:52:03 -0700
committer	Johann <johannkoenig@google.com>	2015-05-26 12:01:52 -0700
commit	c3bdffb0a508ad08d5dfa613c029f368d4293d4c (patch)
tree	4c087783da1d12bfbe09311ebb33f200e789ebf3 /vp8/encoder
parent	976f7f42c1ad1ff3cc0792572f9c4f41f05bb375 (diff)
download	libvpx-c3bdffb0a508ad08d5dfa613c029f368d4293d4c.tar libvpx-c3bdffb0a508ad08d5dfa613c029f368d4293d4c.tar.gz libvpx-c3bdffb0a508ad08d5dfa613c029f368d4293d4c.tar.bz2 libvpx-c3bdffb0a508ad08d5dfa613c029f368d4293d4c.zip