summaryrefslogtreecommitdiff
path: root/vp8/encoder/arm
diff options
context:
space:
mode:
authorJohn Koleszar <jkoleszar@google.com>2010-10-26 15:34:16 -0400
committerJohn Koleszar <jkoleszar@google.com>2010-10-26 20:00:56 -0700
commit209d82ad722bd9eb0de2d2cd1e73aec281f00e00 (patch)
treee15b2fdc602a4491ee449c9b71bd3336555d2dbd /vp8/encoder/arm
parentd6c67f02c9aae706701d3b94c20830b056c57ded (diff)
downloadlibvpx-209d82ad722bd9eb0de2d2cd1e73aec281f00e00.tar
libvpx-209d82ad722bd9eb0de2d2cd1e73aec281f00e00.tar.gz
libvpx-209d82ad722bd9eb0de2d2cd1e73aec281f00e00.tar.bz2
libvpx-209d82ad722bd9eb0de2d2cd1e73aec281f00e00.zip
Add half-pixel variance RTCD functions
NEON has optimized 16x16 half-pixel variance functions, but they were not part of the RTCD framework. Add these functions to RTCD, so that other platforms can make use of this optimization in the future and special-case ARM code can be removed. A number of functions were taking two variance functions as parameters. These functions were changed to take a single parameter, a pointer to a struct containing all the variance functions for that block size. This provides additional flexibility for calling additional variance functions (the half-pixel special case, for example) and by initializing the table for all block sizes, we don't have to construct this function pointer table for each macroblock. Change-Id: I78289ff36b2715f9a7aa04d5f6fbe3d23acdc29c
Diffstat (limited to 'vp8/encoder/arm')
-rw-r--r--vp8/encoder/arm/mcomp_arm.c615
-rw-r--r--vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm18
-rw-r--r--vp8/encoder/arm/variance_arm.h12
3 files changed, 21 insertions, 624 deletions
diff --git a/vp8/encoder/arm/mcomp_arm.c b/vp8/encoder/arm/mcomp_arm.c
deleted file mode 100644
index 27146e23f..000000000
--- a/vp8/encoder/arm/mcomp_arm.c
+++ /dev/null
@@ -1,615 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "mcomp.h"
-#include "vpx_mem/vpx_mem.h"
-
-#include <stdio.h>
-#include <limits.h>
-#include <math.h>
-
-#ifdef ENTROPY_STATS
-static int mv_ref_ct [31] [4] [2];
-static int mv_mode_cts [4] [2];
-#endif
-
-extern unsigned int vp8_sub_pixel_variance16x16s_neon
-(
- unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse
-);
-extern unsigned int vp8_sub_pixel_variance16x16s_4_0_neon
-(
- unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse
-);
-extern unsigned int vp8_sub_pixel_variance16x16s_0_4_neon
-(
- unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse
-);
-extern unsigned int vp8_sub_pixel_variance16x16s_4_4_neon
-(
- unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse
-);
-
-
-int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
-{
- int bestmse = INT_MAX;
- MV startmv;
- //MV this_mv;
- MV this_mv;
- unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
- unsigned char *z = (*(b->base_src) + b->src);
- int left, right, up, down, diag;
- unsigned int sse;
- int whichdir ;
-
-
- // Trap uncodable vectors
- if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
- {
- bestmv->row <<= 3;
- bestmv->col <<= 3;
- return INT_MAX;
- }
-
- // central mv
- bestmv->row <<= 3;
- bestmv->col <<= 3;
- startmv = *bestmv;
-
- // calculate central point error
- bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);
- bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
-
- // go left then right and check error
- this_mv.row = startmv.row;
- this_mv.col = ((startmv.col - 8) | 4);
- left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
- left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (left < bestmse)
- {
- *bestmv = this_mv;
- bestmse = left;
- }
-
- this_mv.col += 8;
- right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse);
- right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (right < bestmse)
- {
- *bestmv = this_mv;
- bestmse = right;
- }
-
- // go up then down and check error
- this_mv.col = startmv.col;
- this_mv.row = ((startmv.row - 8) | 4);
- up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
- up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (up < bestmse)
- {
- *bestmv = this_mv;
- bestmse = up;
- }
-
- this_mv.row += 8;
- down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
- down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (down < bestmse)
- {
- *bestmv = this_mv;
- bestmse = down;
- }
-
-
- // now check 1 more diagonal
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
- //for(whichdir =0;whichdir<4;whichdir++)
- //{
- this_mv = startmv;
-
- switch (whichdir)
- {
- case 0:
- this_mv.col = (this_mv.col - 8) | 4;
- this_mv.row = (this_mv.row - 8) | 4;
- diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
- break;
- case 1:
- this_mv.col += 4;
- this_mv.row = (this_mv.row - 8) | 4;
- diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
- break;
- case 2:
- this_mv.col = (this_mv.col - 8) | 4;
- this_mv.row += 4;
- diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
- break;
- case 3:
- this_mv.col += 4;
- this_mv.row += 4;
- diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
- break;
- }
-
- diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (diag < bestmse)
- {
- *bestmv = this_mv;
- bestmse = diag;
- }
-
-// }
-
-
- // time to check quarter pels.
- if (bestmv->row < startmv.row)
- y -= d->pre_stride;
-
- if (bestmv->col < startmv.col)
- y--;
-
- startmv = *bestmv;
-
-
-
- // go left then right and check error
- this_mv.row = startmv.row;
-
- if (startmv.col & 7)
- {
- this_mv.col = startmv.col - 2;
- left = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- }
- else
- {
- this_mv.col = (startmv.col - 8) | 6;
- left = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
- }
-
- left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (left < bestmse)
- {
- *bestmv = this_mv;
- bestmse = left;
- }
-
- this_mv.col += 4;
- right = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (right < bestmse)
- {
- *bestmv = this_mv;
- bestmse = right;
- }
-
- // go up then down and check error
- this_mv.col = startmv.col;
-
- if (startmv.row & 7)
- {
- this_mv.row = startmv.row - 2;
- up = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- }
- else
- {
- this_mv.row = (startmv.row - 8) | 6;
- up = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
- }
-
- up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (up < bestmse)
- {
- *bestmv = this_mv;
- bestmse = up;
- }
-
- this_mv.row += 4;
- down = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (down < bestmse)
- {
- *bestmv = this_mv;
- bestmse = down;
- }
-
-
- // now check 1 more diagonal
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
-// for(whichdir=0;whichdir<4;whichdir++)
-// {
- this_mv = startmv;
-
- switch (whichdir)
- {
- case 0:
-
- if (startmv.row & 7)
- {
- this_mv.row -= 2;
-
- if (startmv.col & 7)
- {
- this_mv.col -= 2;
- diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- }
- else
- {
- this_mv.col = (startmv.col - 8) | 6;
- diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
- }
- }
- else
- {
- this_mv.row = (startmv.row - 8) | 6;
-
- if (startmv.col & 7)
- {
- this_mv.col -= 2;
- diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
- }
- else
- {
- this_mv.col = (startmv.col - 8) | 6;
- diag = svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
- }
- }
-
- break;
- case 1:
- this_mv.col += 2;
-
- if (startmv.row & 7)
- {
- this_mv.row -= 2;
- diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- }
- else
- {
- this_mv.row = (startmv.row - 8) | 6;
- diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
- }
-
- break;
- case 2:
- this_mv.row += 2;
-
- if (startmv.col & 7)
- {
- this_mv.col -= 2;
- diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- }
- else
- {
- this_mv.col = (startmv.col - 8) | 6;
- diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
- }
-
- break;
- case 3:
- this_mv.col += 2;
- this_mv.row += 2;
- diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- break;
- }
-
- diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (diag < bestmse)
- {
- *bestmv = this_mv;
- bestmse = diag;
- }
-
-// }
-
- return bestmse;
-}
-
-int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
-{
- int bestmse = INT_MAX;
- MV startmv;
- //MV this_mv;
- MV this_mv;
- unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
- unsigned char *z = (*(b->base_src) + b->src);
- int left, right, up, down, diag;
- unsigned int sse;
-
- // Trap uncodable vectors
- if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
- {
- bestmv->row <<= 3;
- bestmv->col <<= 3;
- return INT_MAX;
- }
-
- // central mv
- bestmv->row <<= 3;
- bestmv->col <<= 3;
- startmv = *bestmv;
-
- // calculate central point error
- bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);
- bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
-
- // go left then right and check error
- this_mv.row = startmv.row;
- this_mv.col = ((startmv.col - 8) | 4);
- left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
- left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (left < bestmse)
- {
- *bestmv = this_mv;
- bestmse = left;
- }
-
- this_mv.col += 8;
- right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse);
- right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (right < bestmse)
- {
- *bestmv = this_mv;
- bestmse = right;
- }
-
- // go up then down and check error
- this_mv.col = startmv.col;
- this_mv.row = ((startmv.row - 8) | 4);
- up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
- up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (up < bestmse)
- {
- *bestmv = this_mv;
- bestmse = up;
- }
-
- this_mv.row += 8;
- down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
- down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (down < bestmse)
- {
- *bestmv = this_mv;
- bestmse = down;
- }
-
- // somewhat strangely not doing all the diagonals for half pel is slower than doing them.
-#if 0
- // now check 1 more diagonal -
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
- this_mv = startmv;
-
- switch (whichdir)
- {
- case 0:
- this_mv.col = (this_mv.col - 8) | 4;
- this_mv.row = (this_mv.row - 8) | 4;
- diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
- break;
- case 1:
- this_mv.col += 4;
- this_mv.row = (this_mv.row - 8) | 4;
- diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
- break;
- case 2:
- this_mv.col = (this_mv.col - 8) | 4;
- this_mv.row += 4;
- diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
- break;
- case 3:
- this_mv.col += 4;
- this_mv.row += 4;
- diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
- break;
- }
-
- diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (diag < bestmse)
- {
- *bestmv = this_mv;
- bestmse = diag;
- }
-
-#else
- this_mv.col = (this_mv.col - 8) | 4;
- this_mv.row = (this_mv.row - 8) | 4;
- diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
- diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (diag < bestmse)
- {
- *bestmv = this_mv;
- bestmse = diag;
- }
-
- this_mv.col += 8;
- diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
- diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (diag < bestmse)
- {
- *bestmv = this_mv;
- bestmse = diag;
- }
-
- this_mv.col = (this_mv.col - 8) | 4;
- this_mv.row = startmv.row + 4;
- diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
- diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (diag < bestmse)
- {
- *bestmv = this_mv;
- bestmse = diag;
- }
-
- this_mv.col += 8;
- diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
- diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (diag < bestmse)
- {
- *bestmv = this_mv;
- bestmse = diag;
- }
-
-#endif
- return bestmse;
-}
-
-
-#ifdef ENTROPY_STATS
-void print_mode_context(void)
-{
- FILE *f = fopen("modecont.c", "w");
- int i, j;
-
- fprintf(f, "#include \"entropy.h\"\n");
- fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
- fprintf(f, "{\n");
-
- for (j = 0; j < 6; j++)
- {
- fprintf(f, " { // %d \n", j);
- fprintf(f, " ");
-
- for (i = 0; i < 4; i++)
- {
- int overal_prob;
- int this_prob;
- int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1];
-
- // Overall probs
- count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
-
- if (count)
- overal_prob = 256 * mv_mode_cts[i][0] / count;
- else
- overal_prob = 128;
-
- if (overal_prob == 0)
- overal_prob = 1;
-
- // context probs
- count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
-
- if (count)
- this_prob = 256 * mv_ref_ct[j][i][0] / count;
- else
- this_prob = 128;
-
- if (this_prob == 0)
- this_prob = 1;
-
- fprintf(f, "%5d, ", this_prob);
- //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob);
- //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob);
- }
-
- fprintf(f, " },\n");
- }
-
- fprintf(f, "};\n");
- fclose(f);
-}
-
-/* MV ref count ENTROPY_STATS stats code */
-#ifdef ENTROPY_STATS
-void init_mv_ref_counts()
-{
- vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
- vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
-}
-
-void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
-{
- if (m == ZEROMV)
- {
- ++mv_ref_ct [ct[0]] [0] [0];
- ++mv_mode_cts[0][0];
- }
- else
- {
- ++mv_ref_ct [ct[0]] [0] [1];
- ++mv_mode_cts[0][1];
-
- if (m == NEARESTMV)
- {
- ++mv_ref_ct [ct[1]] [1] [0];
- ++mv_mode_cts[1][0];
- }
- else
- {
- ++mv_ref_ct [ct[1]] [1] [1];
- ++mv_mode_cts[1][1];
-
- if (m == NEARMV)
- {
- ++mv_ref_ct [ct[2]] [2] [0];
- ++mv_mode_cts[2][0];
- }
- else
- {
- ++mv_ref_ct [ct[2]] [2] [1];
- ++mv_mode_cts[2][1];
-
- if (m == NEWMV)
- {
- ++mv_ref_ct [ct[3]] [3] [0];
- ++mv_mode_cts[3][0];
- }
- else
- {
- ++mv_ref_ct [ct[3]] [3] [1];
- ++mv_mode_cts[3][1];
- }
- }
- }
- }
-}
-
-#endif/* END MV ref count ENTROPY_STATS stats code */
-
-#endif
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
index 1c1441cc2..0a2b71c49 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
@@ -9,9 +9,9 @@
;
- EXPORT |vp8_sub_pixel_variance16x16s_4_0_neon|
- EXPORT |vp8_sub_pixel_variance16x16s_0_4_neon|
- EXPORT |vp8_sub_pixel_variance16x16s_4_4_neon|
+ EXPORT |vp8_variance_halfpixvar16x16_h_neon|
+ EXPORT |vp8_variance_halfpixvar16x16_v_neon|
+ EXPORT |vp8_variance_halfpixvar16x16_hv_neon|
EXPORT |vp8_sub_pixel_variance16x16s_neon|
ARM
REQUIRE8
@@ -20,7 +20,7 @@
AREA ||.text||, CODE, READONLY, ALIGN=2
;================================================
-;unsigned int vp8_sub_pixel_variance16x16s_4_0_neon
+;unsigned int vp8_variance_halfpixvar16x16_h_neon
;(
; unsigned char *src_ptr, r0
; int src_pixels_per_line, r1
@@ -29,7 +29,7 @@
; unsigned int *sse
;);
;================================================
-|vp8_sub_pixel_variance16x16s_4_0_neon| PROC
+|vp8_variance_halfpixvar16x16_h_neon| PROC
push {lr}
mov r12, #4 ;loop counter
@@ -120,7 +120,7 @@ vp8_filt_fpo16x16s_4_0_loop_neon
ENDP
;================================================
-;unsigned int vp8_sub_pixel_variance16x16s_0_4_neon
+;unsigned int vp8_variance_halfpixvar16x16_v_neon
;(
; unsigned char *src_ptr, r0
; int src_pixels_per_line, r1
@@ -129,7 +129,7 @@ vp8_filt_fpo16x16s_4_0_loop_neon
; unsigned int *sse
;);
;================================================
-|vp8_sub_pixel_variance16x16s_0_4_neon| PROC
+|vp8_variance_halfpixvar16x16_v_neon| PROC
push {lr}
mov r12, #4 ;loop counter
@@ -216,7 +216,7 @@ vp8_filt_spo16x16s_0_4_loop_neon
ENDP
;================================================
-;unsigned int vp8_sub_pixel_variance16x16s_4_4_neon
+;unsigned int vp8_variance_halfpixvar16x16_hv_neon
;(
; unsigned char *src_ptr, r0
; int src_pixels_per_line, r1
@@ -225,7 +225,7 @@ vp8_filt_spo16x16s_0_4_loop_neon
; unsigned int *sse
;);
;================================================
-|vp8_sub_pixel_variance16x16s_4_4_neon| PROC
+|vp8_variance_halfpixvar16x16_hv_neon| PROC
push {lr}
vld1.u8 {d0, d1, d2, d3}, [r0], r1 ;load src data
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index fb9dd5a5b..0e5f62fcf 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -30,6 +30,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_neon);
//extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_c);
//extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_c);
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_neon);
+extern prototype_variance(vp8_variance_halfpixvar16x16_h_neon);
+extern prototype_variance(vp8_variance_halfpixvar16x16_v_neon);
+extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon);
//extern prototype_getmbss(vp8_get_mb_ss_c);
extern prototype_variance(vp8_mse16x16_neon);
@@ -84,6 +87,15 @@ extern prototype_sad(vp8_get4x4sse_cs_neon);
#undef vp8_variance_subpixvar16x16
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_neon
+#undef vp8_variance_halfpixvar16x16_h
+#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_neon
+
+#undef vp8_variance_halfpixvar16x16_v
+#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_neon
+
+#undef vp8_variance_halfpixvar16x16_hv
+#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_neon
+
//#undef vp8_variance_getmbss
//#define vp8_variance_getmbss vp8_get_mb_ss_c