summaryrefslogtreecommitdiff
path: root/vp9/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/encoder')
-rw-r--r--vp9/encoder/vp9_block.h10
-rw-r--r--vp9/encoder/vp9_encoder.c172
-rw-r--r--vp9/encoder/vp9_encoder.h2
-rw-r--r--vp9/encoder/vp9_firstpass.c4
-rw-r--r--vp9/encoder/vp9_mcomp.c292
-rw-r--r--vp9/encoder/vp9_mcomp.h17
-rw-r--r--vp9/encoder/vp9_pickmode.c2
-rw-r--r--vp9/encoder/vp9_variance.c120
-rw-r--r--vp9/encoder/vp9_variance.h3
-rw-r--r--vp9/encoder/x86/vp9_variance_sse2.c55
10 files changed, 217 insertions, 460 deletions
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index fcf2a0420..f35a85fba 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -20,12 +20,6 @@
extern "C" {
#endif
-// motion search site
-typedef struct {
- MV mv;
- int offset;
-} search_site;
-
// Structure to hold snapshot of coding context during the mode picking process
typedef struct {
MODE_INFO mic;
@@ -108,10 +102,6 @@ struct macroblock {
int skip_optimize;
int q_index;
- search_site *ss;
- int ss_count;
- int searches_per_step;
-
int errorperbit;
int sadperbit16;
int sadperbit4;
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 6816f555e..395d26aef 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -501,9 +501,9 @@ static void update_frame_size(VP9_COMP *cpi) {
int y_stride = cpi->scaled_source.y_stride;
if (cpi->sf.search_method == NSTEP) {
- vp9_init3smotion_compensation(&cpi->mb, y_stride);
+ vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
} else if (cpi->sf.search_method == DIAMOND) {
- vp9_init_dsmotion_compensation(&cpi->mb, y_stride);
+ vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
}
}
@@ -782,9 +782,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
cm->error.setjmp = 1;
- CHECK_MEM_ERROR(cm, cpi->mb.ss, vpx_calloc(sizeof(search_site),
- (MAX_MVSEARCH_STEPS * 8) + 1));
-
vp9_rtcd();
cpi->use_svc = 0;
@@ -973,95 +970,73 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
cpi->rd.thresh_freq_fact[i][j] = 32;
}
-#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SVFHH, SVFHV, SVFHHV, \
- SDX3F, SDX8F, SDX4DF)\
+#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF)\
cpi->fn_ptr[BT].sdf = SDF; \
cpi->fn_ptr[BT].sdaf = SDAF; \
cpi->fn_ptr[BT].vf = VF; \
cpi->fn_ptr[BT].svf = SVF; \
cpi->fn_ptr[BT].svaf = SVAF; \
- cpi->fn_ptr[BT].svf_halfpix_h = SVFHH; \
- cpi->fn_ptr[BT].svf_halfpix_v = SVFHV; \
- cpi->fn_ptr[BT].svf_halfpix_hv = SVFHHV; \
cpi->fn_ptr[BT].sdx3f = SDX3F; \
cpi->fn_ptr[BT].sdx8f = SDX8F; \
cpi->fn_ptr[BT].sdx4df = SDX4DF;
BFP(BLOCK_32X16, vp9_sad32x16, vp9_sad32x16_avg,
vp9_variance32x16, vp9_sub_pixel_variance32x16,
- vp9_sub_pixel_avg_variance32x16, NULL, NULL,
- NULL, NULL, NULL,
- vp9_sad32x16x4d)
+ vp9_sub_pixel_avg_variance32x16, NULL, NULL, vp9_sad32x16x4d)
BFP(BLOCK_16X32, vp9_sad16x32, vp9_sad16x32_avg,
vp9_variance16x32, vp9_sub_pixel_variance16x32,
- vp9_sub_pixel_avg_variance16x32, NULL, NULL,
- NULL, NULL, NULL,
- vp9_sad16x32x4d)
+ vp9_sub_pixel_avg_variance16x32, NULL, NULL, vp9_sad16x32x4d)
BFP(BLOCK_64X32, vp9_sad64x32, vp9_sad64x32_avg,
vp9_variance64x32, vp9_sub_pixel_variance64x32,
- vp9_sub_pixel_avg_variance64x32, NULL, NULL,
- NULL, NULL, NULL,
- vp9_sad64x32x4d)
+ vp9_sub_pixel_avg_variance64x32, NULL, NULL, vp9_sad64x32x4d)
BFP(BLOCK_32X64, vp9_sad32x64, vp9_sad32x64_avg,
vp9_variance32x64, vp9_sub_pixel_variance32x64,
- vp9_sub_pixel_avg_variance32x64, NULL, NULL,
- NULL, NULL, NULL,
- vp9_sad32x64x4d)
+ vp9_sub_pixel_avg_variance32x64, NULL, NULL, vp9_sad32x64x4d)
BFP(BLOCK_32X32, vp9_sad32x32, vp9_sad32x32_avg,
vp9_variance32x32, vp9_sub_pixel_variance32x32,
- vp9_sub_pixel_avg_variance32x32, vp9_variance_halfpixvar32x32_h,
- vp9_variance_halfpixvar32x32_v,
- vp9_variance_halfpixvar32x32_hv, vp9_sad32x32x3, vp9_sad32x32x8,
+ vp9_sub_pixel_avg_variance32x32, vp9_sad32x32x3, vp9_sad32x32x8,
vp9_sad32x32x4d)
BFP(BLOCK_64X64, vp9_sad64x64, vp9_sad64x64_avg,
vp9_variance64x64, vp9_sub_pixel_variance64x64,
- vp9_sub_pixel_avg_variance64x64, vp9_variance_halfpixvar64x64_h,
- vp9_variance_halfpixvar64x64_v,
- vp9_variance_halfpixvar64x64_hv, vp9_sad64x64x3, vp9_sad64x64x8,
+ vp9_sub_pixel_avg_variance64x64, vp9_sad64x64x3, vp9_sad64x64x8,
vp9_sad64x64x4d)
BFP(BLOCK_16X16, vp9_sad16x16, vp9_sad16x16_avg,
vp9_variance16x16, vp9_sub_pixel_variance16x16,
- vp9_sub_pixel_avg_variance16x16, vp9_variance_halfpixvar16x16_h,
- vp9_variance_halfpixvar16x16_v,
- vp9_variance_halfpixvar16x16_hv, vp9_sad16x16x3, vp9_sad16x16x8,
+ vp9_sub_pixel_avg_variance16x16, vp9_sad16x16x3, vp9_sad16x16x8,
vp9_sad16x16x4d)
BFP(BLOCK_16X8, vp9_sad16x8, vp9_sad16x8_avg,
vp9_variance16x8, vp9_sub_pixel_variance16x8,
- vp9_sub_pixel_avg_variance16x8, NULL, NULL, NULL,
+ vp9_sub_pixel_avg_variance16x8,
vp9_sad16x8x3, vp9_sad16x8x8, vp9_sad16x8x4d)
BFP(BLOCK_8X16, vp9_sad8x16, vp9_sad8x16_avg,
vp9_variance8x16, vp9_sub_pixel_variance8x16,
- vp9_sub_pixel_avg_variance8x16, NULL, NULL, NULL,
+ vp9_sub_pixel_avg_variance8x16,
vp9_sad8x16x3, vp9_sad8x16x8, vp9_sad8x16x4d)
BFP(BLOCK_8X8, vp9_sad8x8, vp9_sad8x8_avg,
vp9_variance8x8, vp9_sub_pixel_variance8x8,
- vp9_sub_pixel_avg_variance8x8, NULL, NULL, NULL,
+ vp9_sub_pixel_avg_variance8x8,
vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d)
BFP(BLOCK_8X4, vp9_sad8x4, vp9_sad8x4_avg,
vp9_variance8x4, vp9_sub_pixel_variance8x4,
- vp9_sub_pixel_avg_variance8x4, NULL, NULL,
- NULL, NULL, vp9_sad8x4x8,
- vp9_sad8x4x4d)
+ vp9_sub_pixel_avg_variance8x4, NULL, vp9_sad8x4x8, vp9_sad8x4x4d)
BFP(BLOCK_4X8, vp9_sad4x8, vp9_sad4x8_avg,
vp9_variance4x8, vp9_sub_pixel_variance4x8,
- vp9_sub_pixel_avg_variance4x8, NULL, NULL,
- NULL, NULL, vp9_sad4x8x8,
- vp9_sad4x8x4d)
+ vp9_sub_pixel_avg_variance4x8, NULL, vp9_sad4x8x8, vp9_sad4x8x4d)
BFP(BLOCK_4X4, vp9_sad4x4, vp9_sad4x4_avg,
vp9_variance4x4, vp9_sub_pixel_variance4x4,
- vp9_sub_pixel_avg_variance4x4, NULL, NULL, NULL,
+ vp9_sub_pixel_avg_variance4x4,
vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d)
cpi->full_search_sad = vp9_full_search_sad;
@@ -1182,7 +1157,6 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
}
dealloc_compressor_data(cpi);
- vpx_free(cpi->mb.ss);
vpx_free(cpi->tok);
for (i = 0; i < sizeof(cpi->mbgraph_stats) /
@@ -1444,77 +1418,67 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
}
#endif
-static void scale_and_extend_frame_nonnormative(YV12_BUFFER_CONFIG *src_fb,
- YV12_BUFFER_CONFIG *dst_fb) {
- const int in_w = src_fb->y_crop_width;
- const int in_h = src_fb->y_crop_height;
- const int out_w = dst_fb->y_crop_width;
- const int out_h = dst_fb->y_crop_height;
- const int in_w_uv = src_fb->uv_crop_width;
- const int in_h_uv = src_fb->uv_crop_height;
- const int out_w_uv = dst_fb->uv_crop_width;
- const int out_h_uv = dst_fb->uv_crop_height;
+static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst) {
+ // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t
int i;
+ const uint8_t *const srcs[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
+ src->alpha_buffer};
+ const int src_strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
+ src->alpha_stride};
+ const int src_widths[4] = {src->y_crop_width, src->uv_crop_width,
+ src->uv_crop_width, src->y_crop_width};
+ const int src_heights[4] = {src->y_crop_height, src->uv_crop_height,
+ src->uv_crop_height, src->y_crop_height};
+ uint8_t *const dsts[4] = {dst->y_buffer, dst->u_buffer, dst->v_buffer,
+ dst->alpha_buffer};
+ const int dst_strides[4] = {dst->y_stride, dst->uv_stride, dst->uv_stride,
+ dst->alpha_stride};
+ const int dst_widths[4] = {dst->y_crop_width, dst->uv_crop_width,
+ dst->uv_crop_width, dst->y_crop_width};
+ const int dst_heights[4] = {dst->y_crop_height, dst->uv_crop_height,
+ dst->uv_crop_height, dst->y_crop_height};
+
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
+ dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
- uint8_t *srcs[4] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer,
- src_fb->alpha_buffer};
- int src_strides[4] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride,
- src_fb->alpha_stride};
-
- uint8_t *dsts[4] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer,
- dst_fb->alpha_buffer};
- int dst_strides[4] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride,
- dst_fb->alpha_stride};
-
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- if (i == 0 || i == 3) {
- // Y and alpha planes
- vp9_resize_plane(srcs[i], in_h, in_w, src_strides[i],
- dsts[i], out_h, out_w, dst_strides[i]);
- } else {
- // Chroma planes
- vp9_resize_plane(srcs[i], in_h_uv, in_w_uv, src_strides[i],
- dsts[i], out_h_uv, out_w_uv, dst_strides[i]);
- }
- }
// TODO(hkuang): Call C version explicitly
// as neon version only expand border size 32.
- vp8_yv12_extend_frame_borders_c(dst_fb);
-}
-
-static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
- YV12_BUFFER_CONFIG *dst_fb) {
- const int in_w = src_fb->y_crop_width;
- const int in_h = src_fb->y_crop_height;
- const int out_w = dst_fb->y_crop_width;
- const int out_h = dst_fb->y_crop_height;
+ vp8_yv12_extend_frame_borders_c(dst);
+}
+
+static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst) {
+ const int src_w = src->y_crop_width;
+ const int src_h = src->y_crop_height;
+ const int dst_w = dst->y_crop_width;
+ const int dst_h = dst->y_crop_height;
+ const uint8_t *const srcs[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
+ src->alpha_buffer};
+ const int src_strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
+ src->alpha_stride};
+ uint8_t *const dsts[4] = {dst->y_buffer, dst->u_buffer, dst->v_buffer,
+ dst->alpha_buffer};
+ const int dst_strides[4] = {dst->y_stride, dst->uv_stride, dst->uv_stride,
+ dst->alpha_stride};
int x, y, i;
- uint8_t *srcs[4] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer,
- src_fb->alpha_buffer};
- int src_strides[4] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride,
- src_fb->alpha_stride};
-
- uint8_t *dsts[4] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer,
- dst_fb->alpha_buffer};
- int dst_strides[4] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride,
- dst_fb->alpha_stride};
-
- for (y = 0; y < out_h; y += 16) {
- for (x = 0; x < out_w; x += 16) {
+ for (y = 0; y < dst_h; y += 16) {
+ for (x = 0; x < dst_w; x += 16) {
for (i = 0; i < MAX_MB_PLANE; ++i) {
const int factor = (i == 0 || i == 3 ? 1 : 2);
- const int x_q4 = x * (16 / factor) * in_w / out_w;
- const int y_q4 = y * (16 / factor) * in_h / out_h;
+ const int x_q4 = x * (16 / factor) * src_w / dst_w;
+ const int y_q4 = y * (16 / factor) * src_h / dst_h;
const int src_stride = src_strides[i];
const int dst_stride = dst_strides[i];
- uint8_t *src = srcs[i] + y / factor * in_h / out_h * src_stride +
- x / factor * in_w / out_w;
- uint8_t *dst = dsts[i] + y / factor * dst_stride + x / factor;
+ const uint8_t *src_ptr = srcs[i] + (y / factor) * src_h / dst_h *
+ src_stride + (x / factor) * src_w / dst_w;
+ uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
- vp9_convolve8(src, src_stride, dst, dst_stride,
- vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
- vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
+ vp9_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
+ vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * src_w / dst_w,
+ vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * src_h / dst_h,
16 / factor, 16 / factor);
}
}
@@ -1522,7 +1486,7 @@ static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
// TODO(hkuang): Call C version explicitly
// as neon version only expand border size 32.
- vp8_yv12_extend_frame_borders_c(dst_fb);
+ vp8_yv12_extend_frame_borders_c(dst);
}
static int find_fp_qindex() {
@@ -1701,7 +1665,7 @@ void vp9_scale_references(VP9_COMP *cpi) {
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
- YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf;
+ const YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf;
if (ref->y_crop_width != cm->width ||
ref->y_crop_height != cm->height) {
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 8f3249407..132b479e2 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -497,6 +497,8 @@ typedef struct VP9_COMP {
int frame_flags;
+ search_site_config ss_cfg;
+
#if CONFIG_MULTIPLE_ARF
// ARF tracking variables.
int multi_arf_enabled;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 34506f2bd..b408ced0e 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -418,7 +418,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
v_fn_ptr.vf = get_block_variance_fn(bsize);
// Center the initial step/diamond search on best mv.
- tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
+ tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
step_param,
x->sadperbit16, &num00, &v_fn_ptr, ref_mv);
if (tmp_err < INT_MAX)
@@ -441,7 +441,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
if (num00) {
--num00;
} else {
- tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
+ tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
step_param + n, x->sadperbit16,
&num00, &v_fn_ptr, ref_mv);
if (tmp_err < INT_MAX)
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 89937f5a6..bbec4da76 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -101,32 +101,32 @@ static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
return 0;
}
-void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
+void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
int len, ss_count = 1;
- x->ss[0].mv.col = x->ss[0].mv.row = 0;
- x->ss[0].offset = 0;
+ cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
+ cfg->ss[0].offset = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Generate offsets for 4 search sites per step.
const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
int i;
for (i = 0; i < 4; ++i) {
- search_site *const ss = &x->ss[ss_count++];
+ search_site *const ss = &cfg->ss[ss_count++];
ss->mv = ss_mvs[i];
ss->offset = ss->mv.row * stride + ss->mv.col;
}
}
- x->ss_count = ss_count;
- x->searches_per_step = 4;
+ cfg->ss_count = ss_count;
+ cfg->searches_per_step = 4;
}
-void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
+void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
int len, ss_count = 1;
- x->ss[0].mv.col = x->ss[0].mv.row = 0;
- x->ss[0].offset = 0;
+ cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
+ cfg->ss[0].offset = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Generate offsets for 8 search sites per step.
@@ -136,14 +136,14 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
};
int i;
for (i = 0; i < 8; ++i) {
- search_site *const ss = &x->ss[ss_count++];
+ search_site *const ss = &cfg->ss[ss_count++];
ss->mv = ss_mvs[i];
ss->offset = ss->mv.row * stride + ss->mv.col;
}
}
- x->ss_count = ss_count;
- x->searches_per_step = 8;
+ cfg->ss_count = ss_count;
+ cfg->searches_per_step = 8;
}
/*
@@ -871,7 +871,9 @@ int vp9_fast_dia_search(const MACROBLOCK *x,
#undef CHECK_BETTER
-int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
+int vp9_full_range_search_c(const MACROBLOCK *x,
+ const search_site_config *cfg,
+ MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *center_mv) {
@@ -962,6 +964,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
}
int vp9_diamond_search_sad_c(const MACROBLOCK *x,
+ const search_site_config *cfg,
MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
@@ -973,8 +976,8 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
// of iterations
// 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
// (MAX_FIRST_STEP/4) pel... etc.
- const search_site *const ss = &x->ss[search_param * x->searches_per_step];
- const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
+ const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
+ const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
const uint8_t *best_address, *in_what_ref;
int best_sad = INT_MAX;
@@ -996,7 +999,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
i = 1;
for (step = 0; step < tot_steps; step++) {
- for (j = 0; j < x->searches_per_step; j++) {
+ for (j = 0; j < cfg->searches_per_step; j++) {
const MV mv = {best_mv->row + ss[i].mv.row,
best_mv->col + ss[i].mv.col};
if (is_mv_in(x, &mv)) {
@@ -1050,6 +1053,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
}
int vp9_diamond_search_sadx4(const MACROBLOCK *x,
+ const search_site_config *cfg,
MV *ref_mv, MV *best_mv, int search_param,
int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
@@ -1075,8 +1079,8 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
// 0 = initial step (MAX_FIRST_STEP) pel
// 1 = (MAX_FIRST_STEP/2) pel,
// 2 = (MAX_FIRST_STEP/4) pel...
- const search_site *ss = &x->ss[search_param * x->searches_per_step];
- const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
+ const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
+ const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
@@ -1112,7 +1116,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
if (all_in) {
unsigned int sad_array[4];
- for (j = 0; j < x->searches_per_step; j += 4) {
+ for (j = 0; j < cfg->searches_per_step; j += 4) {
unsigned char const *block_offset[4];
for (t = 0; t < 4; t++)
@@ -1135,7 +1139,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
}
}
} else {
- for (j = 0; j < x->searches_per_step; j++) {
+ for (j = 0; j < cfg->searches_per_step; j++) {
// Trap illegal vectors
const MV this_mv = {best_mv->row + ss[i].mv.row,
best_mv->col + ss[i].mv.col};
@@ -1202,7 +1206,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
const MV *ref_mv, MV *dst_mv) {
MV temp_mv;
int thissme, n, num00 = 0;
- int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
+ int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
step_param, sadpb, &n,
fn_ptr, ref_mv);
if (bestsme < INT_MAX)
@@ -1220,7 +1224,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
if (num00) {
num00--;
} else {
- thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
+ thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
step_param + n, sadpb, &num00,
fn_ptr, ref_mv);
if (thissme < INT_MAX)
@@ -1290,192 +1294,154 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
int sad_per_bit, int distance,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *center_mv, MV *best_mv) {
+ int r;
const MACROBLOCKD *const xd = &x->e_mbd;
- const uint8_t *const what = x->plane[0].src.buf;
- const int what_stride = x->plane[0].src.stride;
- const uint8_t *const in_what = xd->plane[0].pre[0].buf;
- const int in_what_stride = xd->plane[0].pre[0].stride;
- MV this_mv;
- unsigned int bestsad = INT_MAX;
- int r, c;
- unsigned int thissad;
- int ref_row = ref_mv->row;
- int ref_col = ref_mv->col;
-
- // Apply further limits to prevent us looking using vectors that stretch
- // beyond the UMV border
- const int row_min = MAX(ref_row - distance, x->mv_row_min);
- const int row_max = MIN(ref_row + distance, x->mv_row_max);
- const int col_min = MAX(ref_col - distance, x->mv_col_min);
- const int col_max = MIN(ref_col + distance, x->mv_col_max);
- unsigned int sad_array[3];
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
+ const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
+ const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
+ const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+ unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
+ *best_mv = *ref_mv;
- // Work out the mid point for the search
- const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
-
- best_mv->row = ref_row;
- best_mv->col = ref_col;
-
- // Baseline value at the centre
- bestsad = fn_ptr->sdf(what, what_stride,
- bestaddress, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
-
- for (r = row_min; r < row_max; r++) {
- const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
- this_mv.row = r;
- c = col_min;
-
- while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
- int i;
+ for (r = row_min; r < row_max; ++r) {
+ int c = col_min;
+ const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
- fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
+ if (fn_ptr->sdx3f != NULL) {
+ while ((c + 2) < col_max) {
+ int i;
+ unsigned int sads[3];
- for (i = 0; i < 3; i++) {
- thissad = sad_array[i];
+ fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
+ sads);
- if (thissad < bestsad) {
- this_mv.col = c;
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
+ for (i = 0; i < 3; ++i) {
+ unsigned int sad = sads[i];
+ if (sad < best_sad) {
+ const MV mv = {r, c};
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
}
+ ++check_here;
+ ++c;
}
- check_here++;
- c++;
}
}
while (c < col_max) {
- thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
- bestsad);
-
- if (thissad < bestsad) {
- this_mv.col = c;
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
+ unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
+ check_here, in_what->stride, best_sad);
+ if (sad < best_sad) {
+ const MV mv = {r, c};
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
}
}
-
- check_here++;
- c++;
+ ++check_here;
+ ++c;
}
}
- return bestsad;
+
+ return best_sad;
}
int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
int sad_per_bit, int distance,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *center_mv, MV *best_mv) {
+ int r;
const MACROBLOCKD *const xd = &x->e_mbd;
- const uint8_t *const what = x->plane[0].src.buf;
- const int what_stride = x->plane[0].src.stride;
- const uint8_t *const in_what = xd->plane[0].pre[0].buf;
- const int in_what_stride = xd->plane[0].pre[0].stride;
- MV this_mv;
- unsigned int bestsad = INT_MAX;
- int r, c;
- int ref_row = ref_mv->row;
- int ref_col = ref_mv->col;
-
- // Apply further limits to prevent us looking using vectors that stretch
- // beyond the UMV border
- const int row_min = MAX(ref_row - distance, x->mv_row_min);
- const int row_max = MIN(ref_row + distance, x->mv_row_max);
- const int col_min = MAX(ref_col - distance, x->mv_col_min);
- const int col_max = MIN(ref_col + distance, x->mv_col_max);
- DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8);
- unsigned int sad_array[3];
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
+ const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
+ const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
+ const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+ unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
+ *best_mv = *ref_mv;
- // Work out the mid point for the search
- const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
-
- best_mv->row = ref_row;
- best_mv->col = ref_col;
-
- // Baseline value at the center
- bestsad = fn_ptr->sdf(what, what_stride,
- bestaddress, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
-
- for (r = row_min; r < row_max; r++) {
- const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
- this_mv.row = r;
- c = col_min;
-
- while ((c + 7) < col_max) {
- int i;
+ for (r = row_min; r < row_max; ++r) {
+ int c = col_min;
+ const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
- fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
+ if (fn_ptr->sdx8f != NULL) {
+ while ((c + 7) < col_max) {
+ int i;
+ unsigned int sads[8];
- for (i = 0; i < 8; i++) {
- unsigned int thissad = (unsigned int)sad_array8[i];
+ fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride,
+ sads);
- if (thissad < bestsad) {
- this_mv.col = c;
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
+ for (i = 0; i < 8; ++i) {
+ unsigned int sad = sads[i];
+ if (sad < best_sad) {
+ const MV mv = {r, c};
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
}
+ ++check_here;
+ ++c;
}
-
- check_here++;
- c++;
}
}
- while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
- int i;
+ if (fn_ptr->sdx3f != NULL) {
+ while ((c + 2) < col_max) {
+ int i;
+ unsigned int sads[3];
- fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
+ fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
+ sads);
- for (i = 0; i < 3; i++) {
- unsigned int thissad = sad_array[i];
-
- if (thissad < bestsad) {
- this_mv.col = c;
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
+ for (i = 0; i < 3; ++i) {
+ unsigned int sad = sads[i];
+ if (sad < best_sad) {
+ const MV mv = {r, c};
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
}
+ ++check_here;
+ ++c;
}
-
- check_here++;
- c++;
}
}
while (c < col_max) {
- unsigned int thissad = fn_ptr->sdf(what, what_stride,
- check_here, in_what_stride, bestsad);
-
- if (thissad < bestsad) {
- this_mv.col = c;
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
+ unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
+ check_here, in_what->stride, best_sad);
+ if (sad < best_sad) {
+ const MV mv = {r, c};
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
}
}
-
- check_here++;
- c++;
+ ++check_here;
+ ++c;
}
}
- return bestsad;
+
+ return best_sad;
}
int vp9_refining_search_sad_c(const MACROBLOCK *x,
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 70d7985e4..1f524f1f6 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -31,6 +31,20 @@ extern "C" {
// for Block_16x16
#define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND)
+// motion search site
+typedef struct search_site {
+ MV mv;
+ int offset;
+} search_site;
+
+typedef struct search_site_config {
+ search_site ss[8 * MAX_MVSEARCH_STEPS + 1];
+ int ss_count;
+ int searches_per_step;
+} search_site_config;
+
+void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride);
+void vp9_init3smotion_compensation(search_site_config *cfg, int stride);
void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv);
int vp9_mv_bit_cost(const MV *mv, const MV *ref,
@@ -46,8 +60,6 @@ int vp9_get_mvpred_av_var(const MACROBLOCK *x,
const uint8_t *second_pred,
const vp9_variance_fn_ptr_t *vfp,
int use_mvcost);
-void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride);
-void vp9_init3smotion_compensation(MACROBLOCK *x, int stride);
struct VP9_COMP;
int vp9_init_search_range(struct VP9_COMP *cpi, int size);
@@ -119,6 +131,7 @@ typedef int (*vp9_refining_search_fn_t)(const MACROBLOCK *x,
const MV *center_mv);
typedef int (*vp9_diamond_search_fn_t)(const MACROBLOCK *x,
+ const search_site_config *cfg,
MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit,
int *num00,
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index c1493e719..56eb9440c 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -418,7 +418,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// Perform intra prediction search, if the best SAD is above a certain
// threshold.
- if (best_rd > inter_mode_thresh) {
+ if (best_rd > inter_mode_thresh && bsize < cpi->sf.max_intra_bsize) {
for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
vp9_predict_intra_block(xd, 0, b_width_log2(bsize),
mbmi->tx_size, this_mode,
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c
index 1399bfb7e..ae3c86aee 100644
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -276,126 +276,6 @@ VAR(64, 64)
SUBPIX_VAR(64, 64)
SUBPIX_AVG_VAR(64, 64)
-unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
- ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- const uint8_t *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
- xoffset, yoffset, dst_ptr,
- dst_pixels_per_line, sse);
- return *sse;
-}
-
-unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- const uint8_t *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
- xoffset, yoffset, dst_ptr,
- dst_pixels_per_line, sse);
- return *sse;
-}
-
-unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- const uint8_t *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
- xoffset, yoffset, dst_ptr,
- dst_pixels_per_line, sse);
- return *sse;
-}
-
void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
int height, const uint8_t *ref, int ref_stride) {
int i, j;
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index 4c8be71cd..152c3d962 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -82,9 +82,6 @@ typedef struct vp9_variance_vtable {
vp9_variance_fn_t vf;
vp9_subpixvariance_fn_t svf;
vp9_subp_avg_variance_fn_t svaf;
- vp9_variance_fn_t svf_halfpix_h;
- vp9_variance_fn_t svf_halfpix_v;
- vp9_variance_fn_t svf_halfpix_hv;
vp9_sad_multi_fn_t sdx3f;
vp9_sad_multi_fn_t sdx8f;
vp9_sad_multi_d_fn_t sdx4df;
diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c
index 9e65694a8..25d594632 100644
--- a/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/vp9/encoder/x86/vp9_variance_sse2.c
@@ -494,58 +494,3 @@ FNS(ssse3, ssse3);
#undef FNS
#undef FN
-
-unsigned int vp9_variance_halfpixvar16x16_h_sse2(
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- const unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- int xsum0;
- unsigned int xxsum0;
-
- vp9_half_horiz_variance16x_h_sse2(
- src_ptr, src_pixels_per_line,
- dst_ptr, dst_pixels_per_line, 16,
- &xsum0, &xxsum0);
-
- *sse = xxsum0;
- return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
-}
-
-
-unsigned int vp9_variance_halfpixvar16x16_v_sse2(
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- const unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- int xsum0;
- unsigned int xxsum0;
- vp9_half_vert_variance16x_h_sse2(
- src_ptr, src_pixels_per_line,
- dst_ptr, dst_pixels_per_line, 16,
- &xsum0, &xxsum0);
-
- *sse = xxsum0;
- return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
-}
-
-
-unsigned int vp9_variance_halfpixvar16x16_hv_sse2(
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- const unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse) {
- int xsum0;
- unsigned int xxsum0;
-
- vp9_half_horiz_vert_variance16x_h_sse2(
- src_ptr, src_pixels_per_line,
- dst_ptr, dst_pixels_per_line, 16,
- &xsum0, &xxsum0);
-
- *sse = xxsum0;
- return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
-}