summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/vp9_rtcd_defs.pl22
-rw-r--r--vp9/encoder/vp9_encodeframe.c44
-rw-r--r--vp9/encoder/vp9_firstpass.c124
-rw-r--r--vp9/encoder/vp9_rdopt.c29
-rw-r--r--vp9/encoder/vp9_speed_features.h2
-rw-r--r--vp9/encoder/x86/vp9_variance_mmx.c189
6 files changed, 202 insertions, 208 deletions
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index c2df83ed6..66a3956a1 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -305,15 +305,15 @@ specialize qw/vp9_convolve_avg neon_asm dspr2/, "$sse2_x86inc";
$vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon;
add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon_asm dspr2/;
+specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2/;
$vp9_convolve8_neon_asm=vp9_convolve8_neon;
add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon_asm dspr2/;
+specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2/;
$vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon;
add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon_asm dspr2/;
+specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2/;
$vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon;
add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
@@ -447,10 +447,10 @@ add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_
specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
-specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -477,10 +477,10 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_
specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
-specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -653,7 +653,7 @@ add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const
specialize qw/vp9_sad4x4x8 sse4/;
add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad64x64x4d sse2 avx2/;
+specialize qw/vp9_sad64x64x4d sse2/;
add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad32x64x4d sse2/;
@@ -668,7 +668,7 @@ add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, co
specialize qw/vp9_sad16x32x4d sse2/;
add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad32x32x4d sse2 avx2/;
+specialize qw/vp9_sad32x32x4d sse2/;
add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad16x16x4d sse2/;
@@ -739,7 +739,7 @@ add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int strid
specialize qw/vp9_fht8x8 sse2 avx2/;
add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type";
-specialize qw/vp9_fht16x16 sse2 avx2/;
+specialize qw/vp9_fht16x16 sse2/;
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
@@ -760,7 +760,7 @@ add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, int16_t *output, int
specialize qw/vp9_fdct16x16_1 sse2/;
add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fdct16x16 sse2 avx2/;
+specialize qw/vp9_fdct16x16 sse2/;
add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fdct32x32_1 sse2/;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 75416f2aa..537500294 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -697,6 +697,38 @@ void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
x->e_mbd.plane[i].subsampling_y);
}
+static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, int *rate,
+ int64_t *dist, BLOCK_SIZE bsize) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ INTERP_FILTER filter_ref;
+
+ if (xd->up_available)
+ filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
+ else if (xd->left_available)
+ filter_ref = xd->mi[-1]->mbmi.interp_filter;
+ else
+ filter_ref = EIGHTTAP;
+
+ mbmi->sb_type = bsize;
+ mbmi->mode = ZEROMV;
+ mbmi->tx_size = MIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[tx_mode]);
+ mbmi->skip = 1;
+ mbmi->uv_mode = DC_PRED;
+ mbmi->ref_frame[0] = LAST_FRAME;
+ mbmi->ref_frame[1] = NONE;
+ mbmi->mv[0].as_int = 0;
+ mbmi->interp_filter = filter_ref;
+
+ xd->mi[0]->bmi[0].as_mv[0].as_int = 0;
+ x->skip = 1;
+ x->skip_encode = 1;
+
+ *rate = 0;
+ *dist = 0;
+}
+
static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
int mi_row, int mi_col,
int *totalrate, int64_t *totaldist,
@@ -2442,17 +2474,21 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi;
set_offsets(cpi, tile, mi_row, mi_col, bsize);
- xd->mi[0]->mbmi.sb_type = bsize;
+ mbmi = &xd->mi[0]->mbmi;
+ mbmi->sb_type = bsize;
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
- if (xd->mi[0]->mbmi.segment_id && x->in_static_area)
+ if (mbmi->segment_id && x->in_static_area)
x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
}
if (!frame_is_intra_only(cm)) {
- vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col,
- rate, dist, bsize);
+ if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ set_mode_info_seg_skip(x, cm->tx_mode, rate, dist, bsize);
+ else
+ vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, rate, dist, bsize);
} else {
set_mode_info(&xd->mi[0]->mbmi, bsize, DC_PRED);
}
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index a49fe3df5..d008c63d0 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -597,73 +597,91 @@ void vp9_first_pass(VP9_COMP *cpi) {
if (cm->current_video_frame > 0) {
int tmp_err, motion_error;
int_mv mv, tmp_mv;
+ int raw_motion_error;
+ struct buf_2d unscaled_last_source_buf_2d;
xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
motion_error = get_prediction_error(bsize, &x->plane[0].src,
&xd->plane[0].pre[0]);
- // Assume 0,0 motion with no mv overhead.
- mv.as_int = tmp_mv.as_int = 0;
-
- // Test last reference frame using the previous best mv as the
- // starting point (best reference) for the search.
- first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv,
- &motion_error);
- if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- vp9_clear_system_state();
- motion_error = (int)(motion_error * error_weight);
- }
- // If the current best reference mv is not centered on 0,0 then do a 0,0
- // based search as well.
- if (best_ref_mv.as_int) {
- tmp_err = INT_MAX;
- first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
- &tmp_err);
+ // compute the motion error of the zero motion vector using the last
+ // source frame as the reference
+ // skip the further motion search on reconstructed frame
+ // if this error is small
+ unscaled_last_source_buf_2d.buf = cpi->unscaled_last_source->y_buffer
+ + recon_yoffset;
+ unscaled_last_source_buf_2d.stride =
+ cpi->unscaled_last_source->y_stride;
+ raw_motion_error = get_prediction_error(bsize, &x->plane[0].src,
+ &unscaled_last_source_buf_2d);
+
+ // TODO(pengchong): Replace the hard-coded threshold
+ if (raw_motion_error > 25) {
+ // Assume 0,0 motion with no mv overhead.
+ mv.as_int = tmp_mv.as_int = 0;
+
+ // Test last reference frame using the previous best mv as the
+ // starting point (best reference) for the search.
+ first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv,
+ &motion_error);
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
vp9_clear_system_state();
- tmp_err = (int)(tmp_err * error_weight);
+ motion_error = (int)(motion_error * error_weight);
}
- if (tmp_err < motion_error) {
- motion_error = tmp_err;
- mv.as_int = tmp_mv.as_int;
+ // If the current best reference mv is not centered on 0,0
+ // then do a 0,0
+ // based search as well.
+ if (best_ref_mv.as_int) {
+ tmp_err = INT_MAX;
+ first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
+ &tmp_err);
+ if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+ vp9_clear_system_state();
+ tmp_err = (int)(tmp_err * error_weight);
+ }
+
+ if (tmp_err < motion_error) {
+ motion_error = tmp_err;
+ mv.as_int = tmp_mv.as_int;
+ }
}
- }
- // Search in an older reference frame.
- if (cm->current_video_frame > 1 && gld_yv12 != NULL) {
- // Assume 0,0 motion with no mv overhead.
- int gf_motion_error;
+ // Search in an older reference frame.
+ if (cm->current_video_frame > 1 && gld_yv12 != NULL) {
+ // Assume 0,0 motion with no mv overhead.
+ int gf_motion_error;
- xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
- gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
- &xd->plane[0].pre[0]);
+ xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
+ gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
+ &xd->plane[0].pre[0]);
- first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
- &gf_motion_error);
- if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- vp9_clear_system_state();
- gf_motion_error = (int)(gf_motion_error * error_weight);
- }
+ first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
+ &gf_motion_error);
+ if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+ vp9_clear_system_state();
+ gf_motion_error = (int)(gf_motion_error * error_weight);
+ }
- if (gf_motion_error < motion_error && gf_motion_error < this_error)
- ++second_ref_count;
-
- // Reset to last frame as reference buffer.
- xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
- xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset;
- xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset;
-
- // In accumulating a score for the older reference frame take the
- // best of the motion predicted score and the intra coded error
- // (just as will be done for) accumulation of "coded_error" for
- // the last frame.
- if (gf_motion_error < this_error)
- sr_coded_error += gf_motion_error;
- else
- sr_coded_error += this_error;
- } else {
- sr_coded_error += motion_error;
+ if (gf_motion_error < motion_error && gf_motion_error < this_error)
+ ++second_ref_count;
+
+ // Reset to last frame as reference buffer.
+ xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
+ xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset;
+ xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset;
+
+ // In accumulating a score for the older reference frame take the
+ // best of the motion predicted score and the intra coded error
+ // (just as will be done for) accumulation of "coded_error" for
+ // the last frame.
+ if (gf_motion_error < this_error)
+ sr_coded_error += gf_motion_error;
+ else
+ sr_coded_error += this_error;
+ } else {
+ sr_coded_error += motion_error;
+ }
}
// Start by assuming that intra mode is best.
best_ref_mv.as_int = 0;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index f68aa2738..f8acf5b7a 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -994,21 +994,13 @@ static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
return;
}
- if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER) {
- for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
- model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd,
- &r[tx_size][0], &d[tx_size], &s[tx_size]);
- choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
- skip, sse, ref_best_rd, bs);
- } else {
- for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
- txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
- &s[tx_size], &sse[tx_size],
- ref_best_rd, 0, bs, tx_size,
- cpi->sf.use_fast_coef_costing);
- choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
- skip, txfm_cache, bs);
- }
+ for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
+ txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], &s[tx_size],
+ &sse[tx_size], ref_best_rd, 0, bs, tx_size,
+ cpi->sf.use_fast_coef_costing);
+ choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
+ skip, txfm_cache, bs);
+
if (psse)
*psse = sse[mbmi->tx_size];
}
@@ -2810,7 +2802,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
*rate2 += vp9_get_switchable_rate(cpi);
if (!is_comp_pred) {
- if (!x->in_active_map) {
+ if (!x->in_active_map ||
+ vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
if (psse)
*psse = 0;
*distortion = 0;
@@ -3127,9 +3120,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// If the segment skip feature is enabled....
// then do nothing if the current mode is not allowed..
if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
- const int inter_non_zero_mode_mask = 0x1F7F7;
- mode_skip_mask |= inter_non_zero_mode_mask;
- mode_skip_mask &= ~(1 << THR_ZEROMV);
+ mode_skip_mask = ~(1 << THR_ZEROMV);
inter_mode_mask = (1 << ZEROMV);
}
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 0a88499af..4a1a13bc0 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -56,8 +56,6 @@ typedef enum {
typedef enum {
USE_FULL_RD = 0,
- USE_LARGESTINTRA,
- USE_LARGESTINTRA_MODELINTER,
USE_LARGESTALL,
USE_TX_8X8
} TX_SIZE_SEARCH_METHOD;
diff --git a/vp9/encoder/x86/vp9_variance_mmx.c b/vp9/encoder/x86/vp9_variance_mmx.c
index ae2f976af..ce1c83297 100644
--- a/vp9/encoder/x86/vp9_variance_mmx.c
+++ b/vp9/encoder/x86/vp9_variance_mmx.c
@@ -12,141 +12,92 @@
#include "vp9/encoder/vp9_variance.h"
#include "vpx_ports/mem.h"
-extern unsigned int vp9_get8x8var_mmx
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
-);
-extern unsigned int vp9_get4x4var_mmx
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
-);
-
-unsigned int vp9_variance4x4_mmx(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- unsigned int var;
- int avg;
-
- vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg);
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 4));
+unsigned int vp9_get8x8var_mmx(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse, int *sum);
+
+unsigned int vp9_get4x4var_mmx(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *SSE, int *sum);
+
+unsigned int vp9_variance4x4_mmx(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ vp9_get4x4var_mmx(src, src_stride, ref, ref_stride, sse, &sum);
+ return *sse - (((unsigned int)sum * sum) >> 4);
}
-unsigned int vp9_variance8x8_mmx(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- unsigned int var;
- int avg;
+unsigned int vp9_variance8x8_mmx(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, sse, &sum);
+ return *sse - (((unsigned int)sum * sum) >> 6);
+}
+
+unsigned int vp9_mse16x16_mmx(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ unsigned int sse0, sse1, sse2, sse3;
+ int sum0, sum1, sum2, sum3;
- vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg);
- *sse = var;
+ vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, &sse0, &sum0);
+ vp9_get8x8var_mmx(src + 8, src_stride, ref + 8, ref_stride, &sse1, &sum1);
+ vp9_get8x8var_mmx(src + 8 * src_stride, src_stride,
+ ref + 8 * ref_stride, ref_stride, &sse2, &sum2);
+ vp9_get8x8var_mmx(src + 8 * src_stride + 8, src_stride,
+ ref + 8 * ref_stride + 8, ref_stride, &sse3, &sum3);
- return (var - (((unsigned int)avg * avg) >> 6));
+ *sse = sse0 + sse1 + sse2 + sse3;
+ return *sse;
}
-unsigned int vp9_mse16x16_mmx(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- unsigned int sse0, sse1, sse2, sse3, var;
- int sum0, sum1, sum2, sum3;
+unsigned int vp9_variance16x16_mmx(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ unsigned int sse0, sse1, sse2, sse3;
+ int sum0, sum1, sum2, sum3, sum;
- vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
- &sum0);
- vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride,
- &sse1, &sum1);
- vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride,
- ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2);
- vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride,
- ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
+ vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, &sse0, &sum0);
+ vp9_get8x8var_mmx(src + 8, src_stride, ref + 8, ref_stride, &sse1, &sum1);
+ vp9_get8x8var_mmx(src + 8 * src_stride, src_stride,
+ ref + 8 * ref_stride, ref_stride, &sse2, &sum2);
+ vp9_get8x8var_mmx(src + 8 * src_stride + 8, src_stride,
+ ref + 8 * ref_stride + 8, ref_stride, &sse3, &sum3);
- var = sse0 + sse1 + sse2 + sse3;
- *sse = var;
- return var;
+ *sse = sse0 + sse1 + sse2 + sse3;
+ sum = sum0 + sum1 + sum2 + sum3;
+ return *sse - (((unsigned int)sum * sum) >> 8);
}
+unsigned int vp9_variance16x8_mmx(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ unsigned int sse0, sse1;
+ int sum0, sum1, sum;
-unsigned int vp9_variance16x16_mmx(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- unsigned int sse0, sse1, sse2, sse3, var;
- int sum0, sum1, sum2, sum3, avg;
-
- vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
- &sum0);
- vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride,
- &sse1, &sum1);
- vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride,
- ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2);
- vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride,
- ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
-
- var = sse0 + sse1 + sse2 + sse3;
- avg = sum0 + sum1 + sum2 + sum3;
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 8));
-}
+ vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, &sse0, &sum0);
+ vp9_get8x8var_mmx(src + 8, src_stride, ref + 8, ref_stride, &sse1, &sum1);
-unsigned int vp9_variance16x8_mmx(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- unsigned int sse0, sse1, var;
- int sum0, sum1, avg;
-
- vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
- &sum0);
- vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride,
- &sse1, &sum1);
-
- var = sse0 + sse1;
- avg = sum0 + sum1;
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 7));
+ *sse = sse0 + sse1;
+ sum = sum0 + sum1;
+ return *sse - (((unsigned int)sum * sum) >> 7);
}
-unsigned int vp9_variance8x16_mmx(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- unsigned int sse0, sse1, var;
- int sum0, sum1, avg;
-
- vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
- &sum0);
- vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride,
- ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1);
+unsigned int vp9_variance8x16_mmx(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ unsigned int sse0, sse1;
+ int sum0, sum1, sum;
- var = sse0 + sse1;
- avg = sum0 + sum1;
- *sse = var;
+ vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, &sse0, &sum0);
+ vp9_get8x8var_mmx(src + 8 * src_stride, src_stride,
+ ref + 8 * ref_stride, ref_stride, &sse1, &sum1);
- return (var - (((unsigned int)avg * avg) >> 7));
+ *sse = sse0 + sse1;
+ sum = sum0 + sum1;
+ return *sse - (((unsigned int)sum * sum) >> 7);
}