diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/common/vp9_rtcd_defs.pl | 22 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 44 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 124 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 29 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.h | 2 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_variance_mmx.c | 189 |
6 files changed, 202 insertions, 208 deletions
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index c2df83ed6..66a3956a1 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -305,15 +305,15 @@ specialize qw/vp9_convolve_avg neon_asm dspr2/, "$sse2_x86inc"; $vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon; add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon_asm dspr2/; +specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2/; $vp9_convolve8_neon_asm=vp9_convolve8_neon; add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon_asm dspr2/; +specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2/; $vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon; add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; -specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon_asm dspr2/; +specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2/; $vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon; add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; @@ -447,10 +447,10 @@ add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_ specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; -specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc"; @@ -477,10 +477,10 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_sub_pixel_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; -specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; +specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc"; add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc"; @@ -653,7 +653,7 @@ add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const specialize qw/vp9_sad4x4x8 sse4/; add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; -specialize qw/vp9_sad64x64x4d sse2 avx2/; +specialize qw/vp9_sad64x64x4d sse2/; add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; specialize qw/vp9_sad32x64x4d sse2/; @@ -668,7 +668,7 @@ add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, co specialize qw/vp9_sad16x32x4d sse2/; add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; -specialize qw/vp9_sad32x32x4d sse2 avx2/; +specialize qw/vp9_sad32x32x4d sse2/; add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; specialize qw/vp9_sad16x16x4d sse2/; @@ -739,7 +739,7 @@ add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int strid specialize qw/vp9_fht8x8 sse2 avx2/; add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type"; -specialize qw/vp9_fht16x16 sse2 avx2/; +specialize qw/vp9_fht16x16 sse2/; add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride"; specialize qw/vp9_fwht4x4/, "$mmx_x86inc"; @@ -760,7 +760,7 @@ add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, int16_t *output, int specialize qw/vp9_fdct16x16_1 sse2/; add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride"; -specialize qw/vp9_fdct16x16 sse2 avx2/; +specialize qw/vp9_fdct16x16 sse2/; add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, int16_t *output, int stride"; specialize qw/vp9_fdct32x32_1 sse2/; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 75416f2aa..537500294 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -697,6 +697,38 @@ void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, x->e_mbd.plane[i].subsampling_y); } +static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, int *rate, + int64_t *dist, BLOCK_SIZE bsize) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + INTERP_FILTER filter_ref; + + if (xd->up_available) + filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter; + else if (xd->left_available) + filter_ref = xd->mi[-1]->mbmi.interp_filter; + else + filter_ref = EIGHTTAP; + + mbmi->sb_type = bsize; + mbmi->mode = ZEROMV; + mbmi->tx_size = MIN(max_txsize_lookup[bsize], + tx_mode_to_biggest_tx_size[tx_mode]); + mbmi->skip = 1; + mbmi->uv_mode = DC_PRED; + mbmi->ref_frame[0] = LAST_FRAME; + mbmi->ref_frame[1] = NONE; + mbmi->mv[0].as_int = 0; + mbmi->interp_filter = filter_ref; + + xd->mi[0]->bmi[0].as_mv[0].as_int = 0; + x->skip = 1; + x->skip_encode = 1; + + *rate = 0; + *dist = 0; +} + static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, int *totalrate, int64_t *totaldist, @@ -2442,17 +2474,21 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *mbmi; set_offsets(cpi, tile, mi_row, mi_col, bsize); - xd->mi[0]->mbmi.sb_type = bsize; + mbmi = &xd->mi[0]->mbmi; + mbmi->sb_type = bsize; if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { - if (xd->mi[0]->mbmi.segment_id && x->in_static_area) + if (mbmi->segment_id && x->in_static_area) x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); } if (!frame_is_intra_only(cm)) { - vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, - rate, dist, bsize); + if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) + set_mode_info_seg_skip(x, cm->tx_mode, rate, dist, bsize); + else + vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, rate, dist, bsize); } else { set_mode_info(&xd->mi[0]->mbmi, bsize, DC_PRED); } diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index a49fe3df5..d008c63d0 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -597,73 +597,91 @@ void vp9_first_pass(VP9_COMP *cpi) { if (cm->current_video_frame > 0) { int tmp_err, motion_error; int_mv mv, tmp_mv; + int raw_motion_error; + struct buf_2d unscaled_last_source_buf_2d; xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; motion_error = get_prediction_error(bsize, &x->plane[0].src, &xd->plane[0].pre[0]); - // Assume 0,0 motion with no mv overhead. - mv.as_int = tmp_mv.as_int = 0; - - // Test last reference frame using the previous best mv as the - // starting point (best reference) for the search. - first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv, - &motion_error); - if (cpi->oxcf.aq_mode == VARIANCE_AQ) { - vp9_clear_system_state(); - motion_error = (int)(motion_error * error_weight); - } - // If the current best reference mv is not centered on 0,0 then do a 0,0 - // based search as well. - if (best_ref_mv.as_int) { - tmp_err = INT_MAX; - first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv, - &tmp_err); + // compute the motion error of the zero motion vector using the last + // source frame as the reference + // skip the further motion search on reconstructed frame + // if this error is small + unscaled_last_source_buf_2d.buf = cpi->unscaled_last_source->y_buffer + + recon_yoffset; + unscaled_last_source_buf_2d.stride = + cpi->unscaled_last_source->y_stride; + raw_motion_error = get_prediction_error(bsize, &x->plane[0].src, + &unscaled_last_source_buf_2d); + + // TODO(pengchong): Replace the hard-coded threshold + if (raw_motion_error > 25) { + // Assume 0,0 motion with no mv overhead. + mv.as_int = tmp_mv.as_int = 0; + + // Test last reference frame using the previous best mv as the + // starting point (best reference) for the search. + first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv, + &motion_error); if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_clear_system_state(); - tmp_err = (int)(tmp_err * error_weight); + motion_error = (int)(motion_error * error_weight); } - if (tmp_err < motion_error) { - motion_error = tmp_err; - mv.as_int = tmp_mv.as_int; + // If the current best reference mv is not centered on 0,0 + // then do a 0,0 + // based search as well. + if (best_ref_mv.as_int) { + tmp_err = INT_MAX; + first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv, + &tmp_err); + if (cpi->oxcf.aq_mode == VARIANCE_AQ) { + vp9_clear_system_state(); + tmp_err = (int)(tmp_err * error_weight); + } + + if (tmp_err < motion_error) { + motion_error = tmp_err; + mv.as_int = tmp_mv.as_int; + } } - } - // Search in an older reference frame. - if (cm->current_video_frame > 1 && gld_yv12 != NULL) { - // Assume 0,0 motion with no mv overhead. - int gf_motion_error; + // Search in an older reference frame. + if (cm->current_video_frame > 1 && gld_yv12 != NULL) { + // Assume 0,0 motion with no mv overhead. + int gf_motion_error; - xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset; - gf_motion_error = get_prediction_error(bsize, &x->plane[0].src, - &xd->plane[0].pre[0]); + xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset; + gf_motion_error = get_prediction_error(bsize, &x->plane[0].src, + &xd->plane[0].pre[0]); - first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv, - &gf_motion_error); - if (cpi->oxcf.aq_mode == VARIANCE_AQ) { - vp9_clear_system_state(); - gf_motion_error = (int)(gf_motion_error * error_weight); - } + first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv, + &gf_motion_error); + if (cpi->oxcf.aq_mode == VARIANCE_AQ) { + vp9_clear_system_state(); + gf_motion_error = (int)(gf_motion_error * error_weight); + } - if (gf_motion_error < motion_error && gf_motion_error < this_error) - ++second_ref_count; - - // Reset to last frame as reference buffer. - xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; - xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset; - xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset; - - // In accumulating a score for the older reference frame take the - // best of the motion predicted score and the intra coded error - // (just as will be done for) accumulation of "coded_error" for - // the last frame. - if (gf_motion_error < this_error) - sr_coded_error += gf_motion_error; - else - sr_coded_error += this_error; - } else { - sr_coded_error += motion_error; + if (gf_motion_error < motion_error && gf_motion_error < this_error) + ++second_ref_count; + + // Reset to last frame as reference buffer. + xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; + xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset; + xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset; + + // In accumulating a score for the older reference frame take the + // best of the motion predicted score and the intra coded error + // (just as will be done for) accumulation of "coded_error" for + // the last frame. + if (gf_motion_error < this_error) + sr_coded_error += gf_motion_error; + else + sr_coded_error += this_error; + } else { + sr_coded_error += motion_error; + } } // Start by assuming that intra mode is best. best_ref_mv.as_int = 0; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index f68aa2738..f8acf5b7a 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -994,21 +994,13 @@ static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, return; } - if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER) { - for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) - model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd, - &r[tx_size][0], &d[tx_size], &s[tx_size]); - choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s, - skip, sse, ref_best_rd, bs); - } else { - for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) - txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], - &s[tx_size], &sse[tx_size], - ref_best_rd, 0, bs, tx_size, - cpi->sf.use_fast_coef_costing); - choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, - skip, txfm_cache, bs); - } + for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) + txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], &s[tx_size], + &sse[tx_size], ref_best_rd, 0, bs, tx_size, + cpi->sf.use_fast_coef_costing); + choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, + skip, txfm_cache, bs); + if (psse) *psse = sse[mbmi->tx_size]; } @@ -2810,7 +2802,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, *rate2 += vp9_get_switchable_rate(cpi); if (!is_comp_pred) { - if (!x->in_active_map) { + if (!x->in_active_map || + vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { if (psse) *psse = 0; *distortion = 0; @@ -3127,9 +3120,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // If the segment skip feature is enabled.... // then do nothing if the current mode is not allowed.. if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) { - const int inter_non_zero_mode_mask = 0x1F7F7; - mode_skip_mask |= inter_non_zero_mode_mask; - mode_skip_mask &= ~(1 << THR_ZEROMV); + mode_skip_mask = ~(1 << THR_ZEROMV); inter_mode_mask = (1 << ZEROMV); } diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index 0a88499af..4a1a13bc0 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -56,8 +56,6 @@ typedef enum { typedef enum { USE_FULL_RD = 0, - USE_LARGESTINTRA, - USE_LARGESTINTRA_MODELINTER, USE_LARGESTALL, USE_TX_8X8 } TX_SIZE_SEARCH_METHOD; diff --git a/vp9/encoder/x86/vp9_variance_mmx.c b/vp9/encoder/x86/vp9_variance_mmx.c index ae2f976af..ce1c83297 100644 --- a/vp9/encoder/x86/vp9_variance_mmx.c +++ b/vp9/encoder/x86/vp9_variance_mmx.c @@ -12,141 +12,92 @@ #include "vp9/encoder/vp9_variance.h" #include "vpx_ports/mem.h" -extern unsigned int vp9_get8x8var_mmx -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); -extern unsigned int vp9_get4x4var_mmx -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); - -unsigned int vp9_variance4x4_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 4)); +unsigned int vp9_get8x8var_mmx(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse, int *sum); + +unsigned int vp9_get4x4var_mmx(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *SSE, int *sum); + +unsigned int vp9_variance4x4_mmx(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + vp9_get4x4var_mmx(src, src_stride, ref, ref_stride, sse, &sum); + return *sse - (((unsigned int)sum * sum) >> 4); } -unsigned int vp9_variance8x8_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; +unsigned int vp9_variance8x8_mmx(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, sse, &sum); + return *sse - (((unsigned int)sum * sum) >> 6); +} + +unsigned int vp9_mse16x16_mmx(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + unsigned int sse0, sse1, sse2, sse3; + int sum0, sum1, sum2, sum3; - vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); - *sse = var; + vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, &sse0, &sum0); + vp9_get8x8var_mmx(src + 8, src_stride, ref + 8, ref_stride, &sse1, &sum1); + vp9_get8x8var_mmx(src + 8 * src_stride, src_stride, + ref + 8 * ref_stride, ref_stride, &sse2, &sum2); + vp9_get8x8var_mmx(src + 8 * src_stride + 8, src_stride, + ref + 8 * ref_stride + 8, ref_stride, &sse3, &sum3); - return (var - (((unsigned int)avg * avg) >> 6)); + *sse = sse0 + sse1 + sse2 + sse3; + return *sse; } -unsigned int vp9_mse16x16_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int sse0, sse1, sse2, sse3, var; - int sum0, sum1, sum2, sum3; +unsigned int vp9_variance16x16_mmx(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + unsigned int sse0, sse1, sse2, sse3; + int sum0, sum1, sum2, sum3, sum; - vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, - &sum0); - vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, - &sse1, &sum1); - vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, - ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2); - vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, - ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); + vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, &sse0, &sum0); + vp9_get8x8var_mmx(src + 8, src_stride, ref + 8, ref_stride, &sse1, &sum1); + vp9_get8x8var_mmx(src + 8 * src_stride, src_stride, + ref + 8 * ref_stride, ref_stride, &sse2, &sum2); + vp9_get8x8var_mmx(src + 8 * src_stride + 8, src_stride, + ref + 8 * ref_stride + 8, ref_stride, &sse3, &sum3); - var = sse0 + sse1 + sse2 + sse3; - *sse = var; - return var; + *sse = sse0 + sse1 + sse2 + sse3; + sum = sum0 + sum1 + sum2 + sum3; + return *sse - (((unsigned int)sum * sum) >> 8); } +unsigned int vp9_variance16x8_mmx(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + unsigned int sse0, sse1; + int sum0, sum1, sum; -unsigned int vp9_variance16x16_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int sse0, sse1, sse2, sse3, var; - int sum0, sum1, sum2, sum3, avg; - - vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, - &sum0); - vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, - &sse1, &sum1); - vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, - ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2); - vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, - ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); - - var = sse0 + sse1 + sse2 + sse3; - avg = sum0 + sum1 + sum2 + sum3; - *sse = var; - return (var - (((unsigned int)avg * avg) >> 8)); -} + vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, &sse0, &sum0); + vp9_get8x8var_mmx(src + 8, src_stride, ref + 8, ref_stride, &sse1, &sum1); -unsigned int vp9_variance16x8_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - - vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, - &sum0); - vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, - &sse1, &sum1); - - var = sse0 + sse1; - avg = sum0 + sum1; - *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); + *sse = sse0 + sse1; + sum = sum0 + sum1; + return *sse - (((unsigned int)sum * sum) >> 7); } -unsigned int vp9_variance8x16_mmx( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - - vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, - &sum0); - vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, - ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1); +unsigned int vp9_variance8x16_mmx(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + unsigned int sse0, sse1; + int sum0, sum1, sum; - var = sse0 + sse1; - avg = sum0 + sum1; - *sse = var; + vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, &sse0, &sum0); + vp9_get8x8var_mmx(src + 8 * src_stride, src_stride, + ref + 8 * ref_stride, ref_stride, &sse1, &sum1); - return (var - (((unsigned int)avg * avg) >> 7)); + *sse = sse0 + sse1; + sum = sum0 + sum1; + return *sse - (((unsigned int)sum * sum) >> 7); } |