diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_avg.c | 14 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 89 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 37 | ||||
-rw-r--r-- | vp9/encoder/vp9_rd.c | 12 | ||||
-rw-r--r-- | vp9/encoder/vp9_rd.h | 5 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 9 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.h | 10 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_denoiser_sse2.c | 56 |
9 files changed, 135 insertions, 103 deletions
diff --git a/vp9/encoder/vp9_avg.c b/vp9/encoder/vp9_avg.c index 22c6cc4fc..e9810c894 100644 --- a/vp9/encoder/vp9_avg.c +++ b/vp9/encoder/vp9_avg.c @@ -7,6 +7,7 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ +#include "vp9/common/vp9_common.h" #include "vpx_ports/mem.h" unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) { @@ -17,3 +18,16 @@ unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) { return (sum + 32) >> 6; } + +#if CONFIG_VP9_HIGHBITDEPTH +unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) { + int i, j; + int sum = 0; + const uint16_t* s = CONVERT_TO_SHORTPTR(s8); + for (i = 0; i < 8; ++i, s+=p) + for (j = 0; j < 8; sum += s[j], ++j) {} + + return (sum + 32) >> 6; +} +#endif // CONFIG_VP9_HIGHBITDEPTH + diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 420ec0b51..6eff8c501 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -533,8 +533,19 @@ static void choose_partitioning(VP9_COMP *cpi, int sum = 0; if (x_idx < pixels_wide && y_idx < pixels_high) { - int s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp); - int d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp); + int s_avg, d_avg; +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + s_avg = vp9_highbd_avg_8x8(s + y_idx * sp + x_idx, sp); + d_avg = vp9_highbd_avg_8x8(d + y_idx * dp + x_idx, dp); + } else { + s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp); + d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp); + } +#else + s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp); + d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp); +#endif sum = s_avg - d_avg; sse = sum * sum; } @@ -1522,9 +1533,7 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, BLOCK_SIZE subsize; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; - RD_COST last_part_rdc = {INT_MAX, INT64_MAX, INT64_MAX}; - RD_COST none_rdc = {INT_MAX, INT64_MAX, INT64_MAX}; - RD_COST chosen_rdc = {INT_MAX, INT64_MAX, INT64_MAX}; + RD_COST last_part_rdc, none_rdc, chosen_rdc; BLOCK_SIZE sub_subsize = BLOCK_4X4; int splits_below = 0; BLOCK_SIZE bs_type = mi_8x8[0].src_mi->mbmi.sb_type; @@ -1537,6 +1546,10 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, assert(num_4x4_blocks_wide_lookup[bsize] == num_4x4_blocks_high_lookup[bsize]); + vp9_rd_cost_reset(&last_part_rdc); + vp9_rd_cost_reset(&none_rdc); + vp9_rd_cost_reset(&chosen_rdc); + partition = partition_lookup[bsl][bs_type]; subsize = get_subsize(bsize, partition); @@ -1598,16 +1611,15 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) { - RD_COST tmp_rdc = {0, 0, 0}; + RD_COST tmp_rdc; PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; + vp9_rd_cost_init(&tmp_rdc); update_state(cpi, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx); rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { - last_part_rdc.rate = INT_MAX; - last_part_rdc.dist = INT64_MAX; - last_part_rdc.rdcost = INT64_MAX; + vp9_rd_cost_reset(&last_part_rdc); break; } last_part_rdc.rate += tmp_rdc.rate; @@ -1620,17 +1632,16 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, subsize, &pc_tree->vertical[0], INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) { - RD_COST tmp_rdc = {0, 0, 0}; + RD_COST tmp_rdc; PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0]; + vp9_rd_cost_init(&tmp_rdc); update_state(cpi, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx); rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &tmp_rdc, subsize, &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { - last_part_rdc.rate = INT_MAX; - last_part_rdc.dist = INT64_MAX; - last_part_rdc.rdcost = INT64_MAX; + vp9_rd_cost_reset(&last_part_rdc); break; } last_part_rdc.rate += tmp_rdc.rate; @@ -1651,19 +1662,17 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, int x_idx = (i & 1) * (mi_step >> 1); int y_idx = (i >> 1) * (mi_step >> 1); int jj = i >> 1, ii = i & 0x01; - RD_COST tmp_rdc = {0, 0, 0}; - + RD_COST tmp_rdc; if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) continue; + vp9_rd_cost_init(&tmp_rdc); rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp, mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate, &tmp_rdc.dist, i != 3, pc_tree->split[i]); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { - last_part_rdc.rate = INT_MAX; - last_part_rdc.dist = INT64_MAX; - last_part_rdc.rdcost = INT64_MAX; + vp9_rd_cost_reset(&last_part_rdc); break; } last_part_rdc.rate += tmp_rdc.rate; @@ -1710,15 +1719,12 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); pc_tree->split[i]->partitioning = PARTITION_NONE; rd_pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &tmp_rdc, - split_subsize, &pc_tree->split[i]->none, - INT64_MAX); + split_subsize, &pc_tree->split[i]->none, INT64_MAX); restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { - chosen_rdc.rate = INT_MAX; - chosen_rdc.dist = INT64_MAX; - chosen_rdc.rdcost = INT64_MAX; + vp9_rd_cost_reset(&chosen_rdc); break; } @@ -2123,9 +2129,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, PICK_MODE_CONTEXT *ctx = &pc_tree->none; int i, pl; BLOCK_SIZE subsize; - RD_COST this_rdc = {0, 0, 0}; - RD_COST sum_rdc = {0, 0, 0}; - RD_COST best_rdc = {INT_MAX, INT64_MAX, best_rd}; + RD_COST this_rdc, sum_rdc, best_rdc; int do_split = bsize >= BLOCK_8X8; int do_rect = 1; @@ -2153,6 +2157,11 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, assert(num_8x8_blocks_wide_lookup[bsize] == num_8x8_blocks_high_lookup[bsize]); + vp9_rd_cost_init(&this_rdc); + vp9_rd_cost_init(&sum_rdc); + vp9_rd_cost_reset(&best_rdc); + best_rdc.rdcost = best_rd; + set_offsets(cpi, tile, mi_row, mi_col, bsize); if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) @@ -2574,16 +2583,15 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, sf->always_this_block_size); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root); - } else if (cpi->partition_search_skippable_frame || - sf->partition_search_type == VAR_BASED_FIXED_PARTITION) { + } else if (cpi->partition_search_skippable_frame) { BLOCK_SIZE bsize; set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col); set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root); - } else if (sf->partition_search_type == VAR_BASED_PARTITION && - cm->frame_type != KEY_FRAME ) { + } else if (sf->partition_search_type == VAR_BASED_PARTITION && + cm->frame_type != KEY_FRAME ) { choose_partitioning(cpi, tile, mi_row, mi_col); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root); @@ -2835,8 +2843,13 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, this_rate += cpi->partition_cost[pl][PARTITION_NONE]; sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); if (sum_rd < best_rd) { - int64_t stop_thresh = 4096; - int64_t stop_thresh_rd; + int dist_breakout_thr = sf->partition_search_breakout_dist_thr; + int64_t rate_breakout_thr = sf->partition_search_breakout_rate_thr; + + dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] + + b_height_log2_lookup[bsize]); + + rate_breakout_thr *= num_pels_log2_lookup[bsize]; best_rate = this_rate; best_dist = this_dist; @@ -2844,14 +2857,9 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; - // Adjust threshold according to partition size. - stop_thresh >>= 8 - (b_width_log2_lookup[bsize] + - b_height_log2_lookup[bsize]); - - stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh); - // If obtained distortion is very small, choose current partition - // and stop splitting. - if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) { + if (!x->e_mbd.lossless && + this_rate < rate_breakout_thr && + this_dist < dist_breakout_thr) { do_split = 0; do_rect = 0; } @@ -3176,7 +3184,6 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rate, &dummy_dist, cpi->pc_root); break; - case VAR_BASED_FIXED_PARTITION: case FIXED_PARTITION: bsize = sf->partition_search_type == FIXED_PARTITION ? sf->always_this_block_size : diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 96c3e0aa4..f1baf8323 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -66,13 +66,6 @@ static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) { *b = temp; } -static int gfboost_qadjust(int qindex, vpx_bit_depth_t bit_depth) { - const double q = vp9_convert_qindex_to_q(qindex, bit_depth); - return (int)((0.00000828 * q * q * q) + - (-0.0055 * q * q) + - (1.32 * q) + 79.3); -} - // Resets the first pass file to the given position using a relative seek from // the current position. static void reset_fpf_position(TWO_PASS *p, @@ -1317,14 +1310,15 @@ static double calc_frame_boost(VP9_COMP *cpi, double this_frame_mv_in_out, double max_boost) { double frame_boost; - const double lq = vp9_convert_qindex_to_q(cpi->rc.last_q[INTER_FRAME], - cpi->common.bit_depth); - const double q_correction = MIN((0.8 + (lq * 0.001)), 1.0); + const double lq = + vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME], + cpi->common.bit_depth); + const double boost_correction = MIN((0.5 + (lq * 0.015)), 1.5); // Underlying boost factor is based on inter error ratio. frame_boost = (BASELINE_ERR_PER_MB * cpi->common.MBs) / DOUBLE_DIVIDE_CHECK(this_frame->coded_error); - frame_boost = frame_boost * BOOST_FACTOR * q_correction; + frame_boost = frame_boost * BOOST_FACTOR * boost_correction; // Increase boost for frames where new data coming into frame (e.g. zoom out). // Slightly reduce boost if there is a net balance of motion out of the frame @@ -1335,7 +1329,7 @@ static double calc_frame_boost(VP9_COMP *cpi, else frame_boost += frame_boost * (this_frame_mv_in_out / 2.0); - return MIN(frame_boost, max_boost * q_correction); + return MIN(frame_boost, max_boost * boost_correction); } static int calc_arf_boost(VP9_COMP *cpi, int offset, @@ -1874,19 +1868,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err); // Calculate the extra bits to be used for boosted frame(s) - { - int q = rc->last_q[INTER_FRAME]; - int boost = - (rc->gfu_boost * gfboost_qadjust(q, cpi->common.bit_depth)) / 100; - - // Set max and minimum boost and hence minimum allocation. - boost = clamp(boost, MIN_ARF_GF_BOOST, - (rc->baseline_gf_interval + 1) * 200); - - // Calculate the extra bits to be used for boosted frame(s) - gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval, - boost, gf_group_bits); - } + gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval, + rc->gfu_boost, gf_group_bits); // Adjust KF group bits and error remaining. twopass->kf_group_error_left -= (int64_t)gf_group_err; @@ -2380,7 +2363,11 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { section_target_bandwidth); twopass->active_worst_quality = tmp_q; rc->ni_av_qi = tmp_q; + rc->last_q[INTER_FRAME] = tmp_q; rc->avg_q = vp9_convert_qindex_to_q(tmp_q, cm->bit_depth); + rc->avg_frame_qindex[INTER_FRAME] = tmp_q; + rc->last_q[KEY_FRAME] = (tmp_q + cpi->oxcf.best_allowed_q) / 2; + rc->avg_frame_qindex[KEY_FRAME] = rc->last_q[KEY_FRAME]; } vp9_zero(this_frame); if (EOF == input_stats(twopass, &this_frame)) diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index 75c396433..7f526fc42 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -44,6 +44,18 @@ // Factor to weigh the rate for switchable interp filters. #define SWITCHABLE_INTERP_RATE_FACTOR 1 +void vp9_rd_cost_reset(RD_COST *rd_cost) { + rd_cost->rate = INT_MAX; + rd_cost->dist = INT64_MAX; + rd_cost->rdcost = INT64_MAX; +} + +void vp9_rd_cost_init(RD_COST *rd_cost) { + rd_cost->rate = 0; + rd_cost->dist = 0; + rd_cost->rdcost = 0; +} + // The baseline rd thresholds for breaking out of the rd loop for // certain modes are assumed to be based on 8x8 blocks. // This table is used to correct for block size. diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h index 33fb4ac94..1aa52663a 100644 --- a/vp9/encoder/vp9_rd.h +++ b/vp9/encoder/vp9_rd.h @@ -123,6 +123,11 @@ typedef struct RD_COST { int64_t rdcost; } RD_COST; +// Reset the rate distortion cost values to maximum (invalid) value. +void vp9_rd_cost_reset(RD_COST *rd_cost); +// Initialize the rate distortion cost values to zero. +void vp9_rd_cost_init(RD_COST *rd_cost); + struct TileInfo; struct VP9_COMP; struct macroblock; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 7565cc5c9..eca8e5880 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1140,12 +1140,14 @@ static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, *sse = 0; *skippable = 1; - for (plane = 1; plane < MAX_MB_PLANE && is_cost_valid; ++plane) { + for (plane = 1; plane < MAX_MB_PLANE; ++plane) { txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd, plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing); - if (pnrate == INT_MAX) + if (pnrate == INT_MAX) { is_cost_valid = 0; + break; + } *rate += pnrate; *distortion += pndist; *sse += pnsse; @@ -3392,6 +3394,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } if (best_mode_index < 0 || best_rd >= best_rd_so_far) { + rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; return; } @@ -3562,6 +3565,7 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x, rd_cost->rdcost = this_rd; if (this_rd >= best_rd_so_far) { + rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; return; } @@ -4113,6 +4117,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } if (best_rd >= best_rd_so_far) { + rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; return; } diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index bec77d71f..9e3ee2c94 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -275,6 +275,12 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO; + + if (MIN(cm->width, cm->height) >= 720) + sf->partition_search_breakout_dist_thr = (1 << 25); + else + sf->partition_search_breakout_dist_thr = (1 << 23); + sf->partition_search_breakout_rate_thr = 200; } if (speed >= 6) { diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index cc6c2e52a..951b4af22 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -149,16 +149,12 @@ typedef enum { typedef enum { // Search partitions using RD/NONRD criterion - SEARCH_PARTITION = 0, + SEARCH_PARTITION, // Always use a fixed size partition - FIXED_PARTITION = 1, + FIXED_PARTITION, - // Use a fixed size partition in every 64X64 SB, where the size is - // determined based on source variance - VAR_BASED_FIXED_PARTITION = 2, - - REFERENCE_PARTITION = 3, + REFERENCE_PARTITION, // Use an arbitrary partitioning scheme based on source variance within // a 64X64 SB diff --git a/vp9/encoder/x86/vp9_denoiser_sse2.c b/vp9/encoder/x86/vp9_denoiser_sse2.c index bf400d38b..bf5fa889f 100644 --- a/vp9/encoder/x86/vp9_denoiser_sse2.c +++ b/vp9/encoder/x86/vp9_denoiser_sse2.c @@ -41,40 +41,40 @@ static INLINE int sum_diff_16x1(__m128i acc_diff) { static INLINE __m128i vp9_denoiser_16x1_sse2(const uint8_t *sig, const uint8_t *mc_running_avg_y, uint8_t *running_avg_y, - const __m128i k_0, - const __m128i k_4, - const __m128i k_8, - const __m128i k_16, - const __m128i l3, - const __m128i l32, - const __m128i l21, + const __m128i *k_0, + const __m128i *k_4, + const __m128i *k_8, + const __m128i *k_16, + const __m128i *l3, + const __m128i *l32, + const __m128i *l21, __m128i acc_diff) { // Calculate differences - const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0])); + const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0])); const __m128i v_mc_running_avg_y = _mm_loadu_si128( - (__m128i *)(&mc_running_avg_y[0])); + (const __m128i *)(&mc_running_avg_y[0])); __m128i v_running_avg_y; const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); // Obtain the sign. FF if diff is negative. - const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0); + const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, *k_0); // Clamp absolute difference to 16 to be used to get mask. Doing this // allows us to use _mm_cmpgt_epi8, which operates on signed byte. const __m128i clamped_absdiff = _mm_min_epu8( - _mm_or_si128(pdiff, ndiff), k_16); + _mm_or_si128(pdiff, ndiff), *k_16); // Get masks for l2 l1 and l0 adjustments. - const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff); - const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff); - const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff); + const __m128i mask2 = _mm_cmpgt_epi8(*k_16, clamped_absdiff); + const __m128i mask1 = _mm_cmpgt_epi8(*k_8, clamped_absdiff); + const __m128i mask0 = _mm_cmpgt_epi8(*k_4, clamped_absdiff); // Get adjustments for l2, l1, and l0. - __m128i adj2 = _mm_and_si128(mask2, l32); - const __m128i adj1 = _mm_and_si128(mask1, l21); + __m128i adj2 = _mm_and_si128(mask2, *l32); + const __m128i adj1 = _mm_and_si128(mask1, *l21); const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff); __m128i adj, padj, nadj; // Combine the adjustments and get absolute adjustments. adj2 = _mm_add_epi8(adj2, adj1); - adj = _mm_sub_epi8(l3, adj2); + adj = _mm_sub_epi8(*l3, adj2); adj = _mm_andnot_si128(mask0, adj); adj = _mm_or_si128(adj, adj0); @@ -103,9 +103,9 @@ static INLINE __m128i vp9_denoiser_adj_16x1_sse2(const uint8_t *sig, __m128i acc_diff) { __m128i v_running_avg_y = _mm_loadu_si128((__m128i *)(&running_avg_y[0])); // Calculate differences. - const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0])); + const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0])); const __m128i v_mc_running_avg_y = - _mm_loadu_si128((__m128i *)(&mc_running_avg_y[0])); + _mm_loadu_si128((const __m128i *)(&mc_running_avg_y[0])); const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); // Obtain the sign. FF if diff is negative. @@ -178,8 +178,8 @@ static int vp9_denoiser_4xM_sse2(const uint8_t *sig, int sig_stride, acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r], mc_running_buffer[r], running_buffer[r], - k_0, k_4, k_8, k_16, - l3, l32, l21, acc_diff); + &k_0, &k_4, &k_8, &k_16, + &l3, &l32, &l21, acc_diff); vpx_memcpy(running_avg_y, running_buffer[r], 4); vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + 4, 4); vpx_memcpy(running_avg_y + avg_y_stride * 2, @@ -279,8 +279,8 @@ static int vp9_denoiser_8xM_sse2(const uint8_t *sig, int sig_stride, acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r], mc_running_buffer[r], running_buffer[r], - k_0, k_4, k_8, k_16, - l3, l32, l21, acc_diff); + &k_0, &k_4, &k_8, &k_16, + &l3, &l32, &l21, acc_diff); vpx_memcpy(running_avg_y, running_buffer[r], 8); vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + 8, 8); // Update pointers for next iteration. @@ -357,9 +357,9 @@ static int vp9_denoiser_64_32_16xM_sse2(const uint8_t *sig, int sig_stride, const __m128i l21 = _mm_set1_epi8(1); int sum_diff = 0; - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - acc_diff[i][j] = _mm_setzero_si128(); + for (c = 0; c < 4; ++c) { + for (r = 0; r < 4; ++r) { + acc_diff[c][r] = _mm_setzero_si128(); } } @@ -368,8 +368,8 @@ static int vp9_denoiser_64_32_16xM_sse2(const uint8_t *sig, int sig_stride, acc_diff[c>>4][r>>4] = vp9_denoiser_16x1_sse2( sig, mc_running_avg_y, running_avg_y, - k_0, k_4, k_8, k_16, - l3, l32, l21, acc_diff[c>>4][r>>4]); + &k_0, &k_4, &k_8, &k_16, + &l3, &l32, &l21, acc_diff[c>>4][r>>4]); // Update pointers for next iteration. sig += 16; mc_running_avg_y += 16; |