diff options
-rw-r--r-- | test/yuv_temporal_filter_test.cc | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_rd.h | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_temporal_filter.c | 30 | ||||
-rw-r--r-- | vp9/encoder/x86/temporal_filter_sse4.c | 29 |
6 files changed, 44 insertions, 28 deletions
diff --git a/test/yuv_temporal_filter_test.cc b/test/yuv_temporal_filter_test.cc index e8d19672d..9fb170bc3 100644 --- a/test/yuv_temporal_filter_test.cc +++ b/test/yuv_temporal_filter_test.cc @@ -38,6 +38,10 @@ struct TemporalFilterWithBd { int bd; }; +std::ostream &operator<<(std::ostream &os, const TemporalFilterWithBd &tf) { + return os << "Bitdepth: " << tf.bd; +} + int GetFilterWeight(unsigned int row, unsigned int col, unsigned int block_height, unsigned int block_width, const int *const blk_fw, int use_32x32) { diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index b8c86ea43..362077a6c 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -6146,7 +6146,7 @@ static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd, // TODO(angiebird): Consider subpixel when computing the sse. cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv), pre.stride, &sse); - return (double)sse; + return (double)(sse << VP9_DIST_SCALE_LOG2); } else { assert(0); return 0; diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h index fa85f2176..062ca3277 100644 --- a/vp9/encoder/vp9_rd.h +++ b/vp9/encoder/vp9_rd.h @@ -42,6 +42,9 @@ extern "C" { #define RD_THRESH_MAX_FACT 64 #define RD_THRESH_INC 1 +#define VP9_DIST_SCALE_LOG2 4 +#define VP9_DIST_SCALE (1 << VP9_DIST_SCALE_LOG2) + // This enumerator type needs to be kept aligned with the mode order in // const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code. typedef enum { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index c73b0ed87..6f07269d4 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -273,9 +273,9 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, } *skip_txfm_sb = skip_flag; - *skip_sse_sb = total_sse << 4; + *skip_sse_sb = total_sse << VP9_DIST_SCALE_LOG2; *out_rate_sum = (int)rate_sum; - *out_dist_sum = dist_sum << 4; + *out_dist_sum = dist_sum << VP9_DIST_SCALE_LOG2; } #if CONFIG_VP9_HIGHBITDEPTH diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index d02603615..0b636b85c 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -225,24 +225,24 @@ static INLINE int get_filter_weight(unsigned int i, unsigned int j, unsigned int block_height, unsigned int block_width, const int *const blk_fw, int use_32x32) { - int filter_weight = 0; - - if (use_32x32) - // blk_fw[0] ~ blk_fw[3] are the same. + // blk_fw[0] ~ blk_fw[3] are the same. + if (use_32x32) { return blk_fw[0]; + } if (i < block_height / 2) { - if (j < block_width / 2) - filter_weight = blk_fw[0]; - else - filter_weight = blk_fw[1]; - } else { - if (j < block_width / 2) - filter_weight = blk_fw[2]; - else - filter_weight = blk_fw[3]; + if (j < block_width / 2) { + return blk_fw[0]; + } + + return blk_fw[1]; + } + + if (j < block_width / 2) { + return blk_fw[2]; } - return filter_weight; + + return blk_fw[3]; } void vp9_apply_temporal_filter_c( @@ -296,7 +296,7 @@ void vp9_apply_temporal_filter_c( for (i = 0, k = 0, m = 0; i < block_height; i++) { for (j = 0; j < block_width; j++) { const int pixel_value = y_pred[i * y_buf_stride + j]; - int filter_weight = + const int filter_weight = get_filter_weight(i, j, block_height, block_width, blk_fw, use_32x32); // non-local mean approach diff --git a/vp9/encoder/x86/temporal_filter_sse4.c b/vp9/encoder/x86/temporal_filter_sse4.c index 9f9483a9b..a97c96dee 100644 --- a/vp9/encoder/x86/temporal_filter_sse4.c +++ b/vp9/encoder/x86/temporal_filter_sse4.c @@ -254,7 +254,7 @@ static INLINE void read_chroma_dist_row_16(int ss_x, const uint16_t *u_dist, __m128i *v_first, __m128i *v_second) { if (!ss_x) { - // If there is no chroma subsampling in the horizaontal direction, then we + // If there is no chroma subsampling in the horizontal direction, then we // need to load 16 entries from chroma. read_dist_16(u_dist, u_first, u_second); read_dist_16(v_dist, v_first, v_second); @@ -274,8 +274,8 @@ static INLINE void read_chroma_dist_row_16(int ss_x, const uint16_t *u_dist, } } -// Horizonta add unsigned 16-bit ints in src and store them as signed 32-bit int -// in dst. +// Horizontal add unsigned 16-bit ints in src and store them as signed 32-bit +// int in dst. static INLINE void hadd_epu16(__m128i *src, __m128i *dst) { const __m128i zero = _mm_setzero_si128(); const __m128i shift_right = _mm_srli_si128(*src, 2); @@ -323,7 +323,7 @@ static INLINE void add_luma_dist_to_8_chroma_mod(const uint16_t *y_dist, // Apply temporal filter to the luma components. This performs temporal // filtering on a luma block of 16 X block_height. Use blk_fw as an array of -// size 4for the weights for each of the 4 subblocks if blk_fw is not NULL, +// size 4 for the weights for each of the 4 subblocks if blk_fw is not NULL, // else use top_weight for top half, and bottom weight for bottom half. static void vp9_apply_temporal_filter_luma_16( const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, @@ -350,6 +350,9 @@ static void vp9_apply_temporal_filter_luma_16( __m128i sum_row_first; __m128i sum_row_second; + // Loop variables + unsigned int h; + assert(strength >= 0); assert(strength <= 6); @@ -408,7 +411,7 @@ static void vp9_apply_temporal_filter_luma_16( mul_first = _mm_loadu_si128((const __m128i *)neighbors_first[1]); mul_second = _mm_loadu_si128((const __m128i *)neighbors_second[1]); - for (unsigned int h = 1; h < block_height - 1; ++h) { + for (h = 1; h < block_height - 1; ++h) { // Move the weight to bottom half if (!use_whole_blk && h == block_height / 2) { if (blk_fw) { @@ -640,6 +643,9 @@ static void vp9_apply_temporal_filter_chroma_8( __m128i u_sum_row, v_sum_row; + // Loop variable + unsigned int h; + (void)uv_block_width; // First row @@ -690,7 +696,7 @@ static void vp9_apply_temporal_filter_chroma_8( // Then all the rows except the last one mul = _mm_loadu_si128((const __m128i *)neighbors[1]); - for (unsigned int h = 1; h < uv_block_height - 1; ++h) { + for (h = 1; h < uv_block_height - 1; ++h) { // Move the weight pointer to the bottom half of the blocks if (h == uv_block_height / 2) { if (blk_fw) { @@ -936,6 +942,9 @@ void vp9_apply_temporal_filter_sse4_1( const uint8_t *y_src_ptr = y_src, *u_src_ptr = u_src, *v_src_ptr = v_src; const uint8_t *y_pre_ptr = y_pre, *u_pre_ptr = u_pre, *v_pre_ptr = v_pre; + // Loop variables + unsigned int row, blk_col; + assert(block_width <= BW && "block width too large"); assert(block_height <= BH && "block height too large"); assert(block_width % 16 == 0 && "block width must be multiple of 16"); @@ -953,8 +962,8 @@ void vp9_apply_temporal_filter_sse4_1( "subblock filter weight must be less than 2"); // Precompute the difference sqaured - for (unsigned int row = 0; row < block_height; row++) { - for (unsigned int blk_col = 0; blk_col < block_width; blk_col += 16) { + for (row = 0; row < block_height; row++) { + for (blk_col = 0; blk_col < block_width; blk_col += 16) { store_dist_16(y_src_ptr + blk_col, y_pre_ptr + blk_col, y_dist_ptr + blk_col); } @@ -963,8 +972,8 @@ void vp9_apply_temporal_filter_sse4_1( y_dist_ptr += DIST_STRIDE; } - for (unsigned int row = 0; row < chroma_height; row++) { - for (unsigned int blk_col = 0; blk_col < chroma_width; blk_col += 8) { + for (row = 0; row < chroma_height; row++) { + for (blk_col = 0; blk_col < chroma_width; blk_col += 8) { store_dist_8(u_src_ptr + blk_col, u_pre_ptr + blk_col, u_dist_ptr + blk_col); store_dist_8(v_src_ptr + blk_col, v_pre_ptr + blk_col, |