summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/encoder/vp9_encoder.c2
-rw-r--r--vp9/encoder/vp9_rd.h3
-rw-r--r--vp9/encoder/vp9_rdopt.c4
-rw-r--r--vp9/encoder/vp9_temporal_filter.c30
-rw-r--r--vp9/encoder/x86/temporal_filter_sse4.c29
5 files changed, 40 insertions, 28 deletions
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index b8c86ea43..362077a6c 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -6146,7 +6146,7 @@ static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
// TODO(angiebird): Consider subpixel when computing the sse.
cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
pre.stride, &sse);
- return (double)sse;
+ return (double)(sse << VP9_DIST_SCALE_LOG2);
} else {
assert(0);
return 0;
diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h
index fa85f2176..062ca3277 100644
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -42,6 +42,9 @@ extern "C" {
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC 1
+#define VP9_DIST_SCALE_LOG2 4
+#define VP9_DIST_SCALE (1 << VP9_DIST_SCALE_LOG2)
+
// This enumerator type needs to be kept aligned with the mode order in
// const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code.
typedef enum {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index c73b0ed87..6f07269d4 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -273,9 +273,9 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
}
*skip_txfm_sb = skip_flag;
- *skip_sse_sb = total_sse << 4;
+ *skip_sse_sb = total_sse << VP9_DIST_SCALE_LOG2;
*out_rate_sum = (int)rate_sum;
- *out_dist_sum = dist_sum << 4;
+ *out_dist_sum = dist_sum << VP9_DIST_SCALE_LOG2;
}
#if CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index d02603615..0b636b85c 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -225,24 +225,24 @@ static INLINE int get_filter_weight(unsigned int i, unsigned int j,
unsigned int block_height,
unsigned int block_width,
const int *const blk_fw, int use_32x32) {
- int filter_weight = 0;
-
- if (use_32x32)
- // blk_fw[0] ~ blk_fw[3] are the same.
+ // blk_fw[0] ~ blk_fw[3] are the same.
+ if (use_32x32) {
return blk_fw[0];
+ }
if (i < block_height / 2) {
- if (j < block_width / 2)
- filter_weight = blk_fw[0];
- else
- filter_weight = blk_fw[1];
- } else {
- if (j < block_width / 2)
- filter_weight = blk_fw[2];
- else
- filter_weight = blk_fw[3];
+ if (j < block_width / 2) {
+ return blk_fw[0];
+ }
+
+ return blk_fw[1];
+ }
+
+ if (j < block_width / 2) {
+ return blk_fw[2];
}
- return filter_weight;
+
+ return blk_fw[3];
}
void vp9_apply_temporal_filter_c(
@@ -296,7 +296,7 @@ void vp9_apply_temporal_filter_c(
for (i = 0, k = 0, m = 0; i < block_height; i++) {
for (j = 0; j < block_width; j++) {
const int pixel_value = y_pred[i * y_buf_stride + j];
- int filter_weight =
+ const int filter_weight =
get_filter_weight(i, j, block_height, block_width, blk_fw, use_32x32);
// non-local mean approach
diff --git a/vp9/encoder/x86/temporal_filter_sse4.c b/vp9/encoder/x86/temporal_filter_sse4.c
index 9f9483a9b..a97c96dee 100644
--- a/vp9/encoder/x86/temporal_filter_sse4.c
+++ b/vp9/encoder/x86/temporal_filter_sse4.c
@@ -254,7 +254,7 @@ static INLINE void read_chroma_dist_row_16(int ss_x, const uint16_t *u_dist,
__m128i *v_first,
__m128i *v_second) {
if (!ss_x) {
- // If there is no chroma subsampling in the horizaontal direction, then we
+ // If there is no chroma subsampling in the horizontal direction, then we
// need to load 16 entries from chroma.
read_dist_16(u_dist, u_first, u_second);
read_dist_16(v_dist, v_first, v_second);
@@ -274,8 +274,8 @@ static INLINE void read_chroma_dist_row_16(int ss_x, const uint16_t *u_dist,
}
}
-// Horizonta add unsigned 16-bit ints in src and store them as signed 32-bit int
-// in dst.
+// Horizontal add unsigned 16-bit ints in src and store them as signed 32-bit
+// int in dst.
static INLINE void hadd_epu16(__m128i *src, __m128i *dst) {
const __m128i zero = _mm_setzero_si128();
const __m128i shift_right = _mm_srli_si128(*src, 2);
@@ -323,7 +323,7 @@ static INLINE void add_luma_dist_to_8_chroma_mod(const uint16_t *y_dist,
// Apply temporal filter to the luma components. This performs temporal
// filtering on a luma block of 16 X block_height. Use blk_fw as an array of
-// size 4for the weights for each of the 4 subblocks if blk_fw is not NULL,
+// size 4 for the weights for each of the 4 subblocks if blk_fw is not NULL,
// else use top_weight for top half, and bottom weight for bottom half.
static void vp9_apply_temporal_filter_luma_16(
const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre,
@@ -350,6 +350,9 @@ static void vp9_apply_temporal_filter_luma_16(
__m128i sum_row_first;
__m128i sum_row_second;
+ // Loop variables
+ unsigned int h;
+
assert(strength >= 0);
assert(strength <= 6);
@@ -408,7 +411,7 @@ static void vp9_apply_temporal_filter_luma_16(
mul_first = _mm_loadu_si128((const __m128i *)neighbors_first[1]);
mul_second = _mm_loadu_si128((const __m128i *)neighbors_second[1]);
- for (unsigned int h = 1; h < block_height - 1; ++h) {
+ for (h = 1; h < block_height - 1; ++h) {
// Move the weight to bottom half
if (!use_whole_blk && h == block_height / 2) {
if (blk_fw) {
@@ -640,6 +643,9 @@ static void vp9_apply_temporal_filter_chroma_8(
__m128i u_sum_row, v_sum_row;
+ // Loop variable
+ unsigned int h;
+
(void)uv_block_width;
// First row
@@ -690,7 +696,7 @@ static void vp9_apply_temporal_filter_chroma_8(
// Then all the rows except the last one
mul = _mm_loadu_si128((const __m128i *)neighbors[1]);
- for (unsigned int h = 1; h < uv_block_height - 1; ++h) {
+ for (h = 1; h < uv_block_height - 1; ++h) {
// Move the weight pointer to the bottom half of the blocks
if (h == uv_block_height / 2) {
if (blk_fw) {
@@ -936,6 +942,9 @@ void vp9_apply_temporal_filter_sse4_1(
const uint8_t *y_src_ptr = y_src, *u_src_ptr = u_src, *v_src_ptr = v_src;
const uint8_t *y_pre_ptr = y_pre, *u_pre_ptr = u_pre, *v_pre_ptr = v_pre;
+ // Loop variables
+ unsigned int row, blk_col;
+
assert(block_width <= BW && "block width too large");
assert(block_height <= BH && "block height too large");
assert(block_width % 16 == 0 && "block width must be multiple of 16");
@@ -953,8 +962,8 @@ void vp9_apply_temporal_filter_sse4_1(
"subblock filter weight must be less than 2");
// Precompute the difference sqaured
- for (unsigned int row = 0; row < block_height; row++) {
- for (unsigned int blk_col = 0; blk_col < block_width; blk_col += 16) {
+ for (row = 0; row < block_height; row++) {
+ for (blk_col = 0; blk_col < block_width; blk_col += 16) {
store_dist_16(y_src_ptr + blk_col, y_pre_ptr + blk_col,
y_dist_ptr + blk_col);
}
@@ -963,8 +972,8 @@ void vp9_apply_temporal_filter_sse4_1(
y_dist_ptr += DIST_STRIDE;
}
- for (unsigned int row = 0; row < chroma_height; row++) {
- for (unsigned int blk_col = 0; blk_col < chroma_width; blk_col += 8) {
+ for (row = 0; row < chroma_height; row++) {
+ for (blk_col = 0; blk_col < chroma_width; blk_col += 8) {
store_dist_8(u_src_ptr + blk_col, u_pre_ptr + blk_col,
u_dist_ptr + blk_col);
store_dist_8(v_src_ptr + blk_col, v_pre_ptr + blk_col,