summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tools_common.c2
-rw-r--r--vp9/common/vp9_idct.h6
-rw-r--r--vp9/encoder/x86/vp9_dct32x32_sse2_impl.h5
-rw-r--r--vp9/encoder/x86/vp9_dct_sse2_impl.h20
-rw-r--r--vpx_dsp/x86/highbd_variance_sse2.c12
5 files changed, 25 insertions, 20 deletions
diff --git a/tools_common.c b/tools_common.c
index 901734e0f..8d356af3f 100644
--- a/tools_common.c
+++ b/tools_common.c
@@ -392,7 +392,7 @@ void vpx_img_truncate_16_to_8(vpx_image_t *dst, vpx_image_t *src) {
(uint16_t *)(src->planes[plane] + y * src->stride[plane]);
uint8_t *p_dst = dst->planes[plane] + y * dst->stride[plane];
for (x = 0; x < w; x++) {
- *p_dst++ = *p_src++;
+ *p_dst++ = (uint8_t)(*p_src++);
}
}
}
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index cee1682a6..cbce2dd89 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -37,6 +37,10 @@ extern "C" {
_mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \
(int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a))
+#define octa_set_epi16(a, b, c, d, e, f, g, h) \
+ _mm_setr_epi16((int16_t)(a), (int16_t)(b), (int16_t)(c), (int16_t)(d), \
+ (int16_t)(e), (int16_t)(f), (int16_t)(g), (int16_t)(h))
+
// Constants:
// for (int i = 1; i< 32; ++i)
// printf("static const int cospi_%d_64 = %.0f;\n", i,
@@ -158,7 +162,7 @@ typedef struct {
// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
#else
-#define WRAPLOW(x, bd) (x)
+#define WRAPLOW(x, bd) ((int32_t)(x))
#endif // CONFIG_EMULATE_HARDWARE
void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
diff --git a/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h b/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
index 003ebd13f..5074d31a7 100644
--- a/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
+++ b/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
@@ -28,7 +28,8 @@ void vp9_fdct32x32_rows_c(const int16_t *intermediate, tran_low_t *out) {
temp_in[j] = intermediate[j * 32 + i];
vp9_fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
- out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
+ out[j + i * 32] =
+ (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
}
}
#define HIGH_FDCT32x32_2D_C vp9_highbd_fdct32x32_c
@@ -42,7 +43,7 @@ void vp9_fdct32x32_rd_rows_c(const int16_t *intermediate, tran_low_t *out) {
temp_in[j] = intermediate[j * 32 + i];
vp9_fdct32(temp_in, temp_out, 1);
for (j = 0; j < 32; ++j)
- out[j + i * 32] = temp_out[j];
+ out[j + i * 32] = (tran_low_t)temp_out[j];
}
}
#define HIGH_FDCT32x32_2D_C vp9_highbd_fdct32x32_rd_c
diff --git a/vp9/encoder/x86/vp9_dct_sse2_impl.h b/vp9/encoder/x86/vp9_dct_sse2_impl.h
index 11bf5a25e..86e9ecf73 100644
--- a/vp9/encoder/x86/vp9_dct_sse2_impl.h
+++ b/vp9/encoder/x86/vp9_dct_sse2_impl.h
@@ -40,35 +40,35 @@ void FDCT4x4_2D(const int16_t *input, tran_low_t *output, int stride) {
// These are the coefficients used for the multiplies.
// In the comments, pN means cos(N pi /64) and mN is -cos(N pi /64),
// where cospi_N_64 = cos(N pi /64)
- const __m128i k__cospi_A = _mm_setr_epi16(cospi_16_64, cospi_16_64,
+ const __m128i k__cospi_A = octa_set_epi16(cospi_16_64, cospi_16_64,
cospi_16_64, cospi_16_64,
cospi_16_64, -cospi_16_64,
cospi_16_64, -cospi_16_64);
- const __m128i k__cospi_B = _mm_setr_epi16(cospi_16_64, -cospi_16_64,
+ const __m128i k__cospi_B = octa_set_epi16(cospi_16_64, -cospi_16_64,
cospi_16_64, -cospi_16_64,
cospi_16_64, cospi_16_64,
cospi_16_64, cospi_16_64);
- const __m128i k__cospi_C = _mm_setr_epi16(cospi_8_64, cospi_24_64,
+ const __m128i k__cospi_C = octa_set_epi16(cospi_8_64, cospi_24_64,
cospi_8_64, cospi_24_64,
cospi_24_64, -cospi_8_64,
cospi_24_64, -cospi_8_64);
- const __m128i k__cospi_D = _mm_setr_epi16(cospi_24_64, -cospi_8_64,
+ const __m128i k__cospi_D = octa_set_epi16(cospi_24_64, -cospi_8_64,
cospi_24_64, -cospi_8_64,
cospi_8_64, cospi_24_64,
cospi_8_64, cospi_24_64);
- const __m128i k__cospi_E = _mm_setr_epi16(cospi_16_64, cospi_16_64,
+ const __m128i k__cospi_E = octa_set_epi16(cospi_16_64, cospi_16_64,
cospi_16_64, cospi_16_64,
cospi_16_64, cospi_16_64,
cospi_16_64, cospi_16_64);
- const __m128i k__cospi_F = _mm_setr_epi16(cospi_16_64, -cospi_16_64,
+ const __m128i k__cospi_F = octa_set_epi16(cospi_16_64, -cospi_16_64,
cospi_16_64, -cospi_16_64,
cospi_16_64, -cospi_16_64,
cospi_16_64, -cospi_16_64);
- const __m128i k__cospi_G = _mm_setr_epi16(cospi_8_64, cospi_24_64,
+ const __m128i k__cospi_G = octa_set_epi16(cospi_8_64, cospi_24_64,
cospi_8_64, cospi_24_64,
-cospi_8_64, -cospi_24_64,
-cospi_8_64, -cospi_24_64);
- const __m128i k__cospi_H = _mm_setr_epi16(cospi_24_64, -cospi_8_64,
+ const __m128i k__cospi_H = octa_set_epi16(cospi_24_64, -cospi_8_64,
cospi_24_64, -cospi_8_64,
-cospi_24_64, cospi_8_64,
-cospi_24_64, cospi_8_64);
@@ -267,7 +267,7 @@ void FDCT8x8_2D(const int16_t *input, tran_low_t *output, int stride) {
// When we use them, in one case, they are all the same. In all others
// it's a pair of them that we need to repeat four times. This is done
// by constructing the 32 bit constant corresponding to that pair.
- const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
@@ -588,7 +588,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
// When we use them, in one case, they are all the same. In all others
// it's a pair of them that we need to repeat four times. This is done
// by constructing the 32 bit constant corresponding to that pair.
- const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64);
diff --git a/vpx_dsp/x86/highbd_variance_sse2.c b/vpx_dsp/x86/highbd_variance_sse2.c
index fe35c1e86..b45331caa 100644
--- a/vpx_dsp/x86/highbd_variance_sse2.c
+++ b/vpx_dsp/x86/highbd_variance_sse2.c
@@ -50,7 +50,7 @@ static void highbd_10_variance_sse2(const uint16_t *src, int src_stride,
high_variance_fn_t var_fn, int block_size) {
int i, j;
uint64_t sse_long = 0;
- int64_t sum_long = 0;
+ int32_t sum_long = 0;
for (i = 0; i < h; i += block_size) {
for (j = 0; j < w; j += block_size) {
@@ -63,7 +63,7 @@ static void highbd_10_variance_sse2(const uint16_t *src, int src_stride,
}
}
*sum = ROUND_POWER_OF_TWO(sum_long, 2);
- *sse = ROUND_POWER_OF_TWO(sse_long, 4);
+ *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
}
static void highbd_12_variance_sse2(const uint16_t *src, int src_stride,
@@ -72,7 +72,7 @@ static void highbd_12_variance_sse2(const uint16_t *src, int src_stride,
high_variance_fn_t var_fn, int block_size) {
int i, j;
uint64_t sse_long = 0;
- int64_t sum_long = 0;
+ int32_t sum_long = 0;
for (i = 0; i < h; i += block_size) {
for (j = 0; j < w; j += block_size) {
@@ -85,7 +85,7 @@ static void highbd_12_variance_sse2(const uint16_t *src, int src_stride,
}
}
*sum = ROUND_POWER_OF_TWO(sum_long, 4);
- *sse = ROUND_POWER_OF_TWO(sse_long, 8);
+ *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
}
@@ -386,7 +386,7 @@ uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \
} \
} \
se = ROUND_POWER_OF_TWO(se, 4); \
- sse = ROUND_POWER_OF_TWO(long_sse, 8); \
+ sse = (uint32_t)ROUND_POWER_OF_TWO(long_sse, 8); \
*sse_ptr = sse; \
return sse - ((cast se * se) >> (wlog2 + hlog2)); \
}
@@ -555,7 +555,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
} \
} \
se = ROUND_POWER_OF_TWO(se, 4); \
- sse = ROUND_POWER_OF_TWO(long_sse, 8); \
+ sse = (uint32_t)ROUND_POWER_OF_TWO(long_sse, 8); \
*sse_ptr = sse; \
return sse - ((cast se * se) >> (wlog2 + hlog2)); \
}