summaryrefslogtreecommitdiff
path: root/vpx_dsp
diff options
context:
space:
mode:
Diffstat (limited to 'vpx_dsp')
-rw-r--r--vpx_dsp/arm/vpx_convolve8_avg_neon.c8
-rw-r--r--vpx_dsp/arm/vpx_convolve8_neon.c8
-rw-r--r--vpx_dsp/fastssim.c2
-rw-r--r--vpx_dsp/mips/add_noise_msa.c2
-rw-r--r--vpx_dsp/prob.h19
-rw-r--r--vpx_dsp/vpx_dsp_rtcd_defs.pl12
-rw-r--r--vpx_dsp/x86/convolve.h4
7 files changed, 38 insertions, 17 deletions
diff --git a/vpx_dsp/arm/vpx_convolve8_avg_neon.c b/vpx_dsp/arm/vpx_convolve8_avg_neon.c
index 69cb28400..8e5373be0 100644
--- a/vpx_dsp/arm/vpx_convolve8_avg_neon.c
+++ b/vpx_dsp/arm/vpx_convolve8_avg_neon.c
@@ -64,6 +64,10 @@ void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
assert(x_step_q4 == 16);
+ (void)x_step_q4;
+ (void)y_step_q4;
+ (void)filter_y;
+
q0s16 = vld1q_s16(filter_x);
src -= 3; // adjust for taps
@@ -240,6 +244,10 @@ void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
assert(y_step_q4 == 16);
+ (void)x_step_q4;
+ (void)y_step_q4;
+ (void)filter_x;
+
src -= src_stride * 3;
q0s16 = vld1q_s16(filter_y);
for (; w > 0; w -= 4, src += 4, dst += 4) { // loop_vert_h
diff --git a/vpx_dsp/arm/vpx_convolve8_neon.c b/vpx_dsp/arm/vpx_convolve8_neon.c
index 514525696..951c425e2 100644
--- a/vpx_dsp/arm/vpx_convolve8_neon.c
+++ b/vpx_dsp/arm/vpx_convolve8_neon.c
@@ -64,6 +64,10 @@ void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
assert(x_step_q4 == 16);
+ (void)x_step_q4;
+ (void)y_step_q4;
+ (void)filter_y;
+
q0s16 = vld1q_s16(filter_x);
src -= 3; // adjust for taps
@@ -224,6 +228,10 @@ void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
assert(y_step_q4 == 16);
+ (void)x_step_q4;
+ (void)y_step_q4;
+ (void)filter_x;
+
src -= src_stride * 3;
q0s16 = vld1q_s16(filter_y);
for (; w > 0; w -= 4, src += 4, dst += 4) { // loop_vert_h
diff --git a/vpx_dsp/fastssim.c b/vpx_dsp/fastssim.c
index 4d5eb5a6f..0469071a1 100644
--- a/vpx_dsp/fastssim.c
+++ b/vpx_dsp/fastssim.c
@@ -202,6 +202,7 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l, int bit_depth) {
if (bit_depth == 12) ssim_c1 = SSIM_C1_12;
#else
assert(bit_depth == 8);
+ (void)bit_depth;
#endif
w = _ctx->level[_l].w;
h = _ctx->level[_l].h;
@@ -326,6 +327,7 @@ static void fs_calc_structure(fs_ctx *_ctx, int _l, int bit_depth) {
if (bit_depth == 12) ssim_c2 = SSIM_C2_12;
#else
assert(bit_depth == 8);
+ (void)bit_depth;
#endif
w = _ctx->level[_l].w;
diff --git a/vpx_dsp/mips/add_noise_msa.c b/vpx_dsp/mips/add_noise_msa.c
index e372b9d8c..48278d2ec 100644
--- a/vpx_dsp/mips/add_noise_msa.c
+++ b/vpx_dsp/mips/add_noise_msa.c
@@ -14,7 +14,7 @@
void vpx_plane_add_noise_msa(uint8_t *start_ptr, const int8_t *noise,
int blackclamp, int whiteclamp, int width,
int height, int32_t pitch) {
- uint32_t i, j;
+ int i, j;
for (i = 0; i < height / 2; ++i) {
uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch;
diff --git a/vpx_dsp/prob.h b/vpx_dsp/prob.h
index 3127a00bb..5656ddbab 100644
--- a/vpx_dsp/prob.h
+++ b/vpx_dsp/prob.h
@@ -11,6 +11,8 @@
#ifndef VPX_DSP_PROB_H_
#define VPX_DSP_PROB_H_
+#include <assert.h>
+
#include "./vpx_config.h"
#include "./vpx_dsp_common.h"
@@ -43,17 +45,20 @@ typedef int8_t vpx_tree_index;
typedef const vpx_tree_index vpx_tree[];
-static INLINE vpx_prob clip_prob(int p) {
- return (p > 255) ? 255 : (p < 1) ? 1 : p;
-}
-
static INLINE vpx_prob get_prob(unsigned int num, unsigned int den) {
- if (den == 0) return 128u;
- return clip_prob((int)(((int64_t)num * 256 + (den >> 1)) / den));
+ assert(den != 0);
+ {
+ const int p = (int)(((int64_t)num * 256 + (den >> 1)) / den);
+ // (p > 255) ? 255 : (p < 1) ? 1 : p;
+ const int clipped_prob = p | ((255 - p) >> 23) | (p == 0);
+ return (vpx_prob)clipped_prob;
+ }
}
static INLINE vpx_prob get_binary_prob(unsigned int n0, unsigned int n1) {
- return get_prob(n0, n0 + n1);
+ const unsigned int den = n0 + n1;
+ if (den == 0) return 128u;
+ return get_prob(n0, den);
}
/* This function assumes prob1 and prob2 are already within [1,255] range. */
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 9fea2d1cf..46dd243f3 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -11,12 +11,6 @@ EOF
}
forward_decls qw/vpx_dsp_forward_decls/;
-# optimizations which depend on multiple features
-$avx2_ssse3 = '';
-if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) {
- $avx2_ssse3 = 'avx2';
-}
-
# functions that are 64 bit only.
$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
if ($opts{arch} eq "x86_64") {
@@ -437,13 +431,13 @@ add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride,
specialize qw/vpx_convolve_avg neon dspr2 msa sse2/;
add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8 sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
+specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon dspr2 msa/;
add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_horiz sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
+specialize qw/vpx_convolve8_horiz sse2 ssse3 avx2 neon dspr2 msa/;
add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_vert sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
+specialize qw/vpx_convolve8_vert sse2 ssse3 avx2 neon dspr2 msa/;
add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vpx_convolve8_avg sse2 ssse3 neon dspr2 msa/;
diff --git a/vpx_dsp/x86/convolve.h b/vpx_dsp/x86/convolve.h
index 2a0516cdc..d7468ad7c 100644
--- a/vpx_dsp/x86/convolve.h
+++ b/vpx_dsp/x86/convolve.h
@@ -25,6 +25,10 @@ typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch,
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
const int16_t *filter_y, int y_step_q4, int w, int h) { \
+ (void)filter_x; \
+ (void)x_step_q4; \
+ (void)filter_y; \
+ (void)y_step_q4; \
assert(filter[3] != 128); \
assert(step_q4 == 16); \
if (filter[0] | filter[1] | filter[2]) { \