summaryrefslogtreecommitdiff
path: root/vp9/encoder/x86/vp9_denoiser_sse2.c
diff options
context:
space:
mode:
authorclang-format <noreply@google.com>2016-07-26 20:43:23 -0700
committerJames Zern <jzern@google.com>2016-08-02 16:47:11 -0700
commite0cc52db3fc9b09c99d7bbee35153cf82964a860 (patch)
tree4988f1d3a21056339e2ffbd7a3b3d52fab54cb6b /vp9/encoder/x86/vp9_denoiser_sse2.c
parent3a04c9c9c4c4935925f4c00dcc70610100c5e9dd (diff)
downloadlibvpx-e0cc52db3fc9b09c99d7bbee35153cf82964a860.tar
libvpx-e0cc52db3fc9b09c99d7bbee35153cf82964a860.tar.gz
libvpx-e0cc52db3fc9b09c99d7bbee35153cf82964a860.tar.bz2
libvpx-e0cc52db3fc9b09c99d7bbee35153cf82964a860.zip
vp9/encoder: apply clang-format
Change-Id: I45d9fb4013f50766b24363a86365e8063e8954c2
Diffstat (limited to 'vp9/encoder/x86/vp9_denoiser_sse2.c')
-rw-r--r--vp9/encoder/x86/vp9_denoiser_sse2.c119
1 files changed, 53 insertions, 66 deletions
diff --git a/vp9/encoder/x86/vp9_denoiser_sse2.c b/vp9/encoder/x86/vp9_denoiser_sse2.c
index 883507af3..91d0602f9 100644
--- a/vp9/encoder/x86/vp9_denoiser_sse2.c
+++ b/vp9/encoder/x86/vp9_denoiser_sse2.c
@@ -37,17 +37,11 @@ static INLINE int sum_diff_16x1(__m128i acc_diff) {
}
// Denoise a 16x1 vector.
-static INLINE __m128i vp9_denoiser_16x1_sse2(const uint8_t *sig,
- const uint8_t *mc_running_avg_y,
- uint8_t *running_avg_y,
- const __m128i *k_0,
- const __m128i *k_4,
- const __m128i *k_8,
- const __m128i *k_16,
- const __m128i *l3,
- const __m128i *l32,
- const __m128i *l21,
- __m128i acc_diff) {
+static INLINE __m128i vp9_denoiser_16x1_sse2(
+ const uint8_t *sig, const uint8_t *mc_running_avg_y, uint8_t *running_avg_y,
+ const __m128i *k_0, const __m128i *k_4, const __m128i *k_8,
+ const __m128i *k_16, const __m128i *l3, const __m128i *l32,
+ const __m128i *l21, __m128i acc_diff) {
// Calculate differences
const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0]));
const __m128i v_mc_running_avg_y =
@@ -69,7 +63,7 @@ static INLINE __m128i vp9_denoiser_16x1_sse2(const uint8_t *sig,
__m128i adj2 = _mm_and_si128(mask2, *l32);
const __m128i adj1 = _mm_and_si128(mask1, *l21);
const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff);
- __m128i adj, padj, nadj;
+ __m128i adj, padj, nadj;
// Combine the adjustments and get absolute adjustments.
adj2 = _mm_add_epi8(adj2, adj1);
@@ -95,9 +89,8 @@ static INLINE __m128i vp9_denoiser_16x1_sse2(const uint8_t *sig,
// Denoise a 16x1 vector with a weaker filter.
static INLINE __m128i vp9_denoiser_adj_16x1_sse2(
- const uint8_t *sig, const uint8_t *mc_running_avg_y,
- uint8_t *running_avg_y, const __m128i k_0,
- const __m128i k_delta, __m128i acc_diff) {
+ const uint8_t *sig, const uint8_t *mc_running_avg_y, uint8_t *running_avg_y,
+ const __m128i k_0, const __m128i k_delta, __m128i acc_diff) {
__m128i v_running_avg_y = _mm_loadu_si128((__m128i *)(&running_avg_y[0]));
// Calculate differences.
const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0]));
@@ -108,8 +101,7 @@ static INLINE __m128i vp9_denoiser_adj_16x1_sse2(
// Obtain the sign. FF if diff is negative.
const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
// Clamp absolute difference to delta to get the adjustment.
- const __m128i adj =
- _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta);
+ const __m128i adj = _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta);
// Restore the sign and get positive and negative adjustments.
__m128i padj, nadj;
padj = _mm_andnot_si128(diff_sign, adj);
@@ -126,14 +118,17 @@ static INLINE __m128i vp9_denoiser_adj_16x1_sse2(
}
// Denoise 8x8 and 8x16 blocks.
-static int vp9_denoiser_NxM_sse2_small(
- const uint8_t *sig, int sig_stride, const uint8_t *mc_running_avg_y,
- int mc_avg_y_stride, uint8_t *running_avg_y, int avg_y_stride,
- int increase_denoising, BLOCK_SIZE bs, int motion_magnitude, int width) {
+static int vp9_denoiser_NxM_sse2_small(const uint8_t *sig, int sig_stride,
+ const uint8_t *mc_running_avg_y,
+ int mc_avg_y_stride,
+ uint8_t *running_avg_y, int avg_y_stride,
+ int increase_denoising, BLOCK_SIZE bs,
+ int motion_magnitude, int width) {
int sum_diff_thresh, r, sum_diff = 0;
- const int shift_inc = (increase_denoising &&
- motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
- 1 : 0;
+ const int shift_inc =
+ (increase_denoising && motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD)
+ ? 1
+ : 0;
uint8_t sig_buffer[8][16], mc_running_buffer[8][16], running_buffer[8][16];
__m128i acc_diff = _mm_setzero_si128();
const __m128i k_0 = _mm_setzero_si128();
@@ -153,15 +148,13 @@ static int vp9_denoiser_NxM_sse2_small(
memcpy(sig_buffer[r], sig, width);
memcpy(sig_buffer[r] + width, sig + sig_stride, width);
memcpy(mc_running_buffer[r], mc_running_avg_y, width);
- memcpy(mc_running_buffer[r] + width,
- mc_running_avg_y + mc_avg_y_stride, width);
+ memcpy(mc_running_buffer[r] + width, mc_running_avg_y + mc_avg_y_stride,
+ width);
memcpy(running_buffer[r], running_avg_y, width);
memcpy(running_buffer[r] + width, running_avg_y + avg_y_stride, width);
- acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r],
- mc_running_buffer[r],
- running_buffer[r],
- &k_0, &k_4, &k_8, &k_16,
- &l3, &l32, &l21, acc_diff);
+ acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r], mc_running_buffer[r],
+ running_buffer[r], &k_0, &k_4, &k_8,
+ &k_16, &l3, &l32, &l21, acc_diff);
memcpy(running_avg_y, running_buffer[r], width);
memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width);
// Update pointers for next iteration.
@@ -184,19 +177,19 @@ static int vp9_denoiser_NxM_sse2_small(
// The delta is set by the excess of absolute pixel diff over the
// threshold.
- const int delta = ((abs(sum_diff) - sum_diff_thresh) >>
- num_pels_log2_lookup[bs]) + 1;
+ const int delta =
+ ((abs(sum_diff) - sum_diff_thresh) >> num_pels_log2_lookup[bs]) + 1;
// Only apply the adjustment for max delta up to 3.
if (delta < 4) {
const __m128i k_delta = _mm_set1_epi8(delta);
running_avg_y -= avg_y_stride * (b_height << 1);
for (r = 0; r < b_height; ++r) {
acc_diff = vp9_denoiser_adj_16x1_sse2(
- sig_buffer[r], mc_running_buffer[r], running_buffer[r],
- k_0, k_delta, acc_diff);
+ sig_buffer[r], mc_running_buffer[r], running_buffer[r], k_0,
+ k_delta, acc_diff);
memcpy(running_avg_y, running_buffer[r], width);
- memcpy(running_avg_y + avg_y_stride,
- running_buffer[r] + width, width);
+ memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width,
+ width);
// Update pointers for next iteration.
running_avg_y += (avg_y_stride << 1);
}
@@ -216,14 +209,14 @@ static int vp9_denoiser_NxM_sse2_small(
static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
const uint8_t *mc_running_avg_y,
int mc_avg_y_stride,
- uint8_t *running_avg_y,
- int avg_y_stride,
+ uint8_t *running_avg_y, int avg_y_stride,
int increase_denoising, BLOCK_SIZE bs,
int motion_magnitude) {
int sum_diff_thresh, r, c, sum_diff = 0;
- const int shift_inc = (increase_denoising &&
- motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
- 1 : 0;
+ const int shift_inc =
+ (increase_denoising && motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD)
+ ? 1
+ : 0;
__m128i acc_diff[4][4];
const __m128i k_0 = _mm_setzero_si128();
const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
@@ -248,9 +241,9 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
for (r = 0; r < b_height; ++r) {
for (c = 0; c < b_width_shift4; ++c) {
- acc_diff[c][r>>4] = vp9_denoiser_16x1_sse2(
- sig, mc_running_avg_y, running_avg_y, &k_0, &k_4,
- &k_8, &k_16, &l3, &l32, &l21, acc_diff[c][r>>4]);
+ acc_diff[c][r >> 4] = vp9_denoiser_16x1_sse2(
+ sig, mc_running_avg_y, running_avg_y, &k_0, &k_4, &k_8, &k_16, &l3,
+ &l32, &l21, acc_diff[c][r >> 4]);
// Update pointers for next iteration.
sig += 16;
mc_running_avg_y += 16;
@@ -259,7 +252,7 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) {
for (c = 0; c < b_width_shift4; ++c) {
- sum_diff += sum_diff_16x1(acc_diff[c][r>>4]);
+ sum_diff += sum_diff_16x1(acc_diff[c][r >> 4]);
}
}
@@ -272,8 +265,8 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
{
sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising);
if (abs(sum_diff) > sum_diff_thresh) {
- const int delta = ((abs(sum_diff) - sum_diff_thresh) >>
- num_pels_log2_lookup[bs]) + 1;
+ const int delta =
+ ((abs(sum_diff) - sum_diff_thresh) >> num_pels_log2_lookup[bs]) + 1;
// Only apply the adjustment for max delta up to 3.
if (delta < 4) {
@@ -284,9 +277,9 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
sum_diff = 0;
for (r = 0; r < b_height; ++r) {
for (c = 0; c < b_width_shift4; ++c) {
- acc_diff[c][r>>4] = vp9_denoiser_adj_16x1_sse2(
- sig, mc_running_avg_y, running_avg_y, k_0,
- k_delta, acc_diff[c][r>>4]);
+ acc_diff[c][r >> 4] =
+ vp9_denoiser_adj_16x1_sse2(sig, mc_running_avg_y, running_avg_y,
+ k_0, k_delta, acc_diff[c][r >> 4]);
// Update pointers for next iteration.
sig += 16;
mc_running_avg_y += 16;
@@ -295,7 +288,7 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) {
for (c = 0; c < b_width_shift4; ++c) {
- sum_diff += sum_diff_16x1(acc_diff[c][r>>4]);
+ sum_diff += sum_diff_16x1(acc_diff[c][r >> 4]);
}
}
sig = sig - b_width + sig_stride;
@@ -314,27 +307,21 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
}
int vp9_denoiser_filter_sse2(const uint8_t *sig, int sig_stride,
- const uint8_t *mc_avg,
- int mc_avg_stride,
+ const uint8_t *mc_avg, int mc_avg_stride,
uint8_t *avg, int avg_stride,
- int increase_denoising,
- BLOCK_SIZE bs,
+ int increase_denoising, BLOCK_SIZE bs,
int motion_magnitude) {
// Rank by frequency of the block type to have an early termination.
if (bs == BLOCK_16X16 || bs == BLOCK_32X32 || bs == BLOCK_64X64 ||
bs == BLOCK_16X32 || bs == BLOCK_16X8 || bs == BLOCK_32X16 ||
bs == BLOCK_32X64 || bs == BLOCK_64X32) {
- return vp9_denoiser_NxM_sse2_big(sig, sig_stride,
- mc_avg, mc_avg_stride,
- avg, avg_stride,
- increase_denoising,
- bs, motion_magnitude);
+ return vp9_denoiser_NxM_sse2_big(sig, sig_stride, mc_avg, mc_avg_stride,
+ avg, avg_stride, increase_denoising, bs,
+ motion_magnitude);
} else if (bs == BLOCK_8X8 || bs == BLOCK_8X16) {
- return vp9_denoiser_NxM_sse2_small(sig, sig_stride,
- mc_avg, mc_avg_stride,
- avg, avg_stride,
- increase_denoising,
- bs, motion_magnitude, 8);
+ return vp9_denoiser_NxM_sse2_small(sig, sig_stride, mc_avg, mc_avg_stride,
+ avg, avg_stride, increase_denoising, bs,
+ motion_magnitude, 8);
} else {
return COPY_BLOCK;
}