diff options
-rw-r--r-- | vp8/common/arm/neon/loopfilter_neon.c | 3 | ||||
-rw-r--r-- | vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c | 3 | ||||
-rw-r--r-- | vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c | 3 | ||||
-rw-r--r-- | vp8/common/arm/neon/mbloopfilter_neon.c | 147 | ||||
-rw-r--r-- | vp8/common/rtcd_defs.pl | 2 | ||||
-rw-r--r-- | vp8/encoder/arm/neon/denoising_neon.c | 18 | ||||
-rw-r--r-- | vp8/encoder/denoising.c | 34 | ||||
-rw-r--r-- | vp8/encoder/x86/denoising_sse2.c | 20 |
8 files changed, 128 insertions, 102 deletions
diff --git a/vp8/common/arm/neon/loopfilter_neon.c b/vp8/common/arm/neon/loopfilter_neon.c index 47f522bc6..be77e6726 100644 --- a/vp8/common/arm/neon/loopfilter_neon.c +++ b/vp8/common/arm/neon/loopfilter_neon.c @@ -9,8 +9,9 @@ */ #include <arm_neon.h> +#include "./vpx_config.h" -static inline void vp8_loop_filter_neon( +static INLINE void vp8_loop_filter_neon( uint8x16_t qblimit, // flimit uint8x16_t qlimit, // limit uint8x16_t qthresh, // thresh diff --git a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c index 3d024e068..b25686ffb 100644 --- a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c +++ b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c @@ -9,8 +9,9 @@ */ #include <arm_neon.h> +#include "./vpx_config.h" -static inline void vp8_loop_filter_simple_horizontal_edge_neon( +static INLINE void vp8_loop_filter_simple_horizontal_edge_neon( unsigned char *s, int p, const unsigned char *blimit) { diff --git a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c index 68bcde412..b0952b582 100644 --- a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c +++ b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c @@ -9,8 +9,9 @@ */ #include <arm_neon.h> +#include "./vpx_config.h" -static inline void vp8_loop_filter_simple_vertical_edge_neon( +static INLINE void vp8_loop_filter_simple_vertical_edge_neon( unsigned char *s, int p, const unsigned char *blimit) { diff --git a/vp8/common/arm/neon/mbloopfilter_neon.c b/vp8/common/arm/neon/mbloopfilter_neon.c index 385ce36e7..5351f4be6 100644 --- a/vp8/common/arm/neon/mbloopfilter_neon.c +++ b/vp8/common/arm/neon/mbloopfilter_neon.c @@ -9,8 +9,9 @@ */ #include <arm_neon.h> +#include "./vpx_config.h" -static inline void vp8_mbloop_filter_neon( +static INLINE void vp8_mbloop_filter_neon( uint8x16_t qblimit, // mblimit uint8x16_t qlimit, // limit uint8x16_t qthresh, // thresh @@ -352,20 +353,28 @@ void vp8_mbloop_filter_vertical_edge_y_neon( q9 = vcombine_u8(d18, d19); q10 = vcombine_u8(d20, d21); - q2tmp0 = vtrnq_u32((uint32x4_t)q3, (uint32x4_t)q7); - q2tmp1 = vtrnq_u32((uint32x4_t)q4, (uint32x4_t)q8); - q2tmp2 = vtrnq_u32((uint32x4_t)q5, (uint32x4_t)q9); - q2tmp3 = vtrnq_u32((uint32x4_t)q6, (uint32x4_t)q10); - - q2tmp4 = vtrnq_u16((uint16x8_t)q2tmp0.val[0], (uint16x8_t)q2tmp2.val[0]); - q2tmp5 = vtrnq_u16((uint16x8_t)q2tmp1.val[0], (uint16x8_t)q2tmp3.val[0]); - q2tmp6 = vtrnq_u16((uint16x8_t)q2tmp0.val[1], (uint16x8_t)q2tmp2.val[1]); - q2tmp7 = vtrnq_u16((uint16x8_t)q2tmp1.val[1], (uint16x8_t)q2tmp3.val[1]); - - q2tmp8 = vtrnq_u8((uint8x16_t)q2tmp4.val[0], (uint8x16_t)q2tmp5.val[0]); - q2tmp9 = vtrnq_u8((uint8x16_t)q2tmp4.val[1], (uint8x16_t)q2tmp5.val[1]); - q2tmp10 = vtrnq_u8((uint8x16_t)q2tmp6.val[0], (uint8x16_t)q2tmp7.val[0]); - q2tmp11 = vtrnq_u8((uint8x16_t)q2tmp6.val[1], (uint8x16_t)q2tmp7.val[1]); + q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); + q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); + q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); + q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); + + q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), + vreinterpretq_u16_u32(q2tmp2.val[0])); + q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), + vreinterpretq_u16_u32(q2tmp3.val[0])); + q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), + vreinterpretq_u16_u32(q2tmp2.val[1])); + q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), + vreinterpretq_u16_u32(q2tmp3.val[1])); + + q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), + vreinterpretq_u8_u16(q2tmp5.val[0])); + q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), + vreinterpretq_u8_u16(q2tmp5.val[1])); + q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), + vreinterpretq_u8_u16(q2tmp7.val[0])); + q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), + vreinterpretq_u8_u16(q2tmp7.val[1])); q3 = q2tmp8.val[0]; q4 = q2tmp8.val[1]; @@ -380,20 +389,28 @@ void vp8_mbloop_filter_vertical_edge_y_neon( q5, q6, q7, q8, q9, q10, &q4, &q5, &q6, &q7, &q8, &q9); - q2tmp0 = vtrnq_u32((uint32x4_t)q3, (uint32x4_t)q7); - q2tmp1 = vtrnq_u32((uint32x4_t)q4, (uint32x4_t)q8); - q2tmp2 = vtrnq_u32((uint32x4_t)q5, (uint32x4_t)q9); - q2tmp3 = vtrnq_u32((uint32x4_t)q6, (uint32x4_t)q10); - - q2tmp4 = vtrnq_u16((uint16x8_t)q2tmp0.val[0], (uint16x8_t)q2tmp2.val[0]); - q2tmp5 = vtrnq_u16((uint16x8_t)q2tmp1.val[0], (uint16x8_t)q2tmp3.val[0]); - q2tmp6 = vtrnq_u16((uint16x8_t)q2tmp0.val[1], (uint16x8_t)q2tmp2.val[1]); - q2tmp7 = vtrnq_u16((uint16x8_t)q2tmp1.val[1], (uint16x8_t)q2tmp3.val[1]); - - q2tmp8 = vtrnq_u8((uint8x16_t)q2tmp4.val[0], (uint8x16_t)q2tmp5.val[0]); - q2tmp9 = vtrnq_u8((uint8x16_t)q2tmp4.val[1], (uint8x16_t)q2tmp5.val[1]); - q2tmp10 = vtrnq_u8((uint8x16_t)q2tmp6.val[0], (uint8x16_t)q2tmp7.val[0]); - q2tmp11 = vtrnq_u8((uint8x16_t)q2tmp6.val[1], (uint8x16_t)q2tmp7.val[1]); + q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); + q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); + q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); + q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); + + q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), + vreinterpretq_u16_u32(q2tmp2.val[0])); + q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), + vreinterpretq_u16_u32(q2tmp3.val[0])); + q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), + vreinterpretq_u16_u32(q2tmp2.val[1])); + q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), + vreinterpretq_u16_u32(q2tmp3.val[1])); + + q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), + vreinterpretq_u8_u16(q2tmp5.val[0])); + q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), + vreinterpretq_u8_u16(q2tmp5.val[1])); + q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), + vreinterpretq_u8_u16(q2tmp7.val[0])); + q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), + vreinterpretq_u8_u16(q2tmp7.val[1])); q3 = q2tmp8.val[0]; q4 = q2tmp8.val[1]; @@ -503,20 +520,28 @@ void vp8_mbloop_filter_vertical_edge_uv_neon( q9 = vcombine_u8(d18, d19); q10 = vcombine_u8(d20, d21); - q2tmp0 = vtrnq_u32((uint32x4_t)q3, (uint32x4_t)q7); - q2tmp1 = vtrnq_u32((uint32x4_t)q4, (uint32x4_t)q8); - q2tmp2 = vtrnq_u32((uint32x4_t)q5, (uint32x4_t)q9); - q2tmp3 = vtrnq_u32((uint32x4_t)q6, (uint32x4_t)q10); - - q2tmp4 = vtrnq_u16((uint16x8_t)q2tmp0.val[0], (uint16x8_t)q2tmp2.val[0]); - q2tmp5 = vtrnq_u16((uint16x8_t)q2tmp1.val[0], (uint16x8_t)q2tmp3.val[0]); - q2tmp6 = vtrnq_u16((uint16x8_t)q2tmp0.val[1], (uint16x8_t)q2tmp2.val[1]); - q2tmp7 = vtrnq_u16((uint16x8_t)q2tmp1.val[1], (uint16x8_t)q2tmp3.val[1]); - - q2tmp8 = vtrnq_u8((uint8x16_t)q2tmp4.val[0], (uint8x16_t)q2tmp5.val[0]); - q2tmp9 = vtrnq_u8((uint8x16_t)q2tmp4.val[1], (uint8x16_t)q2tmp5.val[1]); - q2tmp10 = vtrnq_u8((uint8x16_t)q2tmp6.val[0], (uint8x16_t)q2tmp7.val[0]); - q2tmp11 = vtrnq_u8((uint8x16_t)q2tmp6.val[1], (uint8x16_t)q2tmp7.val[1]); + q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); + q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); + q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); + q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); + + q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), + vreinterpretq_u16_u32(q2tmp2.val[0])); + q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), + vreinterpretq_u16_u32(q2tmp3.val[0])); + q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), + vreinterpretq_u16_u32(q2tmp2.val[1])); + q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), + vreinterpretq_u16_u32(q2tmp3.val[1])); + + q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), + vreinterpretq_u8_u16(q2tmp5.val[0])); + q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), + vreinterpretq_u8_u16(q2tmp5.val[1])); + q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), + vreinterpretq_u8_u16(q2tmp7.val[0])); + q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), + vreinterpretq_u8_u16(q2tmp7.val[1])); q3 = q2tmp8.val[0]; q4 = q2tmp8.val[1]; @@ -531,20 +556,28 @@ void vp8_mbloop_filter_vertical_edge_uv_neon( q5, q6, q7, q8, q9, q10, &q4, &q5, &q6, &q7, &q8, &q9); - q2tmp0 = vtrnq_u32((uint32x4_t)q3, (uint32x4_t)q7); - q2tmp1 = vtrnq_u32((uint32x4_t)q4, (uint32x4_t)q8); - q2tmp2 = vtrnq_u32((uint32x4_t)q5, (uint32x4_t)q9); - q2tmp3 = vtrnq_u32((uint32x4_t)q6, (uint32x4_t)q10); - - q2tmp4 = vtrnq_u16((uint16x8_t)q2tmp0.val[0], (uint16x8_t)q2tmp2.val[0]); - q2tmp5 = vtrnq_u16((uint16x8_t)q2tmp1.val[0], (uint16x8_t)q2tmp3.val[0]); - q2tmp6 = vtrnq_u16((uint16x8_t)q2tmp0.val[1], (uint16x8_t)q2tmp2.val[1]); - q2tmp7 = vtrnq_u16((uint16x8_t)q2tmp1.val[1], (uint16x8_t)q2tmp3.val[1]); - - q2tmp8 = vtrnq_u8((uint8x16_t)q2tmp4.val[0], (uint8x16_t)q2tmp5.val[0]); - q2tmp9 = vtrnq_u8((uint8x16_t)q2tmp4.val[1], (uint8x16_t)q2tmp5.val[1]); - q2tmp10 = vtrnq_u8((uint8x16_t)q2tmp6.val[0], (uint8x16_t)q2tmp7.val[0]); - q2tmp11 = vtrnq_u8((uint8x16_t)q2tmp6.val[1], (uint8x16_t)q2tmp7.val[1]); + q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); + q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); + q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); + q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); + + q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), + vreinterpretq_u16_u32(q2tmp2.val[0])); + q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), + vreinterpretq_u16_u32(q2tmp3.val[0])); + q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), + vreinterpretq_u16_u32(q2tmp2.val[1])); + q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), + vreinterpretq_u16_u32(q2tmp3.val[1])); + + q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), + vreinterpretq_u8_u16(q2tmp5.val[0])); + q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), + vreinterpretq_u8_u16(q2tmp5.val[1])); + q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), + vreinterpretq_u8_u16(q2tmp7.val[0])); + q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), + vreinterpretq_u8_u16(q2tmp7.val[1])); q3 = q2tmp8.val[0]; q4 = q2tmp8.val[1]; diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl index 130d96535..789b0de54 100644 --- a/vp8/common/rtcd_defs.pl +++ b/vp8/common/rtcd_defs.pl @@ -532,7 +532,7 @@ specialize qw/vp8_yv12_copy_partial_frame neon/; # Denoiser filter # if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") { - add_proto qw/int vp8_denoiser_filter/, "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"; + add_proto qw/int vp8_denoiser_filter/, "unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude"; specialize qw/vp8_denoiser_filter sse2 neon/; } diff --git a/vp8/encoder/arm/neon/denoising_neon.c b/vp8/encoder/arm/neon/denoising_neon.c index 23dc0a967..1bebe8fba 100644 --- a/vp8/encoder/arm/neon/denoising_neon.c +++ b/vp8/encoder/arm/neon/denoising_neon.c @@ -45,10 +45,12 @@ * [16, 255] 3 6 7 */ -int vp8_denoiser_filter_neon(YV12_BUFFER_CONFIG *mc_running_avg, - YV12_BUFFER_CONFIG *running_avg, - MACROBLOCK *signal, unsigned int motion_magnitude, - int y_offset, int uv_offset) { +int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y, + int mc_running_avg_y_stride, + unsigned char *running_avg_y, + int running_avg_y_stride, + unsigned char *sig, int sig_stride, + unsigned int motion_magnitude) { /* If motion_magnitude is small, making the denoiser more aggressive by * increasing the adjustment for each level, level1 adjustment is * increased, the deltas stay the same. @@ -60,14 +62,6 @@ int vp8_denoiser_filter_neon(YV12_BUFFER_CONFIG *mc_running_avg, const uint8x16_t v_level1_threshold = vdupq_n_u8(4); const uint8x16_t v_level2_threshold = vdupq_n_u8(8); const uint8x16_t v_level3_threshold = vdupq_n_u8(16); - - /* Local variables for array pointers and strides. */ - unsigned char *sig = signal->thismb; - int sig_stride = 16; - unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset; - int mc_running_avg_y_stride = mc_running_avg->y_stride; - unsigned char *running_avg_y = running_avg->y_buffer + y_offset; - int running_avg_y_stride = running_avg->y_stride; int64x2_t v_sum_diff_total = vdupq_n_s64(0); /* Go over lines. */ diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c index 781926547..bfce28052 100644 --- a/vp8/encoder/denoising.c +++ b/vp8/encoder/denoising.c @@ -51,17 +51,13 @@ static const unsigned int SSE_THRESHOLD = 16 * 16 * 40; * [16, 255] 6 7 */ -int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg, - YV12_BUFFER_CONFIG *running_avg, MACROBLOCK *signal, - unsigned int motion_magnitude, int y_offset, - int uv_offset) +int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, + unsigned char *running_avg_y, int avg_y_stride, + unsigned char *sig, int sig_stride, + unsigned int motion_magnitude) { - unsigned char *sig = signal->thismb; - int sig_stride = 16; - unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset; - int mc_avg_y_stride = mc_running_avg->y_stride; - unsigned char *running_avg_y = running_avg->y_buffer + y_offset; - int avg_y_stride = running_avg->y_stride; + unsigned char *running_avg_y_start = running_avg_y; + unsigned char *sig_start = sig; int r, c, i; int sum_diff = 0; int adj_val[3] = {3, 4, 6}; @@ -130,8 +126,7 @@ int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg, if (abs(sum_diff) > SUM_DIFF_THRESHOLD) return COPY_BLOCK; - vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride, - signal->thismb, sig_stride); + vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride); return FILTER_BLOCK; } @@ -285,12 +280,17 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, if (decision == FILTER_BLOCK) { + unsigned char *mc_running_avg_y = + denoiser->yv12_mc_running_avg.y_buffer + recon_yoffset; + int mc_avg_y_stride = denoiser->yv12_mc_running_avg.y_stride; + unsigned char *running_avg_y = + denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset; + int avg_y_stride = denoiser->yv12_running_avg[INTRA_FRAME].y_stride; + /* Filter. */ - decision = vp8_denoiser_filter(&denoiser->yv12_mc_running_avg, - &denoiser->yv12_running_avg[INTRA_FRAME], - x, - motion_magnitude2, - recon_yoffset, recon_uvoffset); + decision = vp8_denoiser_filter(mc_running_avg_y, mc_avg_y_stride, + running_avg_y, avg_y_stride, + x->thismb, 16, motion_magnitude2); } if (decision == COPY_BLOCK) { diff --git a/vp8/encoder/x86/denoising_sse2.c b/vp8/encoder/x86/denoising_sse2.c index cceb8263f..d1f76b2cb 100644 --- a/vp8/encoder/x86/denoising_sse2.c +++ b/vp8/encoder/x86/denoising_sse2.c @@ -22,17 +22,14 @@ union sum_union { signed char e[16]; }; -int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg, - YV12_BUFFER_CONFIG *running_avg, - MACROBLOCK *signal, unsigned int motion_magnitude, - int y_offset, int uv_offset) +int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, + int mc_avg_y_stride, + unsigned char *running_avg_y, int avg_y_stride, + unsigned char *sig, int sig_stride, + unsigned int motion_magnitude) { - unsigned char *sig = signal->thismb; - int sig_stride = 16; - unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset; - int mc_avg_y_stride = mc_running_avg->y_stride; - unsigned char *running_avg_y = running_avg->y_buffer + y_offset; - int avg_y_stride = running_avg->y_stride; + unsigned char *running_avg_y_start = running_avg_y; + unsigned char *sig_start = sig; int r; __m128i acc_diff = _mm_setzero_si128(); const __m128i k_0 = _mm_setzero_si128(); @@ -114,7 +111,6 @@ int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg, } } - vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride, - signal->thismb, sig_stride); + vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride); return FILTER_BLOCK; } |