diff options
Diffstat (limited to 'vpx_dsp')
-rw-r--r-- | vpx_dsp/mips/postproc_msa.c | 59 | ||||
-rw-r--r-- | vpx_dsp/postproc.c | 43 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp.mk | 7 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 9 | ||||
-rw-r--r-- | vpx_dsp/x86/postproc_mmx.asm | 84 | ||||
-rw-r--r-- | vpx_dsp/x86/postproc_sse2.asm | 82 |
6 files changed, 284 insertions, 0 deletions
diff --git a/vpx_dsp/mips/postproc_msa.c b/vpx_dsp/mips/postproc_msa.c new file mode 100644 index 000000000..366770c0d --- /dev/null +++ b/vpx_dsp/mips/postproc_msa.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include "./macros_msa.h" + +void vpx_plane_add_noise_msa(uint8_t *start_ptr, char *noise, + char blackclamp[16], char whiteclamp[16], + char bothclamp[16], uint32_t width, + uint32_t height, int32_t pitch) { + uint32_t i, j; + + for (i = 0; i < height / 2; ++i) { + uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch; + int8_t *ref0_ptr = (int8_t *)(noise + (rand() & 0xff)); + uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch; + int8_t *ref1_ptr = (int8_t *)(noise + (rand() & 0xff)); + for (j = width / 16; j--;) { + v16i8 temp00_s, temp01_s; + v16u8 temp00, temp01, black_clamp, white_clamp; + v16u8 pos0, ref0, pos1, ref1; + v16i8 const127 = __msa_ldi_b(127); + + pos0 = LD_UB(pos0_ptr); + ref0 = LD_UB(ref0_ptr); + pos1 = LD_UB(pos1_ptr); + ref1 = LD_UB(ref1_ptr); + black_clamp = (v16u8)__msa_fill_b(blackclamp[0]); + white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]); + temp00 = (pos0 < black_clamp); + pos0 = __msa_bmnz_v(pos0, black_clamp, temp00); + temp01 = (pos1 < black_clamp); + pos1 = __msa_bmnz_v(pos1, black_clamp, temp01); + XORI_B2_128_UB(pos0, pos1); + temp00_s = __msa_adds_s_b((v16i8)white_clamp, const127); + temp00 = (v16u8)(temp00_s < pos0); + pos0 = (v16u8)__msa_bmnz_v((v16u8)pos0, (v16u8)temp00_s, temp00); + temp01_s = __msa_adds_s_b((v16i8)white_clamp, const127); + temp01 = (temp01_s < pos1); + pos1 = (v16u8)__msa_bmnz_v((v16u8)pos1, (v16u8)temp01_s, temp01); + XORI_B2_128_UB(pos0, pos1); + pos0 += ref0; + ST_UB(pos0, pos0_ptr); + pos1 += ref1; + ST_UB(pos1, pos1_ptr); + pos0_ptr += 16; + pos1_ptr += 16; + ref0_ptr += 16; + ref1_ptr += 16; + } + } +} diff --git a/vpx_dsp/postproc.c b/vpx_dsp/postproc.c new file mode 100644 index 000000000..1fa0204f4 --- /dev/null +++ b/vpx_dsp/postproc.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" + +#include "vpx/vpx_integer.h" +#include "vpx_ports/mem.h" + +void vpx_plane_add_noise_c(uint8_t *start, char *noise, + char blackclamp[16], + char whiteclamp[16], + char bothclamp[16], + unsigned int width, unsigned int height, int pitch) { + unsigned int i, j; + + // TODO(jbb): why does simd code use both but c doesn't, normalize and + // fix.. + (void) bothclamp; + for (i = 0; i < height; i++) { + uint8_t *pos = start + i * pitch; + char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT + + for (j = 0; j < width; j++) { + if (pos[j] < blackclamp[0]) + pos[j] = blackclamp[0]; + + if (pos[j] > 255 + whiteclamp[0]) + pos[j] = 255 + whiteclamp[0]; + + pos[j] += ref[j]; + } + } +} diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index 581ec3a28..ef319a864 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk @@ -53,6 +53,13 @@ DSP_SRCS-$(HAVE_SSE2) += x86/highbd_intrapred_sse2.asm endif # CONFIG_USE_X86INC endif # CONFIG_VP9_HIGHBITDEPTH +ifneq ($(filter yes,$(CONFIG_POSTPROC) $(CONFIG_VP9_POSTPROC)),) +DSP_SRCS-yes += postproc.c +DSP_SRCS-$(HAVE_MSA) += mips/postproc_msa.c +DSP_SRCS-$(HAVE_MMX) += x86/postproc_mmx.asm +DSP_SRCS-$(HAVE_SSE2) += x86/postproc_sse2.asm +endif # CONFIG_POSTPROC + DSP_SRCS-$(HAVE_NEON_ASM) += arm/intrapred_neon_asm$(ASM) DSP_SRCS-$(HAVE_NEON) += arm/intrapred_neon.c DSP_SRCS-$(HAVE_MSA) += mips/intrapred_msa.c diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 9ea80a098..f883ce553 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -1907,6 +1907,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; } # CONFIG_VP9_HIGHBITDEPTH + +# +# Post Processing +# +if (vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC") eq "yes") { + add_proto qw/void vpx_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"; + specialize qw/vpx_plane_add_noise mmx sse2 msa/; +} + } # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC 1; diff --git a/vpx_dsp/x86/postproc_mmx.asm b/vpx_dsp/x86/postproc_mmx.asm new file mode 100644 index 000000000..97039750d --- /dev/null +++ b/vpx_dsp/x86/postproc_mmx.asm @@ -0,0 +1,84 @@ +; +; Copyright (c) 2015 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%include "vpx_ports/x86_abi_support.asm" + +;void vpx_plane_add_noise_mmx (unsigned char *Start, unsigned char *noise, +; unsigned char blackclamp[16], +; unsigned char whiteclamp[16], +; unsigned char bothclamp[16], +; unsigned int Width, unsigned int Height, int Pitch) +global sym(vpx_plane_add_noise_mmx) PRIVATE +sym(vpx_plane_add_noise_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 8 + GET_GOT rbx + push rsi + push rdi + ; end prolog + +.addnoise_loop: + call sym(LIBVPX_RAND) WRT_PLT + mov rcx, arg(1) ;noise + and rax, 0xff + add rcx, rax + + ; we rely on the fact that the clamping vectors are stored contiguously + ; in black/white/both order. Note that we have to reload this here because + ; rdx could be trashed by rand() + mov rdx, arg(2) ; blackclamp + + + mov rdi, rcx + movsxd rcx, dword arg(5) ;[Width] + mov rsi, arg(0) ;Pos + xor rax,rax + +.addnoise_nextset: + movq mm1,[rsi+rax] ; get the source + + psubusb mm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise + paddusb mm1, [rdx+32] ;bothclamp + psubusb mm1, [rdx+16] ;whiteclamp + + movq mm2,[rdi+rax] ; get the noise for this line + paddb mm1,mm2 ; add it in + movq [rsi+rax],mm1 ; store the result + + add rax,8 ; move to the next line + + cmp rax, rcx + jl .addnoise_nextset + + movsxd rax, dword arg(7) ; Pitch + add arg(0), rax ; Start += Pitch + sub dword arg(6), 1 ; Height -= 1 + jg .addnoise_loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + +SECTION_RODATA +align 16 +Blur: + times 16 dw 16 + times 8 dw 64 + times 16 dw 16 + times 8 dw 0 + +rd: + times 4 dw 0x40 diff --git a/vpx_dsp/x86/postproc_sse2.asm b/vpx_dsp/x86/postproc_sse2.asm new file mode 100644 index 000000000..f4bc8932a --- /dev/null +++ b/vpx_dsp/x86/postproc_sse2.asm @@ -0,0 +1,82 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +;void vpx_plane_add_noise_sse2(unsigned char *start, unsigned char *noise, +; unsigned char blackclamp[16], +; unsigned char whiteclamp[16], +; unsigned char bothclamp[16], +; unsigned int width, unsigned int height, +; int pitch) +global sym(vpx_plane_add_noise_sse2) PRIVATE +sym(vpx_plane_add_noise_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 8 + GET_GOT rbx + push rsi + push rdi + ; end prolog + +.addnoise_loop: + call sym(LIBVPX_RAND) WRT_PLT + mov rcx, arg(1) ;noise + and rax, 0xff + add rcx, rax + + ; we rely on the fact that the clamping vectors are stored contiguously + ; in black/white/both order. Note that we have to reload this here because + ; rdx could be trashed by rand() + mov rdx, arg(2) ; blackclamp + + + mov rdi, rcx + movsxd rcx, dword arg(5) ;[Width] + mov rsi, arg(0) ;Pos + xor rax,rax + +.addnoise_nextset: + movdqu xmm1,[rsi+rax] ; get the source + + psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise + paddusb xmm1, [rdx+32] ;bothclamp + psubusb xmm1, [rdx+16] ;whiteclamp + + movdqu xmm2,[rdi+rax] ; get the noise for this line + paddb xmm1,xmm2 ; add it in + movdqu [rsi+rax],xmm1 ; store the result + + add rax,16 ; move to the next line + + cmp rax, rcx + jl .addnoise_nextset + + movsxd rax, dword arg(7) ; Pitch + add arg(0), rax ; Start += Pitch + sub dword arg(6), 1 ; Height -= 1 + jg .addnoise_loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + +SECTION_RODATA +align 16 +rd42: + times 8 dw 0x04 +four8s: + times 4 dd 8 |