diff options
author | Jim Bankoski <jimbankoski@google.com> | 2016-05-03 16:23:06 -0700 |
---|---|---|
committer | Jim Bankoski <jimbankoski@google.com> | 2016-05-03 16:23:06 -0700 |
commit | 34d5aff747b3c545de497675f372c6b599860cbd (patch) | |
tree | 0f0543be36d727d03919e9d05712142291908882 /vpx_dsp/x86 | |
parent | fce3cee8ddd8dda91553e4701c0a8081ff4bab52 (diff) | |
download | libvpx-34d5aff747b3c545de497675f372c6b599860cbd.tar libvpx-34d5aff747b3c545de497675f372c6b599860cbd.tar.gz libvpx-34d5aff747b3c545de497675f372c6b599860cbd.tar.bz2 libvpx-34d5aff747b3c545de497675f372c6b599860cbd.zip |
libvpx: add a unit test for plane_add_noise.
In so doing this fixes a couple of bugs:
vpx_plane_add_noise.c needed to subtract a clamp instead of add.
And the assembly (mmx sse) had assumptions that parameters were
continuous in memory which was not true.
Change-Id: I76f2c43cf54bfc838eb2edf8a443eaaa7565d7b5
Diffstat (limited to 'vpx_dsp/x86')
-rw-r--r-- | vpx_dsp/x86/postproc_mmx.asm | 20 | ||||
-rw-r--r-- | vpx_dsp/x86/postproc_sse2.asm | 43 |
2 files changed, 33 insertions, 30 deletions
diff --git a/vpx_dsp/x86/postproc_mmx.asm b/vpx_dsp/x86/postproc_mmx.asm index 97039750d..8c2623db4 100644 --- a/vpx_dsp/x86/postproc_mmx.asm +++ b/vpx_dsp/x86/postproc_mmx.asm @@ -25,18 +25,20 @@ sym(vpx_plane_add_noise_mmx): push rdi ; end prolog + ; get the clamps in registers + mov rdx, arg(2) ; blackclamp + movq mm3, [rdx] + mov rdx, arg(3) ; whiteclamp + movq mm4, [rdx] + mov rdx, arg(4) ; bothclamp + movq mm5, [rdx] + .addnoise_loop: call sym(LIBVPX_RAND) WRT_PLT mov rcx, arg(1) ;noise and rax, 0xff add rcx, rax - ; we rely on the fact that the clamping vectors are stored contiguously - ; in black/white/both order. Note that we have to reload this here because - ; rdx could be trashed by rand() - mov rdx, arg(2) ; blackclamp - - mov rdi, rcx movsxd rcx, dword arg(5) ;[Width] mov rsi, arg(0) ;Pos @@ -45,9 +47,9 @@ sym(vpx_plane_add_noise_mmx): .addnoise_nextset: movq mm1,[rsi+rax] ; get the source - psubusb mm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise - paddusb mm1, [rdx+32] ;bothclamp - psubusb mm1, [rdx+16] ;whiteclamp + psubusb mm1, mm3 ; subtract black clamp + paddusb mm1, mm5 ; add both clamp + psubusb mm1, mm4 ; subtract whiteclamp movq mm2,[rdi+rax] ; get the noise for this line paddb mm1,mm2 ; add it in diff --git a/vpx_dsp/x86/postproc_sse2.asm b/vpx_dsp/x86/postproc_sse2.asm index f4bc8932a..ff61b19ba 100644 --- a/vpx_dsp/x86/postproc_sse2.asm +++ b/vpx_dsp/x86/postproc_sse2.asm @@ -27,38 +27,40 @@ sym(vpx_plane_add_noise_sse2): push rdi ; end prolog + ; get the clamps in registers + mov rdx, arg(2) ; blackclamp + movdqu xmm3, [rdx] + mov rdx, arg(3) ; whiteclamp + movdqu xmm4, [rdx] + mov rdx, arg(4) ; bothclamp + movdqu xmm5, [rdx] + .addnoise_loop: call sym(LIBVPX_RAND) WRT_PLT mov rcx, arg(1) ;noise and rax, 0xff add rcx, rax - ; we rely on the fact that the clamping vectors are stored contiguously - ; in black/white/both order. Note that we have to reload this here because - ; rdx could be trashed by rand() - mov rdx, arg(2) ; blackclamp - - - mov rdi, rcx - movsxd rcx, dword arg(5) ;[Width] - mov rsi, arg(0) ;Pos - xor rax,rax + mov rdi, rcx + movsxd rcx, dword arg(5) ;[Width] + mov rsi, arg(0) ;Pos + xor rax,rax .addnoise_nextset: - movdqu xmm1,[rsi+rax] ; get the source + movdqu xmm1,[rsi+rax] ; get the source - psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise - paddusb xmm1, [rdx+32] ;bothclamp - psubusb xmm1, [rdx+16] ;whiteclamp + psubusb xmm1, xmm3 ; subtract black clamp + paddusb xmm1, xmm5 ; add both clamp + psubusb xmm1, xmm4 ; subtract whiteclamp - movdqu xmm2,[rdi+rax] ; get the noise for this line - paddb xmm1,xmm2 ; add it in - movdqu [rsi+rax],xmm1 ; store the result + movdqu xmm2,[rdi+rax] ; get the noise for this line + paddb xmm1,xmm2 ; add it in + movdqu [rsi+rax],xmm1 ; store the result - add rax,16 ; move to the next line + add rax,16 ; move to the next line - cmp rax, rcx - jl .addnoise_nextset + cmp rax, rcx + jl .addnoise_nextset movsxd rax, dword arg(7) ; Pitch add arg(0), rax ; Start += Pitch @@ -73,7 +75,6 @@ sym(vpx_plane_add_noise_sse2): pop rbp ret - SECTION_RODATA align 16 rd42: |