From 79b44cf61115bd48006227bb325b709f294c56f9 Mon Sep 17 00:00:00 2001 From: Stefan Liebler Date: Tue, 18 Dec 2018 13:57:23 +0100 Subject: S390: Refactor wcscspn ifunc handling. The ifunc handling for wcscspn is adjusted in order to omit ifunc if the minimum architecture level already supports newer CPUs by default. Unfortunately the c ifunc variant can't be omitted at all as it is used by the z13 ifunc variant as fallback if the pointers are not 4-byte aligned. ChangeLog: * sysdeps/s390/multiarch/Makefile (sysdep_routines): Remove wcscspn variants. * sysdeps/s390/Makefile (sysdep_routines): Add wcscspn variants. * sysdeps/s390/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Refactor ifunc handling for wcscspn. * sysdeps/s390/multiarch/wcscspn-c.c: Move to ... * sysdeps/s390/wcscspn-c.c: ... here and adjust ifunc handling. * sysdeps/s390/multiarch/wcscspn-vx.S: Move to ... * sysdeps/s390/wcscspn-vx.S: ... here and adjust ifunc handling. * sysdeps/s390/multiarch/wcscspn.c: Move to ... * sysdeps/s390/wcscspn.c: ... here and adjust ifunc handling. * sysdeps/s390/ifunc-wcscspn.h: New file. --- ChangeLog | 15 ++ sysdeps/s390/Makefile | 3 +- sysdeps/s390/ifunc-wcscspn.h | 53 ++++++ sysdeps/s390/multiarch/Makefile | 3 +- sysdeps/s390/multiarch/ifunc-impl-list.c | 15 +- sysdeps/s390/multiarch/wcscspn-c.c | 26 --- sysdeps/s390/multiarch/wcscspn-vx.S | 293 ------------------------------ sysdeps/s390/multiarch/wcscspn.c | 27 --- sysdeps/s390/wcscspn-c.c | 27 +++ sysdeps/s390/wcscspn-vx.S | 298 +++++++++++++++++++++++++++++++ sysdeps/s390/wcscspn.c | 38 ++++ 11 files changed, 447 insertions(+), 351 deletions(-) create mode 100644 sysdeps/s390/ifunc-wcscspn.h delete mode 100644 sysdeps/s390/multiarch/wcscspn-c.c delete mode 100644 sysdeps/s390/multiarch/wcscspn-vx.S delete mode 100644 sysdeps/s390/multiarch/wcscspn.c create mode 100644 sysdeps/s390/wcscspn-c.c create mode 100644 sysdeps/s390/wcscspn-vx.S create mode 100644 sysdeps/s390/wcscspn.c diff --git a/ChangeLog b/ChangeLog index f199133f1e..a29fe018d3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +2018-12-18 Stefan Liebler + + * sysdeps/s390/multiarch/Makefile + (sysdep_routines): Remove wcscspn variants. + * sysdeps/s390/Makefile (sysdep_routines): Add wcscspn variants. + * sysdeps/s390/multiarch/ifunc-impl-list.c + (__libc_ifunc_impl_list): Refactor ifunc handling for wcscspn. + * sysdeps/s390/multiarch/wcscspn-c.c: Move to ... + * sysdeps/s390/wcscspn-c.c: ... here and adjust ifunc handling. + * sysdeps/s390/multiarch/wcscspn-vx.S: Move to ... + * sysdeps/s390/wcscspn-vx.S: ... here and adjust ifunc handling. + * sysdeps/s390/multiarch/wcscspn.c: Move to ... + * sysdeps/s390/wcscspn.c: ... here and adjust ifunc handling. + * sysdeps/s390/ifunc-wcscspn.h: New file. + 2018-12-18 Stefan Liebler * sysdeps/s390/multiarch/Makefile diff --git a/sysdeps/s390/Makefile b/sysdeps/s390/Makefile index 4decbf8f0a..a9058047a9 100644 --- a/sysdeps/s390/Makefile +++ b/sysdeps/s390/Makefile @@ -97,5 +97,6 @@ sysdep_routines += wcslen wcslen-vx wcslen-c \ wcschrnul wcschrnul-vx wcschrnul-c \ wcsrchr wcsrchr-vx wcsrchr-c \ wcsspn wcsspn-vx wcsspn-c \ - wcspbrk wcspbrk-vx wcspbrk-c + wcspbrk wcspbrk-vx wcspbrk-c \ + wcscspn wcscspn-vx wcscspn-c endif diff --git a/sysdeps/s390/ifunc-wcscspn.h b/sysdeps/s390/ifunc-wcscspn.h new file mode 100644 index 0000000000..23f3667ba3 --- /dev/null +++ b/sysdeps/s390/ifunc-wcscspn.h @@ -0,0 +1,53 @@ +/* wcscspn variant information on S/390 version. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined USE_MULTIARCH && IS_IN (libc) \ + && ! defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT +# define HAVE_WCSCSPN_IFUNC 1 +#else +# define HAVE_WCSCSPN_IFUNC 0 +#endif + +#ifdef HAVE_S390_VX_ASM_SUPPORT +# define HAVE_WCSCSPN_IFUNC_AND_VX_SUPPORT HAVE_WCSCSPN_IFUNC +#else +# define HAVE_WCSCSPN_IFUNC_AND_VX_SUPPORT 0 +#endif + +#if defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT +# define WCSCSPN_DEFAULT WCSCSPN_Z13 +/* The z13 ifunc variant is using the common code variant as fallback! */ +# define HAVE_WCSCSPN_C 1 +# define HAVE_WCSCSPN_Z13 1 +#else +# define WCSCSPN_DEFAULT WCSCSPN_C +# define HAVE_WCSCSPN_C 1 +# define HAVE_WCSCSPN_Z13 HAVE_WCSCSPN_IFUNC_AND_VX_SUPPORT +#endif + +#if HAVE_WCSCSPN_C +# define WCSCSPN_C __wcscspn_c +#else +# define WCSCSPN_C NULL +#endif + +#if HAVE_WCSCSPN_Z13 +# define WCSCSPN_Z13 __wcscspn_vx +#else +# define WCSCSPN_Z13 NULL +#endif diff --git a/sysdeps/s390/multiarch/Makefile b/sysdeps/s390/multiarch/Makefile index e1e2d9dc74..5be6355423 100644 --- a/sysdeps/s390/multiarch/Makefile +++ b/sysdeps/s390/multiarch/Makefile @@ -1,6 +1,5 @@ ifeq ($(subdir),wcsmbs) -sysdep_routines += wcscspn wcscspn-vx wcscspn-c \ - wmemchr wmemchr-vx wmemchr-c \ +sysdep_routines += wmemchr wmemchr-vx wmemchr-c \ wmemset wmemset-vx wmemset-c \ wmemcmp wmemcmp-vx wmemcmp-c endif diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c index 89d6e8ad7e..7d8031a069 100644 --- a/sysdeps/s390/multiarch/ifunc-impl-list.c +++ b/sysdeps/s390/multiarch/ifunc-impl-list.c @@ -61,6 +61,7 @@ #include #include #include +#include /* Maximum number of IFUNC implementations. */ #define MAX_IFUNC 3 @@ -619,6 +620,18 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, ) #endif /* HAVE_WCSPBRK_IFUNC */ +#if HAVE_WCSCSPN_IFUNC + IFUNC_IMPL (i, name, wcscspn, +# if HAVE_WCSCSPN_Z13 + IFUNC_IMPL_ADD (array, i, wcscspn, + dl_hwcap & HWCAP_S390_VX, WCSCSPN_Z13) +# endif +# if HAVE_WCSCSPN_C + IFUNC_IMPL_ADD (array, i, wcscspn, 1, WCSCSPN_C) +# endif + ) +#endif /* HAVE_WCSCSPN_IFUNC */ + #ifdef HAVE_S390_VX_ASM_SUPPORT # define IFUNC_VX_IMPL(FUNC) \ @@ -627,8 +640,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __##FUNC##_vx) \ IFUNC_IMPL_ADD (array, i, FUNC, 1, __##FUNC##_c)) - IFUNC_VX_IMPL (wcscspn); - IFUNC_VX_IMPL (wmemchr); IFUNC_VX_IMPL (wmemset); diff --git a/sysdeps/s390/multiarch/wcscspn-c.c b/sysdeps/s390/multiarch/wcscspn-c.c deleted file mode 100644 index 161e52e686..0000000000 --- a/sysdeps/s390/multiarch/wcscspn-c.c +++ /dev/null @@ -1,26 +0,0 @@ -/* Default wcscscpn implementation for S/390. - Copyright (C) 2015-2018 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) -# define WCSCSPN __wcscspn_c - -# include -extern __typeof (wcscspn) __wcscspn_c; - -# include -#endif diff --git a/sysdeps/s390/multiarch/wcscspn-vx.S b/sysdeps/s390/multiarch/wcscspn-vx.S deleted file mode 100644 index 06bc4e25d0..0000000000 --- a/sysdeps/s390/multiarch/wcscspn-vx.S +++ /dev/null @@ -1,293 +0,0 @@ -/* Vector optimized 32/64 bit S/390 version of wcscspn. - Copyright (C) 2015-2018 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) - -# include "sysdep.h" -# include "asm-syntax.h" - - .text - -/* size_t wcscspn (const wchar_t *s, const wchar_t * reject) - The wcscspn() function calculates the length of the initial segment - of s which consists entirely of characters not in reject. - - This method checks the length of reject string. If it fits entirely - in one vector register, a fast algorithm is used, which does not need - to check multiple parts of accept-string. Otherwise a slower full - check of accept-string is used. - - register overview: - r3: pointer to start of reject-string - r2: pointer to start of search-string - r0: loaded byte count of vlbb search-string - r4: found byte index - r1: current return len - v16: search-string - v17: reject-string - v18: temp-vreg - - ONLY FOR SLOW: - v19: first reject-string - v20: zero for preparing acc-vector - v21: global mask; 1 indicates a match between - search-string-vreg and any reject-character - v22: current mask; 1 indicates a match between - search-string-vreg and any reject-character in current acc-vreg - v30, v31: for re-/storing registers r6, r8, r9 - r5: current len of reject-string - r6: zero-index in search-string or 16 if no zero - or min(zero-index, loaded byte count) - r8: >0, if former reject-string-part contains a zero, - otherwise =0; - r9: loaded byte count of vlbb reject-string -*/ -ENTRY(__wcscspn_vx) - .machine "z13" - .machinemode "zarch_nohighgprs" - - tmll %r2,3 /* Test if s is 4-byte aligned? */ - jne .Lfallback /* And use common-code variant if not. */ - - /* - Check if reject-string fits in one vreg: - ---------------------------------------- - */ - vlbb %v17,0(%r3),0 /* Load reject. */ - lcbb %r0,0(%r3),0 - jo .Lcheck_onbb /* Special case if reject - lays on block-boundary. */ - -.Lcheck_notonbb: - lghi %r1,0 /* Zero out current len. */ - vistrfs %v17,%v17 /* Fill with zeros after first zero. */ - je .Lfast /* Zero found -> reject fits in one vreg. */ - j .Lslow /* No zero -> reject exceeds one vreg. */ - - -.Lcheck_onbb: - /* Reject lays on block-boundary. */ - nill %r0,65532 /* Recognize only fully loaded characters. */ - je .Lcheck_onbb2 /* Reload vr, if we loaded no full wchar_t. */ - vfenezf %v18,%v17,%v17 /* Search zero in loaded reject bytes. */ - vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ - clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> - Reject fits in one vreg; - Fill with zeros and proceed - with FAST. */ -.Lcheck_onbb2: - vl %v17,0(%r3) /* Load reject, which exceeds loaded bytes. */ - j .Lcheck_notonbb /* Check if reject fits in one vreg. */ - - - /* - Search s for reject in one vreg - ------------------------------- - */ -.Lfast: - /* Complete reject-string in v17 and remaining bytes are zero. */ - - vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ - lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ - - vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 - unequal to any in v17 - or first zero element. */ - vlgvb %r4,%v18,7 /* Load byte index of found element. */ - clrjl %r4,%r0,.Lfast_loop_found2 /* If found index is within loaded - bytes, return with found element - index (=equal count). */ - - /* Align s to 16 byte. */ - risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ - lghi %r1,16 /* current_len = 16. */ - slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ - - /* Process s in 16byte aligned loop. */ -.Lfast_loop: - vl %v16,0(%r1,%r2) /* Load search-string. */ - vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 equal to any - in v17 or first zero element. */ - jno .Lfast_loop_found - - vl %v16,16(%r1,%r2) - vfaezfs %v18,%v16,%v17,0 - jno .Lfast_loop_found16 - - vl %v16,32(%r1,%r2) - vfaezfs %v18,%v16,%v17,0 - jno .Lfast_loop_found32 - - vl %v16,48(%r1,%r2) - vfaezfs %v18,%v16,%v17,0 - jno .Lfast_loop_found48 - - aghi %r1,64 - j .Lfast_loop /* Loop if no element was unequal to reject - and not zero. */ - - /* Found equal or zero element. */ -.Lfast_loop_found48: - aghi %r1,16 -.Lfast_loop_found32: - aghi %r1,16 -.Lfast_loop_found16: - aghi %r1,16 -.Lfast_loop_found: - vlgvb %r4,%v18,7 /* Load byte index of found element or zero. */ -.Lfast_loop_found2: - algrk %r2,%r1,%r4 /* Add found index to current len. */ - srlg %r2,%r2,2 /* Convert byte-count to character-count. */ - br %r14 - - - - /* - Search s for reject in multiple vregs - ------------------------------------- - */ -.Lslow: - /* Save registers. */ - vlvgg %v30,%r6,0 - vlvgp %v31,%r8,%r9 - - /* Reject in v17 without zero. */ - vlr %v19,%v17 /* Save first acc-part for a fast reload. */ - vzero %v20 /* Zero for preparing acc-vector. */ - vone %v24 /* One for checking result of former - string-part. */ - - /* Align s to 16 byte. */ - risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and - %r4 = bits 60-63 'and' 15. */ - je .Lslow_loop_str /* If s is aligned, loop aligned. */ - lghi %r0,15 - slr %r0,%r4 /* Compute highest index to load (15-x). */ - vll %v16,%r0,0(%r2) /* Load up to 16byte boundary (vll needs - highest index, remaining bytes are 0). */ - ahi %r0,1 /* Work with loaded byte count. */ - vzero %v21 /* Zero out global mask. */ - lghi %r5,0 /* Set current len of reject-string to zero. */ - vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ - lghi %r8,0 /* There is no zero in first reject-part. */ - vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ - clije %r6,0,.Lslow_end /* If first element is zero -> return 0. */ - clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ - locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ - j .Lslow_loop_acc - - - /* Process s in 16byte aligned loop. */ -.Lslow_next_str: - /* Check results of former processed str-part. */ - vfeef %v18,%v21,%v24 /* Find first equal match in global mask - (ones in element). */ - vlgvb %r4,%v18,7 /* Get index of first one (=equal) or 16. */ - /* Equal-index < min(zero-index, loaded byte count) - -> Return pointer to equal element. */ - clrjl %r4,%r6,.Lslow_index_found - /* Zero-index < loaded byte count - -> Former str-part was last str-part - -> Return null */ - clrjl %r6,%r0,.Lslow_end_not_found - - /* All elements are zero (=no match) -> proceed with next str-part. */ - vlr %v17,%v19 /* Load first part of reject (no zero). */ - algfr %r1,%r0 /* Add loaded byte count to current len. */ - -.Lslow_loop_str: - vl %v16,0(%r1,%r2) /* Load search-string. */ - lghi %r0,16 /* Loaded byte count is 16. */ - vzero %v21 /* Zero out global mask. */ - lghi %r5,0 /* Set current len of reject to zero. */ - vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ - lghi %r8,0 /* There is no zero in first reject-part. */ - vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ - clije %r6,0,.Lslow_end /* If first element is zero (end of string) - -> Return current length. */ - -.Lslow_loop_acc: - vfaef %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> - Character matches any rejected character in - this reject-string-part) IN=0, RT=1. */ - vlgvf %r4,%v22,0 /* Get result of first element. */ - /* First element is equal to any rejected characters? - (All other parts of reject cannot lead to a match before this one) - -> Return current len, which is pointing to this element. */ - clijh %r4,0,.Lslow_end - vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ - /* Proceed with next acc until end of acc is reached. */ - - -.Lslow_next_acc: - clijh %r8,0,.Lslow_next_str /* There was a zero in last reject-part - -> Add found index to current len - and end. */ - vlbb %v17,16(%r5,%r3),6 /* Load next reject part. */ - aghi %r5,16 /* Increment current len of reject-string. */ - lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of reject-string. */ - jo .Lslow_next_acc_onbb /* Jump away if reject-string is - on block-boundary. */ -.Lslow_next_acc_notonbb: - vistrfs %v17,%v17 /* Fill with zeros after first zero. */ - jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ - -.Lslow_next_acc_prepare_zero: - /* Zero in reject-part: fill zeros with first-reject-character. */ - vlgvf %r8,%v17,0 /* Load first element of reject-part. */ - clije %r8,0,.Lslow_next_str /* Process next str-part if first - character in this part of reject - is a zero. */ - /* r8>0 -> zero found in this acc-part. */ - vrepf %v18,%v17,0 /* Replicate first char accross all chars. */ - vceqf %v22,%v20,%v17 /* Create a mask (v22) of null chars - by comparing with 0 (v20). */ - vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ - j .Lslow_loop_acc /* Reject-string part is prepared. */ - -.Lslow_next_acc_onbb: - nill %r9,65532 /* Recognize only fully loaded characters. */ - je .Lslow_next_acc_onbb2 /* Reload vr, if no full wchar_t - loaded. */ - vfenezf %v18,%v17,%v17 /* Find zero in loaded bytes of reject part. */ - vlgvb %r8,%v18,7 /* Load byte index of zero. */ - clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes - -> Prepare vreg. */ -.Lslow_next_acc_onbb2: - vl %v17,0(%r5,%r3) /* Load over boundary ... */ - lghi %r8,0 /* r8=0 -> no zero in this part of acc, - check for zero is in jump-target. */ - j .Lslow_next_acc_notonbb /* ... and search for zero in - fully loaded vreg again. */ - -.Lslow_end_not_found: - algfr %r1,%r6 /* Add zero-index to current len. */ - j .Lslow_end -.Lslow_index_found: - algfr %r1,%r4 /* Add found index of char to current len. */ -.Lslow_end: - srlg %r2,%r1,2 /* Convert byte-count to character-count. */ - /* Restore registers. */ - vlgvg %r6,%v30,0 - vlgvg %r8,%v31,0 - vlgvg %r9,%v31,1 - br %r14 -.Lfallback: - jg __wcscspn_c -END(__wcscspn_vx) -#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcscspn.c b/sysdeps/s390/multiarch/wcscspn.c deleted file mode 100644 index 707327522a..0000000000 --- a/sysdeps/s390/multiarch/wcscspn.c +++ /dev/null @@ -1,27 +0,0 @@ -/* Multiple versions of wcscspn. - Copyright (C) 2015-2018 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) -# include -# include - -s390_vx_libc_ifunc2 (__wcscspn, wcscspn) - -#else -# include -#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/wcscspn-c.c b/sysdeps/s390/wcscspn-c.c new file mode 100644 index 0000000000..d47cb6b75b --- /dev/null +++ b/sysdeps/s390/wcscspn-c.c @@ -0,0 +1,27 @@ +/* Default wcscscpn implementation for S/390. + Copyright (C) 2015-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#if HAVE_WCSCSPN_C +# if HAVE_WCSCSPN_IFUNC || HAVE_WCSCSPN_Z13 +# define WCSCSPN WCSCSPN_C +# endif + +# include +#endif diff --git a/sysdeps/s390/wcscspn-vx.S b/sysdeps/s390/wcscspn-vx.S new file mode 100644 index 0000000000..882cb93fb8 --- /dev/null +++ b/sysdeps/s390/wcscspn-vx.S @@ -0,0 +1,298 @@ +/* Vector optimized 32/64 bit S/390 version of wcscspn. + Copyright (C) 2015-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#if HAVE_WCSCSPN_Z13 + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t wcscspn (const wchar_t *s, const wchar_t * reject) + The wcscspn() function calculates the length of the initial segment + of s which consists entirely of characters not in reject. + + This method checks the length of reject string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of reject-string + r2: pointer to start of search-string + r0: loaded byte count of vlbb search-string + r4: found byte index + r1: current return len + v16: search-string + v17: reject-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first reject-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any reject-character + v22: current mask; 1 indicates a match between + search-string-vreg and any reject-character in current acc-vreg + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of reject-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former reject-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb reject-string +*/ +ENTRY(WCSCSPN_Z13) + .machine "z13" + .machinemode "zarch_nohighgprs" + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + /* + Check if reject-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),0 /* Load reject. */ + lcbb %r0,0(%r3),0 + jo .Lcheck_onbb /* Special case if reject + lays on block-boundary. */ + +.Lcheck_notonbb: + lghi %r1,0 /* Zero out current len. */ + vistrfs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> reject fits in one vreg. */ + j .Lslow /* No zero -> reject exceeds one vreg. */ + + +.Lcheck_onbb: + /* Reject lays on block-boundary. */ + nill %r0,65532 /* Recognize only fully loaded characters. */ + je .Lcheck_onbb2 /* Reload vr, if we loaded no full wchar_t. */ + vfenezf %v18,%v17,%v17 /* Search zero in loaded reject bytes. */ + vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> + Reject fits in one vreg; + Fill with zeros and proceed + with FAST. */ +.Lcheck_onbb2: + vl %v17,0(%r3) /* Load reject, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if reject fits in one vreg. */ + + + /* + Search s for reject in one vreg + ------------------------------- + */ +.Lfast: + /* Complete reject-string in v17 and remaining bytes are zero. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 + unequal to any in v17 + or first zero element. */ + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + clrjl %r4,%r0,.Lfast_loop_found2 /* If found index is within loaded + bytes, return with found element + index (=equal count). */ + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r1,16 /* current_len = 16. */ + slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Process s in 16byte aligned loop. */ +.Lfast_loop: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 equal to any + in v17 or first zero element. */ + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found16 + + vl %v16,32(%r1,%r2) + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found32 + + vl %v16,48(%r1,%r2) + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found48 + + aghi %r1,64 + j .Lfast_loop /* Loop if no element was unequal to reject + and not zero. */ + + /* Found equal or zero element. */ +.Lfast_loop_found48: + aghi %r1,16 +.Lfast_loop_found32: + aghi %r1,16 +.Lfast_loop_found16: + aghi %r1,16 +.Lfast_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element or zero. */ +.Lfast_loop_found2: + algrk %r2,%r1,%r4 /* Add found index to current len. */ + srlg %r2,%r2,2 /* Convert byte-count to character-count. */ + br %r14 + + + + /* + Search s for reject in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + + /* Reject in v17 without zero. */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + vone %v24 /* One for checking result of former + string-part. */ + + /* Align s to 16 byte. */ + risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and + %r4 = bits 60-63 'and' 15. */ + je .Lslow_loop_str /* If s is aligned, loop aligned. */ + lghi %r0,15 + slr %r0,%r4 /* Compute highest index to load (15-x). */ + vll %v16,%r0,0(%r2) /* Load up to 16byte boundary (vll needs + highest index, remaining bytes are 0). */ + ahi %r0,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of reject-string to zero. */ + vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first reject-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end /* If first element is zero -> return 0. */ + clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ + j .Lslow_loop_acc + + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + /* Check results of former processed str-part. */ + vfeef %v18,%v21,%v24 /* Find first equal match in global mask + (ones in element). */ + vlgvb %r4,%v18,7 /* Get index of first one (=equal) or 16. */ + /* Equal-index < min(zero-index, loaded byte count) + -> Return pointer to equal element. */ + clrjl %r4,%r6,.Lslow_index_found + /* Zero-index < loaded byte count + -> Former str-part was last str-part + -> Return null */ + clrjl %r6,%r0,.Lslow_end_not_found + + /* All elements are zero (=no match) -> proceed with next str-part. */ + vlr %v17,%v19 /* Load first part of reject (no zero). */ + algfr %r1,%r0 /* Add loaded byte count to current len. */ + +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string. */ + lghi %r0,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of reject to zero. */ + vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first reject-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end /* If first element is zero (end of string) + -> Return current length. */ + +.Lslow_loop_acc: + vfaef %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + Character matches any rejected character in + this reject-string-part) IN=0, RT=1. */ + vlgvf %r4,%v22,0 /* Get result of first element. */ + /* First element is equal to any rejected characters? + (All other parts of reject cannot lead to a match before this one) + -> Return current len, which is pointing to this element. */ + clijh %r4,0,.Lslow_end + vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ + /* Proceed with next acc until end of acc is reached. */ + + +.Lslow_next_acc: + clijh %r8,0,.Lslow_next_str /* There was a zero in last reject-part + -> Add found index to current len + and end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next reject part. */ + aghi %r5,16 /* Increment current len of reject-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of reject-string. */ + jo .Lslow_next_acc_onbb /* Jump away if reject-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrfs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in reject-part: fill zeros with first-reject-character. */ + vlgvf %r8,%v17,0 /* Load first element of reject-part. */ + clije %r8,0,.Lslow_next_str /* Process next str-part if first + character in this part of reject + is a zero. */ + /* r8>0 -> zero found in this acc-part. */ + vrepf %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqf %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Reject-string part is prepared. */ + +.Lslow_next_acc_onbb: + nill %r9,65532 /* Recognize only fully loaded characters. */ + je .Lslow_next_acc_onbb2 /* Reload vr, if no full wchar_t + loaded. */ + vfenezf %v18,%v17,%v17 /* Find zero in loaded bytes of reject part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vreg. */ +.Lslow_next_acc_onbb2: + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ + +.Lslow_end_not_found: + algfr %r1,%r6 /* Add zero-index to current len. */ + j .Lslow_end +.Lslow_index_found: + algfr %r1,%r4 /* Add found index of char to current len. */ +.Lslow_end: + srlg %r2,%r1,2 /* Convert byte-count to character-count. */ + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + br %r14 +.Lfallback: + jg WCSCSPN_C +END(WCSCSPN_Z13) + +# if ! HAVE_WCSCSPN_IFUNC +strong_alias (WCSCSPN_Z13, wcscspn) +# endif +#endif diff --git a/sysdeps/s390/wcscspn.c b/sysdeps/s390/wcscspn.c new file mode 100644 index 0000000000..0ce31b8aab --- /dev/null +++ b/sysdeps/s390/wcscspn.c @@ -0,0 +1,38 @@ +/* Multiple versions of wcscspn. + Copyright (C) 2015-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#if HAVE_WCSCSPN_IFUNC +# include +# include + +# if HAVE_WCSCSPN_C +extern __typeof (wcscspn) WCSCSPN_C attribute_hidden; +# endif + +# if HAVE_WCSCSPN_Z13 +extern __typeof (wcscspn) WCSCSPN_Z13 attribute_hidden; +# endif + +s390_libc_ifunc_expr (wcscspn, wcscspn, + (HAVE_WCSCSPN_Z13 && (hwcap & HWCAP_S390_VX)) + ? WCSCSPN_Z13 + : WCSCSPN_DEFAULT + ) +#endif -- cgit v1.2.3-70-g09d2