aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-10-05 14:46:47 -0700
committerDavid S. Miller <davem@davemloft.net>2012-10-05 14:46:47 -0700
commit3baddb72a4181e05d0d279a6b345635641e13a18 (patch)
tree502af4b61251ea10b01aa227bc327c01accd281f
parentd7e0dab96d1a855f7ff779195010c188c334669e (diff)
downloadglibc-3baddb72a4181e05d0d279a6b345635641e13a18.tar
glibc-3baddb72a4181e05d0d279a6b345635641e13a18.tar.gz
glibc-3baddb72a4181e05d0d279a6b345635641e13a18.tar.bz2
glibc-3baddb72a4181e05d0d279a6b345635641e13a18.zip
Add Niagara-4 optimized memset/bzero implementation.
* sysdeps/sparc/sparc64/multiarch/memset-niagara4.S: New file. * sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara4.S: New file. * sysdeps/sparc/sparc64/multiarch/Makefile: Add to sysdep_routines. * sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile: Likewise. * sysdeps/sparc/sparc64/multiarch/memset.S: Use Niagara-4 memset and bzero when HWCAP_SPARC_CRYPTO is present.
-rw-r--r--ChangeLog11
-rw-r--r--sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile2
-rw-r--r--sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara4.S1
-rw-r--r--sysdeps/sparc/sparc64/multiarch/Makefile2
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memset-niagara4.S124
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memset.S30
6 files changed, 164 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog
index 916b851a20..1b2a455ce9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2012-10-05 David S. Miller <davem@davemloft.net>
+
+ * sysdeps/sparc/sparc64/multiarch/memset-niagara4.S: New file.
+ * sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara4.S: New
+ file.
+ * sysdeps/sparc/sparc64/multiarch/Makefile: Add to
+ sysdep_routines.
+ * sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile: Likewise.
+ * sysdeps/sparc/sparc64/multiarch/memset.S: Use Niagara-4 memset
+ and bzero when HWCAP_SPARC_CRYPTO is present.
+
2012-10-05 H.J. Lu <hongjiu.lu@intel.com>
[BZ #14602]
diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile b/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile
index 377dfc7330..7358bdb167 100644
--- a/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile
+++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile
@@ -1,4 +1,4 @@
ifeq ($(subdir),string)
sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \
- memset-niagara1 memcpy-niagara4
+ memset-niagara1 memcpy-niagara4 memset-niagara4
endif
diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara4.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara4.S
new file mode 100644
index 0000000000..6545019c46
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara4.S
@@ -0,0 +1 @@
+#include <sparc64/multiarch/memset-niagara4.S>
diff --git a/sysdeps/sparc/sparc64/multiarch/Makefile b/sysdeps/sparc/sparc64/multiarch/Makefile
index 377dfc7330..7358bdb167 100644
--- a/sysdeps/sparc/sparc64/multiarch/Makefile
+++ b/sysdeps/sparc/sparc64/multiarch/Makefile
@@ -1,4 +1,4 @@
ifeq ($(subdir),string)
sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \
- memset-niagara1 memcpy-niagara4
+ memset-niagara1 memcpy-niagara4 memset-niagara4
endif
diff --git a/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S b/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S
new file mode 100644
index 0000000000..c5a2f1befd
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S
@@ -0,0 +1,124 @@
+/* Set a block of memory to some byte value. For SUN4V Niagara-4.
+ Copyright (C) 2012 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by David S. Miller (davem@davemloft.net)
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#define ASI_BLK_INIT_QUAD_LDD_P 0xe2
+
+#if !defined NOT_IN_libc
+
+ .register %g2, #scratch
+ .register %g3, #scratch
+
+ .text
+ .align 32
+
+ENTRY(__memset_niagara4)
+ andcc %o1, 0xff, %o4
+ be,pt %icc, 1f
+ mov %o2, %o1
+ sllx %o4, 8, %g1
+ or %g1, %o4, %o2
+ sllx %o2, 16, %g1
+ or %g1, %o2, %o2
+ sllx %o2, 32, %g1
+ ba,pt %icc, 1f
+ or %g1, %o2, %o4
+END(__memset_niagara4)
+
+ .align 32
+ENTRY(__bzero_niagara4)
+ clr %o4
+1: cmp %o1, 16
+ ble %icc, .Ltiny
+ mov %o0, %o3
+ sub %g0, %o0, %g1
+ and %g1, 0x7, %g1
+ brz,pt %g1, .Laligned8
+ sub %o1, %g1, %o1
+1: stb %o4, [%o0 + 0x00]
+ subcc %g1, 1, %g1
+ bne,pt %icc, 1b
+ add %o0, 1, %o0
+.Laligned8:
+ cmp %o1, 64 + (64 - 8)
+ ble .Lmedium
+ sub %g0, %o0, %g1
+ andcc %g1, (64 - 1), %g1
+ brz,pn %g1, .Laligned64
+ sub %o1, %g1, %o1
+1: stx %o4, [%o0 + 0x00]
+ subcc %g1, 8, %g1
+ bne,pt %icc, 1b
+ add %o0, 0x8, %o0
+.Laligned64:
+ andn %o1, 64 - 1, %g1
+ sub %o1, %g1, %o1
+ brnz,pn %o4, .Lnon_bzero_loop
+ mov 0x20, %g2
+1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+ subcc %g1, 0x40, %g1
+ stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+ bne,pt %icc, 1b
+ add %o0, 0x40, %o0
+.Lpostloop:
+ cmp %o1, 8
+ bl,pn %icc, .Ltiny
+ membar #StoreStore|#StoreLoad
+.Lmedium:
+ andn %o1, 0x7, %g1
+ sub %o1, %g1, %o1
+1: stx %o4, [%o0 + 0x00]
+ subcc %g1, 0x8, %g1
+ bne,pt %icc, 1b
+ add %o0, 0x08, %o0
+ andcc %o1, 0x4, %g1
+ be,pt %icc, .Ltiny
+ sub %o1, %g1, %o1
+ stw %o4, [%o0 + 0x00]
+ add %o0, 0x4, %o0
+.Ltiny:
+ cmp %o1, 0
+ be,pn %icc, .Lexit
+1: subcc %o1, 1, %o1
+ stb %o4, [%o0 + 0x00]
+ bne,pt %icc, 1b
+ add %o0, 1, %o0
+.Lexit:
+ retl
+ mov %o3, %o0
+.Lnon_bzero_loop:
+ mov 0x08, %g3
+ mov 0x28, %o5
+1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+ subcc %g1, 0x40, %g1
+ stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
+ add %o0, 0x10, %o0
+ stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
+ bne,pt %icc, 1b
+ add %o0, 0x30, %o0
+ ba,a,pt %icc, .Lpostloop
+END(__bzero_niagara4)
+
+#endif
diff --git a/sysdeps/sparc/sparc64/multiarch/memset.S b/sysdeps/sparc/sparc64/multiarch/memset.S
index 26cc669d6f..8b3faee915 100644
--- a/sysdeps/sparc/sparc64/multiarch/memset.S
+++ b/sysdeps/sparc/sparc64/multiarch/memset.S
@@ -26,8 +26,19 @@ ENTRY(memset)
# ifdef SHARED
SETUP_PIC_REG_LEAF(o3, o5)
# endif
- andcc %o0, HWCAP_SPARC_BLKINIT, %g0
- be 9f
+ set HWCAP_SPARC_CRYPTO, %o1
+ andcc %o0, %o1, %g0
+ be 1f
+ andcc %o0, HWCAP_SPARC_BLKINIT, %g0
+# ifdef SHARED
+ sethi %gdop_hix22(__memset_niagara4), %o1
+ xor %o1, %gdop_lox10(__memset_niagara4), %o1
+# else
+ set __memset_niagara4, %o1
+# endif
+ ba 10f
+ nop
+1: be 9f
nop
# ifdef SHARED
sethi %gdop_hix22(__memset_niagara1), %o1
@@ -57,8 +68,19 @@ ENTRY(__bzero)
# ifdef SHARED
SETUP_PIC_REG_LEAF(o3, o5)
# endif
- andcc %o0, HWCAP_SPARC_BLKINIT, %g0
- be 9f
+ set HWCAP_SPARC_CRYPTO, %o1
+ andcc %o0, %o1, %g0
+ be 1f
+ andcc %o0, HWCAP_SPARC_BLKINIT, %g0
+# ifdef SHARED
+ sethi %gdop_hix22(__bzero_niagara4), %o1
+ xor %o1, %gdop_lox10(__bzero_niagara4), %o1
+# else
+ set __bzero_niagara4, %o1
+# endif
+ ba 10f
+ nop
+1: be 9f
nop
# ifdef SHARED
sethi %gdop_hix22(__bzero_niagara1), %o1