aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2001-07-16 06:29:42 +0000
committerUlrich Drepper <drepper@redhat.com>2001-07-16 06:29:42 +0000
commit3c204435e91ba20fbfef438e52fdc147e679ca58 (patch)
treef3cdbfd9405e6844b14d51b05974284ed902099d
parenta6d214835f18ce3232d44ed6f0aba679d090693d (diff)
downloadglibc-3c204435e91ba20fbfef438e52fdc147e679ca58.tar
glibc-3c204435e91ba20fbfef438e52fdc147e679ca58.tar.gz
glibc-3c204435e91ba20fbfef438e52fdc147e679ca58.tar.bz2
glibc-3c204435e91ba20fbfef438e52fdc147e679ca58.zip
Use mvcle for big blocks (> 64K) and a mvc loop for small blocks.
-rw-r--r--sysdeps/s390/s390-32/bcopy.S27
-rw-r--r--sysdeps/s390/s390-32/memcpy.S15
-rw-r--r--sysdeps/s390/s390-64/bcopy.S18
-rw-r--r--sysdeps/s390/s390-64/memcpy.S15
4 files changed, 61 insertions, 14 deletions
diff --git a/sysdeps/s390/s390-32/bcopy.S b/sysdeps/s390/s390-32/bcopy.S
index d3ecf1d5e7..ae90dc158e 100644
--- a/sysdeps/s390/s390-32/bcopy.S
+++ b/sysdeps/s390/s390-32/bcopy.S
@@ -34,12 +34,14 @@ ENTRY(__bcopy)
jnl .L0
alr %r1,%r2
clr %r1,%r3
- jh .L5
+ jh .L7
.L0: ahi %r4,-1 # length - 1
lr %r1,%r4
srl %r1,8
- ltr %r1,%r1
+ ltr %r1,%r1 # < 256 bytes to move ?
jz .L2
+ chi %r1,255 # > 1MB to move ?
+ jh .L5
.L1: mvc 0(256,%r3),0(%r2) # move in 256 byte chunks
la %r2,256(%r2)
la %r3,256(%r3)
@@ -49,22 +51,31 @@ ENTRY(__bcopy)
.L3: ex %r4,0(%r1) # execute mvc with length ((%r4)&255)+1
.L4: br %r14
-.L5: # destructive overlay, can not use mvcle
+ # data copies > 1MB are faster with mvcle.
+.L5: ahi %r4,1 # length + 1
+ lr %r5,%r4 # source length
+ lr %r4,%r2 # source address
+ lr %r2,%r3 # set destination
+ lr %r3,%r5 # destination length = source length
+.L6: mvcle %r2,%r4,0 # thats it, MVCLE is your friend
+ jo .L6
+ br %r14
+.L7: # destructive overlay, can not use mvcle
lr %r1,%r2 # bcopy is called with source,dest
lr %r2,%r3 # memmove with dest,source! Oh, well...
lr %r3,%r1
basr %r1,0
-.L6:
+.L8:
#ifdef PIC
- al %r1,.L7-.L6(%r1) # get address of global offset table
+ al %r1,.L9-.L8(%r1) # get address of global offset table
# load address of memmove
l %r1,memmove@GOT12(%r1)
br %r1
-.L7: .long _GLOBAL_OFFSET_TABLE_-.L6
+.L9: .long _GLOBAL_OFFSET_TABLE_-.L8
#else
- al %r1,.L7-.L6(%r1) # load address of memmove
+ al %r1,.L9-.L8(%r1) # load address of memmove
br %r1 # jump to memmove
-.L7: .long memmove-.L6
+.L9: .long memmove-.L8
#endif
END(__bcopy)
diff --git a/sysdeps/s390/s390-32/memcpy.S b/sysdeps/s390/s390-32/memcpy.S
index c7045f28ea..ad82f8a340 100644
--- a/sysdeps/s390/s390-32/memcpy.S
+++ b/sysdeps/s390/s390-32/memcpy.S
@@ -33,8 +33,11 @@ ENTRY(memcpy)
ahi %r4,-1 # length - 1
lr %r1,%r2 # copy destination address
lr %r5,%r4
- sra %r5,8
+ srl %r5,8
+ ltr %r5,%r5 # < 256 bytes to move ?
jz .L1
+ chi %r5,255 # > 1MB to move ?
+ jh .L4
.L0: mvc 0(256,%r1),0(%r3) # move in 256 byte chunks
la %r1,256(%r1)
la %r3,256(%r3)
@@ -43,5 +46,15 @@ ENTRY(memcpy)
mvc 0(1,%r1),0(%r3) # instruction for execute
.L2: ex %r4,0(%r5) # execute mvc with length ((%r4)&255)+1
.L3: br %r14
+ # data copies > 1MB are faster with mvcle.
+.L4: ahi %r4,1 # length + 1
+ lr %r5,%r4 # source length
+ lr %r4,%r2 # source address
+ lr %r2,%r3 # set destination
+ lr %r3,%r5 # destination length = source length
+.L5: mvcle %r2,%r4,0 # thats it, MVCLE is your friend
+ jo .L5
+ lr %r2,%r1 # return destination address
+ br %r14
END(memcpy)
diff --git a/sysdeps/s390/s390-64/bcopy.S b/sysdeps/s390/s390-64/bcopy.S
index bb3acdb708..ff7966723b 100644
--- a/sysdeps/s390/s390-64/bcopy.S
+++ b/sysdeps/s390/s390-64/bcopy.S
@@ -34,11 +34,13 @@ ENTRY(__bcopy)
jnl .L0
algr %r1,%r2
clgr %r1,%r3
- jh .L5
+ jh .L7
.L0: aghi %r4,-1 # length - 1
srlg %r1,%r4,8
- ltgr %r1,%r1
+ ltgr %r1,%r1 # < 256 bytes to move ?
jz .L2
+ cghi %r1,255 # > 1MB to move ?
+ jh .L5
.L1: mvc 0(256,%r3),0(%r2) # move in 256 byte chunks
la %r2,256(%r2)
la %r3,256(%r3)
@@ -47,8 +49,16 @@ ENTRY(__bcopy)
mvc 0(1,%r3),0(%r2) # instruction for execute
.L3: ex %r4,0(%r1) # execute mvc with length ((%r4)&255)+1
.L4: br %r14
-
-.L5: # destructive overlay, can not use mvcle
+ # data copies > 1MB are faster with mvcle.
+.L5: aghi %r4,1 # length + 1
+ lgr %r5,%r4 # source length
+ lgr %r4,%r2 # source address
+ lgr %r2,%r3 # set destination
+ lgr %r3,%r5 # destination length = source length
+.L6: mvcle %r2,%r4,0 # thats it, MVCLE is your friend
+ jo .L6
+ br %r14
+.L7: # destructive overlay, can not use mvcle
lgr %r1,%r2 # bcopy is called with source,dest
lgr %r2,%r3 # memmove with dest,source! Oh, well...
lgr %r3,%r1
diff --git a/sysdeps/s390/s390-64/memcpy.S b/sysdeps/s390/s390-64/memcpy.S
index 8212bd39d6..d13c0ad14e 100644
--- a/sysdeps/s390/s390-64/memcpy.S
+++ b/sysdeps/s390/s390-64/memcpy.S
@@ -32,8 +32,11 @@ ENTRY(memcpy)
jz .L3
aghi %r4,-1 # length - 1
lgr %r1,%r2 # copy destination address
- srag %r5,%r4,8
+ srlg %r5,%r4,8
+ ltgr %r5,%r5 # < 256 bytes to mvoe ?
jz .L1
+ chi %r6,255 # > 1 MB to move ?
+ jh .L4
.L0: mvc 0(256,%r1),0(%r3) # move in 256 byte chunks
la %r1,256(%r1)
la %r3,256(%r3)
@@ -42,5 +45,15 @@ ENTRY(memcpy)
mvc 0(1,%r1),0(%r3) # instruction for execute
.L2: ex %r4,0(%r5) # execute mvc with length ((%r4)&255)+1
.L3: br %r14
+ # data copies > 1MB are faster with mvcle.
+.L4: aghi %r4,1 # length + 1
+ lgr %r5,%r4 # source length
+ lgr %r4,%r2 # source address
+ lgr %r2,%r3 # set destination
+ lgr %r3,%r5 # destination length = source length
+.L5: mvcle %r2,%r4,0 # thats it, MVCLE is your friend
+ jo .L5
+ lgr %r2,%r1 # return destination address
+ br %r14
END(memcpy)