This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH 2/2] S390: Extend structs La_s390_regs / La_s390_retval with vector-registers.
- From: Stefan Liebler <stli at linux dot vnet dot ibm dot com>
- To: libc-alpha at sourceware dot org
- Cc: Stefan Liebler <stli at linux dot vnet dot ibm dot com>
- Date: Tue, 22 Mar 2016 12:25:25 +0100
- Subject: [PATCH 2/2] S390: Extend structs La_s390_regs / La_s390_retval with vector-registers.
- Authentication-results: sourceware.org; auth=none
- References: <1458645925-28632-1-git-send-email-stli at linux dot vnet dot ibm dot com>
Starting with z13, vector registers can also occur as argument registers.
Thus the passed input/output register structs for
la_s390_[32|64]_gnu_plt[enter|exit] functions should reflect those new
registers. This patch extends these structs La_s390_regs and La_s390_retval
and adjusts _dl_runtime_profile() to handle those fields in case of
running on a z13 machine.
ChangeLog:
* sysdeps/s390/bits/link.h: (La_s390_vr) New typedef.
(La_s390_32_regs): Append vector register lr_v24-lr_v31.
(La_s390_64_regs): Likewise.
(La_s390_32_retval): Append vector register lrv_v24.
(La_s390_64_retval): Likeweise.
* sysdeps/s390/s390-32/dl-trampoline.h (_dl_runtime_profile):
Handle extended structs La_s390_32_regs and La_s390_32_retval.
* sysdeps/s390/s390-64/dl-trampoline.h (_dl_runtime_profile):
Handle extended structs La_s390_64_regs and La_s390_64_retval.
---
sysdeps/s390/bits/link.h | 29 +++++++++++++
sysdeps/s390/s390-32/dl-trampoline.h | 76 +++++++++++++++++++-------------
sysdeps/s390/s390-64/dl-trampoline.h | 84 +++++++++++++++++++++---------------
3 files changed, 124 insertions(+), 65 deletions(-)
diff --git a/sysdeps/s390/bits/link.h b/sysdeps/s390/bits/link.h
index 2ef7f44..e27ed67 100644
--- a/sysdeps/s390/bits/link.h
+++ b/sysdeps/s390/bits/link.h
@@ -19,6 +19,9 @@
# error "Never include <bits/link.h> directly; use <link.h> instead."
#endif
+#if defined HAVE_S390_VX_ASM_SUPPORT
+typedef char La_s390_vr[16];
+#endif
#if __ELF_NATIVE_CLASS == 32
@@ -32,6 +35,16 @@ typedef struct La_s390_32_regs
uint32_t lr_r6;
double lr_fp0;
double lr_fp2;
+# if defined HAVE_S390_VX_ASM_SUPPORT
+ La_s390_vr lr_v24;
+ La_s390_vr lr_v25;
+ La_s390_vr lr_v26;
+ La_s390_vr lr_v27;
+ La_s390_vr lr_v28;
+ La_s390_vr lr_v29;
+ La_s390_vr lr_v30;
+ La_s390_vr lr_v31;
+# endif
} La_s390_32_regs;
/* Return values for calls from PLT on s390-32. */
@@ -40,6 +53,9 @@ typedef struct La_s390_32_retval
uint32_t lrv_r2;
uint32_t lrv_r3;
double lrv_fp0;
+# if defined HAVE_S390_VX_ASM_SUPPORT
+ La_s390_vr lrv_v24;
+# endif
} La_s390_32_retval;
@@ -77,6 +93,16 @@ typedef struct La_s390_64_regs
double lr_fp2;
double lr_fp4;
double lr_fp6;
+# if defined HAVE_S390_VX_ASM_SUPPORT
+ La_s390_vr lr_v24;
+ La_s390_vr lr_v25;
+ La_s390_vr lr_v26;
+ La_s390_vr lr_v27;
+ La_s390_vr lr_v28;
+ La_s390_vr lr_v29;
+ La_s390_vr lr_v30;
+ La_s390_vr lr_v31;
+# endif
} La_s390_64_regs;
/* Return values for calls from PLT on s390-64. */
@@ -84,6 +110,9 @@ typedef struct La_s390_64_retval
{
uint64_t lrv_r2;
double lrv_fp0;
+# if defined HAVE_S390_VX_ASM_SUPPORT
+ La_s390_vr lrv_v24;
+# endif
} La_s390_64_retval;
diff --git a/sysdeps/s390/s390-32/dl-trampoline.h b/sysdeps/s390/s390-32/dl-trampoline.h
index a152a7b..bb74d27 100644
--- a/sysdeps/s390/s390-32/dl-trampoline.h
+++ b/sysdeps/s390/s390-32/dl-trampoline.h
@@ -112,28 +112,31 @@ _dl_runtime_resolve:
cfi_startproc
.align 16
_dl_runtime_profile:
- stm %r2,%r6,32(%r15) # save registers
- cfi_offset (r2, -64) # + r6 needed as arg for
- cfi_offset (r3, -60) # _dl_profile_fixup
- cfi_offset (r4, -56)
- cfi_offset (r5, -52)
- cfi_offset (r6, -48)
- std %f0,56(%r15)
- cfi_offset (f0, -40)
- std %f2,64(%r15)
- cfi_offset (f2, -32)
st %r12,12(%r15) # r12 is used as backup of r15
cfi_offset (r12, -84)
st %r14,16(%r15)
cfi_offset (r14, -80)
lr %r12,%r15 # backup stack pointer
cfi_def_cfa_register (12)
+ ahi %r15,-264 # create stack frame:
+ # 96 + sizeof(La_s390_32_regs)
+ st %r12,0(%r15) # save backchain
+
+ stm %r2,%r6,96(%r15) # save registers
+ cfi_offset (r2, -264) # + r6 needed as arg for
+ cfi_offset (r3, -260) # _dl_profile_fixup
+ cfi_offset (r4, -256)
+ cfi_offset (r5, -252)
+ cfi_offset (r6, -248)
+ std %f0,120(%r15)
+ cfi_offset (f0, -240)
+ std %f2,128(%r15)
+ cfi_offset (f2, -232)
#ifdef RESTORE_VRS
- ahi %r15,-224 # create stack frame
.machine push
.machine "z13"
.machinemode "zarch_nohighgprs"
- vstm %v24,%v31,96(%r15) # store call-clobbered vr arguments
+ vstm %v24,%v31,136(%r15) # store call-clobbered vr arguments
cfi_offset (v24, -224)
cfi_offset (v25, -208)
cfi_offset (v26, -192)
@@ -143,31 +146,31 @@ _dl_runtime_profile:
cfi_offset (v30, -128)
cfi_offset (v31, -112)
.machine pop
-#else
- ahi %r15,-96 # create stack frame
#endif
- st %r12,0(%r15) # save backchain
+
lm %r2,%r3,24(%r12) # load arguments saved by PLT
lr %r4,%r14 # return address as third parameter
basr %r1,0
0: l %r14,6f-0b(%r1)
- la %r5,32(%r12) # pointer to struct La_s390_32_regs
+ la %r5,96(%r15) # pointer to struct La_s390_32_regs
la %r6,20(%r12) # long int * framesize
bas %r14,0(%r14,%r1) # call resolver
lr %r1,%r2 # function addr returned in r2
- ld %f0,56(%r12) # restore call-clobbered arg fprs
- ld %f2,64(%r12)
+ ld %f0,120(%r15) # restore call-clobbered arg fprs
+ ld %f2,128(%r15)
#ifdef RESTORE_VRS
.machine push
.machine "z13"
.machinemode "zarch_nohighgprs"
- vlm %v24,%v31,96(%r15) # restore call-clobbered arg vrs
+ vlm %v24,%v31,136(%r15) # restore call-clobbered arg vrs
.machine pop
#endif
icm %r0,15,20(%r12) # load & test framesize
jnm 2f
- lm %r2,%r6,32(%r12)
+ lm %r2,%r6,96(%r15) # framesize < 0 means no pltexit call
+ # so we can do a tail call without
+ # copying the arg overflow area
lr %r15,%r12 # remove stack frame
cfi_def_cfa_register (15)
l %r14,16(%r15) # restore registers
@@ -175,7 +178,9 @@ _dl_runtime_profile:
br %r1 # tail-call to the resolved function
cfi_def_cfa_register (12)
-2: jz 4f # framesize == 0 ?
+2: la %r4,96(%r15) # pointer to struct La_s390_32_regs
+ st %r4,32(%r12)
+ jz 4f # framesize == 0 ?
ahi %r0,7 # align framesize to 8
lhi %r2,-8
nr %r0,%r2
@@ -188,24 +193,35 @@ _dl_runtime_profile:
la %r2,8(%r2)
la %r3,8(%r3)
brct %r0,3b
-4: lm %r2,%r6,32(%r12) # load register parameters
+4: lm %r2,%r6,0(%r4) # load register parameters
basr %r14,%r1 # call resolved function
- stm %r2,%r3,72(%r12) # store return values r2, r3, f0
- std %f0,80(%r12) # to struct La_s390_32_retval
- lm %r2,%r3,24(%r12) # load arguments saved by PLT
+ stm %r2,%r3,40(%r12) # store return values r2, r3, f0
+ std %f0,48(%r12) # to struct La_s390_32_retval
+#ifdef RESTORE_VRS
+ .machine push
+ .machine "z13"
+ vst %v24,56(%r12) # store return value v24
+ .machine pop
+#endif
+ lm %r2,%r4,24(%r12) # r2, r3: load arguments saved by PLT
+ # r4: pointer to struct La_s390_32_regs
basr %r1,0
5: l %r14,7f-5b(%r1)
- la %r4,32(%r12) # pointer to struct La_s390_32_regs
- la %r5,72(%r12) # pointer to struct La_s390_32_retval
+ la %r5,40(%r12) # pointer to struct La_s390_32_retval
bas %r14,0(%r14,%r1) # call _dl_call_pltexit
lr %r15,%r12 # remove stack frame
cfi_def_cfa_register (15)
l %r14,16(%r15) # restore registers
l %r12,12(%r15)
- l %r2,72(%r15) # restore return values
- l %r3,76(%r15)
- ld %f0,80(%r15)
+ lm %r2,%r3,40(%r15) # restore return values
+ ld %f0,48(%r15)
+#ifdef RESTORE_VRS
+ .machine push
+ .machine "z13"
+ vl %v24,56(%r15) # restore return value v24
+ .machine pop
+#endif
br %r14
6: .long _dl_profile_fixup - 0b
diff --git a/sysdeps/s390/s390-64/dl-trampoline.h b/sysdeps/s390/s390-64/dl-trampoline.h
index 658e3a3..33ea3de 100644
--- a/sysdeps/s390/s390-64/dl-trampoline.h
+++ b/sysdeps/s390/s390-64/dl-trampoline.h
@@ -109,31 +109,34 @@ _dl_runtime_resolve:
cfi_startproc
.align 16
_dl_runtime_profile:
- stmg %r2,%r6,64(%r15) # save call-clobbered arg regs
- cfi_offset (r2, -96) # + r6 needed as arg for
- cfi_offset (r3, -88) # _dl_profile_fixup
- cfi_offset (r4, -80)
- cfi_offset (r5, -72)
- cfi_offset (r6, -64)
- std %f0,104(%r15)
- cfi_offset (f0, -56)
- std %f2,112(%r15)
- cfi_offset (f2, -48)
- std %f4,120(%r15)
- cfi_offset (f4, -40)
- std %f6,128(%r15)
- cfi_offset (f6, -32)
stg %r12,24(%r15) # r12 is used as backup of r15
cfi_offset (r12, -136)
stg %r14,32(%r15)
cfi_offset (r14, -128)
lgr %r12,%r15 # backup stack pointer
cfi_def_cfa_register (12)
+ aghi %r15,-360 # create stack frame:
+ # 160 + sizeof(La_s390_64_regs)
+ stg %r12,0(%r15) # save backchain
+
+ stmg %r2,%r6,160(%r15) # save call-clobbered arg regs
+ cfi_offset (r2, -360) # + r6 needed as arg for
+ cfi_offset (r3, -352) # _dl_profile_fixup
+ cfi_offset (r4, -344)
+ cfi_offset (r5, -336)
+ cfi_offset (r6, -328)
+ std %f0,200(%r15)
+ cfi_offset (f0, -320)
+ std %f2,208(%r15)
+ cfi_offset (f2, -312)
+ std %f4,216(%r15)
+ cfi_offset (f4, -304)
+ std %f6,224(%r15)
+ cfi_offset (f6, -296)
#ifdef RESTORE_VRS
- aghi %r15,-288 # create stack frame
.machine push
.machine "z13"
- vstm %v24,%v31,160(%r15)# store call-clobbered vector argument registers
+ vstm %v24,%v31,232(%r15) # store call-clobbered vector arguments
cfi_offset (v24, -288)
cfi_offset (v25, -272)
cfi_offset (v26, -256)
@@ -143,31 +146,28 @@ _dl_runtime_profile:
cfi_offset (v30, -192)
cfi_offset (v31, -176)
.machine pop
-#else
- aghi %r15,-160 # create stack frame
#endif
- stg %r12,0(%r15) # save backchain
lmg %r2,%r3,48(%r12) # load arguments saved by PLT
lgr %r4,%r14 # return address as third parameter
- la %r5,64(%r12) # pointer to struct La_s390_64_regs
+ la %r5,160(%r15) # pointer to struct La_s390_64_regs
la %r6,40(%r12) # long int * framesize
brasl %r14,_dl_profile_fixup # call resolver
lgr %r1,%r2 # function addr returned in r2
- ld %f0,104(%r12) # restore call-clobbered arg fprs
- ld %f2,112(%r12)
- ld %f4,120(%r12)
- ld %f6,128(%r12)
+ ld %f0,200(%r15) # restore call-clobbered arg fprs
+ ld %f2,208(%r15)
+ ld %f4,216(%r15)
+ ld %f6,224(%r15)
#ifdef RESTORE_VRS
.machine push
.machine "z13"
- vlm %v24,%v31,160(%r15) # restore call-clobbered arg vrs
+ vlm %v24,%v31,232(%r15) # restore call-clobbered arg vrs
.machine pop
#endif
lg %r0,40(%r12) # load framesize
ltgr %r0,%r0
jnm 1f
- lmg %r2,%r6,64(%r12) # framesize < 0 means no pltexit call
+ lmg %r2,%r6,160(%r15) # framesize < 0 means no pltexit call
# so we can do a tail call without
# copying the arg overflow area
lgr %r15,%r12 # remove stack frame
@@ -177,7 +177,9 @@ _dl_runtime_profile:
br %r1 # tail-call to resolved function
cfi_def_cfa_register (12)
-1: jz 4f # framesize == 0 ?
+1: la %r4,160(%r15) # pointer to struct La_s390_64_regs
+ stg %r4,64(%r12)
+ jz 4f # framesize == 0 ?
aghi %r0,7 # align framesize to 8
nill %r0,0xfff8
slgr %r15,%r0 # make room for framesize bytes
@@ -189,21 +191,33 @@ _dl_runtime_profile:
la %r2,8(%r2) # depending on framesize
la %r3,8(%r3)
brctg %r0,3b
-4: lmg %r2,%r6,64(%r12) # restore call-clobbered arg gprs
+4: lmg %r2,%r6,0(%r4) # restore call-clobbered arg gprs
basr %r14,%r1 # call resolved function
- stg %r2,136(%r12) # store return values r2, f0
- std %f0,144(%r12) # to struct La_s390_64_retval
- lmg %r2,%r3,48(%r12) # load arguments saved by PLT
- la %r4,64(%r12) # pointer to struct La_s390_64_regs
- la %r5,136(%r12) # pointer to struct La_s390_64_retval
+ stg %r2,72(%r12) # store return values r2, f0
+ std %f0,80(%r12) # to struct La_s390_64_retval
+#ifdef RESTORE_VRS
+ .machine push
+ .machine "z13"
+ vst %v24,88(%r12) # store return value v24
+ .machine pop
+#endif
+ lmg %r2,%r4,48(%r12) # r2, r3: load arguments saved by PLT
+ # r4: pointer to struct La_s390_64_regs
+ la %r5,72(%r12) # pointer to struct La_s390_64_retval
brasl %r14,_dl_call_pltexit
lgr %r15,%r12 # remove stack frame
cfi_def_cfa_register (15)
lg %r14,32(%r15) # restore registers
lg %r12,24(%r15)
- lg %r2,136(%r15) # restore return values
- ld %f0,144(%r15)
+ lg %r2,72(%r15) # restore return values
+ ld %f0,80(%r15)
+#ifdef RESTORE_VRS
+ .machine push
+ .machine "z13"
+ vl %v24,88(%r15) # restore return value v24
+ .machine pop
+#endif
br %r14 # Jump back to caller
cfi_endproc
--
2.3.0