This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [RFC 2/3] Move trampolines to common header.
- From: OndÅej BÃlka <neleai at seznam dot cz>
- To: libc-alpha at sourceware dot org
- Date: Sun, 20 Oct 2013 21:39:42 +0200
- Subject: Re: [RFC 2/3] Move trampolines to common header.
- Authentication-results: sourceware.org; auth=none
- References: <20131019082412 dot GA11703 at domone dot podge>
On Sat, Oct 19, 2013 at 10:24:12AM +0200, OndÅej BÃlka wrote:
> Hi, I returned to storing floating point registers. A first step is
> refactor code so it can be factored out in second patch and sse saving
> logic in third patch. A code now is inconsistent in several ways:
>
This is second patch. There may be some modifications depending on
feedback from first patch.
Comments?
* sysdeps/x86_64/dl-trampoline-real.h: New file.
* sysdeps/x86_64/dl-tlsdesc.S: Factor common code out.
* sysdeps/x86_64/dl-trampoline.S: Likewise.
diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
index c439c7e..f945a63 100644
--- a/sysdeps/x86_64/dl-tlsdesc.S
+++ b/sysdeps/x86_64/dl-tlsdesc.S
@@ -115,22 +115,17 @@ _dl_tlsdesc_dynamic:
.Lslow:
/* Besides rdi and rsi, saved above, save rdx, rcx, r8, r9,
Also, align the stack, that's off by 8 bytes. */
- subq $72, %rsp
- cfi_adjust_cfa_offset (72)
- movq %rdx, 8(%rsp)
- movq %rcx, 16(%rsp)
- movq %r8, 24(%rsp)
- movq %r9, 32(%rsp)
-
- movq %r11, %rdi
- call __tls_get_addr@PLT
-
- movq 8(%rsp), %rdx
- movq 16(%rsp), %rcx
- movq 24(%rsp), %r8
- movq 32(%rsp), %r9
- addq $72, %rsp
- cfi_adjust_cfa_offset (-72)
+
+
+#define PREFIX dynamic
+#define MIDDLE \
+ movq %r11, %rdi; \
+ call __tls_get_addr@PLT; \
+ movq %rax, %r11
+
+#include "dl-trampoline-real.h"
+
+ movq %r11, %rax
jmp .Lret
cfi_endproc
.size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
@@ -156,32 +151,18 @@ _dl_tlsdesc_dynamic:
.align 16
/* The PLT entry will have pushed the link_map pointer. */
_dl_tlsdesc_resolve_rela:
- cfi_adjust_cfa_offset (8)
/* Save all call-clobbered registers. */
movq (%rsp), %r11
- subq $72, %rsp
- cfi_adjust_cfa_offset (72)
- movq %rax, (%rsp)
- movq %rdi, 8(%rsp)
- movq %rsi, 16(%rsp)
- movq %r8, 24(%rsp)
- movq %r9, 32(%rsp)
- movq %rdx, 56(%rsp)
- movq %rcx, 64(%rsp)
-
- movq %rax, %rdi /* Pass tlsdesc* in %rdi. */
- movq %r11, %rsi /* Pass link_map* in %rsi. */
- call _dl_tlsdesc_resolve_rela_fixup
-
- movq (%rsp), %rax
- movq 8(%rsp), %rdi
- movq 16(%rsp), %rsi
- movq 24(%rsp), %r8
- movq 32(%rsp), %r9
- movq 56(%rsp), %rdx
- movq 64(%rsp), %rcx
- addq $80, %rsp
- cfi_adjust_cfa_offset (-80)
+ addq $8, %rsp
+
+#define PREFIX rela
+#define MIDDLE \
+ movq %rax, %rdi ; /* Pass tlsdesc* in %rdi. */ \
+ movq %r11, %rsi ; /* Pass link_map* in %rsi. */ \
+ call _dl_tlsdesc_resolve_rela_fixup \
+
+#include "dl-trampoline-real.h"
+
jmp *(%rax)
cfi_endproc
.size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
@@ -203,31 +184,14 @@ _dl_tlsdesc_resolve_rela:
cfi_startproc
.align 16
_dl_tlsdesc_resolve_hold:
- /* Save all call-clobbered registers. */
- subq $72, %rsp
- cfi_adjust_cfa_offset (72)
- movq %rax, (%rsp)
- movq %rdi, 8(%rsp)
- movq %rsi, 16(%rsp)
- /* Pass _dl_tlsdesc_resolve_hold's address in %rsi. */
- movq %r8, 24(%rsp)
- movq %r9, 32(%rsp)
- movq %rdx, 56(%rsp)
- movq %rcx, 64(%rsp)
-
- movq %rax, %rdi /* Pass tlsdesc* in %rdi. */
- leaq . - _dl_tlsdesc_resolve_hold(%rip), %rsi
- call _dl_tlsdesc_resolve_hold_fixup
-
- movq (%rsp), %rax
- movq 8(%rsp), %rdi
- movq 16(%rsp), %rsi
- movq 24(%rsp), %r8
- movq 32(%rsp), %r9
- movq 56(%rsp), %rdx
- movq 64(%rsp), %rcx
- addq $72, %rsp
- cfi_adjust_cfa_offset (-72)
+
+#define PREFIX hold
+#define MIDDLE \
+ movq %rax, %rdi; /* Pass tlsdesc* in %rdi. */ \
+ leaq . - _dl_tlsdesc_resolve_hold(%rip), %rsi; \
+ call _dl_tlsdesc_resolve_hold_fixup;
+#include "dl-trampoline-real.h"
+
jmp *(%rax)
cfi_endproc
.size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 4212145..ae30aca 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -32,31 +32,18 @@
_dl_runtime_resolve:
movq (%rsp), %r10
movq 8(%rsp), %r11
- cfi_adjust_cfa_offset(16) # Incorporate PLT
- subq $56,%rsp
- cfi_adjust_cfa_offset(56)
- movq %rax,(%rsp) # Preserve registers otherwise clobbered.
- movq %rcx, 8(%rsp)
- movq %rdx, 16(%rsp)
- movq %rsi, 24(%rsp)
- movq %rdi, 32(%rsp)
- movq %r8, 40(%rsp)
- movq %r9, 48(%rsp)
-
- movq %r11, %rsi # Copy args pushed by PLT in register.
- movq %r10, %rdi # %rdi: link_map, %rsi: reloc_index
- call _dl_fixup # Call resolver.
- movq %rax, %r11 # Save return value
-
- movq 48(%rsp), %r9 # Get register content back.
- movq 40(%rsp), %r8
- movq 32(%rsp), %rdi
- movq 24(%rsp), %rsi
- movq 16(%rsp), %rdx
- movq 8(%rsp), %rcx
- movq (%rsp), %rax
- addq $72, %rsp # Adjust stack(PLT did 2 pushes)
- cfi_adjust_cfa_offset(-72)
+
+ addq $16, %rsp
+
+ #define PREFIX DL
+ #define MIDDLE \
+ movq %r11, %rsi; /* Copy args pushed by PLT in register. */ \
+ movq %r10, %rdi; /* %rdi: link_map, %rsi: reloc_index */ \
+ call _dl_fixup; /* Call resolver. */ \
+ movq %rax, %r11; /* Save return value. */
+
+#include "dl-trampoline-real.h"
+
jmp *%r11 # Jump to function address.
cfi_endproc
.size _dl_runtime_resolve, .-_dl_runtime_resolve