This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] Save fp registers on x64 function resolution.
- From: OndÅej BÃlka <neleai at seznam dot cz>
- To: libc-alpha at sourceware dot org
- Date: Fri, 26 Jul 2013 11:15:01 +0200
- Subject: [PATCH] Save fp registers on x64 function resolution.
Hi, as having to manually save xmm registers causes many issues recently
(memset issues, bug 15786...) this patch save xmm registers. If you
accept it to 2.19 further cleanups will follow.
We could also add register saving for other architectures.
As performance is concerned not saving registers looks like saving at
wrong place. It causes dl_fixup code not to use sse functions variants
that could have bigger slowdown than what was saved by not saving
registers.
I do not have measurements yet, it would need to add rdtsc to _dl_fixup
as it is and _dl_fixup with rtld-*.S, -mno-sse and other hacks removed.
Comments?
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 5770c64..354b17c 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -42,7 +42,29 @@ _dl_runtime_resolve:
movq %r9, 48(%rsp)
movq 64(%rsp), %rsi # Copy args pushed by PLT in register.
movq 56(%rsp), %rdi # %rdi: link_map, %rsi: reloc_index
- call _dl_fixup # Call resolver.
+
+ subq $128, %rsp
+ cfi_adjust_cfa_offset(128)
+ movdqu %xmm0, (%rsp)
+ movdqu %xmm1, 16(%rsp)
+ movdqu %xmm2, 32(%rsp)
+ movdqu %xmm3, 48(%rsp)
+ movdqu %xmm4, 64(%rsp)
+ movdqu %xmm5, 80(%rsp)
+ movdqu %xmm6, 96(%rsp)
+ movdqu %xmm7, 112(%rsp)
+ call _dl_fixup # Call resolver.
+ movdqu (%rsp), %xmm0
+ movdqu 16(%rsp), %xmm1
+ movdqu 32(%rsp), %xmm2
+ movdqu 48(%rsp), %xmm3
+ movdqu 64(%rsp), %xmm4
+ movdqu 80(%rsp), %xmm5
+ movdqu 96(%rsp), %xmm6
+ movdqu 112(%rsp), %xmm7
+ addq $128, %rsp
+ cfi_adjust_cfa_offset(-128)
+
movq %rax, %r11 # Save return value
movq 48(%rsp), %r9 # Get register content back.
movq 40(%rsp), %r8