This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] Optimize i386 syscall inlining for GCC 5
- From: "H.J. Lu" <hongjiu dot lu at intel dot com>
- To: GNU C Library <libc-alpha at sourceware dot org>
- Date: Wed, 12 Aug 2015 15:12:03 -0700
- Subject: Re: [PATCH] Optimize i386 syscall inlining for GCC 5
- Authentication-results: sourceware.org; auth=none
- References: <20150812192001 dot GA12730 at intel dot com>
- Reply-to: "H.J. Lu" <hjl dot tools at gmail dot com>
On Wed, Aug 12, 2015 at 12:20:01PM -0700, H.J. Lu wrote:
> Since GCC 5 and above can properly spill %ebx when needed, we can inline
> syscalls with 6 arguments if GCC 5 or above is used to compile glinc. We
> also skip __libc_do_syscall for GCC 5. Tested with -march=i486 and
> -march=i686.
>
> OK for master?
>
Here is the updated version. I optimized out one register move
instruction. OK for master?
H.J.
--
Since GCC 5 and above can properly spill %ebx when needed, we can inline
syscalls with 6 arguments if GCC 5 or above is used to compile glibc.
This patch rewrites INTERNAL_SYSCALL macros and skips __libc_do_syscall
for GCC 5.
For sysdeps/unix/sysv/linux/i386/brk.c, with -O2 -march=i686
-mtune=generic, GCC 5.2 now generates:
<__brk>:
0: push %ebx
1: mov $0x2d,%eax
6: mov 0x8(%esp),%ebx
a: call b <__brk+0xb> b: R_386_PC32 __x86.get_pc_thunk.cx
f: add $0x2,%ecx 11: R_386_GOTPC _GLOBAL_OFFSET_TABLE_
15: call *%gs:0x10
1c: mov %eax,%edx
1e: mov 0x0(%ecx),%eax 20: R_386_GOT32 __curbrk
24: mov %edx,(%eax)
26: xor %eax,%eax
28: cmp %edx,%ebx
2a: ja 30 <__brk+0x30>
2c: pop %ebx
2d: ret
instead of
<__brk>:
0: push %ebx
1: mov 0x8(%esp),%ecx
5: call 6 <__brk+0x6> 6: R_386_PC32 __x86.get_pc_thunk.bx
a: add $0x2,%ebx c: R_386_GOTPC _GLOBAL_OFFSET_TABLE_
10: xchg %ecx,%ebx
12: mov $0x2d,%eax
17: call *%gs:0x10
1e: xchg %ecx,%ebx
20: mov %eax,%edx
22: mov 0x0(%ebx),%eax 24: R_386_GOT32 __curbrk
28: mov %edx,(%eax)
2a: xor %eax,%eax
2c: cmp %edx,%ecx
2e: ja 38 <__brk+0x38>
30: pop %ebx
31: ret
The new one is shorter by 2 instructions.
* sysdeps/unix/sysv/linux/i386/libc-do-syscall.S
(__libc_do_syscall): Defined only if !__GNUC_PREREQ (5,0).
* sysdeps/unix/sysv/linux/i386/sysdep.h: Define assembler macros
only if !__GNUC_PREREQ (5,0).
(INTERNAL_SYSCALL_MAIN_6): Optimize for GCC 5.
(INTERNAL_SYSCALL_MAIN_INLINE): Likewise.
(INTERNAL_SYSCALL_NCS): Likewise.
(LOADREGS_0): New.
(ASMARGS_0): Likewise.
(LOADREGS_1): Likewise.
(ASMARGS_1): Likewise.
(LOADREGS_2): Likewise.
(ASMARGS_2): Likewise.
(LOADREGS_3): Likewise.
(ASMARGS_3): Likewise.
(LOADREGS_4): Likewise.
(ASMARGS_4): Likewise.
(LOADREGS_5): Likewise.
(ASMARGS_5): Likewise.
(LOADREGS_6): Likewise.
(ASMARGS_6): Likewise.
---
sysdeps/unix/sysv/linux/i386/libc-do-syscall.S | 3 +
sysdeps/unix/sysv/linux/i386/sysdep.h | 115 ++++++++++++++++++++++---
2 files changed, 107 insertions(+), 11 deletions(-)
diff --git a/sysdeps/unix/sysv/linux/i386/libc-do-syscall.S b/sysdeps/unix/sysv/linux/i386/libc-do-syscall.S
index af5c6f0..cdef3d5 100644
--- a/sysdeps/unix/sysv/linux/i386/libc-do-syscall.S
+++ b/sysdeps/unix/sysv/linux/i386/libc-do-syscall.S
@@ -18,6 +18,8 @@
#include <sysdep.h>
+#if !__GNUC_PREREQ (5,0)
+
/* %eax, %ecx, %edx and %esi contain the values expected by the kernel.
%edi points to a structure with the values of %ebx, %edi and %ebp. */
@@ -48,3 +50,4 @@ ENTRY (__libc_do_syscall)
cfi_restore (ebx)
ret
END (__libc_do_syscall)
+#endif
diff --git a/sysdeps/unix/sysv/linux/i386/sysdep.h b/sysdeps/unix/sysv/linux/i386/sysdep.h
index d76aca5..92e7e7a 100644
--- a/sysdeps/unix/sysv/linux/i386/sysdep.h
+++ b/sysdeps/unix/sysv/linux/i386/sysdep.h
@@ -275,6 +275,7 @@
#else /* !__ASSEMBLER__ */
+#if !__GNUC_PREREQ (5,0)
/* We need some help from the assembler to generate optimal code. We
define some macros here which later will be used. */
asm (".L__X'%ebx = 1\n\t"
@@ -314,6 +315,7 @@ struct libc_do_syscall_args
{
int ebx, edi, ebp;
};
+#endif
/* Define a macro which expands inline into the wrapper code for a system
call. */
@@ -354,8 +356,12 @@ struct libc_do_syscall_args
INTERNAL_SYSCALL_MAIN_INLINE(name, err, 5, args)
/* Each object using 6-argument inline syscalls must include a
definition of __libc_do_syscall. */
-#define INTERNAL_SYSCALL_MAIN_6(name, err, arg1, arg2, arg3, \
- arg4, arg5, arg6) \
+#if __GNUC_PREREQ (5,0)
+# define INTERNAL_SYSCALL_MAIN_6(name, err, args...) \
+ INTERNAL_SYSCALL_MAIN_INLINE(name, err, 6, args)
+#else /* GCC 5 */
+# define INTERNAL_SYSCALL_MAIN_6(name, err, arg1, arg2, arg3, \
+ arg4, arg5, arg6) \
struct libc_do_syscall_args _xv = \
{ \
(int) (arg1), \
@@ -368,14 +374,52 @@ struct libc_do_syscall_args
: "=a" (resultvar) \
: "i" (__NR_##name), "c" (arg2), "d" (arg3), "S" (arg4), "D" (&_xv) \
: "memory", "cc")
+#endif /* GCC 5 */
#define INTERNAL_SYSCALL(name, err, nr, args...) \
({ \
register unsigned int resultvar; \
INTERNAL_SYSCALL_MAIN_##nr (name, err, args); \
(int) resultvar; })
#ifdef I386_USE_SYSENTER
-# ifdef SHARED
-# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \
+# if __GNUC_PREREQ (5,0)
+# ifdef SHARED
+# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \
+ LOADREGS_##nr(args) \
+ asm volatile ( \
+ "call *%%gs:%P2" \
+ : "=a" (resultvar) \
+ : "a" (__NR_##name), "i" (offsetof (tcbhead_t, sysinfo)) \
+ ASMARGS_##nr(args) : "memory", "cc")
+# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
+ ({ \
+ register unsigned int resultvar; \
+ LOADREGS_##nr(args) \
+ asm volatile ( \
+ "call *%%gs:%P2" \
+ : "=a" (resultvar) \
+ : "a" (name), "i" (offsetof (tcbhead_t, sysinfo)) \
+ ASMARGS_##nr(args) : "memory", "cc"); \
+ (int) resultvar; })
+# else
+# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \
+ LOADREGS_##nr(args) \
+ asm volatile ( \
+ "call *_dl_sysinfo" \
+ : "=a" (resultvar) \
+ : "a" (__NR_##name) ASMARGS_##nr(args) : "memory", "cc")
+# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
+ ({ \
+ register unsigned int resultvar; \
+ LOADREGS_##nr(args) \
+ asm volatile ( \
+ "call *_dl_sysinfo" \
+ : "=a" (resultvar) \
+ : "a" (name) ASMARGS_##nr(args) : "memory", "cc"); \
+ (int) resultvar; })
+# endif
+# else /* GCC 5 */
+# ifdef SHARED
+# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \
EXTRAVAR_##nr \
asm volatile ( \
LOADARGS_##nr \
@@ -385,7 +429,7 @@ struct libc_do_syscall_args
: "=a" (resultvar) \
: "i" (__NR_##name), "i" (offsetof (tcbhead_t, sysinfo)) \
ASMFMT_##nr(args) : "memory", "cc")
-# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
+# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
({ \
register unsigned int resultvar; \
EXTRAVAR_##nr \
@@ -397,8 +441,8 @@ struct libc_do_syscall_args
: "0" (name), "i" (offsetof (tcbhead_t, sysinfo)) \
ASMFMT_##nr(args) : "memory", "cc"); \
(int) resultvar; })
-# else
-# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \
+# else
+# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \
EXTRAVAR_##nr \
asm volatile ( \
LOADARGS_##nr \
@@ -407,7 +451,7 @@ struct libc_do_syscall_args
RESTOREARGS_##nr \
: "=a" (resultvar) \
: "i" (__NR_##name) ASMFMT_##nr(args) : "memory", "cc")
-# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
+# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
({ \
register unsigned int resultvar; \
EXTRAVAR_##nr \
@@ -418,9 +462,27 @@ struct libc_do_syscall_args
: "=a" (resultvar) \
: "0" (name) ASMFMT_##nr(args) : "memory", "cc"); \
(int) resultvar; })
-# endif
+# endif
+# endif /* GCC 5 */
#else
-# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \
+# if __GNUC_PREREQ (5,0)
+# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \
+ LOADREGS_##nr(args) \
+ asm volatile ( \
+ "int $0x80" \
+ : "=a" (resultvar) \
+ : "a" (__NR_##name) ASMARGS_##nr(args) : "memory", "cc")
+# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
+ ({ \
+ register unsigned int resultvar; \
+ LOADREGS_##nr(args) \
+ asm volatile ( \
+ "int $0x80" \
+ : "=a" (resultvar) \
+ : "a" (name) ASMARGS_##nr(args) : "memory", "cc"); \
+ (int) resultvar; })
+# else /* GCC 5 */
+# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \
EXTRAVAR_##nr \
asm volatile ( \
LOADARGS_##nr \
@@ -429,7 +491,7 @@ struct libc_do_syscall_args
RESTOREARGS_##nr \
: "=a" (resultvar) \
: "i" (__NR_##name) ASMFMT_##nr(args) : "memory", "cc")
-# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
+# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
({ \
register unsigned int resultvar; \
EXTRAVAR_##nr \
@@ -440,6 +502,7 @@ struct libc_do_syscall_args
: "=a" (resultvar) \
: "0" (name) ASMFMT_##nr(args) : "memory", "cc"); \
(int) resultvar; })
+# endif /* GCC 5 */
#endif
#undef INTERNAL_SYSCALL_DECL
@@ -504,6 +567,36 @@ struct libc_do_syscall_args
# define RESTOREARGS_5
#endif
+#if __GNUC_PREREQ (5,0)
+# define LOADREGS_0()
+# define ASMARGS_0()
+# define LOADREGS_1(arg1) \
+ LOADREGS_0 ()
+# define ASMARGS_1(arg1) \
+ ASMARGS_0 (), "b" ((unsigned int) (arg1))
+# define LOADREGS_2(arg1, arg2) \
+ LOADREGS_1 (arg1)
+# define ASMARGS_2(arg1, arg2) \
+ ASMARGS_1 (arg1), "c" ((unsigned int) (arg2))
+# define LOADREGS_3(arg1, arg2, arg3) \
+ LOADREGS_2 (arg1, arg2)
+# define ASMARGS_3(arg1, arg2, arg3) \
+ ASMARGS_2 (arg1, arg2), "d" ((unsigned int) (arg3))
+# define LOADREGS_4(arg1, arg2, arg3, arg4) \
+ LOADREGS_3 (arg1, arg2, arg3)
+# define ASMARGS_4(arg1, arg2, arg3, arg4) \
+ ASMARGS_3 (arg1, arg2, arg3), "S" ((unsigned int) (arg4))
+# define LOADREGS_5(arg1, arg2, arg3, arg4, arg5) \
+ LOADREGS_4 (arg1, arg2, arg3, arg4)
+# define ASMARGS_5(arg1, arg2, arg3, arg4, arg5) \
+ ASMARGS_4 (arg1, arg2, arg3, arg4), "D" ((unsigned int) (arg5))
+# define LOADREGS_6(arg1, arg2, arg3, arg4, arg5, arg6) \
+ register unsigned int _a6 asm ("ebp") = (unsigned int) (arg6); \
+ LOADREGS_5 (arg1, arg2, arg3, arg4, arg5)
+# define ASMARGS_6(arg1, arg2, arg3, arg4, arg5, arg6) \
+ ASMARGS_5 (arg1, arg2, arg3, arg4, arg5), "r" (_a6)
+#endif /* GCC 5 */
+
#define ASMFMT_0()
#ifdef __PIC__
# define ASMFMT_1(arg1) \
--
2.4.3