This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH][AArch64] Single thread lowlevellock optimization
- From: Szabolcs Nagy <szabolcs dot nagy at arm dot com>
- To: GNU C Library <libc-alpha at sourceware dot org>
- Cc: nd at arm dot com, "triegel at redhat dot com" <triegel at redhat dot com>
- Date: Fri, 16 Jun 2017 17:26:01 +0100
- Subject: [PATCH][AArch64] Single thread lowlevellock optimization
- Authentication-results: sourceware.org; auth=none
- Authentication-results: redhat.com; dkim=none (message not signed) header.d=none;redhat.com; dmarc=none action=none header.from=arm.com;
- Nodisclaimer: True
- Spamdiagnosticmetadata: NSPM
- Spamdiagnosticoutput: 1:99
Do single thread lock optimization in aarch64 libc. Atomic operations
hurt the performance of some single-threaded programs using stdio
(usually getc/putc in a loop).
Ideally such optimization should be done at a higher level and in a
target independent way as in
https://sourceware.org/ml/libc-alpha/2017-05/msg00479.html
but that approach will need more discussion so do it in lowlevellocks,
similarly to x86, until there is consensus.
Differences compared to the current x86_64 behaviour:
- The optimization is not silently applied to shared locks, in that
case the build fails.
- Unlock assumes the futex value is 0 or 1, there are no waiters to
wake (that would not work in single thread and libc does not use
such locks, to be sure lll_cond* is undefed).
This speeds up a getchar loop about 2-4x depending on the cpu,
while only cause around 5-10% regression for the multi-threaded case
(other libc internal locks are not expected to be performance
critical or significantly affected by this change).
2017-06-16 Szabolcs Nagy <szabolcs.nagy@arm.com>
* sysdeps/unix/sysv/linux/aarch64/lowlevellock.h: New file.
diff --git a/sysdeps/unix/sysv/linux/aarch64/lowlevellock.h b/sysdeps/unix/sysv/linux/aarch64/lowlevellock.h
new file mode 100644
index 0000000000..7561a454c9
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/aarch64/lowlevellock.h
@@ -0,0 +1,93 @@
+/* AArch64 low-level lock implementation.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _AARCH64_LOWLEVELLOCK_H
+#define _AARCH64_LOWLEVELLOCK_H 1
+
+#include <sysdeps/nptl/lowlevellock.h>
+
+#if IS_IN (libc)
+
+/* See sysdeps/nptl/lowlevellock.h for comments. This implementation
+ avoids some atomic operations in the single-threaded case in libc,
+ then the lll operations are not thread-safe nor async-signal-safe.
+
+ It is assumed that in libc there are only private futex locks used
+ and there are no waiters on a lock that unlock has to wake. */
+
+#undef lll_cond_trylock
+#undef lll_cond_lock
+void __aarch64_lll_link_error (void);
+
+extern int __libc_multiple_threads attribute_hidden;
+
+#undef lll_trylock
+#define lll_trylock(futex) __lll_trylock (&(futex))
+static inline int
+__lll_trylock (int *futex)
+{
+ if (__libc_multiple_threads == 0)
+ {
+ int oldval = *futex;
+ if (__glibc_likely (oldval == 0))
+ *futex = 1;
+ return oldval;
+ }
+ return __glibc_unlikely (atomic_compare_and_exchange_bool_acq (futex, 1, 0));
+}
+
+#undef lll_lock
+#define lll_lock(futex, private) \
+ (__builtin_constant_p (private) && (private) == LLL_PRIVATE \
+ ? __lll_lock_private (&(futex)) : __aarch64_lll_link_error ())
+static inline void
+__lll_lock_private (int *futex)
+{
+ if (__libc_multiple_threads == 0)
+ {
+ if (__glibc_likely (*futex == 0))
+ *futex = 1;
+ else
+ __lll_lock_wait_private (futex);
+ }
+ else if (__glibc_unlikely
+ (atomic_compare_and_exchange_bool_acq (futex, 1, 0)))
+ __lll_lock_wait_private (futex);
+}
+
+#undef lll_unlock
+#define lll_unlock(futex, private) \
+ (__builtin_constant_p (private) && (private) == LLL_PRIVATE \
+ ? __lll_unlock_private (&(futex)) : __aarch64_lll_link_error ())
+/* Note: lll_futex_wake depends on macros defined later. */
+#define __lll_unlock_private(futex) \
+ ((void)({ \
+ if (__libc_multiple_threads == 0) \
+ *(futex) = 0; \
+ else \
+ { \
+ int *__futex = (futex); \
+ int __oldval = atomic_exchange_rel (__futex, 0); \
+ if (__glibc_unlikely (__oldval > 1)) \
+ lll_futex_wake (__futex, 1, LLL_PRIVATE); \
+ } \
+ }))
+
+#endif /* IS_IN (libc) */
+
+#endif