This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] aarch64: Optimized implementation of pthread_spin_lock and unlock
- From: Andrew Pinski <pinskia at gmail dot com>
- To: Xuelei Zhang <zhangxuelei4 at huawei dot com>
- Cc: GNU C Library <libc-alpha at sourceware dot org>, Wilco Dijkstra <Wilco dot Dijkstra at arm dot com>, Siddhesh Poyarekar <siddhesh at gotplt dot org>, Szabolcs Nagy <Szabolcs dot Nagy at arm dot com>, jiangyikun at huawei dot com, yikunkero at gmail dot com
- Date: Mon, 28 Oct 2019 10:53:35 -0700
- Subject: Re: [PATCH] aarch64: Optimized implementation of pthread_spin_lock and unlock
- References: <20191028133911.8612-1-zhangxuelei4@huawei.com>
On Mon, Oct 28, 2019 at 6:39 AM Xuelei Zhang <zhangxuelei4@huawei.com> wrote:
>
> An optimized algorithm of spin_lock is implemented to wait for
> random time in the case that multi-threads want to store spinlock
> at the same time. This way can make the operation of different
> threads asynchronous, thereby reducing bus conflicts, and futherly
> improving the overall performance, which benefits more on aarch64
> for its multi-core features.
>
> In addition, the assembly version of spin_unlock is also implemented.
> ---
> sysdeps/aarch64/nptl/pthread_spin_lock.S | 103 +++++++++++++++++++++++++++++
> sysdeps/aarch64/nptl/pthread_spin_unlock.S | 34 ++++++++++
> 2 files changed, 137 insertions(+)
> create mode 100644 sysdeps/aarch64/nptl/pthread_spin_lock.S
> create mode 100644 sysdeps/aarch64/nptl/pthread_spin_unlock.S
>
> diff --git a/sysdeps/aarch64/nptl/pthread_spin_lock.S b/sysdeps/aarch64/nptl/pthread_spin_lock.S
> new file mode 100644
> index 00000000000..662410c5fa3
> --- /dev/null
> +++ b/sysdeps/aarch64/nptl/pthread_spin_lock.S
> @@ -0,0 +1,103 @@
> +/* pthread_spin_lock -- lock a spin lock. Generic version.
> + Copyright (C) 2012-2019 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library. If not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdep.h>
> +
> +/* Assumptions:
> + *
> + * ARMv8-a, AArch64
> + */
> +
> +ENTRY (pthread_spin_lock)
> + DELOUSE (0)
> +
> + mov w1, #0x1
> +
> +L(spin):
> + prfm pldl1strm, [x0]
> + /* A count to distinguish wating time.
> + The larger the value, the more intense the current lock
> + conflicts, and the longer the waiting time. */
> + add w3, w3, #0x1
> + ldaxr w2, [x0]
> + cbnz w2, L(spin)
> + stxr w2, w1, [x0]
> + cbnz w2, 1f
> + b L(end)
> +
> +L(end):
> + mov w0, #0x0
> + ret
> +
> + /* Set the loop times of L(wait) from 1000 to 7000 cycles,
> + equals waiting 1~2us per 1000 cycles. */
> + .p2align 4
> +7:
> + mov w6, #0x1b58
> + b L(wait_init)
> +6:
> + mov w6, #0x1770
> + b L(wait_init)
> +5:
> + mov w6, #0x1388
> + b L(wait_init)
> +4:
> + mov w6, #0x0fa0
> + b L(wait_init)
> +3:
> + mov w6, #0x0bb8
> + b L(wait_init)
> +2:
> + mov w6, #0x07d0
> + b L(wait_init)
> +1:
> + mov w6, #0x03e8
> + b L(wait_init)
> +
> +L(wait_init):
> + mov w5, #0x0
> +L(wait):
> + add w5, w5, #0x1
> + cmp w5, w6
> + b.lt L(wait) /* Wait ends when w5 equals w6. */
> +
> +L(stxr_try):
> + ldr w2, [x0]
> + cbz w2, L(spin)
> + and w3, w3, #0x07
> + /* 8 kinds of distinguish wating time
> + based on the lower three bits of w3. */
> + cmp w3, #0x01
> + beq 1b
> + cmp w3, #0x02
> + beq 2b
> + cmp w3, #0x03
> + beq 3b
> + cmp w3, #0x04
> + beq 4b
> + cmp w3, #0x05
> + beq 5b
> + cmp w3, #0x06
> + beq 6b
> + cmp w3, #0x07
> + beq 7b
> +
> + wfe /* w3=0x000: the most intense situation so to wait 50 us */
> + b L(stxr_try)
> +
> +
> +END (pthread_spin_lock)
> +libc_hidden_builtin_def (pthread_spin_lock)
> +weak_alias (pthread_spin_lock, index)
> diff --git a/sysdeps/aarch64/nptl/pthread_spin_unlock.S b/sysdeps/aarch64/nptl/pthread_spin_unlock.S
> new file mode 100644
> index 00000000000..57a6bca86fe
> --- /dev/null
> +++ b/sysdeps/aarch64/nptl/pthread_spin_unlock.S
> @@ -0,0 +1,34 @@
> +/* pthread_spin_unlock -- unlock a spin lock. Generic version.
> + Copyright (C) 2012-2019 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library. If not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdep.h>
> +
> +/* Assumptions:
> + *
> + * ARMv8-a, AArch64
> + */
> +
> +ENTRY (pthread_spin_unlock)
> + DELOUSE (0)
> + stlr wzr, [x0]
> + mov w0, #0x0
Actually the C version of pthread_spin_unlock is implemented this way
already and provides exactly this same assembly.
Why do you need an assembly version?
Thanks,
Andrew Pinski
PS for reference:
./nptl/pthread_spin_unlock.o: file format elf64-littleaarch64
Disassembly of section .text:
0000000000000000 <pthread_spin_unlock>:
0: 889ffc1f stlr wzr, [x0]
4: 52800000 mov w0, #0x0 // #0
8: d65f03c0 ret
> + ret
> +
> +END (pthread_spin_unlock)
> +libc_hidden_builtin_def (pthread_spin_unlock)
> +weak_alias (pthread_spin_unlock, index)
> --
> 2.14.1.windows.1
>
>