This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] aarch64: Optimized implementation of pthread_spin_lock and unlock


On Mon, Oct 28, 2019 at 6:39 AM Xuelei Zhang <zhangxuelei4@huawei.com> wrote:
>
> An optimized algorithm of spin_lock is implemented to wait for
> random time in the case that multi-threads want to store spinlock
> at the same time. This way can make the operation of different
> threads asynchronous, thereby reducing bus conflicts, and futherly
> improving the overall performance, which benefits more on aarch64
> for its multi-core features.
>
> In addition, the assembly version of spin_unlock is also implemented.
> ---
>  sysdeps/aarch64/nptl/pthread_spin_lock.S   | 103 +++++++++++++++++++++++++++++
>  sysdeps/aarch64/nptl/pthread_spin_unlock.S |  34 ++++++++++
>  2 files changed, 137 insertions(+)
>  create mode 100644 sysdeps/aarch64/nptl/pthread_spin_lock.S
>  create mode 100644 sysdeps/aarch64/nptl/pthread_spin_unlock.S
>
> diff --git a/sysdeps/aarch64/nptl/pthread_spin_lock.S b/sysdeps/aarch64/nptl/pthread_spin_lock.S
> new file mode 100644
> index 00000000000..662410c5fa3
> --- /dev/null
> +++ b/sysdeps/aarch64/nptl/pthread_spin_lock.S
> @@ -0,0 +1,103 @@
> +/* pthread_spin_lock -- lock a spin lock. Generic version.
> +   Copyright (C) 2012-2019 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library.  If not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +
> +/* Assumptions:
> + *
> + * ARMv8-a, AArch64
> + */
> +
> +ENTRY (pthread_spin_lock)
> +       DELOUSE (0)
> +
> +       mov     w1, #0x1
> +
> +L(spin):
> +       prfm    pldl1strm, [x0]
> +    /* A count to distinguish wating time.
> +    The larger the value, the more intense the current lock
> +    conflicts, and the longer the waiting time. */
> +       add     w3, w3, #0x1
> +       ldaxr   w2, [x0]
> +       cbnz    w2, L(spin)
> +       stxr    w2, w1, [x0]
> +       cbnz    w2, 1f
> +       b       L(end)
> +
> +L(end):
> +       mov w0, #0x0
> +       ret
> +
> +    /* Set the loop times of L(wait) from 1000 to 7000 cycles,
> +    equals waiting 1~2us per 1000 cycles. */
> +       .p2align 4
> +7:
> +       mov     w6, #0x1b58
> +       b       L(wait_init)
> +6:
> +       mov     w6, #0x1770
> +       b       L(wait_init)
> +5:
> +       mov     w6, #0x1388
> +       b       L(wait_init)
> +4:
> +       mov     w6, #0x0fa0
> +       b       L(wait_init)
> +3:
> +       mov     w6, #0x0bb8
> +       b       L(wait_init)
> +2:
> +       mov     w6, #0x07d0
> +       b       L(wait_init)
> +1:
> +       mov     w6, #0x03e8
> +       b       L(wait_init)
> +
> +L(wait_init):
> +       mov     w5, #0x0
> +L(wait):
> +       add     w5, w5, #0x1
> +       cmp     w5, w6
> +       b.lt    L(wait) /* Wait ends when w5 equals w6. */
> +
> +L(stxr_try):
> +       ldr     w2, [x0]
> +       cbz     w2, L(spin)
> +       and     w3, w3, #0x07
> +    /* 8 kinds of distinguish wating time
> +    based on the lower three bits of w3. */
> +       cmp     w3, #0x01
> +       beq     1b
> +       cmp     w3, #0x02
> +       beq     2b
> +       cmp     w3, #0x03
> +       beq     3b
> +       cmp     w3, #0x04
> +       beq     4b
> +       cmp     w3, #0x05
> +       beq     5b
> +       cmp     w3, #0x06
> +       beq     6b
> +       cmp     w3, #0x07
> +       beq     7b
> +
> +       wfe /* w3=0x000: the most intense situation so to wait 50 us */
> +       b       L(stxr_try)
> +
> +
> +END (pthread_spin_lock)
> +libc_hidden_builtin_def (pthread_spin_lock)
> +weak_alias (pthread_spin_lock, index)
> diff --git a/sysdeps/aarch64/nptl/pthread_spin_unlock.S b/sysdeps/aarch64/nptl/pthread_spin_unlock.S
> new file mode 100644
> index 00000000000..57a6bca86fe
> --- /dev/null
> +++ b/sysdeps/aarch64/nptl/pthread_spin_unlock.S
> @@ -0,0 +1,34 @@
> +/* pthread_spin_unlock -- unlock a spin lock. Generic version.
> +   Copyright (C) 2012-2019 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library.  If not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +
> +/* Assumptions:
> + *
> + * ARMv8-a, AArch64
> + */
> +
> +ENTRY (pthread_spin_unlock)
> +       DELOUSE (0)
> +       stlr    wzr, [x0]
> +       mov     w0, #0x0

Actually the C version of pthread_spin_unlock is implemented this way
already and provides exactly this same assembly.
Why do you need an assembly version?

Thanks,
Andrew Pinski

PS for reference:
./nptl/pthread_spin_unlock.o:     file format elf64-littleaarch64


Disassembly of section .text:

0000000000000000 <pthread_spin_unlock>:
   0:   889ffc1f        stlr    wzr, [x0]
   4:   52800000        mov     w0, #0x0                        // #0
   8:   d65f03c0        ret




> +       ret
> +
> +END (pthread_spin_unlock)
> +libc_hidden_builtin_def (pthread_spin_unlock)
> +weak_alias (pthread_spin_unlock, index)
> --
> 2.14.1.windows.1
>
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]