This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] aarch64: Optimized implementation of pthread_spin_lock and unlock


On Mon, Oct 28, 2019 at 6:39 AM Xuelei Zhang <zhangxuelei4@huawei.com> wrote:
>
> An optimized algorithm of spin_lock is implemented to wait for
> random time in the case that multi-threads want to store spinlock
> at the same time. This way can make the operation of different
> threads asynchronous, thereby reducing bus conflicts, and futherly
> improving the overall performance, which benefits more on aarch64
> for its multi-core features.
>
> In addition, the assembly version of spin_unlock is also implemented.
> ---
>  sysdeps/aarch64/nptl/pthread_spin_lock.S   | 103 +++++++++++++++++++++++++++++
>  sysdeps/aarch64/nptl/pthread_spin_unlock.S |  34 ++++++++++
>  2 files changed, 137 insertions(+)
>  create mode 100644 sysdeps/aarch64/nptl/pthread_spin_lock.S
>  create mode 100644 sysdeps/aarch64/nptl/pthread_spin_unlock.S
>
> diff --git a/sysdeps/aarch64/nptl/pthread_spin_lock.S b/sysdeps/aarch64/nptl/pthread_spin_lock.S
> new file mode 100644
> index 00000000000..662410c5fa3
> --- /dev/null
> +++ b/sysdeps/aarch64/nptl/pthread_spin_lock.S
> @@ -0,0 +1,103 @@
> +/* pthread_spin_lock -- lock a spin lock. Generic version.
> +   Copyright (C) 2012-2019 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library.  If not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +
> +/* Assumptions:
> + *
> + * ARMv8-a, AArch64
> + */
> +
> +ENTRY (pthread_spin_lock)
> +       DELOUSE (0)
> +
> +       mov     w1, #0x1
> +
> +L(spin):
> +       prfm    pldl1strm, [x0]
> +    /* A count to distinguish wating time.
> +    The larger the value, the more intense the current lock
> +    conflicts, and the longer the waiting time. */
> +       add     w3, w3, #0x1
> +       ldaxr   w2, [x0]
> +       cbnz    w2, L(spin)
> +       stxr    w2, w1, [x0]
> +       cbnz    w2, 1f
> +       b       L(end)
> +
> +L(end):
> +       mov w0, #0x0
> +       ret
> +
> +    /* Set the loop times of L(wait) from 1000 to 7000 cycles,
> +    equals waiting 1~2us per 1000 cycles. */
> +       .p2align 4
> +7:
> +       mov     w6, #0x1b58
> +       b       L(wait_init)
> +6:
> +       mov     w6, #0x1770
> +       b       L(wait_init)
> +5:
> +       mov     w6, #0x1388
> +       b       L(wait_init)
> +4:
> +       mov     w6, #0x0fa0
> +       b       L(wait_init)
> +3:
> +       mov     w6, #0x0bb8
> +       b       L(wait_init)
> +2:
> +       mov     w6, #0x07d0
> +       b       L(wait_init)
> +1:
> +       mov     w6, #0x03e8
> +       b       L(wait_init)
> +
> +L(wait_init):
> +       mov     w5, #0x0
> +L(wait):
> +       add     w5, w5, #0x1
> +       cmp     w5, w6
> +       b.lt    L(wait) /* Wait ends when w5 equals w6. */

I could see this loop even taking less than one cycle/iteration.  With
a micro-op loop cache.

But really my bet is r3 will 99% be 0 so it won't be that random after all.
If this had the ability to become a ticket lock, then you could use
the difference between the ticket and the next ticket for the loop
time.  But since the ABI is set in stone (that is the unlocked value
needs to be all zeros), it does not have the ability to become a
ticket spin lock.

Maybe one way is to use the lower bits of the thread ID.  I don't know
if that would be fast enough to get though.  Maybe you could do
popcount on an TLS address; though that takes a few cycles due to
using the vector registers (no scalar popcount instruction).

Thanks,
Andrew Pinski

> +
> +L(stxr_try):
> +       ldr     w2, [x0]
> +       cbz     w2, L(spin)
> +       and     w3, w3, #0x07
> +    /* 8 kinds of distinguish wating time
> +    based on the lower three bits of w3. */
> +       cmp     w3, #0x01
> +       beq     1b
> +       cmp     w3, #0x02
> +       beq     2b
> +       cmp     w3, #0x03
> +       beq     3b
> +       cmp     w3, #0x04
> +       beq     4b
> +       cmp     w3, #0x05
> +       beq     5b
> +       cmp     w3, #0x06
> +       beq     6b
> +       cmp     w3, #0x07
> +       beq     7b
> +
> +       wfe /* w3=0x000: the most intense situation so to wait 50 us */
> +       b       L(stxr_try)
> +
> +
> +END (pthread_spin_lock)
> +libc_hidden_builtin_def (pthread_spin_lock)
> +weak_alias (pthread_spin_lock, index)
> diff --git a/sysdeps/aarch64/nptl/pthread_spin_unlock.S b/sysdeps/aarch64/nptl/pthread_spin_unlock.S
> new file mode 100644
> index 00000000000..57a6bca86fe
> --- /dev/null
> +++ b/sysdeps/aarch64/nptl/pthread_spin_unlock.S
> @@ -0,0 +1,34 @@
> +/* pthread_spin_unlock -- unlock a spin lock. Generic version.
> +   Copyright (C) 2012-2019 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library.  If not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +
> +/* Assumptions:
> + *
> + * ARMv8-a, AArch64
> + */
> +
> +ENTRY (pthread_spin_unlock)
> +       DELOUSE (0)
> +       stlr    wzr, [x0]
> +       mov     w0, #0x0
> +       ret
> +
> +END (pthread_spin_unlock)
> +libc_hidden_builtin_def (pthread_spin_unlock)
> +weak_alias (pthread_spin_unlock, index)
> --
> 2.14.1.windows.1
>
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]