[PATCH] AArch64: Optimize memset
Florian Weimer
fweimer@redhat.com
Thu Nov 21 18:51:31 GMT 2024
* Wilco Dijkstra:
> Improve small memsets by avoiding branches and use overlapping stores.
> Use DC ZVA for copies over 128 bytes. Remove unnecessary code for ZVA
> sizes other than 64 and 128. Performance of random memset benchmark
> improves by 24% on Neoverse N1.
>
> Passes regress, OK for commit?
We have received a report that this breaks certain virtualized AArch64
environments:
Bug 2327564 - glibc-2.40.9000-4.fc42 update causes kernel panic during
aarch64 Fedora CoreOS boot on OpenStack
<https://bugzilla.redhat.com/show_bug.cgi?id=2327564>
Output from ld.so --list-diagnostics is below.
The crash is in PID 1, so there is not much data to go by. I can
probably get a login on such a system running glibc 2.40 and try run the
glibc testsuite from the current development branch there, but I'm not
sure if I could make sense of the results.
One issue I see is that the dczid_el0 masking in
sysdeps/aarch64/memset.S is inconsistent with
sysdeps/unix/sysv/linux/aarch64/cpu-features.c: The former use 31, while
the latter uses 0xf. But that does not make a difference here because
dczid_el0 is 5.
Thanks,
Florian
dl_dst_lib="lib64"
dl_hwcap=0x8ff
dl_hwcap2=0x0
dl_hwcap3=0x0
dl_hwcap4=0x0
dl_hwcaps_subdirs=""
dl_hwcaps_subdirs_active=0x0
dl_pagesize=0x1000
dl_platform="aarch64"
dl_profile_output="/var/tmp"
dso.ld="ld-linux-aarch64.so.1"
dso.libc="libc.so.6"
env_filtered[0x0]="SHELL"
env_filtered[0x1]="HISTCONTROL"
env_filtered[0x2]="HISTSIZE"
env_filtered[0x3]="HOSTNAME"
env_filtered[0x4]="GPG_TTY"
env_filtered[0x5]="EDITOR"
env_filtered[0x6]="PWD"
env_filtered[0x7]="LOGNAME"
env_filtered[0x8]="XDG_SESSION_TYPE"
env_filtered[0x9]="MOTD_SHOWN"
env_filtered[0xa]="HOME"
env[0xb]="LANG=C.UTF-8"
env_filtered[0xc]="LS_COLORS"
env_filtered[0xd]="SSH_CONNECTION"
env_filtered[0xe]="XDG_SESSION_CLASS"
env_filtered[0xf]="SELINUX_ROLE_REQUESTED"
env_filtered[0x10]="TERM"
env_filtered[0x11]="LESSOPEN"
env_filtered[0x12]="USER"
env_filtered[0x13]="SELINUX_USE_CURRENT_RANGE"
env_filtered[0x14]="SHLVL"
env_filtered[0x15]="XDG_SESSION_ID"
env_filtered[0x16]="XDG_RUNTIME_DIR"
env_filtered[0x17]="SSH_CLIENT"
env[0x18]="PATH=/var/home/core/.local/bin:/var/home/core/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin"
env_filtered[0x19]="SELINUX_LEVEL_REQUESTED"
env_filtered[0x1a]="DBUS_SESSION_BUS_ADDRESS"
env_filtered[0x1b]="MAIL"
env_filtered[0x1c]="SSH_TTY"
env_filtered[0x1d]="_"
path.prefix="/usr"
path.rtld="/lib/ld-linux-aarch64.so.1"
path.sysconfdir="/etc"
path.system_dirs[0x0]="/lib64/"
path.system_dirs[0x1]="/usr/lib64/"
version.release="development"
version.version="2.40.9000"
auxv[0x0].a_type=0x21
auxv[0x0].a_val=0xffffa482b000
auxv[0x1].a_type=0x33
auxv[0x1].a_val=0x1270
auxv[0x2].a_type=0x10
auxv[0x2].a_val=0x8ff
auxv[0x3].a_type=0x6
auxv[0x3].a_val=0x1000
auxv[0x4].a_type=0x11
auxv[0x4].a_val=0x64
auxv[0x5].a_type=0x3
auxv[0x5].a_val=0xffffa47e0040
auxv[0x6].a_type=0x4
auxv[0x6].a_val=0x38
auxv[0x7].a_type=0x5
auxv[0x7].a_val=0x9
auxv[0x8].a_type=0x7
auxv[0x8].a_val=0x0
auxv[0x9].a_type=0x8
auxv[0x9].a_val=0x0
auxv[0xa].a_type=0x9
auxv[0xa].a_val=0xffffa47faa80
auxv[0xb].a_type=0xb
auxv[0xb].a_val=0x3e8
auxv[0xc].a_type=0xc
auxv[0xc].a_val=0x3e8
auxv[0xd].a_type=0xd
auxv[0xd].a_val=0x3e8
auxv[0xe].a_type=0xe
auxv[0xe].a_val=0x3e8
auxv[0xf].a_type=0x17
auxv[0xf].a_val=0x0
auxv[0x10].a_type=0x19
auxv[0x10].a_val=0xffffee238298
auxv[0x11].a_type=0x1a
auxv[0x11].a_val=0x0
auxv[0x12].a_type=0x1d
auxv[0x12].a_val=0x0
auxv[0x13].a_type=0x1f
auxv[0x13].a_val_string="/usr/bin/ld.so"
auxv[0x14].a_type=0xf
auxv[0x14].a_val_string="aarch64"
auxv[0x15].a_type=0x1b
auxv[0x15].a_val=0x1c
auxv[0x16].a_type=0x1c
auxv[0x16].a_val=0x20
uname.sysname="Linux"
uname.nodename="host-192-168-40-30"
uname.release="6.13.0-0.rc0.20241119git158f238aa69d.2.fc42.aarch64"
uname.version="#1 SMP PREEMPT_DYNAMIC Tue Nov 19 16:59:12 UTC 2024"
uname.machine="aarch64"
uname.domainname="(none)"
aarch64.cpu_features.bti=0x0
aarch64.cpu_features.midr_el1=0x431f0a11
aarch64.cpu_features.mops=0x0
aarch64.cpu_features.mte_state=0x0
aarch64.cpu_features.prefer_sve_ifuncs=0x0
aarch64.cpu_features.sve=0x0
aarch64.cpu_features.zva_size=0x80
aarch64.processor[0x0].requested=0x0
aarch64.processor[0x0].observed=0x0
aarch64.processor[0x0].observed_node=0x0
aarch64.processor[0x0].midr_el1=0x431f0a11
aarch64.processor[0x0].dczid_el0=0x5
aarch64.processor[0x1].requested=0x1
aarch64.processor[0x1].observed=0x1
aarch64.processor[0x1].observed_node=0x0
aarch64.processor[0x1].midr_el1=0x431f0a11
aarch64.processor[0x1].dczid_el0=0x5
aarch64.processor[0x2].requested=0x2
aarch64.processor[0x2].observed=0x2
aarch64.processor[0x2].observed_node=0x0
aarch64.processor[0x2].midr_el1=0x431f0a11
aarch64.processor[0x2].dczid_el0=0x5
aarch64.processor[0x3].requested=0x3
aarch64.processor[0x3].observed=0x3
aarch64.processor[0x3].observed_node=0x0
aarch64.processor[0x3].midr_el1=0x431f0a11
aarch64.processor[0x3].dczid_el0=0x5
More information about the Libc-alpha
mailing list