]> sourceware.org Git - glibc.git/commitdiff
x86-64: Update _dl_tlsdesc_dynamic to preserve AMX registers
authorH.J. Lu <hjl.tools@gmail.com>
Wed, 28 Feb 2024 20:08:03 +0000 (12:08 -0800)
committerH.J. Lu <hjl.tools@gmail.com>
Thu, 29 Feb 2024 12:30:01 +0000 (04:30 -0800)
_dl_tlsdesc_dynamic should also preserve AMX registers which are
caller-saved.  Add X86_XSTATE_TILECFG_ID and X86_XSTATE_TILEDATA_ID
to x86-64 TLSDESC_CALL_STATE_SAVE_MASK.  Compute the AMX state size
and save it in xsave_state_full_size which is only used by
_dl_tlsdesc_dynamic_xsave and _dl_tlsdesc_dynamic_xsavec.  This fixes
the AMX part of BZ #31372.  Tested on AMX processor.

AMX test is enabled only for compilers with the fix for

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114098

GCC 14 and GCC 11/12/13 branches have the bug fix.
Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
14 files changed:
sysdeps/unix/sysv/linux/x86_64/Makefile
sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c [new file with mode: 0644]
sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c [new file with mode: 0644]
sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c [new file with mode: 0644]
sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c [new file with mode: 0644]
sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h [new file with mode: 0644]
sysdeps/x86/cpu-features-offsets.sym
sysdeps/x86/cpu-features.c
sysdeps/x86/include/cpu-features.h
sysdeps/x86/sysdep.h
sysdeps/x86_64/configure
sysdeps/x86_64/configure.ac
sysdeps/x86_64/dl-tlsdesc-dynamic.h

index 7d1d205fa0b264a7e11bbdf27fb40941cbb94b53..fcbffd81cbaa031d7b75ed29e9b374d342ce9bb2 100644 (file)
@@ -66,6 +66,33 @@ $(objpfx)libx86-64-isa-level%.os: $(..)/sysdeps/unix/sysv/linux/x86_64/x86-64-is
 $(objpfx)libx86-64-isa-level.so: $(objpfx)libx86-64-isa-level-1.so
        cp $< $@
 endif
+
+ifeq (yes,$(have-mamx-tile))
+tests += \
+  tst-gnu2-tls2-amx \
+# tests
+
+modules-names += \
+  tst-gnu2-tls2-amx-mod0 \
+  tst-gnu2-tls2-amx-mod1 \
+  tst-gnu2-tls2-amx-mod2 \
+# modules-names
+
+$(objpfx)tst-gnu2-tls2-amx: $(shared-thread-library)
+$(objpfx)tst-gnu2-tls2-amx.out: \
+  $(objpfx)tst-gnu2-tls2-amx-mod0.so \
+  $(objpfx)tst-gnu2-tls2-amx-mod1.so \
+  $(objpfx)tst-gnu2-tls2-amx-mod2.so
+$(objpfx)tst-gnu2-tls2-amx-mod0.so: $(libsupport)
+$(objpfx)tst-gnu2-tls2-amx-mod1.so: $(libsupport)
+$(objpfx)tst-gnu2-tls2-amx-mod2.so: $(libsupport)
+
+CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile
+CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -mtls-dialect=gnu2
+endif
+
 endif # $(subdir) == elf
 
 ifneq ($(enable-cet),no)
index 2f511321ad3b3ac11d92b44d5f09f29ac7c92a6f..ef4631bf4b2fd9aad3a225302124c814c3c7df08 100644 (file)
@@ -20,3 +20,8 @@
 # define ARCH_SHSTK_SHSTK              0x1
 # define ARCH_SHSTK_WRSS               0x2
 #endif
+
+#ifndef ARCH_GET_XCOMP_PERM
+# define ARCH_GET_XCOMP_PERM           0x1022
+# define ARCH_REQ_XCOMP_PERM           0x1023
+#endif
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
new file mode 100644 (file)
index 0000000..2e0c7b9
--- /dev/null
@@ -0,0 +1,2 @@
+#include "tst-gnu2-tls2-amx.h"
+#include <tst-gnu2-tls2mod0.c>
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
new file mode 100644 (file)
index 0000000..b8a8ccf
--- /dev/null
@@ -0,0 +1,2 @@
+#include "tst-gnu2-tls2-amx.h"
+#include <tst-gnu2-tls2mod1.c>
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
new file mode 100644 (file)
index 0000000..cdf4a8f
--- /dev/null
@@ -0,0 +1,2 @@
+#include "tst-gnu2-tls2-amx.h"
+#include <tst-gnu2-tls2mod2.c>
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
new file mode 100644 (file)
index 0000000..ae4dd82
--- /dev/null
@@ -0,0 +1,83 @@
+/* Test TLSDESC relocation with AMX.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdbool.h>
+#include <asm/prctl.h>
+#include <support/check.h>
+#include "tst-gnu2-tls2-amx.h"
+
+extern int arch_prctl (int, ...);
+
+#define X86_XSTATE_TILECFG_ID  17
+#define X86_XSTATE_TILEDATA_ID 18
+
+/* Initialize tile config.  */
+__attribute__ ((noinline, noclone))
+static void
+init_tile_config (__tilecfg *tileinfo)
+{
+  int i;
+  tileinfo->palette_id = 1;
+  tileinfo->start_row = 0;
+
+  tileinfo->colsb[0] = MAX_ROWS;
+  tileinfo->rows[0] = MAX_ROWS;
+
+  for (i = 1; i < 4; ++i)
+  {
+    tileinfo->colsb[i] = MAX_COLS;
+    tileinfo->rows[i] = MAX_ROWS;
+  }
+
+  _tile_loadconfig (tileinfo);
+}
+
+static bool
+enable_amx (void)
+{
+  uint64_t bitmask;
+  if (arch_prctl (ARCH_GET_XCOMP_PERM, &bitmask) != 0)
+    return false;
+
+  if ((bitmask & (1 << X86_XSTATE_TILECFG_ID)) == 0)
+    return false;
+
+  if (arch_prctl (ARCH_REQ_XCOMP_PERM, X86_XSTATE_TILEDATA_ID) != 0)
+    return false;
+
+  /* Load tile configuration.  */
+  __tilecfg tile_data = { 0 };
+  init_tile_config (&tile_data);
+
+  return true;
+}
+
+/* An architecture can define it to clobber caller-saved registers in
+   malloc below to verify that the implicit TLSDESC call won't change
+   caller-saved registers.  */
+static void
+clear_tile_register (void)
+{
+  _tile_zero (2);
+}
+
+#define MOD(i) "tst-gnu2-tls2-amx-mod" #i ".so"
+#define IS_SUPPORTED() enable_amx ()
+#define PREPARE_MALLOC() clear_tile_register ()
+
+#include <elf/tst-gnu2-tls2.c>
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
new file mode 100644 (file)
index 0000000..1845a3c
--- /dev/null
@@ -0,0 +1,63 @@
+/* Test TLSDESC relocation with AMX.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdint.h>
+#include <string.h>
+#include <x86intrin.h>
+#include <support/check.h>
+
+#define MAX_ROWS 16
+#define MAX_COLS 64
+#define MAX 1024
+#define STRIDE 64
+
+typedef struct __tile_config
+{
+  uint8_t palette_id;
+  uint8_t start_row;
+  uint8_t reserved_0[14];
+  uint16_t colsb[16];
+  uint8_t rows[16];
+} __tilecfg __attribute__ ((aligned (64)));
+
+/* Initialize int8_t buffer */
+static inline void
+init_buffer (int8_t *buf, int8_t value)
+{
+  int rows, colsb, i, j;
+  rows  = MAX_ROWS;
+  colsb = MAX_COLS;
+
+  for (i = 0; i < rows; i++)
+    for (j = 0; j < colsb; j++)
+      buf[i * colsb + j] = value;
+}
+
+#define BEFORE_TLSDESC_CALL()                                  \
+  int8_t src[MAX];                                             \
+  int8_t res[MAX];                                             \
+  /* Initialize src with data  */                              \
+  init_buffer (src, 2);                                                \
+  /* Load tile rows from memory.  */                           \
+  _tile_loadd (2, src, STRIDE);
+
+#define AFTER_TLSDESC_CALL()                                   \
+  /* Store the tile data to memory.  */                                \
+  _tile_stored (2, res, STRIDE);                               \
+  _tile_release ();                                            \
+  TEST_VERIFY_EXIT (memcmp (src, res, sizeof (res)) == 0);
index 6a8fd298137b7f23fb77c7d51de69116959f5cf5..21fc88d6510840e6dc07b20eb73d034fbe5d7584 100644 (file)
@@ -3,3 +3,4 @@
 #include <ldsodefs.h>
 
 XSAVE_STATE_SIZE_OFFSET        offsetof (struct cpu_features, xsave_state_size)
+XSAVE_STATE_FULL_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_full_size)
index 0ad0a78f67a2c3d26630d065d805a4809aa3f2c5..e7c7ece462678887da9116dd708b31f5368273e7 100644 (file)
@@ -308,6 +308,8 @@ update_active (struct cpu_features *cpu_features)
          __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
          if (ebx != 0)
            {
+             /* NB: On AMX capable processors, ebx always includes AMX
+                states.  */
              unsigned int xsave_state_full_size
                = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
 
@@ -321,6 +323,11 @@ update_active (struct cpu_features *cpu_features)
                {
                  unsigned int xstate_comp_offsets[32];
                  unsigned int xstate_comp_sizes[32];
+#ifdef __x86_64__
+                 unsigned int xstate_amx_comp_offsets[32];
+                 unsigned int xstate_amx_comp_sizes[32];
+                 unsigned int amx_ecx;
+#endif
                  unsigned int i;
 
                  xstate_comp_offsets[0] = 0;
@@ -328,16 +335,39 @@ update_active (struct cpu_features *cpu_features)
                  xstate_comp_offsets[2] = 576;
                  xstate_comp_sizes[0] = 160;
                  xstate_comp_sizes[1] = 256;
+#ifdef __x86_64__
+                 xstate_amx_comp_offsets[0] = 0;
+                 xstate_amx_comp_offsets[1] = 160;
+                 xstate_amx_comp_offsets[2] = 576;
+                 xstate_amx_comp_sizes[0] = 160;
+                 xstate_amx_comp_sizes[1] = 256;
+#endif
 
                  for (i = 2; i < 32; i++)
                    {
-                     if ((STATE_SAVE_MASK & (1 << i)) != 0)
+                     if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0)
                        {
                          __cpuid_count (0xd, i, eax, ebx, ecx, edx);
-                         xstate_comp_sizes[i] = eax;
+#ifdef __x86_64__
+                         /* Include this in xsave_state_full_size.  */
+                         amx_ecx = ecx;
+                         xstate_amx_comp_sizes[i] = eax;
+                         if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0)
+                           {
+                             /* Exclude this from xsave_state_size.  */
+                             ecx = 0;
+                             xstate_comp_sizes[i] = 0;
+                           }
+                         else
+#endif
+                           xstate_comp_sizes[i] = eax;
                        }
                      else
                        {
+#ifdef __x86_64__
+                         amx_ecx = 0;
+                         xstate_amx_comp_sizes[i] = 0;
+#endif
                          ecx = 0;
                          xstate_comp_sizes[i] = 0;
                        }
@@ -350,6 +380,15 @@ update_active (struct cpu_features *cpu_features)
                          if ((ecx & (1 << 1)) != 0)
                            xstate_comp_offsets[i]
                              = ALIGN_UP (xstate_comp_offsets[i], 64);
+#ifdef __x86_64__
+                         xstate_amx_comp_offsets[i]
+                           = (xstate_amx_comp_offsets[i - 1]
+                              + xstate_amx_comp_sizes[i - 1]);
+                         if ((amx_ecx & (1 << 1)) != 0)
+                           xstate_amx_comp_offsets[i]
+                             = ALIGN_UP (xstate_amx_comp_offsets[i],
+                                         64);
+#endif
                        }
                    }
 
@@ -358,6 +397,18 @@ update_active (struct cpu_features *cpu_features)
                    = xstate_comp_offsets[31] + xstate_comp_sizes[31];
                  if (size)
                    {
+#ifdef __x86_64__
+                     unsigned int amx_size
+                       = (xstate_amx_comp_offsets[31]
+                          + xstate_amx_comp_sizes[31]);
+                     amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
+                                          64);
+                     /* Set xsave_state_full_size to the compact AMX
+                        state size for XSAVEC.  NB: xsave_state_full_size
+                        is only used in _dl_tlsdesc_dynamic_xsave and
+                        _dl_tlsdesc_dynamic_xsavec.  */
+                     cpu_features->xsave_state_full_size = amx_size;
+#endif
                      cpu_features->xsave_state_size
                        = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
                      CPU_FEATURE_SET (cpu_features, XSAVEC);
index b9bf3115b616f05f94b23c2937fe8c44db847e8c..cd7bd27cf35959fddb5a4388ba0bb3cf6259a62c 100644 (file)
@@ -934,6 +934,8 @@ struct cpu_features
   /* The full state size for XSAVE when XSAVEC is disabled by
 
      GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC
+
+     and the AMX state size when XSAVEC is available.
    */
   unsigned int xsave_state_full_size;
   /* Data cache size for use in memory and string routines, typically
index 485cad9c0283b3347cb6232d3ed8ce7ad104deb9..db8e576e91767db5bd88770ac193047355d090fa 100644 (file)
    | (1 << X86_XSTATE_ZMM_H_ID)        \
    | (1 << X86_XSTATE_ZMM_ID)          \
    | (1 << X86_XSTATE_APX_F_ID))
+
+/* AMX state mask.  */
+# define AMX_STATE_SAVE_MASK           \
+  ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID))
+
+/* States to be included in xsave_state_full_size.  */
+# define FULL_STATE_SAVE_MASK          \
+  (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
 #else
 /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
    doesn't have red-zone, use 0 here.  */
    | (1 << X86_XSTATE_BNDREGS_ID)      \
    | (1 << X86_XSTATE_K_ID)            \
    | (1 << X86_XSTATE_ZMM_H_ID))
+
+/* States to be included in xsave_state_size.  */
+# define FULL_STATE_SAVE_MASK          STATE_SAVE_MASK
 #endif
 
 /* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL.
-   Compiler assumes that all registers, including x87 FPU stack registers,
-   are unchanged after CALL, except for EFLAGS and RAX/EAX.  */
+   Compiler assumes that all registers, including AMX and x87 FPU
+   stack registers, are unchanged after CALL, except for EFLAGS and
+   RAX/EAX.  */
 #define TLSDESC_CALL_STATE_SAVE_MASK   \
-  (STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
+  (FULL_STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
 
 /* Constants for bits in __x86_string_control:  */
 
index 418cc4a9b862f7e05bec199c381d9530518eb78d..04a534fa126a7bf77f6086576bb5d45a5c57c0cc 100755 (executable)
@@ -134,6 +134,34 @@ fi
 config_vars="$config_vars
 enable-cet = $enable_cet"
 
+# Check if -mamx-tile works properly.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -mamx-tile works properly" >&5
+printf %s "checking whether -mamx-tile works properly... " >&6; }
+if test ${libc_cv_x86_have_amx_tile+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat > conftest.c <<EOF
+#include <x86intrin.h>
+EOF
+              libc_cv_x86_have_amx_tile=no
+              if { ac_try='${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+                if grep -q __builtin_ia32_ldtilecfg conftest.i; then
+                  libc_cv_x86_have_amx_tile=yes
+                fi
+              fi
+              rm -rf conftest*
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_x86_have_amx_tile" >&5
+printf "%s\n" "$libc_cv_x86_have_amx_tile" >&6; }
+config_vars="$config_vars
+have-mamx-tile = $libc_cv_x86_have_amx_tile"
+
 test -n "$critic_missing" && as_fn_error $? "
 *** $critic_missing" "$LINENO" 5
 
index d1f803c02ee67fc5effd5058f277b35e31f5ead2..c714c47351e70390b5cf6de35d12e76ab9d87c2e 100644 (file)
@@ -61,5 +61,20 @@ elif test $enable_cet = permissive; then
 fi
 LIBC_CONFIG_VAR([enable-cet], [$enable_cet])
 
+# Check if -mamx-tile works properly.
+AC_CACHE_CHECK(whether -mamx-tile works properly,
+              libc_cv_x86_have_amx_tile, [dnl
+cat > conftest.c <<EOF
+#include <x86intrin.h>
+EOF
+              libc_cv_x86_have_amx_tile=no
+              if AC_TRY_COMMAND(${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i); then
+                if grep -q __builtin_ia32_ldtilecfg conftest.i; then
+                  libc_cv_x86_have_amx_tile=yes
+                fi
+              fi
+              rm -rf conftest*])
+LIBC_CONFIG_VAR([have-mamx-tile], [$libc_cv_x86_have_amx_tile])
+
 test -n "$critic_missing" && AC_MSG_ERROR([
 *** $critic_missing])
index 0c2e8d5320d0bd26b0f625b01aa3d250fc538ac0..9f02cfc3eb297ed23a9c33f83f1d96acb164c6dc 100644 (file)
@@ -99,7 +99,7 @@ _dl_tlsdesc_dynamic:
 # endif
 #else
        /* Allocate stack space of the required size to save the state.  */
-       sub     _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
+       sub     _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_FULL_SIZE_OFFSET(%rip), %RSP_LP
 #endif
        /* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9,
           r10 and r11.  */
This page took 0.06273 seconds and 5 git commands to generate.