[PATCH v13 4/6] nptl: Introduce RSEQ_GETMEM_VOLATILE and RSEQ_SETMEM

Mathieu Desnoyers mathieu.desnoyers@efficios.com
Tue Nov 19 20:27:08 GMT 2024


On 2024-11-19 13:58, Michael Jeanson wrote:
> This will be used to access and write to the rseq area in the extra TLS
> block.
> 
> Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
> ---
> Changes since v12:
> - Split RSEQ_SET/GETMEM from THREAD_SET/GETMEM
> - Rename rseq_get_area() to RSEQ_SELF()
> - Add rtld_hidden_proto to __rseq_size and __rseq_offset
> ---
>   sysdeps/i386/nptl/tcb-access.h            | 54 ++++++++++++++++++++++
>   sysdeps/nptl/tcb-access.h                 |  9 ++++
>   sysdeps/unix/sysv/linux/dl-rseq-symbols.S | 27 +++++++----
>   sysdeps/unix/sysv/linux/rseq-internal.h   | 24 +++++++---
>   sysdeps/x86_64/nptl/tcb-access.h          | 55 +++++++++++++++++++++++
>   5 files changed, 154 insertions(+), 15 deletions(-)
> 
> diff --git a/sysdeps/i386/nptl/tcb-access.h b/sysdeps/i386/nptl/tcb-access.h
> index 4b6221e103..e421f47fe3 100644
> --- a/sysdeps/i386/nptl/tcb-access.h
> +++ b/sysdeps/i386/nptl/tcb-access.h
> @@ -123,3 +123,57 @@
>   			 "i" (offsetof (struct pthread, member)),	      \
>   			 "r" (idx));					      \
>          }})
> +
> +/* Read member of the RSEQ area directly, all fields currently require
> +   single-copy atomicity semantics reads, hence the 'volatile'.  */
> +#define RSEQ_GETMEM_VOLATILE(member) \
> +  ({ __typeof (RSEQ_SELF()->member) __value;				      \
> +     _Static_assert (sizeof (__value) == 1				      \
> +		     || sizeof (__value) == 4				      \
> +		     || sizeof (__value) == 8,				      \
> +		     "size of per-thread data");			      \
> +     if (sizeof (__value) == 1)						      \
> +       asm volatile ("movb %%gs:%P2(%3),%b0"				      \
> +		     : "=q" (__value)					      \
> +		     : "0" (0), "i" (offsetof (struct rseq_area, member)),   \
> +		     "r" (__rseq_offset));				      \
> +     else if (sizeof (__value) == 4)					      \
> +       asm volatile ("movl %%gs:%P1(%2),%0"				      \
> +		     : "=r" (__value)					      \
> +		     : "i" (offsetof (struct rseq_area, member)),	      \
> +		       "r" (__rseq_offset));				      \
> +     else /* 8 */							      \
> +       {								      \
> +	 asm volatile  ("movl %%gs:%P1(%2),%%eax\n\t"			      \
> +			"movl %%gs:4+%P1(%2),%%edx"			      \
> +			: "=&A" (__value)				      \
> +			: "i" (offsetof (struct rseq_area, member)),	      \
> +			  "r" (__rseq_offset));				      \
> +       }								      \

It's an error to try to do a single-copy atomicity read on a 64-bit
field on a 32-bit architecture with a pair of "movl" instructions:
the kernel could interrupt userspace code between those two instructions
and modify the field, thus leading to incoherent values.

We should probably fail the _Static_assert if a 32-bit architecture
try to load a 64-bit value with RSEQ_GETMEM_VOLATILE so we catch
incorrect use early ?


> +     __value; })
> +
> +/* Set member of the RSEQ area directly.  */
> +#define RSEQ_SETMEM(member, value) \
> +  ({									      \
> +     _Static_assert (sizeof (RSEQ_SELF()->member) == 1			      \
> +		     || sizeof (RSEQ_SELF()->member) == 4		      \
> +		     || sizeof (RSEQ_SELF()->member) == 8,		      \
> +		     "size of per-thread data");			      \
> +     if (sizeof (RSEQ_SELF()->member) == 1)				      \
> +       asm volatile ("movb %b0,%%gs:%P1(%2)" :				      \
> +		     : "iq" (value),					      \
> +		       "i" (offsetof (struct rseq_area, member)),	      \
> +		       "r" (__rseq_offset));				      \
> +     else if (sizeof (RSEQ_SELF()->member) == 4)			      \
> +       asm volatile ("movl %0,%%gs:%P1(%2)" :				      \
> +		     : "ir" (value),					      \
> +		       "i" (offsetof (struct rseq_area, member)),	      \
> +		       "r" (__rseq_offset));				      \
> +     else /* 8 */							      \
> +       {								      \
> +	 asm volatile ("movl %%eax,%%gs:%P1(%2)\n\t"			      \
> +		       "movl %%edx,%%gs:4+%P1(%2)" :			      \

Same comment as above, but this time for stores.

When storing the rseq_cs pointer (beginning of rseq critical section), I
would expect 32-bit userspace to only update the 32-bit LSB and leave
the 32-bit untouched (zeroes), which allows it to do this update with a
single store. Trying to use a pair of stores to update both MSB and LSB
from a 32-bit userspace is not providing single-copy atomicity wrt
kernel.

I would also recommend failing the _Static_assert here.

> +		       : "A" ((uint64_t) cast_to_integer (value)),	      \
> +			 "i" (offsetof (struct rseq_area, member)),	      \
> +			 "r" (__rseq_offset));				      \
> +       }})
> diff --git a/sysdeps/nptl/tcb-access.h b/sysdeps/nptl/tcb-access.h
> index 600433766f..3e8288b8a2 100644
> --- a/sysdeps/nptl/tcb-access.h
> +++ b/sysdeps/nptl/tcb-access.h
> @@ -30,3 +30,12 @@
>     descr->member = (value)
>   #define THREAD_SETMEM_NC(descr, member, idx, value) \
>     descr->member[idx] = (value)
> +
> +/* Read member of the RSEQ area directly, all fields currently require
> +   single-copy atomicity semantics reads, hence the 'volatile'.  */
> +#define RSEQ_GETMEM_VOLATILE(member) \
> +  (*(volatile __typeof (RSEQ_SELF()->member) *)&RSEQ_SELF()->member)
> +
> +/* Set member of the RSEQ area directly.  */
> +#define RSEQ_SETMEM(member, value) \
> +  RSEQ_SELF()->member = (value)
> diff --git a/sysdeps/unix/sysv/linux/dl-rseq-symbols.S b/sysdeps/unix/sysv/linux/dl-rseq-symbols.S
> index b4bba06a99..66b33c1096 100644
> --- a/sysdeps/unix/sysv/linux/dl-rseq-symbols.S
> +++ b/sysdeps/unix/sysv/linux/dl-rseq-symbols.S
> @@ -27,14 +27,18 @@
>   /* Some targets define a macro to denote the zero register.  */
>   #undef zero
>   
> -/* Define 2 symbols: '__rseq_size' is public const and '_rseq_size' (an
> -   alias of '__rseq_size') is hidden and writable for internal use by the
> -   dynamic linker which will initialize the value both symbols point to
> -   before copy relocations take place. */
> +/* Define 3 symbols: '__rseq_size' is public const and then '_rseq_size' and
> +   '__GI___rseq_size' (both aliases of '__rseq_size') are hidden, '_rseq_size'
> +   is writable for internal use by the dynamic linker which will initialize
> +   the value the symbols point to before copy relocations take place.  */
>   
>   	.globl	__rseq_size
>   	.type	__rseq_size, %object
>   	.size	__rseq_size, 4
> +	.hidden __GI___rseq_size
> +	.globl	__GI___rseq_size
> +	.type	__GI___rseq_size, %object
> +	.size	__GI___rseq_size, 4

ok

>   	.hidden _rseq_size
>   	.globl	_rseq_size
>   	.type	_rseq_size, %object
> @@ -42,17 +46,23 @@
>   	.section .data.rel.ro
>   	.balign 4
>   __rseq_size:
> +__GI___rseq_size:
>   _rseq_size:

ok

>   	.zero	4
>   
> -/* Define 2 symbols: '__rseq_offset' is public const and '_rseq_offset' (an
> -   alias of '__rseq_offset') is hidden and writable for internal use by the
> -   dynamic linker which will initialize the value both symbols point to
> -   before copy relocations take place. */
> +/* Define 3 symbols: '__rseq_offset' is public const and then '_rseq_offset'
> +   and '__GI___rseq_offset' (both aliases of '__rseq_offset') are hidden,
> +   '_rseq_offset' is writable for internal use by the dynamic linker which will
> +   initialize the value the symbols point to before copy relocations take
> +   place.  */
>   
>   	.globl	__rseq_offset
>   	.type	__rseq_offset, %object
>   	.size	__rseq_offset, RSEQ_OFFSET_SIZE
> +	.hidden __GI___rseq_offset
> +	.globl	__GI___rseq_offset
> +	.type	__GI___rseq_offset, %object
> +	.size	__GI___rseq_offset, RSEQ_OFFSET_SIZE

ok

>   	.hidden _rseq_offset
>   	.globl	_rseq_offset
>   	.type	_rseq_offset, %object
> @@ -60,5 +70,6 @@ _rseq_size:
>   	.section .data.rel.ro
>   	.balign RSEQ_OFFSET_SIZE
>   __rseq_offset:
> +__GI___rseq_offset:

ok

>   _rseq_offset:
>   	.zero	RSEQ_OFFSET_SIZE
> diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h
> index 4bcb4370f7..41a6bd63f1 100644
> --- a/sysdeps/unix/sysv/linux/rseq-internal.h
> +++ b/sysdeps/unix/sysv/linux/rseq-internal.h
> @@ -23,6 +23,8 @@
>   #include <kernel-features.h>
>   #include <stdbool.h>
>   #include <stdio.h>
> +#include <thread_pointer.h>
> +#include <tcb-access.h>
>   #include <sys/rseq.h>
>   
>   /* Minimum size of the rseq area allocation required by the syscall.  The
> @@ -52,19 +54,27 @@ extern unsigned int _rseq_size attribute_hidden;
>      In .data.relro but not yet write-protected.  */
>   extern ptrdiff_t _rseq_offset attribute_hidden;
>   
> +/* We want to use rtld_hidden_proto in order to call the internal aliases
> +   of __rseq_size and __rseq_offset from ld.so.  This avoids a PLT entry in
> +   ld.so for both variables.   */
> +rtld_hidden_proto (__rseq_size)
> +rtld_hidden_proto (__rseq_offset)

ok

> +
> +/* Returns a pointer to the current thread rseq area.  */
> +static inline struct rseq_area *
> +RSEQ_SELF (void)
> +{
> +  return (struct rseq_area *) ((char *) __thread_pointer () + __rseq_offset);
> +}

ok

> +
>   #ifdef RSEQ_SIG
>   static inline bool
>   rseq_register_current_thread (struct pthread *self, bool do_rseq)
>   {
>     if (do_rseq)
>       {
> -      unsigned int size;
> -#if IS_IN (rtld)
> -      /* Use the hidden symbol in ld.so.  */
> -      size = _rseq_size;
> -#else
> -      size = __rseq_size;
> -#endif
> +      unsigned int size =  __rseq_size;
> +

ok

>         if (size < RSEQ_AREA_SIZE_INITIAL)
>           /* The initial implementation used only 20 bytes out of 32,
>              but still expected size 32.  */
> diff --git a/sysdeps/x86_64/nptl/tcb-access.h b/sysdeps/x86_64/nptl/tcb-access.h
> index d35948f111..644d6d2e26 100644
> --- a/sysdeps/x86_64/nptl/tcb-access.h
> +++ b/sysdeps/x86_64/nptl/tcb-access.h
> @@ -130,3 +130,58 @@
>   			 "i" (offsetof (struct pthread, member[0])),	      \
>   			 "r" (idx));					      \
>          }})
> +
> +/* Read member of the RSEQ area directly, all fields currently require
> +   single-copy atomicity semantics reads, hence the 'volatile'.  */
> +# define RSEQ_GETMEM_VOLATILE(member) \
> +  ({ __typeof (RSEQ_SELF()->member) __value;				      \
> +     _Static_assert (sizeof (__value) == 1				      \
> +		     || sizeof (__value) == 4				      \
> +		     || sizeof (__value) == 8,				      \
> +		     "size of per-thread data");			      \
> +     if (sizeof (__value) == 1)						      \
> +       asm volatile ("movb %%fs:%P2(%q3),%b0"				      \
> +		     : "=q" (__value)					      \
> +		     : "0" (0), "i" (offsetof (struct rseq_area, member)),    \
> +		       "r" (__rseq_offset));				      \
> +     else if (sizeof (__value) == 4)					      \
> +       asm volatile ("movl %%fs:%P1(%q2),%0"				      \
> +		     : "=r" (__value)					      \
> +		     : "i" (offsetof (struct rseq_area, member)),	      \
> +		       "r" (__rseq_offset));				      \
> +     else /* 8 */							      \
> +       {								      \
> +	 asm volatile ("movq %%fs:%P1(%q2),%q0"				      \
> +		       : "=r" (__value)					      \
> +		       : "i" (offsetof (struct rseq_area, member)),	      \
> +			 "r" (__rseq_offset));				      \
> +       }								      \
> +     __value; })

ok

> +
> +/* Set member of the RSEQ area directly.  */
> +# define RSEQ_SETMEM(member, value) \
> +  ({									      \
> +     _Static_assert (sizeof (RSEQ_SELF()->member) == 1			      \
> +		     || sizeof (RSEQ_SELF()->member) == 4		      \
> +		     || sizeof (RSEQ_SELF()->member) == 8,		      \
> +		     "size of per-thread data");			      \
> +     if (sizeof (RSEQ_SELF()->member) == 1)				      \
> +       asm volatile ("movb %b0,%%fs:%P1(%q2)" :				      \
> +		     : "iq" (value),					      \
> +		       "i" (offsetof (struct rseq_area, member)),	      \
> +		       "r" (__rseq_offset));				      \
> +     else if (sizeof (RSEQ_SELF()->member) == 4)			      \
> +       asm volatile ("movl %0,%%fs:%P1(%q2)" :				      \
> +		     : IMM_MODE (value),				      \
> +		       "i" (offsetof (struct rseq_area, member)),	      \
> +		       "r" (__rseq_offset));				      \
> +     else /* 8 */							      \
> +       {								      \
> +	 /* Since movq takes a signed 32-bit immediate or a register source   \
> +	    operand, use "er" constraint for 32-bit signed integer constant   \
> +	    or register.  */						      \
> +	 asm volatile ("movq %q0,%%fs:%P1(%q2)" :			      \
> +		       : "er" ((uint64_t) cast_to_integer (value)),	      \
> +			 "i" (offsetof (struct rseq_area, member)),	      \
> +			 "r" (__rseq_offset));				      \
> +       }})

ok

Thanks,

Mathieu


-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com



More information about the Libc-alpha mailing list