[PATCH v4 05/15] futex2: Implement support for different futex sizes
Davidlohr Bueso
dave@stgolabs.net
Sun Jun 6 19:12:33 GMT 2021
On Thu, 03 Jun 2021, Andr� Almeida wrote:
>Implement support for 8, 16 and 64 bit futexes, along with the existing
>32 bit support. Userspace should use flags to specify in the syscall
>the size of the *uaddr they are operating on.
>
>Variable sized futexes are useful for implementing atomic primitives in
>userspace in an efficient manner. 64bit sized futexes are also
>particularly useful when userspace stores information to be used in an
>atomic fashion on the futex value, given more room for flexibility.
Note that at least in the past, Linus has been vehemently against 64-bit
futexes.
Basically this additional data, like for implementing read/write locks,
does not need to be in the futex atomic wait/wake parts. You can instead
split the userspace lock into two adjacent 32-bit words and do 64-bit
atomic ops on it.
Of course, this is a new interface altogether, so this time it might
be fair game.
Thanks,
Davidlohr
>
>Overlapping futexes are not allowed, so userspace can't wait and wake on
>the same memory address if the are using different sizes.
>
>Signed-off-by: André Almeida <andrealmeid@collabora.com>
>---
> include/uapi/linux/futex.h | 3 +
> kernel/futex2.c | 124 ++++++++++++++++++++++++-------------
> 2 files changed, 84 insertions(+), 43 deletions(-)
>
>diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
>index 06ea9bdfa69e..5786270b0c75 100644
>--- a/include/uapi/linux/futex.h
>+++ b/include/uapi/linux/futex.h
>@@ -42,7 +42,10 @@
> FUTEX_PRIVATE_FLAG)
>
> /* Size argument to futex2 syscall */
>+#define FUTEX_8 0
>+#define FUTEX_16 1
> #define FUTEX_32 2
>+#define FUTEX_64 3
>
> #define FUTEX_SIZE_MASK 0x3
>
>diff --git a/kernel/futex2.c b/kernel/futex2.c
>index 012d7f7fc17a..1e97e5f2e793 100644
>--- a/kernel/futex2.c
>+++ b/kernel/futex2.c
>@@ -89,9 +89,11 @@ struct futex_bucket {
> #define FUTEXV_WAITER_MASK (FUTEX_SIZE_MASK | FUTEX_SHARED_FLAG)
>
> #define is_object_shared ((futexv->objects[i].flags & FUTEX_SHARED_FLAG) ? true : false)
>+#define object_size (futexv->objects[i].flags & FUTEX_SIZE_MASK)
>
>-#define FUT_OFF_INODE 1 /* We set bit 0 if key has a reference on inode */
>-#define FUT_OFF_MMSHARED 2 /* We set bit 1 if key has a reference on mm */
>+#define FUT_OFF_INODE PAGE_SIZE
>+#define FUT_OFF_MMSHARED (PAGE_SIZE << 1)
>+#define FUT_OFF_SIZE 1
>
> static struct futex_bucket *futex_table;
> static unsigned int futex2_hashsize;
>@@ -321,6 +323,7 @@ static int futex_get_shared_key(uintptr_t address, struct mm_struct *mm,
> * @uaddr: futex user address
> * @key: data that uniquely identifies a futex
> * @shared: is this a shared futex?
>+ * @flags: flags for the size
> *
> * For private futexes, each uaddr will be unique for a given mm_struct, and it
> * won't be freed for the life time of the process. For shared futexes, check
>@@ -330,21 +333,41 @@ static int futex_get_shared_key(uintptr_t address, struct mm_struct *mm,
> */
> static struct futex_bucket *futex_get_bucket(void __user *uaddr,
> struct futex_key *key,
>- bool shared)
>+ bool shared, unsigned int flags)
> {
> uintptr_t address = (uintptr_t)uaddr;
> u32 hash_key;
>
>+ size_t size;
>+
>+ switch (flags) {
>+ case FUTEX_8:
>+ size = sizeof(u8);
>+ break;
>+ case FUTEX_16:
>+ size = sizeof(u16);
>+ break;
>+ case FUTEX_32:
>+ size = sizeof(u32);
>+ break;
>+ case FUTEX_64:
>+ size = sizeof(u64);
>+ break;
>+ default:
>+ return ERR_PTR(-EINVAL);
>+ }
>+
> /* Checking if uaddr is valid and accessible */
>- if (unlikely(!IS_ALIGNED(address, sizeof(u32))))
>+ if (unlikely(!IS_ALIGNED(address, size)))
> return ERR_PTR(-EINVAL);
>- if (unlikely(!access_ok(uaddr, sizeof(u32))))
>+ if (unlikely(!access_ok(uaddr, size)))
> return ERR_PTR(-EFAULT);
>
> key->offset = address % PAGE_SIZE;
> address -= key->offset;
> key->pointer = (u64)address;
> key->index = (unsigned long)current->mm;
>+ key->offset |= FUT_OFF_SIZE << (size - sizeof(u8));
>
> if (shared)
> futex_get_shared_key(address, current->mm, key);
>@@ -358,18 +381,39 @@ static struct futex_bucket *futex_get_bucket(void __user *uaddr,
>
> /**
> * futex_get_user - Get the userspace value on this address
>- * @uval: variable to store the value
>- * @uaddr: userspace address
>+ * @uval: variable to store the value
>+ * @uaddr: userspace address
>+ * @pagefault: true if pagefault should be disabled
>+ * @flags: flags for the size
> *
> * Check the comment at futex_enqueue() for more information.
> */
>-static int futex_get_user(u32 *uval, u32 __user *uaddr)
>+static int futex_get_user(u64 *uval, void __user *uaddr, unsigned int flags, bool pagefault)
> {
> int ret;
>
>- pagefault_disable();
>- ret = __get_user(*uval, uaddr);
>- pagefault_enable();
>+ if (pagefault)
>+ pagefault_disable();
>+
>+ switch (flags) {
>+ case FUTEX_8:
>+ ret = __get_user(*uval, (u8 __user *)uaddr);
>+ break;
>+ case FUTEX_16:
>+ ret = __get_user(*uval, (u16 __user *)uaddr);
>+ break;
>+ case FUTEX_32:
>+ ret = __get_user(*uval, (u32 __user *)uaddr);
>+ break;
>+ case FUTEX_64:
>+ ret = __get_user(*uval, (u64 __user *)uaddr);
>+ break;
>+ default:
>+ BUG();
>+ }
>+
>+ if (pagefault)
>+ pagefault_enable();
>
> return ret;
> }
>@@ -484,8 +528,8 @@ static int futex_enqueue(struct futex_waiter_head *futexv, unsigned int nr_futex
> int *awakened)
> {
> int i, ret;
>- u32 uval, val;
>- u32 __user *uaddr;
>+ u64 uval, val;
>+ void __user *uaddr;
> bool retry = false;
> struct futex_bucket *bucket;
>
>@@ -493,13 +537,14 @@ static int futex_enqueue(struct futex_waiter_head *futexv, unsigned int nr_futex
> set_current_state(TASK_INTERRUPTIBLE);
>
> for (i = 0; i < nr_futexes; i++) {
>- uaddr = (u32 __user *)futexv->objects[i].uaddr;
>- val = (u32)futexv->objects[i].val;
>+ uaddr = futexv->objects[i].uaddr;
>+ val = (u64)futexv->objects[i].val;
>
> if (is_object_shared && retry) {
> struct futex_bucket *tmp =
> futex_get_bucket((void __user *)uaddr,
>- &futexv->objects[i].key, true);
>+ &futexv->objects[i].key, true,
>+ object_size);
> if (IS_ERR(tmp)) {
> __set_current_state(TASK_RUNNING);
> futex_dequeue_multiple(futexv, i);
>@@ -513,7 +558,7 @@ static int futex_enqueue(struct futex_waiter_head *futexv, unsigned int nr_futex
> bucket_inc_waiters(bucket);
> spin_lock(&bucket->lock);
>
>- ret = futex_get_user(&uval, uaddr);
>+ ret = futex_get_user(&uval, uaddr, object_size, true);
>
> if (unlikely(ret)) {
> spin_unlock(&bucket->lock);
>@@ -525,7 +570,7 @@ static int futex_enqueue(struct futex_waiter_head *futexv, unsigned int nr_futex
> if (*awakened >= 0)
> return 1;
>
>- if (__get_user(uval, uaddr))
>+ if (futex_get_user(&uval, uaddr, object_size, false))
> return -EFAULT;
>
> retry = true;
>@@ -656,9 +701,6 @@ static long ksys_futex_wait(void __user *uaddr, u64 val, unsigned int flags,
> if (flags & ~FUTEX2_MASK)
> return -EINVAL;
>
>- if (size != FUTEX_32)
>- return -EINVAL;
>-
> futexv = &wait_single.futexv;
> futexv->task = current;
> futexv->hint = false;
>@@ -667,12 +709,13 @@ static long ksys_futex_wait(void __user *uaddr, u64 val, unsigned int flags,
> waiter->index = 0;
> waiter->val = val;
> waiter->uaddr = uaddr;
>+ waiter->flags = flags;
> memset(&wait_single.waiter.key, 0, sizeof(struct futex_key));
>
> INIT_LIST_HEAD(&waiter->list);
>
> /* Get an unlocked hash bucket */
>- waiter->bucket = futex_get_bucket(uaddr, &waiter->key, shared);
>+ waiter->bucket = futex_get_bucket(uaddr, &waiter->key, shared, size);
> if (IS_ERR(waiter->bucket))
> return PTR_ERR(waiter->bucket);
>
>@@ -728,8 +771,7 @@ static int compat_futex_parse_waitv(struct futex_waiter_head *futexv,
> if (copy_from_user(&waitv, &uwaitv[i], sizeof(waitv)))
> return -EFAULT;
>
>- if ((waitv.flags & ~FUTEXV_WAITER_MASK) ||
>- (waitv.flags & FUTEX_SIZE_MASK) != FUTEX_32)
>+ if (waitv.flags & ~FUTEXV_WAITER_MASK)
> return -EINVAL;
>
> futexv->objects[i].key.pointer = 0;
>@@ -740,7 +782,7 @@ static int compat_futex_parse_waitv(struct futex_waiter_head *futexv,
>
> bucket = futex_get_bucket(compat_ptr(waitv.uaddr),
> &futexv->objects[i].key,
>- is_object_shared);
>+ is_object_shared, object_size);
>
> if (IS_ERR(bucket))
> return PTR_ERR(bucket);
>@@ -805,8 +847,7 @@ static int futex_parse_waitv(struct futex_waiter_head *futexv,
> if (copy_from_user(&waitv, &uwaitv[i], sizeof(waitv)))
> return -EFAULT;
>
>- if ((waitv.flags & ~FUTEXV_WAITER_MASK) ||
>- (waitv.flags & FUTEX_SIZE_MASK) != FUTEX_32)
>+ if (waitv.flags & ~FUTEXV_WAITER_MASK)
> return -EINVAL;
>
> futexv->objects[i].key.pointer = 0;
>@@ -816,7 +857,7 @@ static int futex_parse_waitv(struct futex_waiter_head *futexv,
> futexv->objects[i].index = i;
>
> bucket = futex_get_bucket(waitv.uaddr, &futexv->objects[i].key,
>- is_object_shared);
>+ is_object_shared, object_size);
>
> if (IS_ERR(bucket))
> return PTR_ERR(bucket);
>@@ -947,10 +988,7 @@ SYSCALL_DEFINE3(futex_wake, void __user *, uaddr, unsigned int, nr_wake,
> if (flags & ~FUTEX2_MASK)
> return -EINVAL;
>
>- if (size != FUTEX_32)
>- return -EINVAL;
>-
>- bucket = futex_get_bucket(uaddr, &waiter.key, shared);
>+ bucket = futex_get_bucket(uaddr, &waiter.key, shared, size);
> if (IS_ERR(bucket))
> return PTR_ERR(bucket);
>
>@@ -987,28 +1025,30 @@ static inline int __futex_requeue(struct futex_requeue rq1,
> bool retry = false;
> struct futex_bucket *b1, *b2;
> DEFINE_WAKE_Q(wake_q);
>- u32 uval;
>+ u64 uval;
> int ret;
> bool shared1 = (rq1.flags & FUTEX_SHARED_FLAG) ? true : false;
> bool shared2 = (rq2.flags & FUTEX_SHARED_FLAG) ? true : false;
>+ unsigned int size1 = (rq1.flags & FUTEX_SIZE_MASK);
>+ unsigned int size2 = (rq2.flags & FUTEX_SIZE_MASK);
>
>- b1 = futex_get_bucket(rq1.uaddr, &w1.key, shared1);
>+ b1 = futex_get_bucket(rq1.uaddr, &w1.key, shared1, size1);
> if (IS_ERR(b1))
> return PTR_ERR(b1);
>
>- b2 = futex_get_bucket(rq2.uaddr, &w2.key, shared2);
>+ b2 = futex_get_bucket(rq2.uaddr, &w2.key, shared2, size2);
> if (IS_ERR(b2))
> return PTR_ERR(b2);
>
> retry:
> if (shared1 && retry) {
>- b1 = futex_get_bucket(rq1.uaddr, &w1.key, shared1);
>+ b1 = futex_get_bucket(rq1.uaddr, &w1.key, shared1, size1);
> if (IS_ERR(b1))
> return PTR_ERR(b1);
> }
>
> if (shared2 && retry) {
>- b2 = futex_get_bucket(rq2.uaddr, &w2.key, shared2);
>+ b2 = futex_get_bucket(rq2.uaddr, &w2.key, shared2, size2);
> if (IS_ERR(b2))
> return PTR_ERR(b2);
> }
>@@ -1027,11 +1067,11 @@ static inline int __futex_requeue(struct futex_requeue rq1,
> spin_lock_nested(&b1->lock, SINGLE_DEPTH_NESTING);
> }
>
>- ret = futex_get_user(&uval, rq1.uaddr);
>+ ret = futex_get_user(&uval, rq1.uaddr, size1, true);
>
> if (unlikely(ret)) {
> futex_double_unlock(b1, b2);
>- if (__get_user(uval, (u32 __user *)rq1.uaddr))
>+ if (futex_get_user(&uval, rq1.uaddr, size1, false))
> return -EFAULT;
>
> bucket_dec_waiters(b2);
>@@ -1088,8 +1128,7 @@ static int compat_futex_parse_requeue(struct futex_requeue *rq,
> if (copy_from_user(&tmp, uaddr, sizeof(tmp)))
> return -EFAULT;
>
>- if (tmp.flags & ~FUTEXV_WAITER_MASK ||
>- (tmp.flags & FUTEX_SIZE_MASK) != FUTEX_32)
>+ if (tmp.flags & ~FUTEXV_WAITER_MASK)
> return -EINVAL;
>
> rq->uaddr = compat_ptr(tmp.uaddr);
>@@ -1134,8 +1173,7 @@ static int futex_parse_requeue(struct futex_requeue *rq,
> if (copy_from_user(rq, uaddr, sizeof(*rq)))
> return -EFAULT;
>
>- if (rq->flags & ~FUTEXV_WAITER_MASK ||
>- (rq->flags & FUTEX_SIZE_MASK) != FUTEX_32)
>+ if (rq->flags & ~FUTEXV_WAITER_MASK)
> return -EINVAL;
>
> return 0;
>--
>2.31.1
>
More information about the Libc-alpha
mailing list