From 8fee9cc5af3ea902b7628eaa346598d92af5dc58 Mon Sep 17 00:00:00 2001 From: "Frank Ch. Eigler" Date: Fri, 28 Sep 2012 15:35:18 -0400 Subject: [PATCH] PR14364, PR14630: Use set_fs and pagefault_disable/enable around more accesses It turns out there are a bunch of conceptually overlapping functions/macros throughout the runtime, each of which attempts to dereference untrustworthy kernel- or user-space pointers, in slightly different ways. When deliberately invoked with bad pointer values, some lockdep kernels (e.g. 2.6.32-279.9.1.el6.x86_64.debug) would emit errors about page-fault handling paths being triggered in inappropriate contexts for some of these lookup functions. It turns out a more robust control of address space checking and fault suppression is necessary. * runtime/linux/autoconf-pagefault_disable.c: New test. * buildrun.cxx (compile_pass): Run it. * runtime/linux/copy.c (_stp_read_address): Add pagefault_{disable,enable}. Note duplication with loc2c-runtime.h (_stp_strncpy_from_user): Add set_fs & pagefault_{disable,enable}. Note duplication with loc2c-runtime.h * runtime/stp_string.h (__stp_get_user): Wrap in pagefault_{disable,enable}. Note duplication with loc2c-runtime.h * tapset/uconversions.stp (__STP_GET_USER): Instead of __stp_get_user, zap duplication with loc2c-runtime.h and just call loc2c-runtime.h. * runtime/loc2c-runtime.h (STAPCONF_PAGEFAULT_DISABLE): Add dummy macros for pre-rhel5 kernels. (_stp_deref, _stp_store_deref): Revamped arch-specific macros, setting segments and disabling pagefaults. (uderef,ustore_deref,kderef,kstore_deref): Revamped macros to call the above. These should become the standard throughout the runtime/tapset. --- buildrun.cxx | 2 + runtime/linux/autoconf-pagefault_disable.c | 9 + runtime/linux/copy.c | 14 +- runtime/loc2c-runtime.h | 191 ++++++++++++--------- runtime/stp_string.h | 15 +- tapset/uconversions.stp | 11 +- 6 files changed, 141 insertions(+), 101 deletions(-) create mode 100644 runtime/linux/autoconf-pagefault_disable.c diff --git a/buildrun.cxx b/buildrun.cxx index ffffd1e5c..b71e22931 100644 --- a/buildrun.cxx +++ b/buildrun.cxx @@ -376,6 +376,8 @@ compile_pass (systemtap_session& s) // used by runtime/stp_utrace.c output_exportconf(s, o, "task_work_add", "STAPCONF_TASK_WORK_ADD_EXPORTED"); + output_autoconf(s, o, "autoconf-pagefault_disable.c", "STAPCONF_PAGEFAULT_DISABLE", NULL); + o << module_cflags << " += -include $(STAPCONF_HEADER)" << endl; for (unsigned i=0; i + +int foo (int c) +{ + pagefault_disable(); + c ++; + pagefault_enable(); + return c; +} diff --git a/runtime/linux/copy.c b/runtime/linux/copy.c index ac4b8db4f..ea813b2f5 100644 --- a/runtime/linux/copy.c +++ b/runtime/linux/copy.c @@ -1,6 +1,6 @@ /* -*- linux-c -*- * Copy from user space functions - * Copyright (C) 2005-2008 Red Hat Inc. + * Copyright (C) 2005-2012 Red Hat Inc. * Copyright (C) 2005 Intel Corporation. * * This file is part of systemtap, and is free software. You can @@ -35,12 +35,16 @@ * USER_DS for userspace. */ +/* XXX: see also kread/uread in loc2c-runtime.h */ +/* XXX: add bad_addr check */ #define _stp_read_address(x, ptr, segment) \ ({ \ long ret; \ mm_segment_t ofs = get_fs(); \ set_fs(segment); \ + pagefault_disable(); \ ret = __stp_get_user(x, ptr); \ + pagefault_enable(); \ set_fs(ofs); \ ret; \ }) @@ -144,11 +148,17 @@ do { \ * count bytes and returns count. */ +/* XXX: see also kread/uread in loc2c-runtime.h */ static long _stp_strncpy_from_user(char *dst, const char __user *src, long count) { long res = -EFAULT; - if (access_ok(VERIFY_READ, src, count)) + mm_segment_t _oldfs = get_fs(); + set_fs(USER_DS); + pagefault_disable(); + if (access_ok(VERIFY_READ, src, count)) /* XXX: bad_addr? */ __stp_strncpy_from_user(dst, src, count, res); + pagefault_enable(); + set_fs(_oldfs); return res; } diff --git a/runtime/loc2c-runtime.h b/runtime/loc2c-runtime.h index 8b6e0b204..ef99cd763 100644 --- a/runtime/loc2c-runtime.h +++ b/runtime/loc2c-runtime.h @@ -1,6 +1,6 @@ /* target operations * Copyright (C) 2005-2012 Red Hat Inc. - * Copyright (C) 2005, 2006, 2007 Intel Corporation. + * Copyright (C) 2005-2007 Intel Corporation. * Copyright (C) 2007 Quentin Barnes. * * This file is part of systemtap, and is free software. You can @@ -20,6 +20,11 @@ #define uintptr_t unsigned long #endif +#ifndef STAPCONF_PAGEFAULT_DISABLE /* before linux commit a866374a */ +#define pagefault_disable() preempt_disable() +#define pagefault_enable() preempt_enable_no_resched() +#endif + /* These three macro definitions are generic, just shorthands used by the generated code. */ @@ -522,29 +527,36 @@ extern void __store_deref_bad(void); #if defined __i386__ -#define uderef(size, addr) \ +#define _stp_deref(size, addr, seg) \ ({ \ int _bad = 0; \ - u8 _b; u16 _w; u32 _l; \ intptr_t _v = 0; \ + mm_segment_t _oldfs = get_fs(); \ + set_fs(seg); \ + pagefault_disable(); \ if (lookup_bad_addr((unsigned long)addr, size)) \ _bad = 1; \ else \ switch (size) \ { \ - case 1: __get_user_asm(_b,addr,_bad,"b","b","=q",1); _v = _b; break; \ - case 2: __get_user_asm(_w,addr,_bad,"w","w","=r",1); _v = _w; break; \ - case 4: __get_user_asm(_l,addr,_bad,"l","","=r",1); _v = _l; break; \ + case 1: { u8 _b; __get_user_asm(_b,addr,_bad,"b","b","=q",1); _v = _b; } break; \ + case 2: { u16 _w; __get_user_asm(_w,addr,_bad,"w","w","=r",1); _v = _w; } break; \ + case 4: { u32 _l; __get_user_asm(_l,addr,_bad,"l","","=r",1); _v = _l; } break; \ default: _v = __get_user_bad(); \ } \ + pagefault_enable(); \ + set_fs(_oldfs); \ if (_bad) \ DEREF_FAULT(addr); \ _v; \ }) -#define store_uderef(size, addr, value) \ +#define _stp_store_deref(size, addr, value, seg) \ ({ \ int _bad = 0; \ + mm_segment_t _oldfs = get_fs(); \ + set_fs(seg); \ + pagefault_disable(); \ if (lookup_bad_addr((unsigned long)addr, size)) \ _bad = 1; \ else \ @@ -555,6 +567,8 @@ extern void __store_deref_bad(void); case 4: __put_user_asm(((u32)(value)),addr,_bad,"l","k","ir",1); break;\ default: __put_user_bad(); \ } \ + pagefault_enable(); \ + set_fs(_oldfs); \ if (_bad) \ STORE_DEREF_FAULT(addr); \ }) @@ -562,30 +576,37 @@ extern void __store_deref_bad(void); #elif defined __x86_64__ -#define uderef(size, addr) \ +#define _stp_deref(size, addr, seg) \ ({ \ int _bad = 0; \ - u8 _b; u16 _w; u32 _l; u64 _q; \ intptr_t _v = 0; \ + mm_segment_t _oldfs = get_fs(); \ + set_fs(seg); \ + pagefault_disable(); \ if (lookup_bad_addr((unsigned long)addr, size)) \ _bad = 1; \ else \ switch (size) \ { \ - case 1: __get_user_asm(_b,(unsigned long)addr,_bad,"b","b","=q",1); _v = _b; break; \ - case 2: __get_user_asm(_w,(unsigned long)addr,_bad,"w","w","=r",1); _v = _w; break; \ - case 4: __get_user_asm(_l,(unsigned long)addr,_bad,"l","","=r",1); _v = _l; break; \ - case 8: __get_user_asm(_q,(unsigned long)addr,_bad,"q","","=r",1); _v = _q; break; \ + case 1: { u8 _b; __get_user_asm(_b,(unsigned long)addr,_bad,"b","b","=q",1); _v = _b; } break; \ + case 2: { u16 _w; __get_user_asm(_w,(unsigned long)addr,_bad,"w","w","=r",1); _v = _w; } break; \ + case 4: { u32 _l; __get_user_asm(_l,(unsigned long)addr,_bad,"l","","=r",1); _v = _l; } break; \ + case 8: { u64 _q; __get_user_asm(_q,(unsigned long)addr,_bad,"q","","=r",1); _v = _q; } break; \ default: _v = __get_user_bad(); \ } \ + pagefault_enable(); \ + set_fs(_oldfs); \ if (_bad) \ - DEREF_FAULT(addr); \ + DEREF_FAULT(addr); \ _v; \ }) -#define store_uderef(size, addr, value) \ +#define _stp_store_deref(size, addr, value, seg) \ ({ \ int _bad = 0; \ + mm_segment_t _oldfs = get_fs(); \ + set_fs(seg); \ + pagefault_disable(); \ if (lookup_bad_addr((unsigned long)addr, size)) \ _bad = 1; \ else \ @@ -597,47 +618,45 @@ extern void __store_deref_bad(void); case 8: __put_user_asm(((u64)(value)),addr,_bad,"q","","Zr",1); break; \ default: __put_user_bad(); \ } \ + pagefault_enable(); \ + set_fs(_oldfs); \ if (_bad) \ - STORE_DEREF_FAULT(addr); \ + STORE_DEREF_FAULT(addr); \ }) #elif defined __ia64__ -#define uderef(size, addr) \ + +#define _stp_deref(size, addr, seg) \ ({ \ int _bad = 0; \ intptr_t _v=0; \ + mm_segment_t _oldfs = get_fs(); \ + set_fs(seg); \ + pagefault_disable(); \ if (lookup_bad_addr((unsigned long)addr, size)) \ _bad = 1; \ else { \ - if (in_atomic() || irqs_disabled()) { \ - pagefault_disable(); \ - switch (size) { \ + switch (size) { \ case 1: __get_user_size(_v, addr, 1, _bad); break; \ case 2: __get_user_size(_v, addr, 2, _bad); break; \ case 4: __get_user_size(_v, addr, 4, _bad); break; \ case 8: __get_user_size(_v, addr, 8, _bad); break; \ default: __get_user_unknown(); break; \ - } \ - pagefault_enable(); \ - } \ - else { \ - switch (size) { \ - case 1: __get_user_size(_v, addr, 1, _bad); break; \ - case 2: __get_user_size(_v, addr, 2, _bad); break; \ - case 4: __get_user_size(_v, addr, 4, _bad); break; \ - case 8: __get_user_size(_v, addr, 8, _bad); break; \ - default: __get_user_unknown(); break; \ - } \ - } \ + } \ } \ + pagefault_enable(); \ + set_fs(_oldfs); \ if (_bad) \ DEREF_FAULT(addr); \ _v; \ }) -#define store_uderef(size, addr, value) \ +#define _stp_store_deref(size, addr, value, seg) \ ({ \ int _bad=0; \ + mm_segment_t _oldfs = get_fs(); \ + set_fs(seg); \ + pagefault_disable(); \ if (lookup_bad_addr((unsigned long)addr, size)) \ _bad = 1; \ else \ @@ -648,8 +667,10 @@ extern void __store_deref_bad(void); case 8: __put_user_size(value, addr, 8, _bad); break; \ default: __put_user_unknown(); break; \ } \ + pagefault_enable(); \ + set_fs(_oldfs); \ if (_bad) \ - STORE_DEREF_FAULT(addr); \ + STORE_DEREF_FAULT(addr); \ }) #elif defined __powerpc__ || defined __powerpc64__ @@ -666,10 +687,13 @@ extern void __store_deref_bad(void); __put_user_size(x, ptr, size, retval, -EFAULT) #endif -#define uderef(size, addr) \ +#define _stp_deref(size, addr, seg) \ ({ \ int _bad = 0; \ intptr_t _v = 0; \ + mm_segment_t _oldfs = get_fs(); \ + set_fs(seg); \ + pagefault_disable(); \ if (lookup_bad_addr((unsigned long)addr, size)) \ _bad = 1; \ else \ @@ -681,14 +705,19 @@ extern void __store_deref_bad(void); case 8: __stp_get_user_size(_v, addr, 8, _bad); break; \ default: _v = __get_user_bad(); \ } \ + pagefault_enable(); \ + set_fs(_oldfs); \ if (_bad) \ DEREF_FAULT(addr); \ _v; \ }) -#define store_uderef(size, addr, value) \ +#define _stp_store_deref(size, addr, value, seg) \ ({ \ int _bad = 0; \ + mm_segment_t _oldfs = get_fs(); \ + set_fs(seg); \ + pagefault_disable(); \ if (lookup_bad_addr((unsigned long)addr, size)) \ _bad = 1; \ else \ @@ -700,6 +729,8 @@ extern void __store_deref_bad(void); case 8: __stp_put_user_size(((u64)(value)), addr, 8, _bad); break; \ default: __put_user_bad(); \ } \ + pagefault_enable(); \ + set_fs(_oldfs); \ if (_bad) \ STORE_DEREF_FAULT(addr); \ }) @@ -813,27 +844,35 @@ extern void __store_deref_bad(void); : "r" (x), "r" (__pu_addr), "i" (-EFAULT) \ : "cc") -#define uderef(size, addr) \ +#define _stp_deref(size, addr, seg) \ ({ \ int _bad = 0; \ intptr_t _v=0; \ + mm_segment_t _oldfs = get_fs(); \ + set_fs(seg); \ + pagefault_disable(); \ if (lookup_bad_addr((unsigned long)addr, size)) \ _bad = 1; \ - else \ + else \ switch (size){ \ case 1: __stp_get_user_asm_byte(_v, addr, _bad); break; \ case 2: __stp_get_user_asm_half(_v, addr, _bad); break; \ case 4: __stp_get_user_asm_word(_v, addr, _bad); break; \ default: __get_user_bad(); break; \ } \ + pagefault_enable(); \ + set_fs(_oldfs); \ if (_bad) \ DEREF_FAULT(addr); \ _v; \ }) -#define store_uderef(size, addr, value) \ +#define _stp_store_deref(size, addr, value, seg) \ ({ \ int _bad=0; \ + mm_segment_t _oldfs = get_fs(); \ + set_fs(seg); \ + pagefault_disable(); \ if (lookup_bad_addr((unsigned long)addr, size)) \ _bad = 1; \ else \ @@ -843,6 +882,8 @@ extern void __store_deref_bad(void); case 4: __stp_put_user_asm_word(value, addr, _bad); break; \ default: __put_user_bad(); break; \ } \ + pagefault_enable(); \ + set_fs(_oldfs); \ if (_bad) \ STORE_DEREF_FAULT(addr); \ }) @@ -852,13 +893,14 @@ extern void __store_deref_bad(void); /* Use same __get_user() and __put_user() for both user and kernel addresses, but make sure set_fs() is called appropriately first. */ -#define uderef(size, addr) ({ \ +#define _stp_deref(size, addr, seg) ({ \ u8 _b; u16 _w; u32 _l; u64 _q; \ uintptr_t _a = (uintptr_t) addr; \ intptr_t _v = 0; \ int _bad = 0; \ - mm_segment_t _oldfs = get_fs(); \ - set_fs (USER_DS); \ + mm_segment_t _oldfs = get_fs(); \ + set_fs(seg); \ + pagefault_disable(); \ switch (size) { \ case 1: _bad = __get_user(_b, (u8 *)(_a)); _v = _b; break; \ case 2: _bad = __get_user(_w, (u16 *)(_a)); _v = _w; break; \ @@ -866,16 +908,18 @@ extern void __store_deref_bad(void); case 8: _bad = __get_user(_q, (u64 *)(_a)); _v = _q; break; \ default: __get_user_bad(); \ } \ - set_fs (_oldfs); \ + pagefault_enable(); \ + set_fs(_oldfs); \ if (_bad) \ DEREF_FAULT(addr); \ _v; \ }) -#define store_uderef(size, addr, value) ({ \ +#define _stp_store_deref(size, addr, value, seg) ({ \ int _bad = 0; \ - mm_segment_t _oldfs = get_fs(); \ - set_fs (USER_DS); \ + mm_segment_t _oldfs = get_fs(); \ + set_fs(seg); \ + pagefault_disable(); \ switch (size) { \ case 1: _bad = __put_user(((u8)(value)), ((u8 *)(addr))); break; \ case 2: _bad = __put_user(((u16)(value)), ((u16 *)(addr))); break; \ @@ -883,58 +927,35 @@ extern void __store_deref_bad(void); case 8: _bad = __put_user(((u64)(value)), ((u64 *)(addr))); break; \ default: __put_user_bad(); \ } \ - set_fs (_oldfs); \ + pagefault_enable(); \ + set_fs(_oldfs); \ if (_bad) \ STORE_DEREF_FAULT(addr); \ }) -#define kderef(size, addr) ({ \ - u8 _b; u16 _w; u32 _l; u64 _q; \ - uintptr_t _a = (uintptr_t) addr; \ - intptr_t _v = 0; \ - int _bad = 0; \ - mm_segment_t _oldfs = get_fs(); \ - set_fs (KERNEL_DS); \ - switch (size) { \ - case 1: _bad = __get_user(_b, (u8 *)(_a)); _v = _b; break; \ - case 2: _bad = __get_user(_w, (u16 *)(_a)); _v = _w; break; \ - case 4: _bad = __get_user(_l, (u32 *)(_a)); _v = _l; break; \ - case 8: _bad = __get_user(_q, (u64 *)(_a)); _v = _q; break; \ - default: __get_user_bad(); \ - } \ - set_fs (_oldfs); \ - if (_bad) \ - DEREF_FAULT(addr); \ - _v; \ - }) +#endif /* (s390) || (s390x) */ -#define store_kderef(size, addr, value) ({ \ - int _bad = 0; \ - mm_segment_t _oldfs = get_fs(); \ - set_fs (KERNEL_DS); \ - switch (size) { \ - case 1: _bad = __put_user(((u8)(value)), ((u8 *)(addr))); break; \ - case 2: _bad = __put_user(((u16)(value)), ((u16 *)(addr))); break; \ - case 4: _bad = __put_user(((u32)(value)), ((u32 *)(addr))); break; \ - case 8: _bad = __put_user(((u64)(value)), ((u64 *)(addr))); break; \ - default: __put_user_bad(); \ - } \ - set_fs (_oldfs); \ - if (_bad) \ - STORE_DEREF_FAULT(addr); \ - }) -#endif /* (s390) || (s390x) */ +/* Map kderef/uderef to the generic segment-aware deref macros. */ -/* Normally we can use uderef and store_uderef also for kernel space. */ #ifndef kderef -#define kderef uderef +#define kderef(s,a) _stp_deref(s,a,KERNEL_DS) #endif #ifndef store_kderef -#define store_kderef store_uderef +#define store_kderef(s,a,v) _stp_store_deref(s,a,v,KERNEL_DS) #endif +#ifndef uderef +#define uderef(s,a) _stp_deref(s,a,USER_DS) +#endif + +#ifndef store_uderef +#define store_uderef(s,a,v) _stp_store_deref(s,a,v,USER_DS) +#endif + + + #if defined (__i386__) || defined (__arm__) /* x86 and arm can't do 8-byte put/get_user_asm, so we have to split it */ diff --git a/runtime/stp_string.h b/runtime/stp_string.h index 45153feda..7fa4fa8f4 100644 --- a/runtime/stp_string.h +++ b/runtime/stp_string.h @@ -1,5 +1,5 @@ /* -*- linux-c -*- - * Copyright (C) 2005, 2007, 2009 Red Hat Inc. + * Copyright (C) 2005-2012 Red Hat Inc. * * This file is part of systemtap, and is free software. You can * redistribute it and/or modify it under the terms of the GNU General @@ -12,6 +12,9 @@ #define to_oct_digit(c) ((c) + '0') static void _stp_text_str(char *out, char *in, int len, int quoted, int user); +/* XXX: duplication with loc2c-runtime.h */ +/* XXX: probably needs the same set_fs / pagefault_* / bad_addr checks */ + #if defined(__KERNEL__) /* @@ -31,13 +34,9 @@ static void _stp_text_str(char *out, char *in, int len, int quoted, int user); #define __stp_get_user(x, ptr) \ ({ \ int __res; \ - if (in_atomic() || irqs_disabled()) { \ - pagefault_disable(); \ - __res = __get_user(x, ptr); \ - pagefault_enable(); \ - } \ - else \ - __res = __get_user(x, ptr); \ + pagefault_disable(); \ + __res = __get_user(x, ptr); \ + pagefault_enable(); \ __res; \ }) #else /* !defined(__powerpc__) && !defined(__ia64) */ diff --git a/tapset/uconversions.stp b/tapset/uconversions.stp index 1ca0cc62d..9c3231c32 100644 --- a/tapset/uconversions.stp +++ b/tapset/uconversions.stp @@ -1,5 +1,5 @@ // userspace conversions tapset -// Copyright (C) 2005-2011 Red Hat Inc. +// Copyright (C) 2005-2012 Red Hat Inc. // Copyright (C) 2007 Intel Corporation. // // This file is part of systemtap, and is free software. You can @@ -7,15 +7,14 @@ // Public License (GPL); either version 2, or (at your option) any // later version. +// implement in terms of runtime/loc2c-runtime.h macro uderef() %{ #define __STP_GET_USER(t, warn) \ do { \ __label__ deref_fault; \ - t *_ptr = (t*) (intptr_t) STAP_ARG_addr; \ - assert_is_myproc(); \ - if (! access_ok(VERIFY_READ, _ptr, sizeof(t))) \ - goto deref_fault; \ - if (__stp_get_user(STAP_RETVALUE, _ptr)) { \ + t *_ptr = (t*) (intptr_t) STAP_ARG_addr; \ + STAP_RETVALUE = (t) uderef (sizeof(t), _ptr); \ + if (0) { \ deref_fault: \ STAP_RETVALUE = 0; \ CONTEXT->last_error = NULL; \ -- 2.43.5