This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
Re: Failure in syscall.open probe
This patch follows the API and fixes a bug - increment src/dst pointers
index c5022b5..082a4fd 100644
--- a/runtime/linux/copy.c
+++ b/runtime/linux/copy.c
@@ -24,6 +24,27 @@
* @{
*/
+static long _stp_copy_from_user_inatomic(char *dst, const char __user
*src, long count)
+{
+ int res;
+ long bytes = 0;
+ for (;count;dst++, src++, count--, bytes++)
+ {
+ res = __copy_from_user_inatomic(dst, src, 1);
+ if (unlikely(res != 0))
+ {
+ *dst = 0;
+ break;
+ }
+ if (unlikely(*dst == 0))
+ {
+ break;
+ }
+ }
+
+ return bytes;
+}
+
/** Copy a NULL-terminated string from userspace.
* On success, returns the length of the string (not including the trailing
* NULL).
@@ -34,8 +55,8 @@
* least <i>count</i> bytes long.
* @param src Source address, in user space.
* @param count Maximum number of bytes to copy, including the trailing NULL.
- *
- * If <i>count</i> is smaller than the length of the string, copies
+ *
+ * If <i>count</i> is smaller than the length of the string, copies
* <i>count</i> bytes and returns <i>count</i>.
*/
@@ -46,14 +67,8 @@ static long _stp_strncpy_from_user(char *dst, const
char __user *src, long count
mm_segment_t _oldfs = get_fs();
set_fs(USER_DS);
pagefault_disable();
- /* XXX: The following preempt() manipulations should be
- redundant with probe entry/exit code, but for unknown
- reasons on RHEL5/6 conversions.exp intermittently fails
- without this. */
- preempt_disable();
if (!lookup_bad_addr(VERIFY_READ, (const unsigned long)src, count))
- res = strncpy_from_user(dst, src, count);
- preempt_enable_no_resched();
+ res = _stp_copy_from_user_inatomic(dst, src, count);
pagefault_enable();
set_fs(_oldfs);
return res;
On Mon, Aug 28, 2017 at 8:49 AM, Arkady <arkady.miasnikov@gmail.com> wrote:
> The following patch solves the problem, or, at least, makes it
> significantly less probable
>
> diff --git a/runtime/linux/copy.c b/runtime/linux/copy.c
> index c5022b5..2da01f1 100644
> --- a/runtime/linux/copy.c
> +++ b/runtime/linux/copy.c
> @@ -39,6 +39,28 @@
> * <i>count</i> bytes and returns <i>count</i>.
> */
>
> +static long _stp_copy_from_user_inatomic(char *dst, const char __user
> *src, long count)
> +{
> + int res;
> + long bytes = 0;
> + while (count)
> + {
> + res = __copy_from_user_inatomic(dst, src, 1);
> + if (unlikely(res != 1))
> + {
> + return res;
> + }
> + if (unlikely((*dst == 0)))
> + {
> + break;
> + }
> + bytes++;
> + count--;
> + }
> +
> + return bytes;
> +}
> +
> /* XXX: see also kread/uread in loc2c-runtime.h */
> static long _stp_strncpy_from_user(char *dst, const char __user *src,
> long count)
> {
> @@ -46,14 +68,8 @@ static long _stp_strncpy_from_user(char *dst, const
> char __user *src, long count
> mm_segment_t _oldfs = get_fs();
> set_fs(USER_DS);
> pagefault_disable();
> - /* XXX: The following preempt() manipulations should be
> - redundant with probe entry/exit code, but for unknown
> - reasons on RHEL5/6 conversions.exp intermittently fails
> - without this. */
> - preempt_disable();
> if (!lookup_bad_addr(VERIFY_READ, (const unsigned long)src, count))
> - res = strncpy_from_user(dst, src, count);
> - preempt_enable_no_resched();
> + res = _stp_copy_from_user_inatomic(dst, src, count);
> pagefault_enable();
> set_fs(_oldfs);
> return res;
>
> On Sun, Aug 27, 2017 at 3:38 PM, Arkady <arkady.miasnikov@gmail.com> wrote:
>> The following comment is probably relevant:
>> /*
>> * On some kernels (e.g. 2.6.39), even with preemption disabled, the
>> strncpy_from_user,
>> * instead of returning -1 after a page fault, schedules the process,
>> so we drop events
>> * because of the preemption. This function reads the user buffer in
>> atomic chunks, and
>> * returns when there's an error or the terminator is found
>> */
>>
>> https://github.com/draios/sysdig/blob/dev/driver/ppm_events.c#L108
>>
>>> On Fri, Aug 25, 2017 at 10:52 PM, David Smith <dsmith@redhat.com> wrote:
>>>> Arkady,
>>>>
>>>> The "good" news is that I've duplicated your problem. I went ahead and
>>>> filed PR22012 (<https://sourceware.org/bugzilla/show_bug.cgi?id=22012>)
>>>> on this issue so I won't forget it.
>>>>
>>>> I'm looking into it.
>>>>
>>>>
>>>> On Fri, Aug 25, 2017 at 12:25 PM, Arkady <arkady.miasnikov@gmail.com> wrote:
>>>>> Reproduce the problem
>>>>>
>>>>> Run the following one liner on CentOS 6.9
>>>>>
>>>>> stap -e "global AF%;probe syscall.open {tid = tid();AF[tid] =
>>>>> filename;}probe syscall.open.return{tid = tid();delete AF[tid];}"
>>>>>
>>>>> Run a bash loop
>>>>> touch test.txt;while [ 1 ];do cat test.txt;done;
>>>>>
>>>>> Monitor the kernel log
>>>>> tail -f /var/log/messages
>>>>>
>>>>> After a couple of minutes you shall see
>>>>>
>>>>> BUG: scheduling while atomic: cat/87764/0x10000001
>>>>> Modules linked in: test_open(U) fuse rfcomm sco bridge bnep l2cap
>>>>> autofs4 bnx2fc cnic uio fcoe libfcoe libfc 8021q scsi_transport_fc
>>>>> garp stp scsi_tgt llc ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4
>>>>> iptable_filter ip_tables ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6
>>>>> xt_state nf_conntrack ip6table_filter ip6_tables ib_ipoib rdma_ucm
>>>>> ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core
>>>>> ib_addr ipv6 uinput microcode vmware_balloon uvcvideo videodev
>>>>> v4l2_compat_ioctl32 btusb bluetooth rfkill snd_seq_midi e1000
>>>>> snd_seq_midi_event snd_ens1371 snd_rawmidi snd_ac97_codec ac97_bus
>>>>> snd_seq snd_seq_device snd_pcm snd_timer snd soundcore snd_page_alloc
>>>>> sg i2c_piix4 shpchp ext4 jbd2 mbcache sd_mod crc_t10dif sr_mod cdrom
>>>>> mptspi mptscsih mptbase scsi_transport_spi pata_acpi ata_generic
>>>>> ata_piix vmwgfx ttm drm_kms_helper drm i2c_core dm_mirror
>>>>> dm_region_hash dm_log dm_mod [last unloaded: speedstep_lib]
>>>>> Pid: 87764, comm: cat Not tainted 2.6.32-696.10.1.el6.x86_64 #1
>>>>> Call Trace:
>>>>> <#DB> [<ffffffff81068244>] ? __schedule_bug+0x44/0x50
>>>>> [<ffffffff8154ae0c>] ? schedule+0xa4c/0xb70
>>>>> [<ffffffff812a6216>] ? vsnprintf+0x336/0x5e0
>>>>> [<ffffffff810740aa>] ? __cond_resched+0x2a/0x40
>>>>> [<ffffffff8154b200>] ? _cond_resched+0x30/0x40
>>>>> [<ffffffff812a8b9a>] ? strncpy_from_user+0x4a/0x90
>>>>> [<ffffffffa0705ff9>] ? probe_3602+0x5f9/0x1220 [test_open]
>>>>> [<ffffffff81196c31>] ? sys_open+0x1/0x30
>>>>> [<ffffffffa0707fed>] ? enter_kprobe_probe+0x1ed/0x3a0 [test_open]
>>>>> [<ffffffff815512bb>] ? aggr_pre_handler+0x5b/0xb0
>>>>> [<ffffffff81196c30>] ? sys_open+0x0/0x30
>>>>> [<ffffffff81196c31>] ? sys_open+0x1/0x30
>>>>> [<ffffffff81550cd5>] ? kprobe_exceptions_notify+0x3d5/0x430
>>>>> [<ffffffff81550f45>] ? notifier_call_chain+0x55/0x80
>>>>> [<ffffffff81550faa>] ? atomic_notifier_call_chain+0x1a/0x20
>>>>> [<ffffffff810acd0e>] ? notify_die+0x2e/0x30
>>>>> [<ffffffff8154e815>] ? do_int3+0x35/0xb0
>>>>> [<ffffffff8154e083>] ? int3+0x33/0x40
>>>>> [<ffffffff81196c30>] ? sys_open+0x0/0x30
>>>>
>>>>
>>>>
>>>> --
>>>> David Smith
>>>> Principal Software Engineer
>>>> Red Hat