From f8a2cb52f70287956de1ef5b49f8bc2372f75fe2 Mon Sep 17 00:00:00 2001 From: Martin Cermak Date: Fri, 1 Apr 2022 23:54:30 +0200 Subject: [PATCH] Support the faccessat2 syscall --- tapset/linux/aux_syscalls.stp | 16 +++ tapset/linux/sysc_faccessat2.stp | 143 +++++++++++++++++++++++++++ testsuite/systemtap.syscall/access.c | 18 ++-- 3 files changed, 168 insertions(+), 9 deletions(-) create mode 100644 tapset/linux/sysc_faccessat2.stp diff --git a/tapset/linux/aux_syscalls.stp b/tapset/linux/aux_syscalls.stp index 4a4bac4df..af6b0aefd 100644 --- a/tapset/linux/aux_syscalls.stp +++ b/tapset/linux/aux_syscalls.stp @@ -1422,6 +1422,22 @@ function _access_mode_str:string(mode:long) MAXSTRINGLEN); %} +%{ +static const _stp_val_array _stp_faccessat2_flags_list[] = { + {0x100, "AT_SYMLINK_NOFOLLOW"}, + {0x200, "AT_EACCESS"}, + {0, NULL} +}; +%} + +function _faccessat2_flags_str:string(flags:long) +%{ /* pure */ /* unprivileged */ + unsigned int flags = (unsigned int)STAP_ARG_flags; + _stp_lookup_or_str(_stp_faccessat2_flags_list, flags, STAP_RETVALUE, + MAXSTRINGLEN); +%} + + /* `man 2 open` for more information */ @__private30 function _sys_open_mode_str(f) { diff --git a/tapset/linux/sysc_faccessat2.stp b/tapset/linux/sysc_faccessat2.stp new file mode 100644 index 000000000..c3c71212e --- /dev/null +++ b/tapset/linux/sysc_faccessat2.stp @@ -0,0 +1,143 @@ +# faccessat2 __________________________________________________ +# new function faccessat2() was added to Linux in version 5.8. +# with commit c8ffd8bcdd28296a198f237cc595148a8d4adfbe +# +# from man faccessat2: the raw Linux faccessat() system call does not have a +# flags argument. To allow for a proper implementation, Linux 5.8 added +# the faccessat2() system call, which supports the flags argument and allows +# a correct implementation of the faccessat() wrapper function (in glibc) +# +# long sys_faccessat2(int dfd, const char __user *filename, int mode, int flags) + +@define _SYSCALL_FACCESSAT2_NAME +%( + name = "faccessat2" +%) + +@define _SYSCALL_FACCESSAT2_ARGSTR +%( + argstr = sprintf("%s, %s, %s, %s", dirfd_str, pathname, mode_str, flags_str) +%) + +@define _SYSCALL_FACCESSAT2_REGARGS +%( + dirfd = int_arg(1) + dirfd_str = _dfd_str(dirfd) + pathname = user_string_quoted(pointer_arg(2)) + pathname_unquoted = user_string_nofault(pointer_arg(2)) + mode = int_arg(3) + mode_str = _access_mode_str(mode) + flags = int_arg(4) + flags_str = _faccessat2_flags_str(flags) +%) + +@define _SYSCALL_FACCESSAT2_REGARGS_STORE +%( + if (@probewrite(dirfd)) + set_int_arg(1, dirfd) + + if (@probewrite(pathname_unquoted)) + set_user_string_arg(pointer_arg(2), pathname_unquoted) + + if (@probewrite(mode)) + set_int_arg(3, mode) + + if (@probewrite(flags)) + set_int_arg(4, flags) +%) + +probe syscall.faccessat2 = dw_syscall.faccessat2 !, nd_syscall.faccessat2 ? {} +probe syscall.faccessat2.return = dw_syscall.faccessat2.return !, nd_syscall.faccessat2.return ? {} + +# dw_faccessat2 _____________________________________________________ + +probe dw_syscall.faccessat2 = kernel.function("sys_faccessat2").call ? +{ + @__syscall_compat_gate(@const("__NR_faccessat2"), + @const("__NR_compat_faccessat2")) + @_SYSCALL_FACCESSAT2_NAME + dirfd = __int32($dfd) + dirfd_str = _dfd_str(__int32($dfd)) + pathname = user_string_quoted($filename) + mode = __int32($mode) + mode_str = _access_mode_str(__int32($mode)) + flags = __int32($flags) + flags_str = _faccessat2_flags_str(__int32($flags)) + @_SYSCALL_FACCESSAT2_ARGSTR +} +probe dw_syscall.faccessat2.return = kernel.function("sys_faccessat2").return ? +{ + @__syscall_compat_gate(@const("__NR_faccessat2"), + @const("__NR_compat_faccessat2")) + @_SYSCALL_FACCESSAT2_NAME + @SYSC_RETVALSTR($return) +} + +# nd_faccessat2 _____________________________________________________ + +probe nd_syscall.faccessat2 = nd1_syscall.faccessat2!, nd2_syscall.faccessat2!, tp_syscall.faccessat2 + { } + +probe nd1_syscall.faccessat2 = kprobe.function("sys_faccessat2") ? +{ + @__syscall_compat_gate(@const("__NR_faccessat2"), + @const("__NR_compat_faccessat2")) + @_SYSCALL_FACCESSAT2_NAME + asmlinkage() + @_SYSCALL_FACCESSAT2_REGARGS + @_SYSCALL_FACCESSAT2_ARGSTR +} + +/* kernel 4.17+ */ +probe nd2_syscall.faccessat2 = kprobe.function(@arch_syscall_prefix "sys_faccessat2") ? +{ + __set_syscall_pt_regs(pointer_arg(1)) + @_SYSCALL_FACCESSAT2_NAME + @_SYSCALL_FACCESSAT2_REGARGS + @_SYSCALL_FACCESSAT2_ARGSTR +}, +{ + %( @_IS_SREG_KERNEL %? @_SYSCALL_FACCESSAT2_REGARGS_STORE %) +} + +/* kernel 3.5+, but undesirable because it affects all syscalls */ +probe tp_syscall.faccessat2 = kernel.trace("sys_enter") +{ + __set_syscall_pt_regs($regs) + @__syscall_compat_gate(@const("__NR_faccessat2"), + @const("__NR_compat_faccessat2")) + @_SYSCALL_FACCESSAT2_NAME + @_SYSCALL_FACCESSAT2_REGARGS + @_SYSCALL_FACCESSAT2_ARGSTR +}, +{ + %( @_IS_SREG_KERNEL %? @_SYSCALL_FACCESSAT2_REGARGS_STORE %) +} + +probe nd_syscall.faccessat2.return = nd1_syscall.faccessat2.return!, nd2_syscall.faccessat2.return!, tp_syscall.faccessat2.return + { } + +probe nd1_syscall.faccessat2.return = kprobe.function("sys_faccessat2").return ? +{ + @__syscall_compat_gate(@const("__NR_faccessat2"), + @const("__NR_compat_faccessat2")) + @_SYSCALL_FACCESSAT2_NAME + @SYSC_RETVALSTR(returnval()) +} + +/* kernel 4.17+ */ +probe nd2_syscall.faccessat2.return = kprobe.function(@arch_syscall_prefix "sys_faccessat2").return ? +{ + @_SYSCALL_FACCESSAT2_NAME + @SYSC_RETVALSTR(returnval()) +} + +/* kernel 3.5+, but undesirable because it affects all syscalls */ +probe tp_syscall.faccessat2.return = kernel.trace("sys_exit") +{ + __set_syscall_pt_regs($regs) + @__syscall_compat_gate(@const("__NR_faccessat2"), + @const("__NR_compat_faccessat2")) + @_SYSCALL_FACCESSAT2_NAME + @SYSC_RETVALSTR($ret) +} diff --git a/testsuite/systemtap.syscall/access.c b/testsuite/systemtap.syscall/access.c index 2b510a35e..6d17eaaf6 100644 --- a/testsuite/systemtap.syscall/access.c +++ b/testsuite/systemtap.syscall/access.c @@ -28,7 +28,7 @@ int main() #if GLIBC_SUPPORT faccessat(AT_FDCWD, "foobar1", F_OK, 0); - //staptest// faccessat (AT_FDCWD, "foobar1", F_OK) = 0 + //staptest// [[[[faccessat (AT_FDCWD, "foobar1", F_OK)!!!!faccessat2 (AT_FDCWD, "foobar1", F_OK, 0x0)]]]] = 0 #endif access("foobar1", R_OK); @@ -36,7 +36,7 @@ int main() #if GLIBC_SUPPORT faccessat(AT_FDCWD, "foobar1", R_OK, 0); - //staptest// faccessat (AT_FDCWD, "foobar1", R_OK) = 0 + //staptest// [[[[faccessat (AT_FDCWD, "foobar1", R_OK)!!!!faccessat2 (AT_FDCWD, "foobar1", R_OK, 0x0)]]]] = 0 #endif access("foobar1", W_OK); @@ -44,7 +44,7 @@ int main() #if GLIBC_SUPPORT faccessat(AT_FDCWD, "foobar1", W_OK, 0); - //staptest// faccessat (AT_FDCWD, "foobar1", W_OK) = 0 + //staptest// [[[[faccessat (AT_FDCWD, "foobar1", W_OK)!!!!faccessat2 (AT_FDCWD, "foobar1", W_OK, 0x0)]]]] = 0 #endif access("foobar1", X_OK); @@ -52,7 +52,7 @@ int main() #if GLIBC_SUPPORT faccessat(AT_FDCWD, "foobar1", X_OK, 0); - //staptest// faccessat (AT_FDCWD, "foobar1", X_OK) = -NNNN (EACCES) + //staptest// [[[[faccessat (AT_FDCWD, "foobar1", X_OK)!!!!faccessat2 (AT_FDCWD, "foobar1", X_OK, 0x0)]]]] = -NNNN (EACCES) #endif access("foobar1", R_OK|W_OK); @@ -60,7 +60,7 @@ int main() #if GLIBC_SUPPORT faccessat(AT_FDCWD, "foobar1", R_OK|W_OK, 0); - //staptest// faccessat (AT_FDCWD, "foobar1", R_OK|W_OK) = 0 + //staptest// [[[[faccessat (AT_FDCWD, "foobar1", R_OK|W_OK)!!!!faccessat2 (AT_FDCWD, "foobar1", R_OK|W_OK, 0x0)]]]] = 0 #endif access("foobar1", R_OK|W_OK|X_OK); @@ -68,7 +68,7 @@ int main() #if GLIBC_SUPPORT faccessat(AT_FDCWD, "foobar1", R_OK|W_OK|X_OK, 0); - //staptest// faccessat (AT_FDCWD, "foobar1", R_OK|W_OK|X_OK) = -NNNN (EACCES) + //staptest// [[[[faccessat (AT_FDCWD, "foobar1", R_OK|W_OK|X_OK)!!!!faccessat2 (AT_FDCWD, "foobar1", R_OK|W_OK|X_OK, 0x0)]]]] = -NNNN (EACCES) #endif access((char *)-1, F_OK); @@ -83,17 +83,17 @@ int main() #if GLIBC_SUPPORT faccessat(-1, "foobar1", F_OK, 0); - //staptest// faccessat (-1, "foobar1", F_OK) = -NNNN (EBADF) + //staptest// [[[[faccessat (-1, "foobar1", F_OK)!!!!faccessat2 (-1, "foobar1", F_OK, 0x0)]]]] = -NNNN (EBADF) faccessat(AT_FDCWD, (char *)-1, F_OK, 0); #ifdef __s390__ //staptest// faccessat (AT_FDCWD, 0x[7]?[f]+, F_OK) = -NNNN (EFAULT) #else - //staptest// faccessat (AT_FDCWD, 0x[f]+, F_OK) = -NNNN (EFAULT) + //staptest// [[[[faccessat (AT_FDCWD, 0x[f]+, F_OK)!!!!faccessat2 (AT_FDCWD, 0x[f]+, F_OK, 0x0)]]]] = -NNNN (EFAULT) #endif faccessat(AT_FDCWD, "foobar1", -1, 0); - //staptest// faccessat (AT_FDCWD, "foobar1", R_OK|W_OK|X_OK|0x[f]+8) = -NNNN (EINVAL) + //staptest// [[[[faccessat (AT_FDCWD, "foobar1", R_OK|W_OK|X_OK|0x[f]+8)!!!!faccessat2 (AT_FDCWD, "foobar1", R_OK|W_OK|X_OK|0x[f]+8, 0x0)]]]] = -NNNN (EINVAL) // We can't test the last argument to faccessat() as a -1, since // glibc will realize that's wrong and not issue a syscall. -- 2.43.5