From a399ff28c436ba8927ce3bb0af7655e33f3aacc5 Mon Sep 17 00:00:00 2001 From: "Yichun Zhang (agentzh)" Date: Tue, 2 Feb 2021 14:05:00 -0800 Subject: [PATCH] Bug: delete_module() syscall might get interrupted under load. staprun might fail to delete modules and leave ko in the system: $ sudo lsmod|grep stap stap_2eb3039808b647e990825b99fb1f9b6_18830 221184 0 stap_eee9b566240fbeb1194347a571ccd68_17219 208896 0 Now we try up to 5 times for the delete_module() syscall in staprun with a growing sleep interval between successive attempts upon the EINTR error. --- staprun/staprun.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/staprun/staprun.c b/staprun/staprun.c index 334804b29..7cf0fef32 100644 --- a/staprun/staprun.c +++ b/staprun/staprun.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -242,7 +243,11 @@ static void remove_all_modules(void) static int remove_module(const char *name, int verb) { - int ret; + int i, ret; + enum { + MAX_EINTR_TRIES = 5 + }; + dbug(2, "%s\n", name); #ifdef PR_SET_NAME @@ -271,12 +276,15 @@ static int remove_module(const char *name, int verb) dbug(2, "removing module %s\n", name); PROBE1(staprun, remove__module, name); - ret = delete_module (name, O_NONBLOCK); + + for (i = 0; i < MAX_EINTR_TRIES; i++) { + ret = delete_module (name, O_NONBLOCK); + if (ret == 0 || errno != EINTR) + break; + usleep(100 * i); + } + if (ret != 0) { - /* XXX: maybe we should just accept this, with a - diagnostic, but without an error. Might it be - possible for the same module to be started up just - as we're shutting down? */ err("Couldn't remove module '%s': %s.\n", name, strerror(errno)); return 1; } -- 2.43.5