Bug 13002 - robust mutex deadlocks instead of returning EOWNERDEAD
Summary: robust mutex deadlocks instead of returning EOWNERDEAD
Status: RESOLVED FIXED
Alias: None
Product: glibc
Classification: Unclassified
Component: nptl (show other bugs)
Version: 2.14
: P2 normal
Target Milestone: ---
Assignee: Ulrich Drepper
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2011-07-16 13:36 UTC by Jonathan Wakely
Modified: 2014-06-27 12:54 UTC (History)
1 user (show)

See Also:
Host:
Target:
Build:
Last reconfirmed:
fweimer: security-


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Jonathan Wakely 2011-07-16 13:36:42 UTC
Originally reported as http://bugzilla.redhat.com/show_bug.cgi?id=628608

As shown by this code, the kernel clear the robust list for the child after a fork:


#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/syscall.h>
#include <syscall.h>

int main()
{
    void *list;
    size_t sz;
    if (syscall(__NR_get_robust_list, 0, &list, &sz))
        abort();
    if (!list)
        abort();

    if (fork() == 0)
    {
        if (syscall(__NR_get_robust_list, 0, &list, &sz))
            return 1;
        if (!list)
            return 2;
        return 0;
    }

    int status;
    waitpid(-1, &status, 0);
    if (!WIFEXITED(status))
    {
        printf("child exited abnormally\n");
        return 1;
    }

    switch(WEXITSTATUS(status))
    {
    case 1:
        printf("child failed to call get_robust_list\n");
        return 1;
    case 2:
        printf("child has no robust list\n");
        return 1;
    default:
        printf("child exited normally\n");
    }

    return 0;
}


If a parent process and child process share a robust mutex and the child exits while holding the mutex lock, when the parent tries to acquire the lock it will hang instead of being notified of the state by EOWNERDEAD.

Here's a testcase which exits successfully on Solaris but deadlocks with NPTL

#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <pthread.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <stdio.h>
#include <fcntl.h>

void initialize(pthread_mutex_t* mtx)
{
    pthread_mutexattr_t mtxa;
    if(pthread_mutexattr_init(&mtxa))
        abort();
    if(pthread_mutexattr_setpshared(&mtxa, PTHREAD_PROCESS_SHARED))
        abort();
    if(pthread_mutexattr_setrobust_np(&mtxa, PTHREAD_MUTEX_ROBUST_NP))
        abort();
    if(pthread_mutex_init(mtx, &mtxa))
        abort();
    pthread_mutexattr_destroy(&mtxa);
}

void set_consistent(pthread_mutex_t* mtx)
{
    if(pthread_mutex_consistent_np(mtx))
        abort();
}

void lock(pthread_mutex_t* mtx)
{
    int err;
    if((err = pthread_mutex_lock(mtx))) {
        if(EOWNERDEAD == err) { // handle abandoned mutex
	    if(pthread_mutex_consistent_np(mtx))
		abort();
	}
        else
            abort();
    }
}

void unlock(pthread_mutex_t* mtx)
{
    if(pthread_mutex_unlock(mtx))
        abort();
}

pid_t spawn(int(*fn)())
{
    // fork a child process
    pid_t pid = fork();
    switch(pid) {
    case 0:
        exit(fn());
    case -1:
        abort();
    default:
        return pid;
    }
}

char const shared_file[] = "shared_file";


void* open_shared_file()
{
    int fd = open(shared_file, O_CREAT | O_RDWR, (mode_t)0666);
    if(fd < 0)
	abort();
    struct stat st;
    if(fstat(fd, &st))
	abort();
    int new_file = !st.st_size;
    if (new_file)
    	if(ftruncate(fd, sizeof(pthread_mutex_t)))
    	    abort();
    void* mem = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
    close(fd);
    if(MAP_FAILED == mem)
	abort();
    if (new_file)
	initialize((pthread_mutex_t*)mem);
    return mem;
}

int process_1()
{
    unsigned pid = getpid();
    printf("%u: process 1\n", pid);
    pthread_mutex_t* m = (pthread_mutex_t*)open_shared_file();
    printf("%u: locking mutex...\n", pid);
    lock(m);
    printf("%u: exiting\n", pid);

    return 0;
}

int process_2()
{
    unsigned pid = getpid();
    printf("%u: process 2\n", pid);
    pthread_mutex_t* m = (pthread_mutex_t*)open_shared_file();
    printf("%u: locking mutex...\n", pid);
    lock(m);
    printf("%u: mutex locked\n", pid);
    unlock(m);
    return 0;
}

int main(int ac, char** av)
{
    // fork process_1 and wait till it terminates
    pid_t child;
    unlink(shared_file);
    int child_status;
    child = spawn(process_1);
    if(-1 == waitpid(child, &child_status, 0))
	abort();

    // now do process_2
    return process_2();
}
Comment 1 Ulrich Drepper 2011-07-20 16:25:17 UTC
Check the code before reporting problems, this has been fixed for some time.
Comment 2 Jonathan Wakely 2011-07-20 17:20:22 UTC
ah yes, 6f8326cacd08bf7d1966743086855fc36574bf74 - sorry, and thanks!
Comment 3 wangdeqiang 2011-08-10 04:38:47 UTC
just only do setrobust syscall in child after fork?
if the father is a muti-thread process, when one thread is in fork, the others might be call pthread_mutex_lock, so the robust list of father(user space) is not null, when fork is done, there will be some mutex (actually is not owned by child) in the child's robust_list.
i think the mutex in robust list should be clear in child.
Comment 4 Jackie Rosen 2014-02-16 16:59:38 UTC Comment hidden (spam)