[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#465652: Acknowledgement (libc6: Occasional failed wakeup in pthread_cond_wait)



I've produced a small (140 line) test case.  Note that it only fails
25%-50% of the time, whereas my application failed 90%+ of the time,
probably due to the test case being much quicker.

-- 
Adam Olsen, aka Rhamphoryncus
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <errno.h>

#include <atomic_ops.h>

#define THREAD_COUNT 10
#define REPEAT 100000
#define RATIO 50

typedef struct {
    int num;
    pthread_mutex_t lock;
    pthread_t id;
} Thread;

static Thread threads[THREAD_COUNT];
static pthread_cond_t wakeup = PTHREAD_COND_INITIALIZER;
static pthread_mutex_t world_lock = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
static AO_t world_sleep;


#define CHECK(expr) do { \
    int __status; \
    if ((__status = expr)) { \
        fprintf(stderr, "%s:%d syscall failed with %d, %d\n", __FILE__, __LINE__, __status, errno); \
        abort(); \
    } \
} while (0)

static void
logmsg(Thread *thread, const char *msg)
{
    printf("%d(0x%llx) %s\n", thread->num,
        (unsigned long long)thread->id, msg);
}

static void
do_stoptheworld(Thread *self)
{
    int i;

    CHECK(pthread_mutex_unlock(&self->lock));
    CHECK(pthread_mutex_lock(&world_lock));
    CHECK(pthread_mutex_lock(&self->lock));
    AO_store_full(&world_sleep, 1);

    for (i = 0; i < THREAD_COUNT; i++) {
        Thread *other = &threads[i];

        if (other == self)
            continue;

        CHECK(pthread_mutex_lock(&other->lock));
    }

    CHECK(pthread_cond_broadcast(&wakeup));

    for (i = 0; i < THREAD_COUNT; i++) {
        Thread *other = &threads[i];

        if (other == self)
            continue;

        CHECK(pthread_mutex_unlock(&other->lock));
    }

    AO_store_full(&world_sleep, 0);
    CHECK(pthread_mutex_unlock(&world_lock));
}

static void
do_tick(Thread *thread)
{
    if (AO_load_acquire(&world_sleep)) {
        logmsg(thread, "Sleeping");
        /* pthread_cond_wait could return EINTR, but for this test we
         * treat that as fatal */
        CHECK(pthread_cond_wait(&wakeup, &thread->lock));
        logmsg(thread, "Woken up");
    }
}

static void *
worker(void *arg)
{
    Thread *thread = (Thread *)arg;
    int i, j;

    logmsg(thread, "Started");

    CHECK(pthread_mutex_lock(&thread->lock));

    for (i = 0; i < REPEAT; i++) {
        do_stoptheworld(thread);

        for (j = 0; j < RATIO; j++)
            do_tick(thread);

        if ((i % (REPEAT / 5)) == 0)
            printf("%d() ticked %d\n", thread->num, i);
    }

    CHECK(pthread_mutex_unlock(&thread->lock));

    logmsg(thread, "Finished");
    return NULL;
}

int
main(int argc, char **argv)
{
    int i;
    pthread_mutexattr_t attr;

    printf("%d %d\n", EINVAL, EPERM);

    CHECK(pthread_mutexattr_init(&attr));
    CHECK(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK_NP));

    for (i = 0; i < THREAD_COUNT; i++) {
        Thread *thread = &threads[i];
        thread->num = i;
        CHECK(pthread_mutex_init(&thread->lock, &attr));
    }

    for (i = 0; i < THREAD_COUNT; i++) {
        Thread *thread = &threads[i];
        CHECK(pthread_create(&thread->id, NULL, worker, (void *)thread));
    }

    for (i = 0; i < THREAD_COUNT; i++) {
        Thread *thread = &threads[i];
        CHECK(pthread_join(thread->id, NULL));
    }

    return 0;
}

Reply to: