[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: Please give back ruby1.9/1.9.0.2-9 on hppa and alpha



dann frazier wrote:
> On Mon, Feb 02, 2009 at 07:04:48PM +0100, Lucas Nussbaum wrote:
>> ruby1.9 still fails to build on hppa and alpha.
>>
>> On hppa, it's caused by a kernel bug, which was partially fixed (at
>> least the kernel doesn't panic() anymore). Since the issue is related to
>> threading, it is possible that retrying could make it build
>> successfully.
> 
> fyi, I've retried it numerous times on both buildds with no
> luck. We're not crashing the buildd anymore - thanks to Helge's fix -

The kudos belong to James Bottomley btw. I did debugging and testing,
but James gave me the final hint to the solution then...

> but the build hangs indefinitely. I've no objection to it being
> retried again of course (and I'm not the buildd admin anyway) - I just
> want to set your expectations.

I tried a few times now to find the bug. I'm not sure if it's really due to 
a) a kernel bug (probably)
b) the fact that hppa still uses Linuxthreads (although Dann mentioned
in another mail that he saw similar problems with another server which
used NPTL instead of Linuxthreads)
c) wrong pthread coding in ruby1.9

If it's due to a) (kernel bug), then it's hard to find and track down.
I concentrated on b) and c) for now. LT uses a few signals to synchronize the
threads, and ruby plays some small but bad games with signals in it's code, e.g.
rb_disable_interrupt() and rb_enable_interrupt() in signal.c.
With the attached patch/hack below I tried to work around possible LT-related cornercases
in ruby1.9, but the issue stays the same: "make test" will make the ruby
testsuite hang in the "test_thread.rb" test. It seems some thread is waiting
for a signal which will not arrive, since the other thread is a zombie already....

Anyway, it would be nice if someone with ruby knowledge could reduce 
the testsuite, so that it will be easier to reproduce the bug. I'm a little
lost at this stage. Now since the hppa kernel doesn't crash any longer, building
such a testcase should be much easier to create.

Helge

--- ./signal.c.org	2009-02-05 11:16:23.000000000 +0100
+++ ./signal.c	2009-02-05 20:52:38.000000000 +0100
@@ -36,6 +36,46 @@
 # endif
 #endif
 
+/* ruby1.9 is a multithreaded program.
+   Nevertheless, ruby1.9 uses sigprocmask() which has unspecified 
+   behaviour in a multi-threaded process (see man page!).
+ */
+static void ruby_generate_sigprocmask(int how, sigset_t *mask, sigset_t *oldset)
+{
+	/* make sure that ruby does not block the Linuxthreads
+	   signals */
+	if (how == SIG_BLOCK) {
+		sigdelset(mask, __SIGRTMIN);
+		sigdelset(mask, __SIGRTMIN+1);
+		sigdelset(mask, __SIGRTMIN+2);
+	} else if (how == SIG_SETMASK) {
+		sigaddset(mask, __SIGRTMIN);
+		sigaddset(mask, __SIGRTMIN+1);
+		sigaddset(mask, __SIGRTMIN+2);
+	} else { // SIG_UNBLOCK
+		sigaddset(mask, __SIGRTMIN);
+		sigaddset(mask, __SIGRTMIN+1);
+		sigaddset(mask, __SIGRTMIN+2);
+	}
+}
+
+static int ruby_pthread_sigprocmask(int how, sigset_t *mask, sigset_t *oldset)
+{
+	ruby_generate_sigprocmask(how, mask, oldset);
+	return pthread_sigmask(how,mask,oldset);
+}
+
+static int ruby_sigprocmask(int how, sigset_t *mask, sigset_t *oldset)
+{
+#if 0 
+	return ruby_pthread_sigprocmask(how, mask, oldset);
+#else
+	ruby_generate_sigprocmask(how, mask, oldset);
+	/* XXX: ruby should not use sigprocmask(). */
+	return sigprocmask(how,mask,oldset);
+#endif
+}
+
 static const struct signals {
     const char *signm;
     int  signo;
@@ -430,7 +470,6 @@ static sighandler_t
 ruby_signal(int signum, sighandler_t handler)
 {
     struct sigaction sigact, old;
-
 #if 0
     rb_trap_accept_nativethreads[signum] = 0;
 #endif
@@ -448,6 +487,10 @@ ruby_signal(int signum, sighandler_t han
     if (signum == SIGCHLD && handler == SIG_IGN)
 	sigact.sa_flags |= SA_NOCLDWAIT;
 #endif
+
+//    printf("signal: %d (%d), %p\n", signum, __SIGRTMIN, handler);
+    if (signum >= __SIGRTMIN && signum <= __SIGRTMIN+2)
+	return NULL;
     sigaction(signum, &sigact, &old);
     return old.sa_handler;
 }
@@ -505,7 +548,7 @@ rb_disable_interrupt(void)
     sigfillset(&mask);
     sigdelset(&mask, SIGVTALRM);
     sigdelset(&mask, SIGSEGV);
-    pthread_sigmask(SIG_SETMASK, &mask, NULL);
+    ruby_pthread_sigprocmask(SIG_SETMASK, &mask, NULL);
 #endif
 }
 
@@ -515,7 +558,7 @@ rb_enable_interrupt(void)
 #ifndef _WIN32
     sigset_t mask;
     sigemptyset(&mask);
-    pthread_sigmask(SIG_SETMASK, &mask, NULL);
+    ruby_pthread_sigprocmask(SIG_SETMASK, &mask, NULL);
 #endif
 }
 
@@ -852,7 +895,7 @@ trap_ensure(struct trap_arg *arg)
 {
     /* enable interrupt */
 #ifdef HAVE_SIGPROCMASK
-    sigprocmask(SIG_SETMASK, &arg->mask, NULL);
+    ruby_sigprocmask(SIG_SETMASK, &arg->mask, NULL);
 #else
     sigsetmask(arg->mask);
 #endif
@@ -866,7 +909,7 @@ rb_trap_restore_mask(void)
 {
 #if USE_TRAP_MASK
 # ifdef HAVE_SIGPROCMASK
-    sigprocmask(SIG_SETMASK, &trap_last_mask, NULL);
+    ruby_sigprocmask(SIG_SETMASK, &trap_last_mask, NULL);
 # else
     sigsetmask(trap_last_mask);
 # endif
@@ -931,7 +974,7 @@ sig_trap(int argc, VALUE *argv)
     /* disable interrupt */
 # ifdef HAVE_SIGPROCMASK
     sigfillset(&arg.mask);
-    sigprocmask(SIG_BLOCK, &arg.mask, &arg.mask);
+    ruby_sigprocmask(SIG_BLOCK, &arg.mask, &arg.mask);
 # else
     arg.mask = sigblock(~0);
 # endif
@@ -991,7 +1034,7 @@ init_sigchld(int sig)
     /* disable interrupt */
 # ifdef HAVE_SIGPROCMASK
     sigfillset(&mask);
-    sigprocmask(SIG_BLOCK, &mask, &mask);
+    ruby_sigprocmask(SIG_BLOCK, &mask, &mask);
 # else
     mask = sigblock(~0);
 # endif
@@ -1007,7 +1050,7 @@ init_sigchld(int sig)
 #if USE_TRAP_MASK
 #ifdef HAVE_SIGPROCMASK
     sigdelset(&mask, sig);
-    sigprocmask(SIG_SETMASK, &mask, NULL);
+    ruby_sigprocmask(SIG_SETMASK, &mask, NULL);
 #else
     mask &= ~sigmask(sig);
     sigsetmask(mask);

Reply to: