[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#570350: pid_ns child_reaper fixes for 2.6.26



On Tue, 2010-06-29 at 17:23 +0200, Oleg Nesterov wrote:
> On 06/29, Ben Hutchings wrote:
> >
> > I've attempted to cherry-pick and adjust these for 2.6.26; patches
> > below.  Do these look reasonable or are additional changes required?
> 
> Confused. please see below.
> 
> > Subject: [PATCH 1/2] pid_ns: zap_pid_ns_processes: fix the ->child_reaper changing
> >
> > commit add0d4dfd660e9e4fd0af3eac3cad23583c9558f upstream.
> > ...
> >
> > @@ -182,9 +182,12 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
> >  		rc = sys_wait4(-1, NULL, __WALL, NULL);
> >  	} while (rc != -ECHILD);
> >
> > -
> > -	/* Child reaper for the pid namespace is going away */
> > -	pid_ns->child_reaper = NULL;
> > +	/*
> > +	 * We can not clear ->child_reaper or leave it alone.
> > +	 * There may by stealth EXIT_DEAD tasks on ->children,
> > +	 * forget_original_parent() must move them somewhere.
> > +	 */
> > +	pid_ns->child_reaper = init_pid_ns.child_reaper;
> 
> This is correct, but the second patch
> 
> > @@ -182,12 +182,6 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
> >  		rc = sys_wait4(-1, NULL, __WALL, NULL);
> >  	} while (rc != -ECHILD);
> >
> > -	/*
> > -	 * We can not clear ->child_reaper or leave it alone.
> > -	 * There may by stealth EXIT_DEAD tasks on ->children,
> > -	 * forget_original_parent() must move them somewhere.
> > -	 */
> > -	pid_ns->child_reaper = init_pid_ns.child_reaper;
> 
> Removes this code?

That's what your commit 950bbabb5a804690a0201190de5c22837f72f83f did.

> This doesn't look right, or I missed something.
> 
> 
> I think you are right, you need these 2 commits
> 
> 	950bbabb5a804690a0201190de5c22837f72f83f
> 	add0d4dfd660e9e4fd0af3eac3cad23583c9558f
> 
> (in that order).

That is the opposite of the order in which they were originally applied!

> I'd suggest you to adjust these commits and make
> a single patch. In that case I can try to see if it is correct
> against the 2.6.26.

The combined diff is:

--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -758,23 +758,48 @@ static void reparent_thread(struct task_struct *p, struct task_struct *father)
  * the child reaper process (ie "init") in our pid
  * space.
  */
+static struct task_struct *find_new_reaper(struct task_struct *father)
+{
+	struct pid_namespace *pid_ns = task_active_pid_ns(father);
+	struct task_struct *thread;
+
+	thread = father;
+	while_each_thread(father, thread) {
+		if (thread->flags & PF_EXITING)
+			continue;
+		if (unlikely(pid_ns->child_reaper == father))
+			pid_ns->child_reaper = thread;
+		return thread;
+	}
+
+	if (unlikely(pid_ns->child_reaper == father)) {
+		write_unlock_irq(&tasklist_lock);
+		if (unlikely(pid_ns == &init_pid_ns))
+			panic("Attempted to kill init!");
+
+		zap_pid_ns_processes(pid_ns);
+		write_lock_irq(&tasklist_lock);
+		/*
+		 * We can not clear ->child_reaper or leave it alone.
+		 * There may by stealth EXIT_DEAD tasks on ->children,
+		 * forget_original_parent() must move them somewhere.
+		 */
+		pid_ns->child_reaper = init_pid_ns.child_reaper;
+	}
+
+	return pid_ns->child_reaper;
+}
+
 static void forget_original_parent(struct task_struct *father)
 {
-	struct task_struct *p, *n, *reaper = father;
+	struct task_struct *p, *n, *reaper;
 	struct list_head ptrace_dead;
 
 	INIT_LIST_HEAD(&ptrace_dead);
 
 	write_lock_irq(&tasklist_lock);
+	reaper = find_new_reaper(father);
 
-	do {
-		reaper = next_thread(reaper);
-		if (reaper == father) {
-			reaper = task_child_reaper(father);
-			break;
-		}
-	} while (reaper->flags & PF_EXITING);
-
 	/*
 	 * There are only two places where our children can be:
 	 *
@@ -929,39 +954,6 @@ static void check_stack_usage(void)
 static inline void check_stack_usage(void) {}
 #endif
 
-static inline void exit_child_reaper(struct task_struct *tsk)
-{
-	if (likely(tsk->group_leader != task_child_reaper(tsk)))
-		return;
-
-	if (tsk->nsproxy->pid_ns == &init_pid_ns)
-		panic("Attempted to kill init!");
-
-	/*
-	 * @tsk is the last thread in the 'cgroup-init' and is exiting.
-	 * Terminate all remaining processes in the namespace and reap them
-	 * before exiting @tsk.
-	 *
-	 * Note that @tsk (last thread of cgroup-init) may not necessarily
-	 * be the child-reaper (i.e main thread of cgroup-init) of the
-	 * namespace i.e the child_reaper may have already exited.
-	 *
-	 * Even after a child_reaper exits, we let it inherit orphaned children,
-	 * because, pid_ns->child_reaper remains valid as long as there is
-	 * at least one living sub-thread in the cgroup init.
-
-	 * This living sub-thread of the cgroup-init will be notified when
-	 * a child inherited by the 'child-reaper' exits (do_notify_parent()
-	 * uses __group_send_sig_info()). Further, when reaping child processes,
-	 * do_wait() iterates over children of all living sub threads.
-
-	 * i.e even though 'child_reaper' thread is listed as the parent of the
-	 * orphaned children, any living sub-thread in the cgroup-init can
-	 * perform the role of the child_reaper.
-	 */
-	zap_pid_ns_processes(tsk->nsproxy->pid_ns);
-}
-
 NORET_TYPE void do_exit(long code)
 {
 	struct task_struct *tsk = current;
@@ -1024,7 +1016,6 @@ NORET_TYPE void do_exit(long code)
 	}
 	group_dead = atomic_dec_and_test(&tsk->signal->live);
 	if (group_dead) {
-		exit_child_reaper(tsk);
 		hrtimer_cancel(&tsk->signal->real_timer);
 		exit_itimers(tsk->signal);
 	}
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -182,9 +182,6 @@
 		rc = sys_wait4(-1, NULL, __WALL, NULL);
 	} while (rc != -ECHILD);
 
-
-	/* Child reaper for the pid namespace is going away */
-	pid_ns->child_reaper = NULL;
 	return;
 }
 
--

Ben.

-- 
Ben Hutchings
Once a job is fouled up, anything done to improve it makes it worse.

Attachment: signature.asc
Description: This is a digitally signed message part


Reply to: