[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: [Nbd] [PATCH] nbd: Add locking for tasks



On Tue, 2015-10-06 at 20:03 +0200, Markus Pargmann wrote:
> The timeout handling introduced in
> 	> 7e2893a16d3e (nbd: Fix timeout detection)
> introduces a race condition which may lead to killing of tasks that are
> not in nbd context anymore. This was not observed or reproducable yet.
> 
> This patch adds locking to critical use of task_recv and task_send to
> avoid killing tasks that already left the NBD thread functions. This
> lock is only acquired if a timeout occures or the nbd device
> starts/stops.
> 
> Reported-by: Ben Hutchings <ben@...1505...>
> Signed-off-by: Markus Pargmann <mpa@...1897...>

Reviewed-by: Ben Hutchings <ben@...1505...>

You could add 'Fixes: 7e2893a16d3e ("nbd: Fix timeout detection")' to
the commit message as well.

nbd_dbg_tasks_show() can still race with thread exit and two tasks can 
race to become the receive thread, but those aren't new bugs.

Ben.

> ---
>  drivers/block/nbd.c | 36 ++++++++++++++++++++++++++++++------
>  1 file changed, 30 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 039c7c4f0539..1a70852ac808 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -60,6 +60,7 @@ struct nbd_device {
>  > 	> bool disconnect; /* a disconnect has been requested by user */
>  
>  > 	> struct timer_list timeout_timer;
> +> 	> spinlock_t tasks_lock;
>  > 	> struct task_struct *task_recv;
>  > 	> struct task_struct *task_send;
>  
> @@ -140,21 +141,23 @@ static void sock_shutdown(struct nbd_device *nbd)
>  static void nbd_xmit_timeout(unsigned long arg)
>  {
>  > 	> struct nbd_device *nbd = (struct nbd_device *)arg;
> -> 	> struct task_struct *task;
> +> 	> unsigned long flags;
>  
>  > 	> if (list_empty(&nbd->queue_head))
>  > 	> 	> return;
>  
>  > 	> nbd->disconnect = true;
>  
> -> 	> task = READ_ONCE(nbd->task_recv);
> -> 	> if (task)
> -> 	> 	> force_sig(SIGKILL, task);
> +> 	> spin_lock_irqsave(&nbd->tasks_lock, flags);
> +
> +> 	> if (nbd->task_recv)
> +> 	> 	> force_sig(SIGKILL, nbd->task_recv);
>  
> -> 	> task = READ_ONCE(nbd->task_send);
> -> 	> if (task)
> +> 	> if (nbd->task_send)
>  > 	> 	> force_sig(SIGKILL, nbd->task_send);
>  
> +> 	> spin_unlock_irqrestore(&nbd->tasks_lock, flags);
> +
>  > 	> dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
>  }
>  
> @@ -403,17 +406,24 @@ static int nbd_thread_recv(struct nbd_device *nbd)
>  {
>  > 	> struct request *req;
>  > 	> int ret;
> +> 	> unsigned long flags;
>  
>  > 	> BUG_ON(nbd->magic != NBD_MAGIC);
>  
>  > 	> sk_set_memalloc(nbd->sock->sk);
>  
> +> 	> spin_lock_irqsave(&nbd->tasks_lock, flags);
>  > 	> nbd->task_recv = current;
> +> 	> spin_unlock_irqrestore(&nbd->tasks_lock, flags);
>  
>  > 	> ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
>  > 	> if (ret) {
>  > 	> 	> dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
> +
> +> 	> 	> spin_lock_irqsave(&nbd->tasks_lock, flags);
>  > 	> 	> nbd->task_recv = NULL;
> +> 	> 	> spin_unlock_irqrestore(&nbd->tasks_lock, flags);
> +
>  > 	> 	> return ret;
>  > 	> }
>  
> @@ -429,7 +439,9 @@ static int nbd_thread_recv(struct nbd_device *nbd)
>  
>  > 	> device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
>  
> +> 	> spin_lock_irqsave(&nbd->tasks_lock, flags);
>  > 	> nbd->task_recv = NULL;
> +> 	> spin_unlock_irqrestore(&nbd->tasks_lock, flags);
>  
>  > 	> if (signal_pending(current)) {
>  > 	> 	> siginfo_t info;
> @@ -534,8 +546,11 @@ static int nbd_thread_send(void *data)
>  {
>  > 	> struct nbd_device *nbd = data;
>  > 	> struct request *req;
> +> 	> unsigned long flags;
>  
> +> 	> spin_lock_irqsave(&nbd->tasks_lock, flags);
>  > 	> nbd->task_send = current;
> +> 	> spin_unlock_irqrestore(&nbd->tasks_lock, flags);
>  
>  > 	> set_user_nice(current, MIN_NICE);
>  > 	> while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
> @@ -572,7 +587,15 @@ static int nbd_thread_send(void *data)
>  > 	> 	> nbd_handle_req(nbd, req);
>  > 	> }
>  
> +> 	> spin_lock_irqsave(&nbd->tasks_lock, flags);
>  > 	> nbd->task_send = NULL;
> +> 	> spin_unlock_irqrestore(&nbd->tasks_lock, flags);
> +
> +> 	> /* Clear maybe pending signals */
> +> 	> if (signal_pending(current)) {
> +> 	> 	> siginfo_t info;
> +> 	> 	> dequeue_signal_lock(current, ¤t->blocked, &info);
> +> 	> }
>  
>  > 	> return 0;
>  }
> @@ -1027,6 +1050,7 @@ static int __init nbd_init(void)
>  > 	> 	> nbd_dev[i].magic = NBD_MAGIC;
>  > 	> 	> INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
>  > 	> 	> spin_lock_init(&nbd_dev[i].queue_lock);
> +> 	> 	> spin_lock_init(&nbd_dev[i].tasks_lock);
>  > 	> 	> INIT_LIST_HEAD(&nbd_dev[i].queue_head);
>  > 	> 	> mutex_init(&nbd_dev[i].tx_lock);
>  > 	> 	> init_timer(&nbd_dev[i].timeout_timer);
-- 
Ben Hutchings
Once a job is fouled up, anything done to improve it makes it worse.

Attachment: signature.asc
Description: This is a digitally signed message part


Reply to: