[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#261706: usb-storage hangs all USB devices



On Sun, Aug 01, 2004 at 10:43:30PM -0700, Matt Zimmerman wrote:
> Is there anything further I can do to help resolve this issue?  For now, I
> am working around the problem by backing out to an older cdrecord which
> doesn't trigger the problem, but I would like to help solve it correctly.
> 
> Has anyone else seen this behaviour?

A long shot, but is this of any help
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=90442

I have attached the patch which was included in the
recently released kernel-2.4.22-1.2199.nptl kernel for fedora.

-- 
Horms
diff -ur linux-2.4.20-20.7.5/drivers/usb/serial/usbserial.c linux-2.4.20-20.7.5-u1/drivers/usb/serial/usbserial.c
--- linux-2.4.20-20.7.5/drivers/usb/serial/usbserial.c	2003-11-17 22:30:34.000000000 -0500
+++ linux-2.4.20-20.7.5-u1/drivers/usb/serial/usbserial.c	2003-11-17 22:33:16.000000000 -0500
@@ -297,6 +297,7 @@
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/smp_lock.h>
+#include <linux/spinlock.h>
 #include <linux/usb.h>
 
 #ifdef CONFIG_USB_SERIAL_DEBUG
@@ -347,10 +348,24 @@
 };
 #endif
 
+/*
+ * The post kludge structures and variables.
+ */
+#define POST_BSIZE	100	/* little below 128 in total */
+struct usb_serial_post_job {
+	struct list_head link;
+	struct usb_serial_port *port;
+	int len;
+	char buff[POST_BSIZE];
+};
+static spinlock_t post_lock = SPIN_LOCK_UNLOCKED;	/* Also covers ->ref */
+static struct list_head post_list = LIST_HEAD_INIT(post_list);
+static struct tq_struct post_task;
 
 /* local function prototypes */
 static int  serial_open (struct tty_struct *tty, struct file * filp);
 static void serial_close (struct tty_struct *tty, struct file * filp);
+static int __serial_write (struct usb_serial_port *port, int from_user, const unsigned char *buf, int count);
 static int  serial_write (struct tty_struct * tty, int from_user, const unsigned char *buf, int count);
 static int  serial_write_room (struct tty_struct *tty);
 static int  serial_chars_in_buffer (struct tty_struct *tty);
@@ -448,6 +463,53 @@
 	return;
 }
 
+/*
+ * The post kludge.
+ *
+ * Our component drivers are hideously buggy and written by people
+ * who have difficulty understanding the concept of spinlocks.
+ * There were so many races and lockups that Greg K-H made a watershed
+ * decision to provide what is essentially a single-threaded sandbox
+ * for component drivers, protected by a semaphore. It helped a lot, but
+ * for one little problem: when tty->low_latency is set, line disciplines
+ * can call ->write from an interrupt, where the semaphore oopses.
+ *
+ * Rather than open the whole can of worms again, we just post writes
+ * into a helper which can sleep.
+ *
+ * Kernel 2.6 has a proper fix, reportedly.
+ *
+ * XXX Nothing prevents this from looping forever.
+ */
+static void post_helper(void *arg)
+{
+	struct usb_serial_post_job *job;
+	struct usb_serial_port *port;
+	struct usb_serial *serial;
+	unsigned int flags;
+
+	spin_lock_irqsave(&post_lock, flags);
+	while (!list_empty(&post_list)) {
+		job = list_entry(post_list.next, struct usb_serial_post_job, link);
+		list_del(&job->link);
+		spin_unlock_irqrestore(&post_lock, flags);
+
+		port = job->port;
+		serial = get_usb_serial (port, __FUNCTION__);
+
+		down(&port->sem);
+		if (port->tty != NULL)
+			__serial_write(port, 0, job->buff, job->len);
+		up(&port->sem);
+
+		kfree(job);
+		spin_lock_irqsave(&post_lock, flags);
+		if (--serial->ref == 0)
+			kfree(serial);
+	}
+	spin_unlock_irqrestore(&post_lock, flags);
+}
+
 #ifdef USES_EZUSB_FUNCTIONS
 /* EZ-USB Control and Status Register.  Bit 0 controls 8051 reset */
 #define CPUCS_REG    0x7F92
@@ -576,23 +638,21 @@
 
 	/* if disconnect beat us to the punch here, there's nothing to do */
 	if (tty->driver_data) {
+		/* post_helper(NULL); */ /* Correct, but unimportant for echo.*/
 		__serial_close(port, filp);
 	}
 
 	up (&port->sem);
 }
 
-static int serial_write (struct tty_struct * tty, int from_user, const unsigned char *buf, int count)
+static int __serial_write (struct usb_serial_port *port, int from_user, const unsigned char *buf, int count)
 {
-	struct usb_serial_port *port = (struct usb_serial_port *) tty->driver_data;
 	struct usb_serial *serial = get_usb_serial (port, __FUNCTION__);
 	int retval = -EINVAL;
 
 	if (!serial)
 		return -ENODEV;
 
-	down (&port->sem);
-
 	dbg("%s - port %d, %d byte(s)", __FUNCTION__, port->number, count);
 
 	if (!port->open_count) {
@@ -607,10 +667,68 @@
 		retval = generic_write(port, from_user, buf, count);
 
 exit:
-	up (&port->sem);
 	return retval;
 }
 
+static int serial_write (struct tty_struct * tty, int from_user, const unsigned char *buf, int count)
+{
+	struct usb_serial_port *port = (struct usb_serial_port *) tty->driver_data;
+	struct usb_serial *serial = get_usb_serial (port, __FUNCTION__);
+	struct usb_serial_post_job *job;
+	unsigned long flags;
+	int rc;
+
+	if (!in_interrupt()) {
+		/*
+		 * Run post_list to reduce a possiblity of reordered writes.
+		 * Tasks can make keventd to sleep, sometimes for a long time.
+		 */
+		post_helper(NULL);
+
+		down(&port->sem);
+		rc = __serial_write(port, from_user, buf, count);
+		up(&port->sem);
+		return rc;
+	}
+
+	if (from_user) {
+		/*
+		 * This is a BUG-able offense because we cannot
+		 * pagefault while in_interrupt, but we want to see
+		 * something in dmesg rather than just blinking LEDs.
+		 */
+		err("user data in interrupt write");
+		return -EINVAL;
+	}
+
+	job = kmalloc(sizeof(struct usb_serial_post_job), GFP_ATOMIC);
+	if (job == NULL)
+		return -ENOMEM;
+
+	job->port = port;
+	if ((job->len = count) >= POST_BSIZE) {
+		static int rate = 0;
+		/*
+		 * Data loss due to extreme circumstances.
+		 * It's a ususal thing on serial to lose characters, isn't it?
+		 * Neener, neener! Actually, it's probably an echo loop anyway.
+		 * Only happens when getty starts talking to Visor.
+		 */
+		if (++rate % 1000 < 5)
+			err("too much data (%d)", count);
+		job->len = POST_BSIZE;
+	}
+	memcpy(job->buff, buf, job->len);
+
+	spin_lock_irqsave(&post_lock, flags);
+	list_add_tail(&job->link, &post_list);
+	serial->ref++;		/* Protect the port->sem from kfree() */
+	schedule_task(&post_task);
+	spin_unlock_irqrestore(&post_lock, flags);
+
+	return count;
+}
+
 static int serial_write_room (struct tty_struct *tty) 
 {
 	struct usb_serial_port *port = (struct usb_serial_port *) tty->driver_data;
@@ -620,6 +738,9 @@
 	if (!serial)
 		return -ENODEV;
 
+	if (in_interrupt())
+		return POST_BSIZE;
+
 	down (&port->sem);
 
 	dbg("%s - port %d", __FUNCTION__, port->number);
@@ -1128,6 +1249,7 @@
 	int num_ports;
 	int max_endpoints;
 	const struct usb_device_id *id_pattern = NULL;
+	unsigned long flags;
 
 	/* loop through our list of known serial converters, and see if this
 	   device matches. */
@@ -1338,11 +1460,15 @@
 		init_MUTEX (&port->sem);
 	}
 
+	spin_lock_irqsave(&post_lock, flags);
+	serial->ref = 1;
+	spin_unlock_irqrestore(&post_lock, flags);
+
 	/* if this device type has a startup function, call it */
 	if (type->startup) {
 		i = type->startup (serial);
 		if (i < 0)
-			goto probe_error;
+			goto startup_error;
 		if (i > 0)
 			return serial;
 	}
@@ -1357,6 +1483,12 @@
 	return serial; /* success */
 
 
+startup_error:
+	spin_lock_irqsave(&post_lock, flags);
+	if (serial->ref != 1) {
+		err("bug in component startup: ref %d\n", serial->ref);
+	}
+	spin_unlock_irqrestore(&post_lock, flags);
 probe_error:
 	for (i = 0; i < num_bulk_in; ++i) {
 		port = &serial->port[i];
@@ -1392,6 +1524,7 @@
 {
 	struct usb_serial *serial = (struct usb_serial *) ptr;
 	struct usb_serial_port *port;
+	unsigned long flags;
 	int i;
 
 	dbg ("%s", __FUNCTION__);
@@ -1452,7 +1585,10 @@
 		return_serial (serial);
 
 		/* free up any memory that we allocated */
-		kfree (serial);
+		spin_lock_irqsave(&post_lock, flags);
+		if (--serial->ref == 0)
+			kfree(serial);
+		spin_unlock_irqrestore(&post_lock, flags);
 
 	} else {
 		info("device disconnected");
@@ -1504,6 +1640,7 @@
 	for (i = 0; i < SERIAL_TTY_MINORS; ++i) {
 		serial_table[i] = NULL;
 	}
+	post_task.routine = post_helper;
 
 	/* register the tty driver */
 	serial_tty_driver.init_termios          = tty_std_termios;
diff -ur linux-2.4.20-20.7.5/drivers/usb/serial/usb-serial.h linux-2.4.20-20.7.5-u1/drivers/usb/serial/usb-serial.h
--- linux-2.4.20-20.7.5/drivers/usb/serial/usb-serial.h	2002-11-28 18:53:15.000000000 -0500
+++ linux-2.4.20-20.7.5-u1/drivers/usb/serial/usb-serial.h	2003-11-17 22:32:55.000000000 -0500
@@ -151,6 +151,9 @@
 	__u16				product;
 	struct usb_serial_port		port[MAX_NUM_PORTS];
 	void *				private;
+#ifndef __GENKSYMS__
+	int				ref;
+#endif
 };
 
 

diff -urN -X dontdiff linux-2.4.22-1.2176/drivers/usb/serial/usbserial.c linux-2.4.22-1.2176-u1/drivers/usb/serial/usbserial.c
--- linux-2.4.22-1.2176/drivers/usb/serial/usbserial.c	2004-03-11 20:53:43.000000000 -0800
+++ linux-2.4.22-1.2176-u1/drivers/usb/serial/usbserial.c	2004-03-23 11:07:12.000000000 -0800
@@ -367,6 +367,10 @@
 static void serial_close (struct tty_struct *tty, struct file * filp);
 static int __serial_write (struct usb_serial_port *port, int from_user, const unsigned char *buf, int count);
 static int  serial_write (struct tty_struct * tty, int from_user, const unsigned char *buf, int count);
+static int  serial_post_job(struct usb_serial_port *port, int from_user,
+    int gfp, const unsigned char *buf, int count);
+static int  serial_post_one(struct usb_serial_port *port, int from_user,
+    int gfp, const unsigned char *buf, int count);
 static int  serial_write_room (struct tty_struct *tty);
 static int  serial_chars_in_buffer (struct tty_struct *tty);
 static void serial_throttle (struct tty_struct * tty);
@@ -477,35 +481,45 @@
  * Rather than open the whole can of worms again, we just post writes
  * into a helper which can sleep.
  *
- * Kernel 2.6 has a proper fix, reportedly.
- *
- * XXX Nothing prevents this from looping forever.
+ * Kernel 2.6 has a proper fix. It replaces semaphores with proper locking.
  */
 static void post_helper(void *arg)
 {
+	struct list_head *pos;
 	struct usb_serial_post_job *job;
 	struct usb_serial_port *port;
 	struct usb_serial *serial;
 	unsigned int flags;
 
 	spin_lock_irqsave(&post_lock, flags);
-	while (!list_empty(&post_list)) {
-		job = list_entry(post_list.next, struct usb_serial_post_job, link);
+	pos = post_list.next;
+	while (pos != &post_list) {
+		job = list_entry(pos, struct usb_serial_post_job, link);
+		port = job->port;
+		/* get_usb_serial checks port->tty, so cannot be used */
+		serial = port->serial;
+		if (port->write_busy) {
+			dbg("%s - port %d busy", __FUNCTION__, port->number);
+			pos = pos->next;
+			continue;
+		}
 		list_del(&job->link);
 		spin_unlock_irqrestore(&post_lock, flags);
 
-		port = job->port;
-		serial = get_usb_serial (port, __FUNCTION__);
-
 		down(&port->sem);
+		dbg("%s - port %d len %d backlog %d", __FUNCTION__,
+		    port->number, job->len, port->write_backlog);
 		if (port->tty != NULL)
 			__serial_write(port, 0, job->buff, job->len);
 		up(&port->sem);
 
-		kfree(job);
 		spin_lock_irqsave(&post_lock, flags);
+		port->write_backlog -= job->len;
+		kfree(job);
 		if (--serial->ref == 0)
 			kfree(serial);
+		/* Have to reset because we dropped spinlock */
+		pos = post_list.next;
 	}
 	spin_unlock_irqrestore(&post_lock, flags);
 }
@@ -642,7 +656,20 @@
 
 	/* if disconnect beat us to the punch here, there's nothing to do */
 	if (tty->driver_data) {
-		/* post_helper(NULL); */ /* Correct, but unimportant for echo.*/
+		/*
+		 * XXX The right thing would be to wait for the output to drain.
+		 * But we are not sufficiently daring to experiment in 2.4.
+		 * N.B. If we do wait, no need to run post_helper here.
+		 * Normall callback mechanism wakes it up just fine.
+		 */
+#if I_AM_A_DARING_HACKER
+		tty->closing = 1;
+		up (&port->sem);
+		if (info->closing_wait != ASYNC_CLOSING_WAIT_NONE)
+			tty_wait_until_sent(tty, info->closing_wait);
+		down (&port->sem);
+		if (!tty->driver_data) /* woopsie, disconnect, now what */ ;
+#endif
 		__serial_close(port, filp);
 	}
 
@@ -677,9 +704,6 @@
 static int serial_write (struct tty_struct * tty, int from_user, const unsigned char *buf, int count)
 {
 	struct usb_serial_port *port = (struct usb_serial_port *) tty->driver_data;
-	struct usb_serial *serial = get_usb_serial (port, __FUNCTION__);
-	struct usb_serial_post_job *job;
-	unsigned long flags;
 	int rc;
 
 	if (!in_interrupt()) {
@@ -690,6 +714,18 @@
 		post_helper(NULL);
 
 		down(&port->sem);
+		/*
+		 * This happens when a line discipline asks how much room
+		 * we have, gets 64, then tries to perform two writes
+		 * for a byte each. First write takes whole URB, second
+		 * write hits this check.
+		 */
+		if (port->write_busy) {
+			up(&port->sem);
+			return serial_post_job(port, from_user, GFP_KERNEL,
+			    buf, count);
+		}
+
 		rc = __serial_write(port, from_user, buf, count);
 		up(&port->sem);
 		return rc;
@@ -705,12 +741,19 @@
 		return -EINVAL;
 	}
 
-	job = kmalloc(sizeof(struct usb_serial_post_job), GFP_ATOMIC);
-	if (job == NULL)
-		return -ENOMEM;
+	return serial_post_job(port, 0, GFP_ATOMIC, buf, count);
+}
 
-	job->port = port;
-	if ((job->len = count) >= POST_BSIZE) {
+static int serial_post_job(struct usb_serial_port *port, int from_user,
+    int gfp, const unsigned char *buf, int count)
+{
+	int done = 0, length;
+	int rc;
+
+	if (port == NULL)
+		return -EPIPE;
+
+	if (count >= 512) {
 		static int rate = 0;
 		/*
 		 * Data loss due to extreme circumstances.
@@ -718,13 +761,61 @@
 		 * Neener, neener! Actually, it's probably an echo loop anyway.
 		 * Only happens when getty starts talking to Visor.
 		 */
-		if (++rate % 1000 < 5)
-			err("too much data (%d)", count);
-		job->len = POST_BSIZE;
+		if (++rate % 1000 < 3) {
+			err("too much data (%d) from %s", count,
+			    from_user? "user": "kernel");
+		}
+		count = 512;
+	}
+
+	while (done < count) {
+		length = count - done;
+		if (length > POST_BSIZE)
+			length = POST_BSIZE;
+		if (length > port->bulk_out_size)
+			length = port->bulk_out_size;
+
+		rc = serial_post_one(port, from_user, gfp, buf + done, length);
+		if (rc <= 0) {
+			if (done != 0)
+				return done;
+			return rc;
+		}
+		done += rc;
+	}
+
+	return done;
+}
+
+static int serial_post_one(struct usb_serial_port *port, int from_user,
+    int gfp, const unsigned char *buf, int count)
+{
+	struct usb_serial *serial = get_usb_serial (port, __FUNCTION__);
+	struct usb_serial_post_job *job;
+	unsigned long flags;
+
+	dbg("%s - port %d user %d count %d", __FUNCTION__, port->number, from_user, count);
+
+	job = kmalloc(sizeof(struct usb_serial_post_job), gfp);
+	if (job == NULL)
+		return -ENOMEM;
+
+	job->port = port;
+	if (count >= POST_BSIZE)
+		count = POST_BSIZE;
+	job->len = count;
+
+	if (from_user) {
+		if (copy_from_user(job->buff, buf, count)) {
+			kfree(job);
+			return -EFAULT;
+		}
+	} else {
+		memcpy(job->buff, buf, count);
 	}
-	memcpy(job->buff, buf, job->len);
 
 	spin_lock_irqsave(&post_lock, flags);
+	port->write_backlog += count;
 	list_add_tail(&job->link, &post_list);
 	serial->ref++;		/* Protect the port->sem from kfree() */
 	schedule_task(&post_task);
@@ -742,8 +833,13 @@
 	if (!serial)
 		return -ENODEV;
 
-	if (in_interrupt())
-		return POST_BSIZE;
+	if (in_interrupt()) {
+		retval = 0;
+		if (!port->write_busy && port->write_backlog == 0)
+			retval = port->bulk_out_size;
+		dbg("%s - returns %d", __FUNCTION__, retval);
+		return retval;
+	}
 
 	down (&port->sem);
 
@@ -776,10 +872,8 @@
 
 	down (&port->sem);
 
-	dbg("%s = port %d", __FUNCTION__, port->number);
-
 	if (!port->open_count) {
-		dbg("%s - port not open", __FUNCTION__);
+		dbg("%s - port %d: not open", __FUNCTION__, port->number);
 		goto exit;
 	}
 
@@ -1038,18 +1132,23 @@
 {
 	struct usb_serial *serial = port->serial;
 	int result;
-
-	dbg("%s - port %d", __FUNCTION__, port->number);
+	unsigned long flags;
 
 	if (count == 0) {
 		dbg("%s - write request of 0 bytes", __FUNCTION__);
 		return (0);
 	}
+	if (count < 0) {
+		err("%s - port %d: write request of %d bytes", __FUNCTION__,
+		    port->number, count);
+		return (0);
+	}
 
 	/* only do something if we have a bulk out endpoint */
 	if (serial->num_bulk_out) {
-		if (port->write_urb->status == -EINPROGRESS) {
-			dbg("%s - already writing", __FUNCTION__);
+		if (port->write_busy) {
+			/* Happens when two threads run port_helper. Watch. */
+			info("%s - already writing", __FUNCTION__);
 			return (0);
 		}
 
@@ -1058,12 +1157,10 @@
 		if (from_user) {
 			if (copy_from_user(port->write_urb->transfer_buffer, buf, count))
 				return -EFAULT;
-		}
-		else {
+		} else {
 			memcpy (port->write_urb->transfer_buffer, buf, count);
 		}
-
-		usb_serial_debug_data (__FILE__, __FUNCTION__, count, port->write_urb->transfer_buffer);
+		dbg("%s - port %d [%d]", __FUNCTION__, port->number, count);
 
 		/* set up our urb */
 		usb_fill_bulk_urb (port->write_urb, serial->dev,
@@ -1075,10 +1172,18 @@
 				     generic_write_bulk_callback), port);
 
 		/* send the data out the bulk port */
+		port->write_busy = 1;
 		result = usb_submit_urb(port->write_urb);
-		if (result)
-			err("%s - failed submitting write urb, error %d", __FUNCTION__, result);
-		else
+		if (result) {
+			err("%s - port %d: failed submitting write urb (%d)",
+			     __FUNCTION__, port->number, result);
+			port->write_busy = 0;
+			spin_lock_irqsave(&post_lock, flags);
+			if (port->write_backlog != 0)
+				schedule_task(&post_task);
+			spin_unlock_irqrestore(&post_lock, flags);
+
+		} else
 			result = count;
 
 		return result;
@@ -1093,14 +1198,12 @@
 	struct usb_serial *serial = port->serial;
 	int room = 0;
 
-	dbg("%s - port %d", __FUNCTION__, port->number);
-	
 	if (serial->num_bulk_out) {
-		if (port->write_urb->status != -EINPROGRESS)
+		if (!port->write_busy && port->write_backlog == 0)
 			room = port->bulk_out_size;
 	}
 
-	dbg("%s - returns %d", __FUNCTION__, room);
+	dbg("%s - port %d, returns %d", __FUNCTION__, port->number, room);
 	return (room);
 }
 
@@ -1112,8 +1215,9 @@
 	dbg("%s - port %d", __FUNCTION__, port->number);
 
 	if (serial->num_bulk_out) {
-		if (port->write_urb->status == -EINPROGRESS)
-			chars = port->write_urb->transfer_buffer_length;
+		if (port->write_busy)
+			chars += port->write_urb->transfer_buffer_length;
+		chars += port->write_backlog;	/* spin_lock... Baah */
 	}
 
 	dbg("%s - returns %d", __FUNCTION__, chars);
@@ -1177,14 +1281,16 @@
 
 	dbg("%s - port %d", __FUNCTION__, port->number);
 
+	port->write_busy = 0;
+	wmb();
+
 	if (!serial) {
-		dbg("%s - bad serial pointer, exiting", __FUNCTION__);
+		err("%s - null serial pointer, exiting", __FUNCTION__);
 		return;
 	}
 
 	if (urb->status) {
 		dbg("%s - nonzero write bulk status received: %d", __FUNCTION__, urb->status);
-		return;
 	}
 
 	queue_task(&port->tqueue, &tq_immediate);
@@ -1210,12 +1316,18 @@
 	struct usb_serial_port *port = (struct usb_serial_port *)private;
 	struct usb_serial *serial = get_usb_serial (port, __FUNCTION__);
 	struct tty_struct *tty;
+	unsigned long flags;
 
 	dbg("%s - port %d", __FUNCTION__, port->number);
 	
 	if (!serial)
 		return;
 
+	spin_lock_irqsave(&post_lock, flags);
+	if (port->write_backlog != 0)
+		schedule_task(&post_task);
+	spin_unlock_irqrestore(&post_lock, flags);
+
 	tty = port->tty;
 	if (!tty)
 		return;
diff -urN -X dontdiff linux-2.4.22-1.2176/drivers/usb/serial/usb-serial.h linux-2.4.22-1.2176-u1/drivers/usb/serial/usb-serial.h
--- linux-2.4.22-1.2176/drivers/usb/serial/usb-serial.h	2004-03-11 20:53:43.000000000 -0800
+++ linux-2.4.22-1.2176-u1/drivers/usb/serial/usb-serial.h	2004-03-23 11:07:12.000000000 -0800
@@ -111,6 +111,8 @@
 	int			bulk_out_size;
 	struct urb *		write_urb;
 	__u8			bulk_out_endpointAddress;
+	char			write_busy;	/* URB is active */
+	int			write_backlog;	/* Fifo used */
 
 	wait_queue_head_t	write_wait;
 	struct tq_struct	tqueue;


Reply to: