[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[Nbd] [PATCH] nbd: Support FUA, FLUSH and ROTATIONAL



[Please use this version rather than the version sent earlier today
which did not fix the flush command properly]

This patch implements FUA (Force Unit Access) and FLUSH commands
and support for rotational devices for network block devices. They have
been tested and appear to work as specified.

See
  http://git.alex.org.uk/nbd-module.git
for a git tree containing this items. Commits in this tree are on
an out-of-tree version of nbd.ko (so it can be built without recompiling
the kernel), though the commits/patches should apply to an in-tree
version of nbd.

---
 drivers/block/nbd.c |   62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 include/linux/nbd.h |   18 +++++++++++++-----
 2 files changed, 69 insertions(+), 11 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 0eeed20..f665f54 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -89,6 +89,8 @@ static const char *ioctl_cmd_to_ascii(int cmd)
 	case NBD_PRINT_DEBUG: return "print-debug";
 	case NBD_SET_SIZE_BLOCKS: return "set-size-blocks";
 	case NBD_DISCONNECT: return "disconnect";
+	case NBD_SET_TIMEOUT: return "set-timeout";
+	case NBD_SET_FLAGS: return "set-flags";
 	case BLKROSET: return "set-read-only";
 	case BLKFLSBUF: return "flush-buffer-cache";
 	}
@@ -98,9 +100,10 @@ static const char *ioctl_cmd_to_ascii(int cmd)
 static const char *nbdcmd_to_ascii(int cmd)
 {
 	switch (cmd) {
-	case  NBD_CMD_READ: return "read";
+	case NBD_CMD_READ: return "read";
 	case NBD_CMD_WRITE: return "write";
-	case  NBD_CMD_DISC: return "disconnect";
+	case NBD_CMD_DISC: return "disconnect";
+	case NBD_CMD_FLUSH: return "flush";
 	}
 	return "invalid";
 }
@@ -240,9 +243,18 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
 	unsigned long size = blk_rq_bytes(req);
 
 	request.magic = htonl(NBD_REQUEST_MAGIC);
-	request.type = htonl(nbd_cmd(req));
-	request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
-	request.len = htonl(size);
+	/* If FUA is set in the request, and we are told to send FUA, then OR in NBD_CMD_FLAG_FUA */
+	request.type = htonl(nbd_cmd(req) |
+			     (( (req->cmd_flags & REQ_FUA) && (lo->flags & NBD_FLAG_SEND_FUA)) ?
+			      NBD_CMD_FLAG_FUA : 0));
+	/* Send from & len as zero on FLUSH - other values reserved per protocol */
+	if (nbd_cmd(req) == NBD_CMD_FLUSH) {
+		request.from = 0;
+		request.len = 0;
+	} else {
+		request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
+		request.len = htonl(size);
+	}
 	memcpy(request.handle, &req, sizeof(req));
 
 	dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n",
@@ -459,13 +471,34 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
 	nbd_cmd(req) = NBD_CMD_READ;
 	if (rq_data_dir(req) == WRITE) {
 		nbd_cmd(req) = NBD_CMD_WRITE;
-		if (lo->flags & NBD_READ_ONLY) {
+		if (lo->flags & NBD_FLAG_READ_ONLY) {
 			printk(KERN_ERR "%s: Write on read-only\n",
 					lo->disk->disk_name);
 			goto error_out;
 		}
 	}
 
+	if (req->cmd_flags & REQ_FLUSH) {
+		if (unlikely(blk_rq_sectors(req))) {
+			/* Elevator is meant to guarantee that a request with REQ_FLUSH
+			 * set is broken into an empty request with REQ_FLUSH set then
+			 * the rest of the content (if any). If this doesn't happen,
+			 * whinge, then proceed to do the content without a flush.
+			 */
+			printk(KERN_ERR "%s: nbd passed non-empty flush request\n",
+			       lo->disk->disk_name);
+
+		} else {
+			if (lo->flags & NBD_FLAG_SEND_FLUSH)
+				nbd_cmd(req) = NBD_CMD_FLUSH;
+			else {
+				/* Ignore flush that we don't need */
+				nbd_end_request(req);
+				return;
+			}
+		}
+	}
+
 	req->errors = 0;
 
 	mutex_lock(&lo->tx_lock);
@@ -642,6 +675,18 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
 		lo->xmit_timeout = arg * HZ;
 		return 0;
 
+	case NBD_SET_FLAGS:
+		lo->flags = arg;
+		if (lo->disk)
+		{
+			if (lo->flags & NBD_FLAG_ROTATIONAL)
+				queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, lo->disk->queue);
+			else
+				queue_flag_set_unlocked(QUEUE_FLAG_NONROT, lo->disk->queue);
+
+		}
+		return 0;
+
 	case NBD_SET_SIZE_BLOCKS:
 		lo->bytesize = ((u64) arg) * lo->blksize;
 		bdev->bd_inode->i_size = lo->bytesize;
@@ -775,6 +820,11 @@ static int __init nbd_init(void)
 			put_disk(disk);
 			goto out;
 		}
+		/* In order not to confuse the elevator, say we always
+		 * want FLUSH and FUA. We won't send them to the server
+		 * unless the relevant flag bit is set
+		 */
+		blk_queue_flush(disk->queue, REQ_FLUSH | REQ_FUA);
 		/*
 		 * Tell the block layer that we are not a rotational device
 		 */
diff --git a/include/linux/nbh.c b/include/linux/nbd.h
index d146ca1..5cf18a0 100644
--- a/include/linux/nbd.h
+++ b/include/linux/nbd.h
@@ -27,13 +27,25 @@
 #define NBD_SET_SIZE_BLOCKS	_IO( 0xab, 7 )
 #define NBD_DISCONNECT  _IO( 0xab, 8 )
 #define NBD_SET_TIMEOUT _IO( 0xab, 9 )
+#define NBD_SET_FLAGS _IO( 0xab, 10 )
 
 enum {
 	NBD_CMD_READ = 0,
 	NBD_CMD_WRITE = 1,
-	NBD_CMD_DISC = 2
+	NBD_CMD_DISC = 2,
+	NBD_CMD_FLUSH = 3
 };
 
+#define NBD_CMD_MASK_COMMAND 0x0000ffff
+#define NBD_CMD_FLAG_FUA (1<<16)
+
+/* values for flags field */
+#define NBD_FLAG_HAS_FLAGS	(1 << 0)	/* Flags are there */
+#define NBD_FLAG_READ_ONLY	(1 << 1)	/* Device is read-only */
+#define NBD_FLAG_SEND_FLUSH	(1 << 2)	/* Send FLUSH */
+#define NBD_FLAG_SEND_FUA	(1 << 3)	/* Send FUA (Force Unit Access) */
+#define NBD_FLAG_ROTATIONAL	(1 << 4)	/* Use elevator algorithm - rotational media */
+
 #define nbd_cmd(req) ((req)->cmd[0])
 
 /* userspace doesn't need the nbd_device structure */
@@ -42,10 +54,6 @@ enum {
 #include <linux/wait.h>
 #include <linux/mutex.h>
 
-/* values for flags field */
-#define NBD_READ_ONLY 0x0001
-#define NBD_WRITE_NOCHK 0x0002
-
 struct request;
 
 struct nbd_device {



Reply to: