[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[Nbd] [PATCH] Kernel patch to support FUA, FLUSH, ROTATIONAL



Here's a first attempt at a kernel patch. This one again is
only compile-tested: I just want to know whether I'm going in
the right direction.

For instance, is it acceptable to tell the elevator that we always
want FLUSH and FUA? At the time we make the call, we don't
know what the server will want. I suspect changing elevator
strategy in mid-stream may confuse it. This has the disadvantage
that in normal use we'd be generating additional requests which
would make it as far as the nbd driver (but not the socket)
before being dumped.

Note the change from NBD_READ_ONLY to NBD_FLAG_READ_ONLY.
This change has less impact that you might have thought as it
is in fact not possible through any existing interface to
set NBD_READ_ONLY as far as I can tell (it relies rather
on BLKROSET and the NBD_READ_ONLY test currently always
fails).

Also available at
 http://www.alex.org.uk/nbd-kernel-fua.patch

--
Alex Bligh

Signed-Off-By: Alex Bligh <alex@...872...>

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index e6fc716..0a5dcea 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -85,6 +85,8 @@ static const char *ioctl_cmd_to_ascii(int cmd)
	case NBD_PRINT_DEBUG: return "print-debug";
	case NBD_SET_SIZE_BLOCKS: return "set-size-blocks";
	case NBD_DISCONNECT: return "disconnect";
+	case NBD_SET_TIMEOUT: return "set-timeout";
+	case NBD_SET_FLAGS: return "set-flags";
	case BLKROSET: return "set-read-only";
	case BLKFLSBUF: return "flush-buffer-cache";
	}
@@ -94,9 +96,10 @@ static const char *ioctl_cmd_to_ascii(int cmd)
static const char *nbdcmd_to_ascii(int cmd)
{
	switch (cmd) {
-	case  NBD_CMD_READ: return "read";
+	case NBD_CMD_READ: return "read";
	case NBD_CMD_WRITE: return "write";
-	case  NBD_CMD_DISC: return "disconnect";
+	case NBD_CMD_DISC: return "disconnect";
+	case NBD_CMD_FLUSH: return "flush";
	}
	return "invalid";
}
@@ -236,7 +239,10 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
	unsigned long size = blk_rq_bytes(req);

	request.magic = htonl(NBD_REQUEST_MAGIC);
-	request.type = htonl(nbd_cmd(req));
+ /* If FUA is set in the request, and we are told to send FUA, then OR in NBD_CMD_FLAG_FUA */
+	request.type = htonl(nbd_cmd(req) |
+			     (( (req->cmd_flags & REQ_FUA) && (lo->flags & NBD_FLAG_SEND_FUA)) ?
+			      NBD_CMD_FLAG_FUA : 0));
	request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
	request.len = htonl(size);
	memcpy(request.handle, &req, sizeof(req));
@@ -455,13 +461,34 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
	nbd_cmd(req) = NBD_CMD_READ;
	if (rq_data_dir(req) == WRITE) {
		nbd_cmd(req) = NBD_CMD_WRITE;
-		if (lo->flags & NBD_READ_ONLY) {
+		if (lo->flags & NBD_FLAG_READ_ONLY) {
			printk(KERN_ERR "%s: Write on read-only\n",
					lo->disk->disk_name);
			goto error_out;
		}
	}

+	if (req->cmd_flags & REQ_FLUSH) {
+		if (unlikely(blk_rq_sectors(req))) {
+			/* Elevator is meant to guarantee that a request with REQ_FLUSH
+			 * set is broken into an empty request with REQ_FLUSH set then
+			 * the rest of the content (if any). If this doesn't happen,
+			 * whinge, then proceed to do the content without a flush.
+			 */
+			printk(KERN_ERR "%s: nbd passed non-empty flush request\n",
+			       lo->disk->disk_name);
+
+		} else {
+			if (lo->flags & NBD_FLAG_SEND_FLUSH)
+				nbd_cmd(req) = NBD_CMD_FLUSH;
+			else {
+				/* Ignore flush that we don't need */
+				nbd_end_request(req);
+				return;
+			}
+		}
+	}
+
	req->errors = 0;

	mutex_lock(&lo->tx_lock);
@@ -638,6 +665,18 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
		lo->xmit_timeout = arg * HZ;
		return 0;

+	case NBD_SET_FLAGS:
+		lo->flags = arg;
+		if (lo->disk)
+		{
+			if (lo->flags & NBD_FLAG_ROTATIONAL)
+				queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, lo->disk->queue);
+			else
+				queue_flag_set_unlocked(QUEUE_FLAG_NONROT, lo->disk->queue);
+
+		}
+		return 0;
+
	case NBD_SET_SIZE_BLOCKS:
		lo->bytesize = ((u64) arg) * lo->blksize;
		bdev->bd_inode->i_size = lo->bytesize;
@@ -771,6 +810,11 @@ static int __init nbd_init(void)
			put_disk(disk);
			goto out;
		}
+		/* In order not to confuse the elevator, say we always
+		 * want FLUSH and FUA. We won't send them to the server
+		 * unless the relevant flag bit is set
+		 */
+		blk_queue_flush(disk->queue, REQ_FLUSH | REQ_FUA);
		/*
		 * Tell the block layer that we are not a rotational device
		 */
diff --git a/include/linux/nbd.h b/include/linux/nbd.h
index d146ca1..5cf18a0 100644
--- a/include/linux/nbd.h
+++ b/include/linux/nbd.h
@@ -27,13 +27,25 @@
#define NBD_SET_SIZE_BLOCKS	_IO( 0xab, 7 )
#define NBD_DISCONNECT  _IO( 0xab, 8 )
#define NBD_SET_TIMEOUT _IO( 0xab, 9 )
+#define NBD_SET_FLAGS _IO( 0xab, 10 )

enum {
	NBD_CMD_READ = 0,
	NBD_CMD_WRITE = 1,
-	NBD_CMD_DISC = 2
+	NBD_CMD_DISC = 2,
+	NBD_CMD_FLUSH = 3
};

+#define NBD_CMD_MASK_COMMAND 0x0000ffff
+#define NBD_CMD_FLAG_FUA (1<<16)
+
+/* values for flags field */
+#define NBD_FLAG_HAS_FLAGS	(1 << 0)	/* Flags are there */
+#define NBD_FLAG_READ_ONLY	(1 << 1)	/* Device is read-only */
+#define NBD_FLAG_SEND_FLUSH	(1 << 2)	/* Send FLUSH */
+#define NBD_FLAG_SEND_FUA	(1 << 3)	/* Send FUA (Force Unit Access) */
+#define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - rotational media */
+
#define nbd_cmd(req) ((req)->cmd[0])

/* userspace doesn't need the nbd_device structure */
@@ -42,10 +54,6 @@ enum {
#include <linux/wait.h>
#include <linux/mutex.h>

-/* values for flags field */
-#define NBD_READ_ONLY 0x0001
-#define NBD_WRITE_NOCHK 0x0002
-
struct request;

struct nbd_device {




Reply to: