[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[PATCH nbd-client] client: Request NBD_INFO_BLOCK_SIZE and set constraints in the kernel



NBD servers may advertise their minimum, preferred and maximum block
size constraints.  The constraints do not map well to what the kernel
expects, so see this design document for the mapping used here:
https://lists.debian.org/nbd/2022/06/msg00022.html

This patch only does the smallest change to read these constraints
from the server and map the preferred block size to kernel hints
minimum_io_size and optimal_io_size.  The minimum and maximum
constraints are ignored for now.

The names of the kernel hints are very confusing, and do not refer to
the "minimum" of anything, see this document for an explanation:
https://people.redhat.com/msnitzer/docs/io-limits.txt

This requires a separate kernel patch which allows setting the
minimum_io_size and optimal_io_size through netlink attributes
NBD_ATTR_BLOCK_SIZE_MIN and NBD_ATTR_BLOCK_SIZE_OPT.  Older kernels
ignore these attributes.

Example of usage:

$ nbdkit -fv memory 1G --filter=blocksize-policy blocksize-preferred=65536

$ sudo ./nbd-client server /dev/nbd0
Warning: the oldstyle protocol is no longer supported.
This method now uses the newstyle protocol with a default export
Negotiation: ..size = 1024MB

$ cat /sys/devices/virtual/block/nbd0/queue/minimum_io_size
65536
$ cat /sys/devices/virtual/block/nbd0/queue/optimal_io_size
65536

Tools such as parted and mkfs may use these hints to align partitions,
filesystem structures etc to (in this example) 64K.

Signed-off-by: Richard W.M. Jones <rjones@redhat.com>
---
 nbd-client.c  | 51 ++++++++++++++++++++++++++++++++++++++++++++-------
 nbd-netlink.h |  5 +++++
 2 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/nbd-client.c b/nbd-client.c
index c187e8c..4871b9d 100644
--- a/nbd-client.c
+++ b/nbd-client.c
@@ -168,7 +168,7 @@ static struct nl_sock *get_nbd_socket(int *driver_id) {
 
 static void netlink_configure(int index, int *sockfds, int num_connects,
 			      u64 size64, int blocksize, uint16_t flags,
-			      int timeout) {
+			      uint32_t *block_sizes, int timeout) {
 	struct nl_sock *socket;
 	struct nlattr *sock_attr;
 	struct nl_msg *msg;
@@ -186,6 +186,9 @@ static void netlink_configure(int index, int *sockfds, int num_connects,
 		NLA_PUT_U32(msg, NBD_ATTR_INDEX, index);
 	NLA_PUT_U64(msg, NBD_ATTR_SIZE_BYTES, size64);
 	NLA_PUT_U64(msg, NBD_ATTR_BLOCK_SIZE_BYTES, blocksize);
+	/* Set both minimum_io_size & optimal_io_size to the preferred size. */
+	NLA_PUT_U32(msg, NBD_ATTR_BLOCK_SIZE_MIN, block_sizes[1]);
+	NLA_PUT_U32(msg, NBD_ATTR_BLOCK_SIZE_OPT, block_sizes[1]);
 	NLA_PUT_U64(msg, NBD_ATTR_SERVER_FLAGS, flags);
 	if (timeout)
 		NLA_PUT_U64(msg, NBD_ATTR_TIMEOUT, timeout);
@@ -241,7 +244,7 @@ nla_put_failure:
 #else
 static void netlink_configure(int index, int *sockfds, int num_connects,
 			      u64 size64, int blocksize, uint16_t flags,
-			      int timeout)
+			      uint32_t *block_sizes, int timeout)
 {
 }
 
@@ -529,6 +532,28 @@ void parse_sizes(char *buf, uint64_t *size, uint16_t *flags) {
 	printf("\n");
 }
 
+/* Parse reply from NBD_INFO_BLOCK_SIZE */
+void parse_block_sizes(char *data, uint32_t datasize, uint32_t *block_sizes)
+{
+	if (datasize < 3 * sizeof(uint32_t)) {
+		err("E: server sent too short reply to NBD_INFO_BLOCK_SIZE, ignoring");
+		return;
+	}
+	memcpy(&block_sizes[0], &data[0], sizeof(uint32_t));
+	block_sizes[0] = ntohl(block_sizes[0]);
+	memcpy(&block_sizes[1], &data[4], sizeof(uint32_t));
+	block_sizes[1] = ntohl(block_sizes[1]);
+	memcpy(&block_sizes[2], &data[8], sizeof(uint32_t));
+	block_sizes[2] = ntohl(block_sizes[2]);
+
+#if 0
+	printf("server block sizes: min: %ld, pref: %ld, max: %ld\n",
+	       (long) block_sizes[0],
+	       (long) block_sizes[1],
+	       (long) block_sizes[2]);
+#endif
+}
+
 void send_opt_exportname(int sock, u64 *rsize64, uint16_t *flags, bool can_opt_go, char* name, uint16_t global_flags) {
 	send_request(sock, NBD_OPT_EXPORT_NAME, -1, name);
 	char b[sizeof(*flags) + sizeof(*rsize64)];
@@ -542,10 +567,12 @@ void send_opt_exportname(int sock, u64 *rsize64, uint16_t *flags, bool can_opt_g
 	}
 }
 
-void negotiate(int *sockp, u64 *rsize64, uint16_t *flags, char* name, uint32_t needed_flags, uint32_t client_flags, uint32_t do_opts, char *certfile, char *keyfile, char *cacertfile, char *tlshostname, bool tls, bool can_opt_go) {
+void negotiate(int *sockp, u64 *rsize64, uint16_t *flags, uint32_t *block_sizes, char* name, uint32_t needed_flags, uint32_t client_flags, uint32_t do_opts, char *certfile, char *keyfile, char *cacertfile, char *tlshostname, bool tls, bool can_opt_go) {
 	u64 magic;
 	uint16_t tmp;
 	uint16_t global_flags;
+	uint16_t info_reqs[1];
+	int nr_info_reqs = 0;
 	char buf[256] = "\0\0\0\0\0\0\0\0\0";
 	int sock = *sockp;
 
@@ -682,7 +709,12 @@ void negotiate(int *sockp, u64 *rsize64, uint16_t *flags, char* name, uint32_t n
 		return;
 	}
 
-	send_info_request(sock, NBD_OPT_GO, 0, NULL, name);
+	block_sizes[0] = 1; /* default minimum */
+	block_sizes[1] = 4096; /* default preferred */
+	block_sizes[2] = 0xffffffff; /* default maximum */
+	info_reqs[nr_info_reqs] = htons(NBD_INFO_BLOCK_SIZE);
+	nr_info_reqs++;
+	send_info_request(sock, NBD_OPT_GO, nr_info_reqs, info_reqs, name);
 
 	do {
 		if(rep != NULL) free(rep);
@@ -726,6 +758,9 @@ void negotiate(int *sockp, u64 *rsize64, uint16_t *flags, char* name, uint32_t n
 					case NBD_INFO_EXPORT:
 						parse_sizes(rep->data + 2, rsize64, flags);
 						break;
+					case NBD_INFO_BLOCK_SIZE:
+						parse_block_sizes(rep->data + 2, rep->datasize - 2, block_sizes);
+						break;
 					default:
 						// ignore these, don't need them
 						break;
@@ -934,6 +969,7 @@ int main(int argc, char *argv[]) {
 	u64 size64 = 0;
 	u64 force_size64 = 0;
 	uint16_t flags = 0;
+	uint32_t block_sizes[3]; // NBD_INFO_BLOCK_SIZE (min, pref, max)
 	bool force_read_only = false;
 	bool preinit = false;
 	int c;
@@ -1222,7 +1258,7 @@ int main(int argc, char *argv[]) {
 			exit(EXIT_FAILURE);
 
 		if (!preinit)
-			negotiate(&sock, &size64, &flags, name, needed_flags, cflags, opts, certfile, keyfile, cacertfile, tlshostname, tls, can_opt_go);
+			negotiate(&sock, &size64, &flags, block_sizes, name, needed_flags, cflags, opts, certfile, keyfile, cacertfile, tlshostname, tls, can_opt_go);
 		if (force_read_only)
 			flags |= NBD_FLAG_READ_ONLY;
 		if (force_size64)
@@ -1255,7 +1291,8 @@ int main(int argc, char *argv[]) {
 				err("Invalid nbd device target\n");
 		}
 		netlink_configure(index, sockfds, num_connections,
-				  size64, blocksize, flags, timeout);
+				  size64, blocksize, flags, block_sizes,
+				  timeout);
 		return 0;
 	}
 	/* Go daemon */
@@ -1340,7 +1377,7 @@ int main(int argc, char *argv[]) {
 					nbd = open(nbddev, O_RDWR);
 					if (nbd < 0)
 						err("Cannot open NBD: %m");
-					negotiate(&sock, &new_size, &new_flags, name, needed_flags, cflags, opts, certfile, keyfile, cacertfile, tlshostname, tls, can_opt_go);
+					negotiate(&sock, &new_size, &new_flags, block_sizes, name, needed_flags, cflags, opts, certfile, keyfile, cacertfile, tlshostname, tls, can_opt_go);
 					if (size64 != new_size) {
 						err("Size of the device changed. Bye");
 					}
diff --git a/nbd-netlink.h b/nbd-netlink.h
index fd0f4e4..9901f1b 100644
--- a/nbd-netlink.h
+++ b/nbd-netlink.h
@@ -31,6 +31,11 @@ enum {
 	NBD_ATTR_SERVER_FLAGS,
 	NBD_ATTR_CLIENT_FLAGS,
 	NBD_ATTR_SOCKETS,
+	NBD_ATTR_DEAD_CONN_TIMEOUT,
+	NBD_ATTR_DEVICE_LIST,
+	NBD_ATTR_BACKEND_IDENTIFIER,
+	NBD_ATTR_BLOCK_SIZE_MIN,
+	NBD_ATTR_BLOCK_SIZE_OPT,
 	__NBD_ATTR_MAX,
 };
 #define NBD_ATTR_MAX (__NBD_ATTR_MAX - 1)
-- 
2.35.1


Reply to: