[PATCH 3/4] Implement structured reply handling

To: nbd@other.debian.org
Cc: Wouter Verhelst <w@uter.be>
Subject: [PATCH 3/4] Implement structured reply handling
From: w@uter.be
Date: Sat, 11 Mar 2023 15:07:04 +0200
Message-id: <[🔎] 20230311130705.253855-4-w@uter.be>
In-reply-to: <[🔎] 20230311130705.253855-1-w@uter.be>
References: <[🔎] 20230311130705.253855-1-w@uter.be>
From: Wouter Verhelst <w@uter.be>

This implements the transmission phase part of structured replies as documented
in doc/proto.txt, but does not do the negotiation phase part of this (yet).

Signed-off-by: Wouter Verhelst <w@uter.be>
---
 nbd-server.c | 157 +++++++++++++++++++++++++++++++++++++++++++++------
 nbd.h        |  37 ++++++++++++
 nbdsrv.h     |   7 +++
 3 files changed, 184 insertions(+), 17 deletions(-)

diff --git a/nbd-server.c b/nbd-server.c
index 64609f3..8c84f93 100644
--- a/nbd-server.c
+++ b/nbd-server.c
@@ -1289,6 +1289,51 @@ static void log_reply(CLIENT *client, struct nbd_reply *prply) {
 	}
 }
 
+static void log_structured_reply(CLIENT *client, struct nbd_structured_reply *prply) {
+	if (client->transactionlogfd != -1) {
+		lock_logsem(client);
+		writeit(client->transactionlogfd, prply, sizeof(*ptply));
+		unlock_logsem(client);
+	}
+}
+
+void send_structured_chunk(CLIENT *client, struct nbd_request *req, uint16_t flags, uint16_t type, uint32_t length, int bufcount, void *buf[], size_t buflen[]) {
+	struct nbd_structured_reply rep;
+	rep.magic = htonl(NBD_STRUCTURED_REPLY_MAGIC);
+	rep.flags = htons(flags);
+	rep.type = htons(type);
+	memcpy(&(rep.handle), req->handle, sizeof(rep.handle));
+	rep.paylen = htonl(length);
+	pthread_mutex_lock(&(client->lock));
+	socket_write(client, &rep, sizeof rep);
+	for(int i=0; i<bufcount; i++) {
+		socket_write(client, buf[i], buflen[i]);
+	}
+	pthread_mutex_unlock(&(client->lock));
+	log_structured_reply(client, &rep);
+}
+
+void send_structured_chunk_v(CLIENT *client, struct nbd_request *req, uint16_t flags, uint16_t type, uint32_t length, int bufcount, ...) {
+	struct nbd_structured_reply rep;
+	va_list ap;
+	rep.magic = htonl(NBD_STRUCTURED_REPLY_MAGIC);
+	rep.flags = htons(flags);
+	rep.type = htons(type);
+	memcpy(&(rep.handle), req->handle, sizeof(rep.handle));
+	rep.paylen = htonl(length);
+	va_start(ap, bufcount);
+	pthread_mutex_lock(&(client->lock));
+	socket_write(client, &rep, sizeof rep);
+	for(int i=0; i<bufcount; i++) {
+		void *buf = va_arg(ap, void*);
+		size_t size = va_arg(ap, size_t);
+		socket_write(client, buf, size);
+	}
+	pthread_mutex_unlock(&(client->lock));
+	log_structured_reply(client, &rep);
+	va_end(ap);
+}
+
 /**
  * Find the location to write the data for the next chunk to.
  * Assumes checks on memory sizes etc have already been done.
@@ -1298,28 +1343,85 @@ static void log_reply(CLIENT *client, struct nbd_reply *prply) {
  * @param len the length of this chunk.
  */
 char * find_read_buf(READ_CTX *ctx) {
-	return ctx->buf + ctx->current_offset;
+	if(!(ctx->is_structured) || ctx->df) {
+		return ctx->buf + ctx->current_offset;
+	}
+	ctx->buf = malloc(ctx->current_len);
+	if(!(ctx->buf)) {
+		err("Could not allocate memory for request");
+	}
+	return ctx->buf;
 }
 
 void confirm_read(CLIENT *client, READ_CTX *ctx, size_t len_read) {
+	if(ctx->is_structured && !(ctx->df)) {
+		uint64_t offset = htonll(ctx->req->from + (uint64_t)(ctx->current_offset));
+		send_structured_chunk_v(client, ctx->req, 0, NBD_REPLY_TYPE_OFFSET_DATA, len_read + 8, 2, &offset, sizeof offset, ctx->buf, (size_t)len_read);
+		free(ctx->buf);
+	}
 }
 
 void complete_read(CLIENT *client, READ_CTX *ctx, uint32_t error, char *errmsg, uint16_t msglen, bool with_offset, uint64_t err_offset) {
 	uint16_t type;
 	uint64_t offset = 0;
-	struct nbd_reply rep;
-	setup_reply(&rep, ctx->req);
-	if(error) {
-		rep.error = error;
-	}
-	log_reply(client, &rep);
-	pthread_mutex_lock(&(client->lock));
-	socket_write(client, &rep, sizeof rep);
-	if(!error) {
-		socket_write(client, ctx->buf, ctx->buflen);
+	if(ctx->is_structured) {
+		if(ctx->df) {
+			uint32_t len = ctx->req->len;
+			if(error != 0 && with_offset) {
+				len = err_offset;
+			}
+			if(error == 0 || with_offset) {
+				offset = htonll(ctx->req->from);
+				send_structured_chunk_v(client, ctx->req, 0, NBD_REPLY_TYPE_OFFSET_DATA, len + 8, 2, &offset, sizeof offset, ctx->buf, err_offset);
+			}
+			free(ctx->buf);
+		}
+		if(error != 0) {
+			struct nbd_structured_error_payload pl;
+			void *buf[3];
+			size_t bufsize[3];
+			int payloads = 1;
+			size_t total_size;
+			pl.error = error;
+			pl.msglen = msglen;
+			if(with_offset) {
+				offset += err_offset;
+				type = NBD_REPLY_TYPE_ERROR_OFFSET;
+			} else {
+				type = NBD_REPLY_TYPE_ERROR;
+			}
+			buf[0] = &pl;
+			bufsize[0] = sizeof pl;
+			total_size = bufsize[0];
+			if(msglen > 0) {
+				buf[++payloads] = errmsg;
+				bufsize[payloads] = msglen;
+				total_size += msglen;
+			}
+			if(with_offset) {
+				buf[++payloads] = &offset;
+				bufsize[payloads] = sizeof offset;
+				total_size += sizeof offset;
+			}
+			send_structured_chunk(client, ctx->req, NBD_REPLY_FLAG_DONE, type, total_size, payloads, buf, bufsize);
+			return;
+		}
+		send_structured_chunk_v(client, ctx->req, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_NONE, 0, 0);
+	} else {
+		struct nbd_reply rep;
+		setup_reply(&rep, ctx->req);
+		if(error) {
+			rep.error = error;
+		}
+		log_reply(client, &rep);
+		pthread_mutex_lock(&(client->lock));
+		socket_write(client, &rep, sizeof rep);
+		if(!error) {
+			socket_write(client, ctx->buf, ctx->buflen);
+		}
+		pthread_mutex_unlock(&(client->lock));
+		free(ctx->buf);
 	}
-	pthread_mutex_unlock(&(client->lock));
-	free(ctx->buf);
 }
 
 /**
@@ -2735,11 +2837,32 @@ static void handle_normal_read(CLIENT *client, struct nbd_request *req)
 	uint32_t error = 0;
 	char *errmsg = NULL;
 	uint16_t msglen = 0;
-	ctx->buf = malloc(req->len);
-	if(!(ctx->buf)) {
-		err("Could not allocate memory for request");
+	if(client->clientflags & F_STRUCTURED) {
+		ctx->is_structured = 1;
+	} else {
+		ctx->is_structured = 0;
+	}
+	if(req->type & NBD_CMD_FLAG_DF != 0) {
+		ctx->df = 1;
+	}
+	if(ctx->is_structured && ctx->df && req->len > (1 << 20)) {
+		/* standard requires a minimum of 64KiB; we are more generous
+		 * by allowing up to 1MiB as our largest unfragmented answer */
+		const char too_long[] = "Request too long for unfragmented reply";
+		struct nbd_structured_error_payload pl;
+		pl.error = NBD_EOVERFLOW;
+		pl.msglen = sizeof too_long;
+		send_structured_chunk_v(client, req, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, 6 + pl.msglen, 2, &pl, sizeof pl, too_long, sizeof too_long);
+		free(ctx);
+		return;
+	}
+	if(ctx->df || !(ctx->is_structured)) {
+		ctx->buf = malloc(req->len);
+		if(!(ctx->buf)) {
+			err("Could not allocate memory for request");
+		}
+		ctx->buflen = req->len;
 	}
-	ctx->buflen = req->len;
 	if(expread(ctx, client)) {
 		DEBUG("Read failed: %m");
 		char read_failed[] = "Read failed";
diff --git a/nbd.h b/nbd.h
index caa3595..4cf692a 100644
--- a/nbd.h
+++ b/nbd.h
@@ -45,6 +45,7 @@ enum {
 #define NBD_CMD_SHIFT (16)
 #define NBD_CMD_FLAG_FUA ((1 << 0) << NBD_CMD_SHIFT)
 #define NBD_CMD_FLAG_NO_HOLE ((1 << 1) << NBD_CMD_SHIFT)
+#define NBD_CMD_FLAG_DF  ((1 << 2) << NBD_CMD_SHIFT)
 
 /* values for flags field */
 #define NBD_FLAG_HAS_FLAGS	(1 << 0)	/* Flags are there */
@@ -71,6 +72,16 @@ enum {
 
 #define NBD_OPT_REPLY_MAGIC 0x3e889045565a9LL
 
+#define NBD_REPLY_TYPE_NONE 		(0)
+#define NBD_REPLY_TYPE_OFFSET_DATA	(1)
+#define NBD_REPLY_TYPE_OFFSET_HOLE	(2)
+#define NBD_REPLY_TYPE_BLOCK_STATUS	(3)
+
+#define NBD_REPLY_TYPE_ERROR		((1 << 15) + 1)
+#define NBD_REPLY_TYPE_ERROR_OFFSET	((1 << 15) + 2)
+
+#define NBD_REPLY_FLAG_DONE		(1 << 0)
+
 /*
  * This is the packet used for communication between client and
  * server. All data are in network byte order.
@@ -92,4 +103,30 @@ struct nbd_reply {
 	uint32_t error;		/* 0 = ok, else error	*/
 	char handle[8];		/* handle you got from request	*/
 } __attribute__ ((packed));
+
+/*
+ * The reply packet for structured replies
+ */
+struct nbd_structured_reply {
+	uint32_t magic;
+	uint16_t flags;
+	uint16_t type;
+	uint64_t handle;
+	uint32_t paylen;
+} __attribute__ ((packed));
+
+struct nbd_structured_error_payload {
+	uint32_t error;
+	uint16_t msglen;
+} __attribute__ ((packed));
+
+#define NBD_EPERM 1
+#define NBD_EIO 5
+#define NBD_ENOMEM 12
+#define NBD_EINVAL 22
+#define NBD_ENOSPC 28
+#define NBD_EOVERFLOW 75
+#define NBD_ENOTSUP 95
+#define NBD_ESHUTDOWN 108
+
 #endif
diff --git a/nbdsrv.h b/nbdsrv.h
index 4b227e7..43c44cd 100644
--- a/nbdsrv.h
+++ b/nbdsrv.h
@@ -73,6 +73,7 @@ typedef struct _client {
 	char semname[100];	/**< name of the posix sem that protects access to the transaction log */
 	sem_t *logsem;		/**< posix sem that protects access to the transaction log */
 	int clientfeats;	/**< Client flags specified during negotiation */
+	int clientflags;	/**< Internal flags for this client, as determined by nbd-server */
 	pthread_mutex_t lock;	/**< socket lock */
 	void *tls_session;	/**< TLS session context. Is NULL unless STARTTLS
 				     has been negotiated. */
@@ -95,6 +96,8 @@ typedef struct {
 	size_t buflen;
 	size_t current_offset;
 	uint32_t current_len;
+	unsigned int is_structured : 1;
+	unsigned int df : 1;
 } READ_CTX;
 
 /* Constants and macros */
@@ -167,6 +170,10 @@ typedef enum {
 #define F_WAIT 65536      /**< flag to tell us to wait for file creation */
 #define F_DATALOG 131072  /**< flag to tell us that the transaction log shall contain the written data */
 
+/** Internal flags (for clientflags) */
+
+#define F_STRUCTURED 1
+
 /* Functions */
 
 /**
-- 
2.39.2
Reply to:
Follow-Ups:
- Re: [PATCH 3/4] Implement structured reply handling
  - From: Eric Blake <eblake@redhat.com>
- Re: [PATCH 3/4] Implement structured reply handling
  - From: Eric Blake <eblake@redhat.com>
References:
- [PATCH v2] nbd-server: Implement structured replies
  - From: w@uter.be
Prev by Date: [PATCH 1/4] Refactor request handling
Next by Date: Re: ublk-nbd: ublk-nbd is avaialbe
Previous by thread: Re: [PATCH 1/4] Refactor request handling
Next by thread: Re: [PATCH 3/4] Implement structured reply handling
Index(es):
- Date
- Thread