[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: active swapping using rumpdisk



On 17/08/2025 08:43, Samuel Thibault wrote:
I'd say you can add an RPC whose interface allow to set any IKOT type,
but that for now returns KERN_INVALID_ARGUMENT for anything else than
IKOT_USER_DEVICE.

Samuel

Attached is my proposed patch.

I've tested this solution with good results. My stress test case has run continuously for around 90 minutes rather than the 30s-3m that would otherwise occur. There is some evidence to suggest that swapping is still less stable than when using the in kernel block driver so it is not a complete solution. I think nevertheless that it is a big enough improvement to be worth integrating immediately.

IKOT_USER_DEVICE is not a perfect match to the existing KOTYPE types since there is no associated kernel object (ie. ipc_port.ip_kobject == NULL). I did consider whether it would be better to introduce extra 'feature' flags to ipc_object_t instead. There are 2 bytes associated with the KOTYPE which seems quite large for what amounts to only 29 types currently. One could reduce KOTYPE to 1 byte and leave 8 additional feature bits, one of which could be used to identify the port as suitable for 'page lists'. That would simplify the test for 'ipc_kobject_vm_page_list()'.

I specified different constants than the IKOT_ ones for the RPC for 'ktype' (see port.h). This avoids the issue of how to export the IKOT_ defines to the user interface and in any case makes it clear which IKOT types are appropriate. I had hoped mig might be able to generate an anonymous enum that could produce user constants matching the IKOT ones and include them in the mach/mach_port.h generated header but I don't think that is possible. I also wanted to specify the type of 'ktype' as something like mach_port_ktype_t but failed to get mig to accept a type spec. I think I'm misunderstanding something here as I have no knowledge of mig at all.

I generalised the RPC to allow the port right to be specified during the lookup. If that is not useful and we wanted to restrict to receive rights then I can remove that parameter. The RPC is limited to privileged tasks via the requirement to supply the privileged host port.

I have an existing target of a successful 24 hour test run of my stress test and will now include the additional requirement that the system is using rumpdisk so hopefully there will be further improvements.

Regards,

Mike.

diff -ur gnumach.orig/include/mach/mach_port.defs gnumach/include/mach/mach_port.defs
--- gnumach.orig/include/mach/mach_port.defs	2025-07-31 20:34:14.836151975 +0100
+++ gnumach/include/mach/mach_port.defs	2025-08-19 05:36:12.174830550 +0100
@@ -358,3 +358,19 @@
 routine mach_port_clear_protected_payload(
 		task		: ipc_space_t;
 		name		: mach_port_name_t);
+
+/*
+ *	Set the kernel port type for specific kernel behaviour.
+ *	ktype must be one of:
+ *	MACH_PORT_KTYPE_NONE
+ *	MACH_PORT_KTYPE_USER_DEVICE
+ *	The named port must not be currently associated with any
+ *	other kernel port type.
+ */
+
+routine mach_port_set_ktype(
+		host            : host_priv_t;
+		task		: ipc_space_t;
+		name		: mach_port_name_t;
+		right		: mach_port_right_t;
+		ktype		: unsigned);
diff -ur gnumach.orig/include/mach/port.h gnumach/include/mach/port.h
--- gnumach.orig/include/mach/port.h	2025-07-31 20:34:14.836151975 +0100
+++ gnumach/include/mach/port.h	2025-08-19 05:33:29.108169683 +0100
@@ -156,4 +156,8 @@
 #define MACH_PORT_QLIMIT_DEFAULT	((mach_port_msgcount_t) 5)
 #define MACH_PORT_QLIMIT_MAX		((mach_port_msgcount_t) 16)
 
+/* Constants for calls to mach_port_set_ktype() */
+#define MACH_PORT_KTYPE_NONE             0
+#define MACH_PORT_KTYPE_USER_DEVICE      1
+
 #endif	/* _MACH_PORT_H_ */
diff -ur gnumach.orig/ipc/ipc_object.c gnumach/ipc/ipc_object.c
--- gnumach.orig/ipc/ipc_object.c	2025-07-31 20:34:14.840151931 +0100
+++ gnumach/ipc/ipc_object.c	2025-08-17 22:48:01.000000000 +0100
@@ -944,6 +944,7 @@
 	"(CLOCK)            ",
 	"(CLOCK_CTRL)       ",
 	"(PAGER_PROXY)      ",	/* 27 */
+	"(USER_DEVICE)      ",	/* 28 */
 				/* << new entries here	*/
 	"(UNKNOWN)     "	/* magic catchall	*/
 };	/* Please keep in sync with kern/ipc_kobject.h	*/
diff -ur gnumach.orig/ipc/mach_port.c gnumach/ipc/mach_port.c
--- gnumach.orig/ipc/mach_port.c	2025-07-31 20:34:14.844151887 +0100
+++ gnumach/ipc/mach_port.c	2025-08-19 05:40:55.423218382 +0100
@@ -1567,6 +1567,44 @@
 	return KERN_SUCCESS;
 }
 
+kern_return_t
+mach_port_set_ktype(
+        host_t host_priv,
+        ipc_space_t space,
+        mach_port_name_t name,
+        mach_port_right_t right,
+        unsigned ktype)
+{
+	ipc_port_t port;
+	kern_return_t kr;
+
+	if (host_priv == HOST_NULL)
+		return KERN_INVALID_HOST;
+
+	if (space == IS_NULL)
+		return KERN_INVALID_TASK;
+
+	if (ktype != MACH_PORT_KTYPE_NONE
+	    && ktype != MACH_PORT_KTYPE_USER_DEVICE)
+	  return KERN_INVALID_ARGUMENT;
+
+	kr = ipc_object_translate(space, name, right, (ipc_object_t *)&port);
+	if (kr != KERN_SUCCESS)
+		return kr;
+
+	if (ip_kotype(port) != IKOT_NONE && ip_kotype(port) != IKOT_USER_DEVICE)
+	  return KERN_INVALID_ARGUMENT;
+
+	/* port is locked and active */
+	ipc_kobject_set(port, IKO_NULL,
+			ktype == MACH_PORT_KTYPE_NONE
+			? IKOT_NONE
+			: IKOT_USER_DEVICE);
+	ip_unlock(port);
+
+	return KERN_SUCCESS;
+}
+
 #if	MACH_KDB
 
 void
diff -ur gnumach.orig/kern/ipc_kobject.h gnumach/kern/ipc_kobject.h
--- gnumach.orig/kern/ipc_kobject.h	2025-07-31 20:34:14.844151887 +0100
+++ gnumach/kern/ipc_kobject.h	2025-08-18 07:10:15.378591611 +0100
@@ -77,9 +77,10 @@
 #define IKOT_CLOCK		25
 #define IKOT_CLOCK_CTRL		26
 #define	IKOT_PAGER_PROXY	27
+#define	IKOT_USER_DEVICE	28
 					/* << new entries here	*/
-#define	IKOT_UNKNOWN		28	/* magic catchall	*/
-#define	IKOT_MAX_TYPE		29	/* # of IKOT_ types	*/
+#define	IKOT_UNKNOWN		29	/* magic catchall	*/
+#define	IKOT_MAX_TYPE		30	/* # of IKOT_ types	*/
  /* Please keep ipc/ipc_object.c:ikot_print_array up to date	*/
 
 #define is_ipc_kobject(ikot)	(ikot != IKOT_NONE)
@@ -90,7 +91,9 @@
  */
 
 #define ipc_kobject_vm_page_list(ikot) 			\
-	((ikot == IKOT_PAGING_REQUEST) || (ikot == IKOT_DEVICE))
+  ((ikot == IKOT_PAGING_REQUEST) || \
+   (ikot == IKOT_DEVICE) || \
+   (ikot == IKOT_USER_DEVICE))
 
 #define ipc_kobject_vm_page_steal(ikot)	(ikot == IKOT_PAGING_REQUEST)
 
diff -ur hurd.orig/rumpdisk/block-rump.c hurd/rumpdisk/block-rump.c
--- hurd.orig/rumpdisk/block-rump.c	2025-07-31 20:36:06.014917781 +0100
+++ hurd/rumpdisk/block-rump.c	2025-08-19 08:20:48.477079651 +0100
@@ -27,6 +27,7 @@
 
 #include <mach.h>
 #include <mach/gnumach.h>
+#include <mach/port.h>
 #include <hurd.h>
 #include <hurd/ports.h>
 #include <device/device.h>
@@ -327,6 +328,25 @@
       rump_sys_close (fd);
       pthread_rwlock_unlock (&rumpdisk_rwlock);
       return err;
+    }
+
+  /* Configure the receive port as a USER_DEVICE so that IPC messages
+     destined for rumpdisk will use page lists rather than page map
+     entries. This strategy prevents pages that are referenced in the
+     message body from being swapped out until the message has been
+     processed.
+  */
+  err = mach_port_set_ktype (master_host,
+			     mach_task_self (),
+			     bd->port.port_right,
+			     MACH_PORT_RIGHT_RECEIVE,
+			     MACH_PORT_KTYPE_USER_DEVICE);
+  if (err != 0)
+    {
+      mach_print ("Failed to set receive port as USER_DEVICE\n");
+      rump_sys_close (fd);
+      pthread_rwlock_unlock (&rumpdisk_rwlock);
+      return err;
     }
 
   bd->taken = 1;

Reply to: