Re: active swapping using rumpdisk
On 17/08/2025 08:43, Samuel Thibault wrote:
I'd say you can add an RPC whose interface allow to set any IKOT type,
but that for now returns KERN_INVALID_ARGUMENT for anything else than
IKOT_USER_DEVICE.
Samuel
Attached is my proposed patch.
I've tested this solution with good results. My stress test case has run
continuously for around 90 minutes rather than the 30s-3m that would
otherwise occur. There is some evidence to suggest that swapping is
still less stable than when using the in kernel block driver so it is
not a complete solution. I think nevertheless that it is a big enough
improvement to be worth integrating immediately.
IKOT_USER_DEVICE is not a perfect match to the existing KOTYPE types
since there is no associated kernel object (ie. ipc_port.ip_kobject ==
NULL). I did consider whether it would be better to introduce extra
'feature' flags to ipc_object_t instead. There are 2 bytes associated
with the KOTYPE which seems quite large for what amounts to only 29
types currently. One could reduce KOTYPE to 1 byte and leave 8
additional feature bits, one of which could be used to identify the port
as suitable for 'page lists'. That would simplify the test for
'ipc_kobject_vm_page_list()'.
I specified different constants than the IKOT_ ones for the RPC for
'ktype' (see port.h). This avoids the issue of how to export the IKOT_
defines to the user interface and in any case makes it clear which IKOT
types are appropriate. I had hoped mig might be able to generate an
anonymous enum that could produce user constants matching the IKOT ones
and include them in the mach/mach_port.h generated header but I don't
think that is possible. I also wanted to specify the type of 'ktype' as
something like mach_port_ktype_t but failed to get mig to accept a type
spec. I think I'm misunderstanding something here as I have no knowledge
of mig at all.
I generalised the RPC to allow the port right to be specified during the
lookup. If that is not useful and we wanted to restrict to receive
rights then I can remove that parameter. The RPC is limited to
privileged tasks via the requirement to supply the privileged host port.
I have an existing target of a successful 24 hour test run of my stress
test and will now include the additional requirement that the system is
using rumpdisk so hopefully there will be further improvements.
Regards,
Mike.
diff -ur gnumach.orig/include/mach/mach_port.defs gnumach/include/mach/mach_port.defs
--- gnumach.orig/include/mach/mach_port.defs 2025-07-31 20:34:14.836151975 +0100
+++ gnumach/include/mach/mach_port.defs 2025-08-19 05:36:12.174830550 +0100
@@ -358,3 +358,19 @@
routine mach_port_clear_protected_payload(
task : ipc_space_t;
name : mach_port_name_t);
+
+/*
+ * Set the kernel port type for specific kernel behaviour.
+ * ktype must be one of:
+ * MACH_PORT_KTYPE_NONE
+ * MACH_PORT_KTYPE_USER_DEVICE
+ * The named port must not be currently associated with any
+ * other kernel port type.
+ */
+
+routine mach_port_set_ktype(
+ host : host_priv_t;
+ task : ipc_space_t;
+ name : mach_port_name_t;
+ right : mach_port_right_t;
+ ktype : unsigned);
diff -ur gnumach.orig/include/mach/port.h gnumach/include/mach/port.h
--- gnumach.orig/include/mach/port.h 2025-07-31 20:34:14.836151975 +0100
+++ gnumach/include/mach/port.h 2025-08-19 05:33:29.108169683 +0100
@@ -156,4 +156,8 @@
#define MACH_PORT_QLIMIT_DEFAULT ((mach_port_msgcount_t) 5)
#define MACH_PORT_QLIMIT_MAX ((mach_port_msgcount_t) 16)
+/* Constants for calls to mach_port_set_ktype() */
+#define MACH_PORT_KTYPE_NONE 0
+#define MACH_PORT_KTYPE_USER_DEVICE 1
+
#endif /* _MACH_PORT_H_ */
diff -ur gnumach.orig/ipc/ipc_object.c gnumach/ipc/ipc_object.c
--- gnumach.orig/ipc/ipc_object.c 2025-07-31 20:34:14.840151931 +0100
+++ gnumach/ipc/ipc_object.c 2025-08-17 22:48:01.000000000 +0100
@@ -944,6 +944,7 @@
"(CLOCK) ",
"(CLOCK_CTRL) ",
"(PAGER_PROXY) ", /* 27 */
+ "(USER_DEVICE) ", /* 28 */
/* << new entries here */
"(UNKNOWN) " /* magic catchall */
}; /* Please keep in sync with kern/ipc_kobject.h */
diff -ur gnumach.orig/ipc/mach_port.c gnumach/ipc/mach_port.c
--- gnumach.orig/ipc/mach_port.c 2025-07-31 20:34:14.844151887 +0100
+++ gnumach/ipc/mach_port.c 2025-08-19 05:40:55.423218382 +0100
@@ -1567,6 +1567,44 @@
return KERN_SUCCESS;
}
+kern_return_t
+mach_port_set_ktype(
+ host_t host_priv,
+ ipc_space_t space,
+ mach_port_name_t name,
+ mach_port_right_t right,
+ unsigned ktype)
+{
+ ipc_port_t port;
+ kern_return_t kr;
+
+ if (host_priv == HOST_NULL)
+ return KERN_INVALID_HOST;
+
+ if (space == IS_NULL)
+ return KERN_INVALID_TASK;
+
+ if (ktype != MACH_PORT_KTYPE_NONE
+ && ktype != MACH_PORT_KTYPE_USER_DEVICE)
+ return KERN_INVALID_ARGUMENT;
+
+ kr = ipc_object_translate(space, name, right, (ipc_object_t *)&port);
+ if (kr != KERN_SUCCESS)
+ return kr;
+
+ if (ip_kotype(port) != IKOT_NONE && ip_kotype(port) != IKOT_USER_DEVICE)
+ return KERN_INVALID_ARGUMENT;
+
+ /* port is locked and active */
+ ipc_kobject_set(port, IKO_NULL,
+ ktype == MACH_PORT_KTYPE_NONE
+ ? IKOT_NONE
+ : IKOT_USER_DEVICE);
+ ip_unlock(port);
+
+ return KERN_SUCCESS;
+}
+
#if MACH_KDB
void
diff -ur gnumach.orig/kern/ipc_kobject.h gnumach/kern/ipc_kobject.h
--- gnumach.orig/kern/ipc_kobject.h 2025-07-31 20:34:14.844151887 +0100
+++ gnumach/kern/ipc_kobject.h 2025-08-18 07:10:15.378591611 +0100
@@ -77,9 +77,10 @@
#define IKOT_CLOCK 25
#define IKOT_CLOCK_CTRL 26
#define IKOT_PAGER_PROXY 27
+#define IKOT_USER_DEVICE 28
/* << new entries here */
-#define IKOT_UNKNOWN 28 /* magic catchall */
-#define IKOT_MAX_TYPE 29 /* # of IKOT_ types */
+#define IKOT_UNKNOWN 29 /* magic catchall */
+#define IKOT_MAX_TYPE 30 /* # of IKOT_ types */
/* Please keep ipc/ipc_object.c:ikot_print_array up to date */
#define is_ipc_kobject(ikot) (ikot != IKOT_NONE)
@@ -90,7 +91,9 @@
*/
#define ipc_kobject_vm_page_list(ikot) \
- ((ikot == IKOT_PAGING_REQUEST) || (ikot == IKOT_DEVICE))
+ ((ikot == IKOT_PAGING_REQUEST) || \
+ (ikot == IKOT_DEVICE) || \
+ (ikot == IKOT_USER_DEVICE))
#define ipc_kobject_vm_page_steal(ikot) (ikot == IKOT_PAGING_REQUEST)
diff -ur hurd.orig/rumpdisk/block-rump.c hurd/rumpdisk/block-rump.c
--- hurd.orig/rumpdisk/block-rump.c 2025-07-31 20:36:06.014917781 +0100
+++ hurd/rumpdisk/block-rump.c 2025-08-19 08:20:48.477079651 +0100
@@ -27,6 +27,7 @@
#include <mach.h>
#include <mach/gnumach.h>
+#include <mach/port.h>
#include <hurd.h>
#include <hurd/ports.h>
#include <device/device.h>
@@ -327,6 +328,25 @@
rump_sys_close (fd);
pthread_rwlock_unlock (&rumpdisk_rwlock);
return err;
+ }
+
+ /* Configure the receive port as a USER_DEVICE so that IPC messages
+ destined for rumpdisk will use page lists rather than page map
+ entries. This strategy prevents pages that are referenced in the
+ message body from being swapped out until the message has been
+ processed.
+ */
+ err = mach_port_set_ktype (master_host,
+ mach_task_self (),
+ bd->port.port_right,
+ MACH_PORT_RIGHT_RECEIVE,
+ MACH_PORT_KTYPE_USER_DEVICE);
+ if (err != 0)
+ {
+ mach_print ("Failed to set receive port as USER_DEVICE\n");
+ rump_sys_close (fd);
+ pthread_rwlock_unlock (&rumpdisk_rwlock);
+ return err;
}
bd->taken = 1;
Reply to: