Unable to recycle any page
Hello All,
I have found one cause of the occurrence of the "vm_page warning: unable
to recycle any page" that sometimes occurs during significant swap
activity. I found a reliable method of stimulating this event using the
following on my 2G RAM (i386) VM with 1G swap:
# stress-ng --vm 32 --vm-bytes 750M --mmap 32 --mmap-bytes 750M --page-in
You will most likely not be able to run this stress-ng test yourself as
there is another issue relating to mmap (write only) about which I will
email separately. The test case does however produce the vm_page warning
within a few minutes on all runs.
Gnumach has separate page queues for each memory segment: one for active
pages and one for inactive pages. Each of these page queues has 2 lists:
external pages and internal pages. The problem occurs when there are
both external and internal pages within a single page queue.
Pageout prioritises external pages over internal ones and to this end it
first searches only the external page lists. If that fails to evict
sufficient pages then a second pass is made searching both external and
internal page lists. Sometimes there are a few pages in an external page
list and none of them can be considered for pageout (vm_page_can_move()
returns false). The current code does not look at the internal page list
at all if there are any pages in the external page list. Consequently in
this situation the internal page list (which can have a very high number
of pages) does not get considered and the pageout repeatedly finds no
pages to evict.
I have restructured vm_page_seg_pull_inactive_page() and
vm_page_seg_pull_active_page() to search all appropriate lists and have
been able to run the test case to completion many times without the
'vm_page warning' occurring with swap usage reaching up to ~500M in some
instances. See appended patch. As usual, please feel free to alter
before inclusion, make suggestions or reject as appropriate.
I've done some basic testing of the revised gnumach for compilations but
those tasks won't touch swap at all. This patch only affects page
balancing and pageout so the stress-ng is my only test case for this.
There are still a number of issues that I have observed during heavy
swapping. This test case is so intensive that it is almost impossible to
access the system at all via a root console. Any attempt to swap virtual
consoles for example is futile. Worse still, an attempt to interact with
the console whilst the stress-ng is operating renders that console
useless once the stress-ng is complete. I haven't investigated but I
wonder if there is some issue here with the page-in. That will perhaps
be my next investigation. This issue occurs with or without my alterations.
I am also seeing the following occasionally:
fp_load: invalid FPU state!
Regards,
Mike.
diff --git a/vm/vm_page.c b/vm/vm_page.c
index 6c5eba22..f07d6c78 100644
--- a/vm/vm_page.c
+++ b/vm/vm_page.c
@@ -555,24 +555,6 @@ vm_page_queue_remove(struct vm_page_queue *queue,
struct vm_page *page)
list_remove(&page->node);
}
-static struct vm_page *
-vm_page_queue_first(struct vm_page_queue *queue, boolean_t external_only)
-{
- struct vm_page *page;
-
- if (!list_empty(&queue->external_pages)) {
- page = list_first_entry(&queue->external_pages, struct vm_page,
node);
- return page;
- }
-
- if (!external_only && !list_empty(&queue->internal_pages)) {
- page = list_first_entry(&queue->internal_pages, struct vm_page,
node);
- return page;
- }
-
- return NULL;
-}
-
static struct vm_page_seg *
vm_page_seg_get(unsigned short index)
{
@@ -813,28 +795,53 @@ vm_page_seg_remove_inactive_page(struct
vm_page_seg *seg, struct vm_page *page)
vm_page_inactive_count--;
}
+static inline struct list*
+vm_page_next_page_list(struct list* cur_page_list,
+ struct vm_page_queue* queue,
+ boolean_t external_only)
+{
+ return (external_only
+ ? NULL
+ : (cur_page_list == &queue->external_pages
+ ? &queue->internal_pages
+ : NULL));
+}
+
/*
* Attempt to pull an active page.
*
* If successful, the object containing the page is locked.
*/
+
static struct vm_page *
vm_page_seg_pull_active_page(struct vm_page_seg *seg, boolean_t
external_only)
{
struct vm_page *page, *first;
+ struct list* page_list;
boolean_t locked;
first = NULL;
+ page_list = &seg->active_pages.external_pages;
+
for (;;) {
- page = vm_page_queue_first(&seg->active_pages, external_only);
- if (page == NULL) {
+ page = (list_empty(page_list)
+ ? NULL
+ : list_first_entry(page_list, struct vm_page, node));
+
+ if (page == NULL || page == first) {
+ page_list = vm_page_next_page_list(page_list,
&seg->active_pages, external_only);
+
+ if (page_list == NULL)
break;
+ else
+ {
+ first = NULL;
+ continue;
+ }
} else if (first == NULL) {
first = page;
- } else if (first == page) {
- break;
}
vm_page_seg_remove_active_page(seg, page);
@@ -868,19 +875,31 @@ static struct vm_page *
vm_page_seg_pull_inactive_page(struct vm_page_seg *seg, boolean_t
external_only)
{
struct vm_page *page, *first;
+ struct list* page_list;
boolean_t locked;
first = NULL;
+ page_list = &seg->inactive_pages.external_pages;
+
for (;;) {
- page = vm_page_queue_first(&seg->inactive_pages, external_only);
- if (page == NULL) {
+ page = (list_empty(page_list)
+ ? NULL
+ : list_first_entry(page_list, struct vm_page, node));
+
+ if (page == NULL || page == first) {
+ page_list = vm_page_next_page_list(page_list,
&seg->inactive_pages, external_only);
+
+ if (page_list == NULL)
break;
+ else
+ {
+ first = NULL;
+ continue;
+ }
} else if (first == NULL) {
first = page;
- } else if (first == page) {
- break;
}
vm_page_seg_remove_inactive_page(seg, page);
Reply to: