commit cdba82c27f712624b267eb5dc174294c0b6f1864
Author: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date:   Sun Apr 12 20:37:04 2026 +0200

    vm_page: Add global LRU list
    
    This unfortunately takes room in the vm_page structure, thus reducing
    manageable memory.

commit e3690e5874d6cfb13285511a20d925b0c6612d2c
Author: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date:   Sun Apr 12 21:02:09 2026 +0200

    vm_page: Make page eviction use the global LRU list
    
    Otherwise, as long as the highest segment has evictable pages, we will be
    focusing on it, and not benefitting from other segments.
    
    This is notably very problematic when the highest segment is small, smaller
    than the application working set, but big enough for finding evictable
    pages.

---
 i386/i386/vm_param.h |    2 
 vm/vm_page.c         |  208 +++++++++++++++++++++++++++++++++++++++++++--------
 vm/vm_page.h         |    3 
 3 files changed, 180 insertions(+), 33 deletions(-)

--- a/i386/i386/vm_param.h
+++ b/i386/i386/vm_param.h
@@ -90,7 +90,7 @@
  * These were tested as working.
  */
 #ifdef __x86_64__
-#define MAX_PHYS_END (30ULL * 1024 * 1024 * 1024)
+#define MAX_PHYS_END (27ULL * 1024 * 1024 * 1024)
 #else
 #define MAX_PHYS_END (8ULL * 1024 * 1024 * 1024)
 #endif
--- a/vm/vm_page.c
+++ b/vm/vm_page.c
@@ -28,6 +28,10 @@
  * it is filled by transferring multiple pages from the backend buddy system.
  * The symmetric case is handled likewise.
  *
+ * active/inactive lists are kept up to date per segment *and* globally, to be
+ * able to both balance between segments, and page-out pages with a global LRU
+ * view.
+ *
  * TODO Limit number of dirty pages, block allocations above a top limit.
  */
 
@@ -191,6 +195,15 @@ struct vm_page_queue {
     struct vm_page_list external;
 };
 
+struct vm_page_lru_queue {
+	struct list internal;
+	struct list external;
+};
+
+/* Protected by vm_page_lock_queues.  */
+struct vm_page_lru_queue vm_page_active_lru_list;
+struct vm_page_lru_queue vm_page_inactive_lru_list;
+
 /*
  * Segment name buffer size.
  */
@@ -570,6 +583,19 @@ vm_page_queue_remove(struct vm_page_queu
     list->nr_pages--;
 }
 
+static void
+vm_page_lru_queue_push(struct vm_page_lru_queue *queue, struct vm_page *page)
+{
+    struct list* list = (page->external ? &queue->external: &queue->internal);
+    list_insert_tail(list, &page->node_lru);
+}
+
+static void
+vm_page_lru_queue_remove(struct vm_page_lru_queue *queue, struct vm_page *page)
+{
+    list_remove(&page->node_lru);
+}
+
 static struct vm_page_seg *
 vm_page_seg_get(unsigned short index)
 {
@@ -756,6 +782,7 @@ static void
 vm_page_seg_add_active_page(struct vm_page_seg *seg, struct vm_page *page)
 {
     assert(simple_lock_taken(&seg->lock));
+    assert(vm_page_locked_queues());
     assert(page->object != NULL);
     assert(page->seg_index == vm_page_seg_index(seg));
     assert(page->type != VM_PT_FREE);
@@ -764,6 +791,7 @@ vm_page_seg_add_active_page(struct vm_pa
     page->active = TRUE;
     page->reference = TRUE;
     vm_page_queue_push(&seg->active_pages, page);
+    vm_page_lru_queue_push(&vm_page_active_lru_list, page);
     vm_page_active_count++;
 }
 
@@ -771,6 +799,7 @@ static void
 vm_page_seg_remove_active_page(struct vm_page_seg *seg, struct vm_page *page)
 {
     assert(simple_lock_taken(&seg->lock));
+    assert(vm_page_locked_queues());
     assert(page->object != NULL);
     assert(page->seg_index == vm_page_seg_index(seg));
     assert(page->type != VM_PT_FREE);
@@ -778,6 +807,7 @@ vm_page_seg_remove_active_page(struct vm
     assert(!page->free && page->active && !page->inactive);
     page->active = FALSE;
     vm_page_queue_remove(&seg->active_pages, page);
+    vm_page_lru_queue_remove(&vm_page_active_lru_list, page);
     vm_page_active_count--;
 }
 
@@ -785,6 +815,7 @@ static void
 vm_page_seg_add_inactive_page(struct vm_page_seg *seg, struct vm_page *page)
 {
     assert(simple_lock_taken(&seg->lock));
+    assert(vm_page_locked_queues());
     assert(page->object != NULL);
     assert(page->seg_index == vm_page_seg_index(seg));
     assert(page->type != VM_PT_FREE);
@@ -792,6 +823,7 @@ vm_page_seg_add_inactive_page(struct vm_
     assert(!page->free && !page->active && !page->inactive);
     page->inactive = TRUE;
     vm_page_queue_push(&seg->inactive_pages, page);
+    vm_page_lru_queue_push(&vm_page_inactive_lru_list, page);
     vm_page_inactive_count++;
 }
 
@@ -799,6 +831,7 @@ static void
 vm_page_seg_remove_inactive_page(struct vm_page_seg *seg, struct vm_page *page)
 {
     assert(simple_lock_taken(&seg->lock));
+    assert(vm_page_locked_queues());
     assert(page->object != NULL);
     assert(page->seg_index == vm_page_seg_index(seg));
     assert(page->type != VM_PT_FREE);
@@ -806,11 +839,12 @@ vm_page_seg_remove_inactive_page(struct
     assert(!page->free && !page->active && page->inactive);
     page->inactive = FALSE;
     vm_page_queue_remove(&seg->inactive_pages, page);
+    vm_page_lru_queue_remove(&vm_page_inactive_lru_list, page);
     vm_page_inactive_count--;
 }
 
 /*
- * Attempt to pull an active page.
+ * Attempt to pull an active page from a given segment.
  *
  * If successful, the object containing the page is locked.
  */
@@ -861,7 +895,7 @@ vm_page_seg_pull_active_page(struct vm_p
 }
 
 /*
- * Attempt to pull an inactive page.
+ * Attempt to pull an inactive page from a given segment.
  *
  * If successful, the object containing the page is locked.
  *
@@ -942,6 +976,124 @@ vm_page_seg_pull_cache_page(struct vm_pa
     return NULL;
 }
 
+/*
+ * Attempt to pull an active page from the LRU.
+ *
+ * If successful, the segment and the object containing the page are locked.
+ *
+ * XXX See vm_page_seg_pull_active_page (duplicated code).
+ */
+static struct vm_page *
+vm_page_pull_active_page(boolean_t external)
+{
+    struct vm_page *page, *first;
+    struct list* page_list;
+    struct vm_page_seg *seg;
+    boolean_t locked;
+
+    first = NULL;
+
+    page_list = (external
+		 ? &vm_page_active_lru_list.external
+		 : &vm_page_active_lru_list.internal);
+
+    for (;;) {
+
+        page = (list_empty(page_list)
+		? NULL
+                : list_first_entry(page_list, struct vm_page, node_lru));
+
+        if (page == NULL || page == first)
+	  break;
+
+        if (first == NULL) {
+            first = page;
+        }
+
+        seg = &vm_page_segs[page->seg_index];
+        simple_lock(&seg->lock);
+
+        vm_page_seg_remove_active_page(seg, page);
+        locked = vm_object_lock_try(page->object);
+
+        if (!locked) {
+            vm_page_seg_add_active_page(seg, page);
+            simple_unlock(&seg->lock);
+            continue;
+        }
+
+        if (!vm_page_can_move(page)) {
+            vm_page_seg_add_active_page(seg, page);
+            vm_object_unlock(page->object);
+            simple_unlock(&seg->lock);
+            continue;
+        }
+
+        return page;
+    }
+
+    return NULL;
+}
+
+/*
+ * Attempt to pull an inactive page from the LRU.
+ *
+ * If successful, the segment and the object containing the page are locked.
+ *
+ * XXX See vm_page_pull_active_page (duplicated code).
+ */
+static struct vm_page *
+vm_page_pull_inactive_page(boolean_t external)
+{
+    struct vm_page *page, *first;
+    struct list* page_list;
+    struct vm_page_seg *seg;
+    boolean_t locked;
+
+    first = NULL;
+
+    page_list = (external
+		 ? &vm_page_inactive_lru_list.external
+		 : &vm_page_inactive_lru_list.internal);
+
+    for (;;) {
+
+        page = (list_empty(page_list)
+		? NULL
+                : list_first_entry(page_list, struct vm_page, node_lru));
+
+        if (page == NULL || page == first)
+	  break;
+
+        if (first == NULL) {
+            first = page;
+        }
+
+        seg = &vm_page_segs[page->seg_index];
+        simple_lock(&seg->lock);
+
+        vm_page_seg_remove_inactive_page(seg, page);
+        locked = vm_object_lock_try(page->object);
+
+        if (!locked) {
+            vm_page_seg_add_inactive_page(seg, page);
+            simple_unlock(&seg->lock);
+            continue;
+        }
+
+        if (!vm_page_can_move(page)) {
+            vm_page_seg_add_inactive_page(seg, page);
+            vm_object_unlock(page->object);
+            simple_unlock(&seg->lock);
+            continue;
+        }
+
+        return page;
+    }
+
+    return NULL;
+}
+
 static boolean_t
 vm_page_seg_page_available(const struct vm_page_seg *seg)
 {
@@ -1114,34 +1266,36 @@ vm_page_seg_balance(struct vm_page_seg *
 }
 
 static boolean_t
-vm_page_seg_evict(struct vm_page_seg *seg, boolean_t external,
-		  boolean_t active, boolean_t alloc_paused)
+vm_page_evict_one(boolean_t external, boolean_t active, boolean_t alloc_paused)
 {
     struct vm_page *page;
     boolean_t reclaim, double_paging;
     vm_object_t object;
+    struct vm_page_seg *seg;
 
     if (!external && !IP_VALID(memory_manager_default))
       return FALSE;
 
+    seg = NULL;
     page = NULL;
     object = NULL;
     double_paging = FALSE;
 
 restart:
     vm_page_lock_queues();
-    simple_lock(&seg->lock);
 
     if (page != NULL) {
+        simple_lock(&seg->lock);
         vm_object_lock(page->object);
     } else {
         page = (active
-		? vm_page_seg_pull_active_page(seg, external)
-		: vm_page_seg_pull_inactive_page(seg, external));
+                ? vm_page_pull_active_page(external)
+                : vm_page_pull_inactive_page(external));
 
         if (page == NULL) {
             goto out;
         }
+        seg = &vm_page_segs[page->seg_index];
     }
 
     assert(page->object != NULL);
@@ -1160,6 +1314,7 @@ restart:
         vm_stat.reactivations++;
         current_task()->reactivations++;
         vm_page_unlock_queues();
+        seg = NULL;
         page = NULL;
         object = NULL;
         goto restart;
@@ -1208,7 +1363,8 @@ restart:
     }
 
 out:
-    simple_unlock(&seg->lock);
+    if (seg)
+        simple_unlock(&seg->lock);
 
     if (object == NULL) {
         vm_page_unlock_queues();
@@ -1458,6 +1614,11 @@ vm_page_setup(void)
 
     vm_page_check_boot_segs();
 
+    list_init(&vm_page_active_lru_list.internal);
+    list_init(&vm_page_active_lru_list.external);
+    list_init(&vm_page_inactive_lru_list.internal);
+    list_init(&vm_page_inactive_lru_list.external);
+
     /*
      * Compute the page table size.
      */
@@ -2033,32 +2194,17 @@ vm_page_balance(void)
 static boolean_t
 vm_page_evict_once(boolean_t alloc_paused)
 {
-    unsigned int i;
-
-    /*
-     * It's important here that pages are evicted from lower priority
-     * segments first.
-     */
-
     /* Try to evict inactive pages first */
-    for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) {
-        struct vm_page_seg *seg = vm_page_seg_get(i);
-
-	/* Try to evict external pages first */
-	if (vm_page_seg_evict(seg, TRUE, FALSE, alloc_paused) ||
-	    vm_page_seg_evict(seg, FALSE, FALSE, alloc_paused))
-	  return TRUE;
-    }
+    /* Try to evict external pages first */
+    if (vm_page_evict_one(TRUE, FALSE, alloc_paused) ||
+	vm_page_evict_one(FALSE, FALSE, alloc_paused))
+      return TRUE;
 
     /* Then try to evict active pages */
-    for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) {
-        struct vm_page_seg *seg = vm_page_seg_get(i);
-
-	/* Try to evict external pages first */
-	if (vm_page_seg_evict(seg, TRUE, TRUE, alloc_paused) ||
-	    vm_page_seg_evict(seg, FALSE, TRUE, alloc_paused))
-	  return TRUE;
-    }
+    /* Try to evict external pages first */
+    if (vm_page_evict_one(TRUE, TRUE, alloc_paused) ||
+	vm_page_evict_one(FALSE, TRUE, alloc_paused))
+      return TRUE;
 
     return FALSE;
 }
--- a/vm/vm_page.h
+++ b/vm/vm_page.h
@@ -78,7 +78,8 @@
  */
 
 struct vm_page {
-	struct list node;		/* page queues or free list (P) */
+	struct list node;		/* segment page queues or free list (P) */
+	struct list node_lru;		/* LRU page queues */
 	void *priv;
 
 	/*
