*/
unsigned long numa_next_scan;
+ /* numa_next_reset is when the PTE scanner period will be reset */
+ unsigned long numa_next_reset;
+
/* Restart point for scanning and setting pte_numa */
unsigned long numa_scan_offset;
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
#ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int node, int pages);
+extern void task_numa_fault(int node, int pages, bool migrated);
#else
-static inline void task_numa_fault(int node, int pages)
+static inline void task_numa_fault(int node, int pages, bool migrated)
{
}
#endif
extern unsigned int sysctl_numa_balancing_scan_delay;
extern unsigned int sysctl_numa_balancing_scan_period_min;
extern unsigned int sysctl_numa_balancing_scan_period_max;
+extern unsigned int sysctl_numa_balancing_scan_period_reset;
extern unsigned int sysctl_numa_balancing_scan_size;
extern unsigned int sysctl_numa_balancing_settle_count;
#ifdef CONFIG_NUMA_BALANCING
if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
p->mm->numa_next_scan = jiffies;
+ p->mm->numa_next_reset = jiffies;
p->mm->numa_scan_seq = 0;
}
* numa task sample period in ms
*/
unsigned int sysctl_numa_balancing_scan_period_min = 100;
-unsigned int sysctl_numa_balancing_scan_period_max = 100*16;
+unsigned int sysctl_numa_balancing_scan_period_max = 100*50;
+unsigned int sysctl_numa_balancing_scan_period_reset = 100*600;
/* Portion of address space to scan in MB */
unsigned int sysctl_numa_balancing_scan_size = 256;
/*
* Got a PROT_NONE fault for a page on @node.
*/
-void task_numa_fault(int node, int pages)
+void task_numa_fault(int node, int pages, bool migrated)
{
struct task_struct *p = current;
/* FIXME: Allocate task-specific structure for placement policy here */
/*
- * Assume that as faults occur that pages are getting properly placed
- * and fewer NUMA hints are required. Note that this is a big
- * assumption, it assumes processes reach a steady steady with no
- * further phase changes.
+ * If pages are properly placed (did not migrate) then scan slower.
+ * This is reset periodically in case of phase changes
*/
- p->numa_scan_period = min(sysctl_numa_balancing_scan_period_max,
- p->numa_scan_period + jiffies_to_msecs(2));
+ if (!migrated)
+ p->numa_scan_period = min(sysctl_numa_balancing_scan_period_max,
+ p->numa_scan_period + jiffies_to_msecs(10));
task_numa_placement(p);
}
if (p->flags & PF_EXITING)
return;
+ /*
+ * Reset the scan period if enough time has gone by. Objective is that
+ * scanning will be reduced if pages are properly placed. As tasks
+ * can enter different phases this needs to be re-examined. Lacking
+ * proper tracking of reference behaviour, this blunt hammer is used.
+ */
+ migrate = mm->numa_next_reset;
+ if (time_after(now, migrate)) {
+ p->numa_scan_period = sysctl_numa_balancing_scan_period_min;
+ next_scan = now + msecs_to_jiffies(sysctl_numa_balancing_scan_period_reset);
+ xchg(&mm->numa_next_reset, next_scan);
+ }
+
/*
* Enforce maximal scan/migration frequency..
*/
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "numa_balancing_scan_period_reset",
+ .data = &sysctl_numa_balancing_scan_period_reset,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
{
.procname = "numa_balancing_scan_period_max_ms",
.data = &sysctl_numa_balancing_scan_period_max,
spin_unlock(&mm->page_table_lock);
if (page) {
put_page(page);
- task_numa_fault(numa_node_id(), HPAGE_PMD_NR);
+ task_numa_fault(numa_node_id(), HPAGE_PMD_NR, false);
}
return 0;
}
spinlock_t *ptl;
int current_nid = -1;
int target_nid;
+ bool migrated = false;
/*
* The "pte" at this point cannot be used safely without
}
/* Migrate to the requested node */
- if (migrate_misplaced_page(page, target_nid))
+ migrated = migrate_misplaced_page(page, target_nid);
+ if (migrated)
current_nid = target_nid;
out:
if (current_nid != -1)
- task_numa_fault(current_nid, 1);
+ task_numa_fault(current_nid, 1, migrated);
return 0;
}
struct page *page;
int curr_nid = local_nid;
int target_nid;
+ bool migrated;
if (!pte_present(pteval))
continue;
if (!pte_numa(pteval))
/* Migrate to the requested node */
pte_unmap_unlock(pte, ptl);
- if (migrate_misplaced_page(page, target_nid))
+ migrated = migrate_misplaced_page(page, target_nid);
+ if (migrated)
curr_nid = target_nid;
- task_numa_fault(curr_nid, 1);
+ task_numa_fault(curr_nid, 1, migrated);
pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
}