*参照元 [#fa6b4d8b] #backlinks *説明 [#f3bde180] -パス: [[linux-2.6.33/mm/vmscan.c]] -FIXME: これは何? --説明 **引数 [#ddf98c79] -pg_data_t *pgdat -- --[[linux-2.6.33/pg_data_t]] -int order -- **返り値 [#gbabb1eb] -unsigned long -- **参考 [#cef70d9b] *実装 [#z7120be3] /* * For kswapd, balance_pgdat() will work across all this node's zones until * they are all at high_wmark_pages(zone). * * Returns the number of pages which were actually freed. * * There is special handling here for zones which are full of pinned pages. * This can happen if the pages are all mlocked, or if they are all used by * device drivers (say, ZONE_DMA). Or if they are all in use by hugetlb. * What we do is to detect the case where all pages in the zone have been * scanned twice and there has been zero successful reclaim. Mark the zone as * dead and from now on, only perform a short scan. Basically we're polling * the zone for when the problem goes away. * * kswapd scans the zones in the highmem->normal->dma direction. It skips * zones which have free_pages > high_wmark_pages(zone), but once a zone is * found to have free_pages <= high_wmark_pages(zone), we scan that zone and the * lower zones regardless of the number of free pages in the lower zones. This * interoperates with the page allocator fallback scheme to ensure that aging * of pages is balanced across the zones. */ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) { int all_zones_ok; int priority; int i; unsigned long total_scanned; struct reclaim_state *reclaim_state = current->reclaim_state; struct scan_control sc = { .gfp_mask = GFP_KERNEL, .may_unmap = 1, .may_swap = 1, /* * kswapd doesn't want to be bailed out while reclaim. because * we want to put equal scanning pressure on each zone. */ .nr_to_reclaim = ULONG_MAX, .swappiness = vm_swappiness, .order = order, .mem_cgroup = NULL, .isolate_pages = isolate_pages_global, }; - --[[linux-2.6.33/reclaim_state]] - --[[linux-2.6.33/scan_control]] - --[[linux-2.6.33/current(global)]] - --[[linux-2.6.33/GFP_KERNEL]] - --[[linux-2.6.33/vm_swappiness(global)]] - --[[linux-2.6.33/isolate_pages_global()]] /* * temp_priority is used to remember the scanning priority at which * this zone was successfully refilled to * free_pages == high_wmark_pages(zone). */ int temp_priority[MAX_NR_ZONES]; - --[[linux-2.6.33/MAX_NR_ZONES]] loop_again: total_scanned = 0; sc.nr_reclaimed = 0; sc.may_writepage = !laptop_mode; count_vm_event(PAGEOUTRUN); - --[[linux-2.6.33/laptop_mode(global)]] - --[[linux-2.6.33/count_vm_event()]] - --[[linux-2.6.33/PAGEOUTRUN]] for (i = 0; i < pgdat->nr_zones; i++) temp_priority[i] = DEF_PRIORITY; - --[[linux-2.6.33/DEF_PRIORITY]] for (priority = DEF_PRIORITY; priority >= 0; priority--) { int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ unsigned long lru_pages = 0; int has_under_min_watermark_zone = 0; /* The swap token gets in the way of swapout... */ if (!priority) disable_swap_token(); - --[[linux-2.6.33/disable_swap_token()]] all_zones_ok = 1; /* * Scan in the highmem->dma direction for the highest * zone which needs scanning */ for (i = pgdat->nr_zones - 1; i >= 0; i--) { struct zone *zone = pgdat->node_zones + i; - --[[linux-2.6.33/zone]] if (!populated_zone(zone)) continue; - --[[linux-2.6.33/populated_zone()]] if (zone_is_all_unreclaimable(zone) && priority != DEF_PRIORITY) continue; - --[[linux-2.6.33/zone_is_all_unreclamable()]] - --[[linux-2.6.33/DEF_PRIORITY]] /* * Do some background aging of the anon list, to give * pages a chance to be referenced before reclaiming. */ if (inactive_anon_is_low(zone, &sc)) shrink_active_list(SWAP_CLUSTER_MAX, zone, &sc, priority, 0); - --[[linux-2.6.33/inactive_anon_is_low()]] - --[[linux-2.6.33/shrink_active_list()]] - --[[linux-2.6.33/SWAP_CLUSTER_MAX]] if (!zone_watermark_ok(zone, order, high_wmark_pages(zone), 0, 0)) { end_zone = i; break; } } - --[[linux-2.6.33/zone_watermak_ok()]] - --[[linux-2.6.33/high_wmark_pages()]] if (i < 0) goto out; for (i = 0; i <= end_zone; i++) { struct zone *zone = pgdat->node_zones + i; lru_pages += zone_reclaimable_pages(zone); } - --[[linux-2.6.33/zone]] - --[[linux-2.6.33/zone_reclaimable_pages()]] /* * Now scan the zone in the dma->highmem direction, stopping * at the last zone which needs scanning. * * We do this because the page allocator works in the opposite * direction. This prevents the page allocator from allocating * pages behind kswapd's direction of progress, which would * cause too much scanning of the lower zones. */ for (i = 0; i <= end_zone; i++) { struct zone *zone = pgdat->node_zones + i; int nr_slab; int nid, zid; if (!populated_zone(zone)) continue; - --[[linux-2.6.33/populated_zone()]] if (zone_is_all_unreclaimable(zone) && priority != DEF_PRIORITY) continue; - --[[linux-2.6.33/zone_is_all_unreclaimable()]] - --[[linux-2.6.33/DEF_PRIORITY]] if (!zone_watermark_ok(zone, order, high_wmark_pages(zone), end_zone, 0)) all_zones_ok = 0; temp_priority[i] = priority; sc.nr_scanned = 0; note_zone_scanning_priority(zone, priority); - --[[linux-2.6.33/zone_watermark_ok()]] - --[[linux-2.6.33/high_wmark_pages()]] - --[[linux-2.6.33/note_zone_scanning_priority()]] nid = pgdat->node_id; zid = zone_idx(zone); - --[[linux-2.6.33/zone_idx()]] /* * Call soft limit reclaim before calling shrink_zone. * For now we ignore the return value */ mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask, nid, zid); - --[[linux-2.6.33/mem_cgroup_soft_limit_reclaim()]] /* * We put equal pressure on every zone, unless one * zone has way too many pages free already. */ if (!zone_watermark_ok(zone, order, 8*high_wmark_pages(zone), end_zone, 0)) shrink_zone(priority, zone, &sc); - --[[linux-2.6.33/zone_watermark_ok()]] - --[[linux-2.6.33/high_wmark_pages()]] - --[[linux-2.6.33/shrink_zone()]] reclaim_state->reclaimed_slab = 0; nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, lru_pages); sc.nr_reclaimed += reclaim_state->reclaimed_slab; total_scanned += sc.nr_scanned; if (zone_is_all_unreclaimable(zone)) continue; if (nr_slab == 0 && zone->pages_scanned >= (zone_reclaimable_pages(zone) * 6)) zone_set_flag(zone, ZONE_ALL_UNRECLAIMABLE); - --[[linux-2.6.33/shrink_slab()]] - --[[linux-2.6.33/GFP_KERNEL]] - --[[linux-2.6.33/zone_is_all_unreclaimable()]] - --[[linux-2.6.33/zone_reclaimable_pages()]] - --[[linux-2.6.33/zone_set_flag()]] - --[[linux-2.6.33/ZONE_ALL_UNRECLAIMABLE]] /* * If we've done a decent amount of scanning and * the reclaim ratio is low, start doing writepage * even in laptop mode */ if (total_scanned > SWAP_CLUSTER_MAX * 2 && total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) sc.may_writepage = 1; - --[[linux-2.6.33/SWAP_CLUSTER_MAX]] /* * We are still under min water mark. it mean we have * GFP_ATOMIC allocation failure risk. Hurry up! */ if (!zone_watermark_ok(zone, order, min_wmark_pages(zone), end_zone, 0)) has_under_min_watermark_zone = 1; - --[[linux-2.6.33/zone_watermark_ok()]] - --[[linux-2.6.33/min_wmark_pages()]] } if (all_zones_ok) break; /* kswapd: all done */ /* * OK, kswapd is getting into trouble. Take a nap, then take * another pass across the zones. */ if (total_scanned && (priority < DEF_PRIORITY - 2)) { if (has_under_min_watermark_zone) count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); else congestion_wait(BLK_RW_ASYNC, HZ/10); } - --[[linux-2.6.33/count_vm_event()]] - --[[linux-2.6.33/KSWAPD_SKIP_CONGESTION_WAIT]] - --[[linux-2.6.33/congestion_wait()]] - --[[linux-2.6.33/BLK_RW_ASYNC]] - --[[linux-2.6.33/HZ]] /* * We do this so kswapd doesn't build up large priorities for * example when it is freeing in parallel with allocators. It * matches the direct reclaim path behaviour in terms of impact * on zone->*_priority. */ if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) break; } out: /* * Note within each zone the priority level at which this zone was * brought into a happy state. So that the next thread which scans this * zone will start out at that priority level. */ for (i = 0; i < pgdat->nr_zones; i++) { struct zone *zone = pgdat->node_zones + i; zone->prev_priority = temp_priority[i]; } if (!all_zones_ok) { cond_resched(); try_to_freeze(); - --[[linux-2.6.33/cond_resched()]] - --[[linux-2.6.33/try_to_freeze()]] /* * Fragmentation may mean that the system cannot be * rebalanced for high-order allocations in all zones. * At this point, if nr_reclaimed < SWAP_CLUSTER_MAX, * it means the zones have been fully scanned and are still * not balanced. For high-order allocations, there is * little point trying all over again as kswapd may * infinite loop. * * Instead, recheck all watermarks at order-0 as they * are the most important. If watermarks are ok, kswapd will go * back to sleep. High-order users can still perform direct * reclaim if they wish. */ if (sc.nr_reclaimed < SWAP_CLUSTER_MAX) order = sc.order = 0; goto loop_again; } return sc.nr_reclaimed; } *コメント [#rf3219a6]