linux-2.6.33/balance_pgdat() の変更点

追加された行はこの色です。
削除された行はこの色です。
linux-2.6.33/balance_pgdat() へ行く。
linux-2.6.33/balance_pgdat() の差分を削除
*参照元 [#fa6b4d8b]
#backlinks

*説明 [#f3bde180]
-パス: [[linux-2.6.33/mm/vmscan.c]]

-FIXME: これは何？
--説明


**引数 [#ddf98c79]
-pg_data_t *pgdat
--
--[[linux-2.6.33/pg_data_t]]
-int order
--


**返り値 [#gbabb1eb]
-unsigned long
--


**参考 [#cef70d9b]


*実装 [#z7120be3]
 /*
  * For kswapd, balance_pgdat() will work across all this node's zones until
  * they are all at high_wmark_pages(zone).
  *
  * Returns the number of pages which were actually freed.
  *
  * There is special handling here for zones which are full of pinned pages.
  * This can happen if the pages are all mlocked, or if they are all used by
  * device drivers (say, ZONE_DMA).  Or if they are all in use by hugetlb.
  * What we do is to detect the case where all pages in the zone have been
  * scanned twice and there has been zero successful reclaim.  Mark the zone as
  * dead and from now on, only perform a short scan.  Basically we're polling
  * the zone for when the problem goes away.
  *
  * kswapd scans the zones in the highmem->normal->dma direction.  It skips
  * zones which have free_pages > high_wmark_pages(zone), but once a zone is
  * found to have free_pages <= high_wmark_pages(zone), we scan that zone and the
  * lower zones regardless of the number of free pages in the lower zones. This
  * interoperates with the page allocator fallback scheme to ensure that aging
  * of pages is balanced across the zones.
  */
 static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
 {
 	int all_zones_ok;
 	int priority;
 	int i;
 	unsigned long total_scanned;
 	struct reclaim_state *reclaim_state = current->reclaim_state;
 	struct scan_control sc = {
 		.gfp_mask = GFP_KERNEL,
 		.may_unmap = 1,
 		.may_swap = 1,
 		/*
 		 * kswapd doesn't want to be bailed out while reclaim. because
 		 * we want to put equal scanning pressure on each zone.
 		 */
 		.nr_to_reclaim = ULONG_MAX,
 		.swappiness = vm_swappiness,
 		.order = order,
 		.mem_cgroup = NULL,
 		.isolate_pages = isolate_pages_global,
 	};
-
--[[linux-2.6.33/reclaim_state]]
-
--[[linux-2.6.33/scan_control]]
-
--[[linux-2.6.33/current(global)]]
-
--[[linux-2.6.33/GFP_KERNEL]]
-
--[[linux-2.6.33/vm_swappiness(global)]]
-
--[[linux-2.6.33/isolate_pages_global()]]

 	/*
 	 * temp_priority is used to remember the scanning priority at which
 	 * this zone was successfully refilled to
 	 * free_pages == high_wmark_pages(zone).
 	 */
 	int temp_priority[MAX_NR_ZONES];
 
-
--[[linux-2.6.33/MAX_NR_ZONES]]

 loop_again:
 	total_scanned = 0;
 	sc.nr_reclaimed = 0;
 	sc.may_writepage = !laptop_mode;
 	count_vm_event(PAGEOUTRUN);
 
-
--[[linux-2.6.33/laptop_mode(global)]]
-
--[[linux-2.6.33/count_vm_event()]]
-
--[[linux-2.6.33/PAGEOUTRUN]]

 	for (i = 0; i < pgdat->nr_zones; i++)
 		temp_priority[i] = DEF_PRIORITY;
 
-
--[[linux-2.6.33/DEF_PRIORITY]]

 	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
 		int end_zone = 0;	/* Inclusive.  0 = ZONE_DMA */
 		unsigned long lru_pages = 0;
 		int has_under_min_watermark_zone = 0;
 
 		/* The swap token gets in the way of swapout... */
 		if (!priority)
 			disable_swap_token();
 
-
--[[linux-2.6.33/disable_swap_token()]]

 		all_zones_ok = 1;
 
 		/*
 		 * Scan in the highmem->dma direction for the highest
 		 * zone which needs scanning
 		 */
 		for (i = pgdat->nr_zones - 1; i >= 0; i--) {
 			struct zone *zone = pgdat->node_zones + i;
 
-
--[[linux-2.6.33/zone]]

 			if (!populated_zone(zone))
 				continue;
 
-
--[[linux-2.6.33/populated_zone()]]

 			if (zone_is_all_unreclaimable(zone) &&
 			    priority != DEF_PRIORITY)
 				continue;
 
-
--[[linux-2.6.33/zone_is_all_unreclamable()]]
-
--[[linux-2.6.33/DEF_PRIORITY]]

 			/*
 			 * Do some background aging of the anon list, to give
 			 * pages a chance to be referenced before reclaiming.
 			 */
 			if (inactive_anon_is_low(zone, &sc))
 				shrink_active_list(SWAP_CLUSTER_MAX, zone,
 							&sc, priority, 0);
 
-
--[[linux-2.6.33/inactive_anon_is_low()]]
-
--[[linux-2.6.33/shrink_active_list()]]
-
--[[linux-2.6.33/SWAP_CLUSTER_MAX]]

 			if (!zone_watermark_ok(zone, order,
 					high_wmark_pages(zone), 0, 0)) {
 				end_zone = i;
 				break;
 			}
 		}
-
--[[linux-2.6.33/zone_watermak_ok()]]
-
--[[linux-2.6.33/high_wmark_pages()]]

 		if (i < 0)
 			goto out;
 
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
 
 			lru_pages += zone_reclaimable_pages(zone);
 		}
 
-
--[[linux-2.6.33/zone]]
-
--[[linux-2.6.33/zone_reclaimable_pages()]]

 		/*
 		 * Now scan the zone in the dma->highmem direction, stopping
 		 * at the last zone which needs scanning.
 		 *
 		 * We do this because the page allocator works in the opposite
 		 * direction.  This prevents the page allocator from allocating
 		 * pages behind kswapd's direction of progress, which would
 		 * cause too much scanning of the lower zones.
 		 */
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
 			int nr_slab;
 			int nid, zid;
 
 			if (!populated_zone(zone))
 				continue;
 
-
--[[linux-2.6.33/populated_zone()]]

 			if (zone_is_all_unreclaimable(zone) &&
 					priority != DEF_PRIORITY)
 				continue;
 
-
--[[linux-2.6.33/zone_is_all_unreclaimable()]]
-
--[[linux-2.6.33/DEF_PRIORITY]]

 			if (!zone_watermark_ok(zone, order,
 					high_wmark_pages(zone), end_zone, 0))
 				all_zones_ok = 0;
 			temp_priority[i] = priority;
 			sc.nr_scanned = 0;
 			note_zone_scanning_priority(zone, priority);
 
-
--[[linux-2.6.33/zone_watermark_ok()]]
-
--[[linux-2.6.33/high_wmark_pages()]]
-
--[[linux-2.6.33/note_zone_scanning_priority()]]

 			nid = pgdat->node_id;
 			zid = zone_idx(zone);
-
--[[linux-2.6.33/zone_idx()]]

 			/*
 			 * Call soft limit reclaim before calling shrink_zone.
 			 * For now we ignore the return value
 			 */
 			mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask,
 							nid, zid);
-
--[[linux-2.6.33/mem_cgroup_soft_limit_reclaim()]]

 			/*
 			 * We put equal pressure on every zone, unless one
 			 * zone has way too many pages free already.
 			 */
 			if (!zone_watermark_ok(zone, order,
 					8*high_wmark_pages(zone), end_zone, 0))
 				shrink_zone(priority, zone, &sc);
-
--[[linux-2.6.33/zone_watermark_ok()]]
-
--[[linux-2.6.33/high_wmark_pages()]]
-
--[[linux-2.6.33/shrink_zone()]]

 			reclaim_state->reclaimed_slab = 0;
 			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
 						lru_pages);
 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 			total_scanned += sc.nr_scanned;
 			if (zone_is_all_unreclaimable(zone))
 				continue;
 			if (nr_slab == 0 && zone->pages_scanned >=
 					(zone_reclaimable_pages(zone) * 6))
 					zone_set_flag(zone,
 						      ZONE_ALL_UNRECLAIMABLE);
-
--[[linux-2.6.33/shrink_slab()]]
-
--[[linux-2.6.33/GFP_KERNEL]]
-
--[[linux-2.6.33/zone_is_all_unreclaimable()]]
-
--[[linux-2.6.33/zone_reclaimable_pages()]]
-
--[[linux-2.6.33/zone_set_flag()]]
-
--[[linux-2.6.33/ZONE_ALL_UNRECLAIMABLE]]

 			/*
 			 * If we've done a decent amount of scanning and
 			 * the reclaim ratio is low, start doing writepage
 			 * even in laptop mode
 			 */
 			if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
 			    total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
 				sc.may_writepage = 1;
 
-
--[[linux-2.6.33/SWAP_CLUSTER_MAX]]

 			/*
 			 * We are still under min water mark. it mean we have
 			 * GFP_ATOMIC allocation failure risk. Hurry up!
 			 */
 			if (!zone_watermark_ok(zone, order, min_wmark_pages(zone),
 					      end_zone, 0))
 				has_under_min_watermark_zone = 1;
 
-
--[[linux-2.6.33/zone_watermark_ok()]]
-
--[[linux-2.6.33/min_wmark_pages()]]

 		}
 		if (all_zones_ok)
 			break;		/* kswapd: all done */
 		/*
 		 * OK, kswapd is getting into trouble.  Take a nap, then take
 		 * another pass across the zones.
 		 */
 		if (total_scanned && (priority < DEF_PRIORITY - 2)) {
 			if (has_under_min_watermark_zone)
 				count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
 			else
 				congestion_wait(BLK_RW_ASYNC, HZ/10);
 		}
 
-
--[[linux-2.6.33/count_vm_event()]]
-
--[[linux-2.6.33/KSWAPD_SKIP_CONGESTION_WAIT]]
-
--[[linux-2.6.33/congestion_wait()]]
-
--[[linux-2.6.33/BLK_RW_ASYNC]]
-
--[[linux-2.6.33/HZ]]

 		/*
 		 * We do this so kswapd doesn't build up large priorities for
 		 * example when it is freeing in parallel with allocators. It
 		 * matches the direct reclaim path behaviour in terms of impact
 		 * on zone->*_priority.
 		 */
 		if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)
 			break;
 	}
 out:
 	/*
 	 * Note within each zone the priority level at which this zone was
 	 * brought into a happy state.  So that the next thread which scans this
 	 * zone will start out at that priority level.
 	 */
 	for (i = 0; i < pgdat->nr_zones; i++) {
 		struct zone *zone = pgdat->node_zones + i;
 
 		zone->prev_priority = temp_priority[i];
 	}
 	if (!all_zones_ok) {
 		cond_resched();
 
 		try_to_freeze();
 
-
--[[linux-2.6.33/cond_resched()]]
-
--[[linux-2.6.33/try_to_freeze()]]

 		/*
 		 * Fragmentation may mean that the system cannot be
 		 * rebalanced for high-order allocations in all zones.
 		 * At this point, if nr_reclaimed < SWAP_CLUSTER_MAX,
 		 * it means the zones have been fully scanned and are still
 		 * not balanced. For high-order allocations, there is
 		 * little point trying all over again as kswapd may
 		 * infinite loop.
 		 *
 		 * Instead, recheck all watermarks at order-0 as they
 		 * are the most important. If watermarks are ok, kswapd will go
 		 * back to sleep. High-order users can still perform direct
 		 * reclaim if they wish.
 		 */
 		if (sc.nr_reclaimed < SWAP_CLUSTER_MAX)
 			order = sc.order = 0;
 
 		goto loop_again;
 	}
 
 	return sc.nr_reclaimed;
 }


*コメント [#rf3219a6]