1、__alloc_pages_nodemask(hear func)
zonelist可能为本地zonelist也可能为带有fallback的zonelist
struct page *__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,struct zonelist *zonelist, nodemask_t *nodemask){enum zone_type high_zoneidx = gfp_zone(gfp_mask);struct zone *preferred_zone;struct page *page;int migratetype = allocflags_to_migratetype(gfp_mask);gfp_mask &= gfp_allowed_mask;lockdep_trace_alloc(gfp_mask);might_sleep_if(gfp_mask & __GFP_WAIT);if (should_fail_alloc_page(gfp_mask, order))return NULL;/** Check the zones suitable for the gfp_mask contain at least one* valid zone. It's possible to have an empty zonelist as a result* of GFP_THISNODE and a memoryless node*/if (unlikely(!zonelist->_zonerefs->zone))return NULL;/* The preferred zone is used for statistics later */first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone);if (!preferred_zone)return NULL;/* First allocation attempt */page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET,preferred_zone, migratetype);if (unlikely(!page))page = __alloc_pages_slowpath(gfp_mask, order,zonelist, high_zoneidx, nodemask,preferred_zone, migratetype);trace_mm_page_alloc(page, order, gfp_mask, migratetype);return page;}
1.1、get_page_from_freelist
static struct page *get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask,unsigned int order,struct zonelist *zonelist, int high_zoneidx, int alloc_flags,struct zone *preferred_zone, int migratetype){struct zoneref *z;struct page *page = NULL;int classzone_idx;struct zone *zone;nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */int zlc_active = 0; /* set if using zonelist_cache */int did_zlc_setup = 0; /* just call zlc_setup() one time */// 获取zone的index ZONE_DMA OR ZONE_NORMALclasszone_idx = zone_idx(preferred_zone);zonelist_scan:for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, nodemask) {if (NUMA_BUILD && zlc_active &&!zlc_zone_worth_trying(zonelist, z, allowednodes))continue;if ((alloc_flags & ALLOC_CPUSET) &&!cpuset_zone_allowed_softwall(zone, gfp_mask))goto try_next_zone;if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {unsigned long mark;int ret;mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];if (zone_watermark_ok(zone, order, mark,classzone_idx, alloc_flags))goto try_this_zone;if (zone_reclaim_mode == 0)goto this_zone_full;ret = zone_reclaim(zone, gfp_mask, order);switch (ret) {case ZONE_RECLAIM_NOSCAN:/* did not scan */goto try_next_zone;case ZONE_RECLAIM_FULL:/* scanned but unreclaimable */goto this_zone_full;default:/* did we reclaim enough */if (!zone_watermark_ok(zone, order, mark,classzone_idx, alloc_flags))goto this_zone_full;}}try_this_zone:page = buffered_rmqueue(preferred_zone, zone, order,gfp_mask, migratetype);if (page)break;this_zone_full:if (NUMA_BUILD)zlc_mark_zone_full(zonelist, z);try_next_zone:if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) {/** we do zlc_setup after the first zone is tried but only* if there are multiple nodes make it worthwhile*/allowednodes = zlc_setup(zonelist, alloc_flags);zlc_active = 1;did_zlc_setup = 1;}}if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) {/* Disable zlc cache for second zonelist scan */zlc_active = 0;goto zonelist_scan;}return page;}
1.2、buffered_rmqueue
/** Really, prep_compound_page() should be called from __rmqueue_bulk(). But* we cheat by calling it from here, in the order > 0 path. Saves a branch* or two.*/static inline struct page *buffered_rmqueue(struct zone *preferred_zone,struct zone *zone, int order, gfp_t gfp_flags,int migratetype){unsigned long flags;struct page *page;int cold = !!(gfp_flags & __GFP_COLD);again:if (likely(order == 0)) {struct per_cpu_pages *pcp;struct list_head *list;local_irq_save(flags);pcp = &this_cpu_ptr(zone->pageset)->pcp;list = &pcp->lists[migratetype];if (list_empty(list)) {pcp->count += rmqueue_bulk(zone, 0,pcp->batch, list,migratetype, cold);if (unlikely(list_empty(list)))goto failed;}if (cold)page = list_entry(list->prev, struct page, lru);elsepage = list_entry(list->next, struct page, lru);list_del(&page->lru);pcp->count--;} else {if (unlikely(gfp_flags & __GFP_NOFAIL)) {/** __GFP_NOFAIL is not to be used in new code.** All __GFP_NOFAIL callers should be fixed so that they* properly detect and handle allocation failures.** We most definitely don't want callers attempting to* allocate greater than order-1 page units with* __GFP_NOFAIL.*/WARN_ON_ONCE(order > 1);}spin_lock_irqsave(&zone->lock, flags);page = __rmqueue(zone, order, migratetype);spin_unlock(&zone->lock);if (!page)goto failed;__mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));}__count_zone_vm_events(PGALLOC, zone, 1 << order);zone_statistics(preferred_zone, zone);local_irq_restore(flags);VM_BUG_ON(bad_range(zone, page));if (prep_new_page(page, order, gfp_flags))goto again;return page;failed:local_irq_restore(flags);return NULL;}
