1、__alloc_pages_nodemask(hear func)

zonelist可能为本地zonelist也可能为带有fallback的zonelist

  1. struct page *
  2. __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
  3. struct zonelist *zonelist, nodemask_t *nodemask)
  4. {
  5. enum zone_type high_zoneidx = gfp_zone(gfp_mask);
  6. struct zone *preferred_zone;
  7. struct page *page;
  8. int migratetype = allocflags_to_migratetype(gfp_mask);
  9. gfp_mask &= gfp_allowed_mask;
  10. lockdep_trace_alloc(gfp_mask);
  11. might_sleep_if(gfp_mask & __GFP_WAIT);
  12. if (should_fail_alloc_page(gfp_mask, order))
  13. return NULL;
  14. /*
  15. * Check the zones suitable for the gfp_mask contain at least one
  16. * valid zone. It's possible to have an empty zonelist as a result
  17. * of GFP_THISNODE and a memoryless node
  18. */
  19. if (unlikely(!zonelist->_zonerefs->zone))
  20. return NULL;
  21. /* The preferred zone is used for statistics later */
  22. first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone);
  23. if (!preferred_zone)
  24. return NULL;
  25. /* First allocation attempt */
  26. page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
  27. zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET,
  28. preferred_zone, migratetype);
  29. if (unlikely(!page))
  30. page = __alloc_pages_slowpath(gfp_mask, order,
  31. zonelist, high_zoneidx, nodemask,
  32. preferred_zone, migratetype);
  33. trace_mm_page_alloc(page, order, gfp_mask, migratetype);
  34. return page;
  35. }

1.1、get_page_from_freelist

  1. static struct page *get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask,
  2. unsigned int order,
  3. struct zonelist *zonelist, int high_zoneidx, int alloc_flags,
  4. struct zone *preferred_zone, int migratetype)
  5. {
  6. struct zoneref *z;
  7. struct page *page = NULL;
  8. int classzone_idx;
  9. struct zone *zone;
  10. nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
  11. int zlc_active = 0; /* set if using zonelist_cache */
  12. int did_zlc_setup = 0; /* just call zlc_setup() one time */
  13. // 获取zone的index ZONE_DMA OR ZONE_NORMAL
  14. classzone_idx = zone_idx(preferred_zone);
  15. zonelist_scan:
  16. for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, nodemask) {
  17. if (NUMA_BUILD && zlc_active &&
  18. !zlc_zone_worth_trying(zonelist, z, allowednodes))
  19. continue;
  20. if ((alloc_flags & ALLOC_CPUSET) &&
  21. !cpuset_zone_allowed_softwall(zone, gfp_mask))
  22. goto try_next_zone;
  23. if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
  24. unsigned long mark;
  25. int ret;
  26. mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
  27. if (zone_watermark_ok(zone, order, mark,
  28. classzone_idx, alloc_flags))
  29. goto try_this_zone;
  30. if (zone_reclaim_mode == 0)
  31. goto this_zone_full;
  32. ret = zone_reclaim(zone, gfp_mask, order);
  33. switch (ret) {
  34. case ZONE_RECLAIM_NOSCAN:
  35. /* did not scan */
  36. goto try_next_zone;
  37. case ZONE_RECLAIM_FULL:
  38. /* scanned but unreclaimable */
  39. goto this_zone_full;
  40. default:
  41. /* did we reclaim enough */
  42. if (!zone_watermark_ok(zone, order, mark,
  43. classzone_idx, alloc_flags))
  44. goto this_zone_full;
  45. }
  46. }
  47. try_this_zone:
  48. page = buffered_rmqueue(preferred_zone, zone, order,
  49. gfp_mask, migratetype);
  50. if (page)
  51. break;
  52. this_zone_full:
  53. if (NUMA_BUILD)
  54. zlc_mark_zone_full(zonelist, z);
  55. try_next_zone:
  56. if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) {
  57. /*
  58. * we do zlc_setup after the first zone is tried but only
  59. * if there are multiple nodes make it worthwhile
  60. */
  61. allowednodes = zlc_setup(zonelist, alloc_flags);
  62. zlc_active = 1;
  63. did_zlc_setup = 1;
  64. }
  65. }
  66. if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) {
  67. /* Disable zlc cache for second zonelist scan */
  68. zlc_active = 0;
  69. goto zonelist_scan;
  70. }
  71. return page;
  72. }

1.2、buffered_rmqueue

  1. /*
  2. * Really, prep_compound_page() should be called from __rmqueue_bulk(). But
  3. * we cheat by calling it from here, in the order > 0 path. Saves a branch
  4. * or two.
  5. */
  6. static inline struct page *buffered_rmqueue(struct zone *preferred_zone,
  7. struct zone *zone, int order, gfp_t gfp_flags,
  8. int migratetype)
  9. {
  10. unsigned long flags;
  11. struct page *page;
  12. int cold = !!(gfp_flags & __GFP_COLD);
  13. again:
  14. if (likely(order == 0)) {
  15. struct per_cpu_pages *pcp;
  16. struct list_head *list;
  17. local_irq_save(flags);
  18. pcp = &this_cpu_ptr(zone->pageset)->pcp;
  19. list = &pcp->lists[migratetype];
  20. if (list_empty(list)) {
  21. pcp->count += rmqueue_bulk(zone, 0,
  22. pcp->batch, list,
  23. migratetype, cold);
  24. if (unlikely(list_empty(list)))
  25. goto failed;
  26. }
  27. if (cold)
  28. page = list_entry(list->prev, struct page, lru);
  29. else
  30. page = list_entry(list->next, struct page, lru);
  31. list_del(&page->lru);
  32. pcp->count--;
  33. } else {
  34. if (unlikely(gfp_flags & __GFP_NOFAIL)) {
  35. /*
  36. * __GFP_NOFAIL is not to be used in new code.
  37. *
  38. * All __GFP_NOFAIL callers should be fixed so that they
  39. * properly detect and handle allocation failures.
  40. *
  41. * We most definitely don't want callers attempting to
  42. * allocate greater than order-1 page units with
  43. * __GFP_NOFAIL.
  44. */
  45. WARN_ON_ONCE(order > 1);
  46. }
  47. spin_lock_irqsave(&zone->lock, flags);
  48. page = __rmqueue(zone, order, migratetype);
  49. spin_unlock(&zone->lock);
  50. if (!page)
  51. goto failed;
  52. __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
  53. }
  54. __count_zone_vm_events(PGALLOC, zone, 1 << order);
  55. zone_statistics(preferred_zone, zone);
  56. local_irq_restore(flags);
  57. VM_BUG_ON(bad_range(zone, page));
  58. if (prep_new_page(page, order, gfp_flags))
  59. goto again;
  60. return page;
  61. failed:
  62. local_irq_restore(flags);
  63. return NULL;
  64. }