0x00 前言:Concept

在学习内存管理这个体系结构之前,我们先要理解这几个概念以及缩写

概念:

文件页

内存回收,也就是系统释放掉可以回收的内存,比如缓存和缓冲区,就属于可回收内存。它们在内存管理中,通常被叫做文件页(File-backed Page)。大部分文件页,都可以直接回收,以后有需要时,再从磁盘重新读取就可以了。

脏页

些被应用程序修改过,并且暂时还没写入磁盘的数据(也就是脏页),就得先写入磁盘,然后才能进行内存释放。 这些脏页,一般可以通过两种方式写入磁盘。可以在应用程序中,通过系统调用 fsync ,把脏页同步到磁盘中;也可以交给系统,由内核线程 pdflush 负责这些脏页的刷新。

文件映射页

除了缓存和缓冲区,通过内存映射获取的文件映射页,也是一种常见的文件页。它也可以被释放掉,下次再访问的时候,从文件重新读取。(mmap)

匿名页

应用程序动态分配的堆内存,也就是在内存管理中说到的匿名页(Anonymous Page),它们很可能还要再次被访问啊,不能直接回收,这些内存自然不能直接释放。但是,如果这些内存在分配后很少被访问,似乎也是一种资源浪费。

Linux Swap

Linux的 Swap 机制把这些不常访问的内存先写到磁盘中,然后释放这些内存,给其他更需要的进程使用。再次访问这些内存时,重新从磁盘读入内存就可以了。

缩写

PGD:Page Global Dictory 页面全局目录(顶层页表)
PUD: Page Upper Dictory 上层页表(二级页表)
PMD: Page Middle Dictory 中级页表(三级页表)

0x01 Struct

address_space

定义:

是页高速缓存(page cache)的核心数据结构。在很多时候,内核在读写磁盘时都引用页高速缓存,新页被追加到页高速缓存以满足用户态进程的读请求。如果页不在高速缓存中,新页就被追加到高速缓存。这样作的目的就是为了更快的效率,比如有一些页,经常
被访问,那么此时,如果内存空间允许的话,可以考虑让它们长期驻留在页高速缓存中,这样要比从磁盘访问它们效率更高。而address_space结构体就是嵌入在页所有者的索引节点对象中的数据结构。而_页描述符与它的联系是通过其中的字段_mapping和index来完成的。前者指向拥有页的索引节点的address_space对象index字段表示在所有者的地址空间中以页大小为单位的偏移量,即在所有者的磁盘映像中页中数据的位置。在页高速缓存中查找页时使用这两个字段。
20200602101417585.jpg

  1. struct address_space {
  2. struct inode *host; /* owner: inode, block_device拥有它的节点 */
  3. struct radix_tree_root page_tree;/* radix tree of all pages包含全部页面的radix树 */
  4. rwlock_t tree_lock; /* and rwlock protecting it保护page_tree的自旋锁 */
  5. unsigned int i_mmap_writable;/* count VM_SHARED mappings共享映射数 VM_SHARED记数*/
  6. struct prio_tree_root i_mmap; /* tree of private and shared mappings 优先搜索树的树根*/
  7. struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings 非线性映射的链表头*/
  8. spinlock_t i_mmap_lock; /* protect tree, count, list 保护i_mmap的自旋锁*/
  9. unsigned int truncate_count; /* Cover race condition with truncate 将文件截断的记数*/
  10. unsigned long nrpages; /* number of total pages 页总数*/
  11. pgoff_t writeback_index;/* writeback starts here 回写的起始偏移*/
  12. struct address_space_operations *a_ops; /* methods 操作函数表*/
  13. unsigned long flags; /* error bits/gfp mask ,gfp_mask掩码与错误标识 */
  14. struct backing_dev_info *backing_dev_info; /* device readahead, etc预读信息 */
  15. spinlock_t private_lock; /* for use by the address_space 私有address_space锁*/
  16. struct list_head private_list; /* ditto 私有address_space链表*/
  17. struct address_space *assoc_mapping; /* ditto 相关的缓冲*/
  18. } __attribute__((aligned(sizeof(long))));

mm_struct

定义

一个进程的虚拟地址空间主要由两个数据结来描述。一个是最高层次的:mm_struct,一个是较高层次的:vm_area_structs。最高层次的mm_struct结构描述了一个进程的整个虚拟地址空间。较高层次的结构vm_area_truct描述了虚拟地址空间的一个区间(简称虚拟区)每个进程只有一个mm_struct结构,在每个进程的task_struct结构中,有一个指向该进程的结构。可以说,mm_struct结构是对整个用户空间的描述

  1. struct mm_struct {
  2. //指向线性区对象的链表头
  3. struct vm_area_struct * mmap; /* list of VMAs */
  4. //指向线性区对象的红黑树
  5. struct rb_root mm_rb;
  6. //指向最近找到的虚拟区间
  7. struct vm_area_struct * mmap_cache; /* last find_vma result */
  8. //用来在进程地址空间中搜索有效的进程地址空间的函数
  9. unsigned long (*get_unmapped_area) (struct file *filp,
  10. unsigned long addr, unsigned long len,
  11. unsigned long pgoff, unsigned long flags);
  12. unsigned long (*get_unmapped_exec_area) (struct file *filp,
  13. unsigned long addr, unsigned long len,
  14. unsigned long pgoff, unsigned long flags);
  15. //释放线性区时调用的方法,
  16. void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
  17. //标识第一个分配文件内存映射的线性地址
  18. unsigned long mmap_base; /* base of mmap area */
  19. unsigned long task_size; /* size of task vm space */
  20. /*
  21. * RHEL6 special for bug 790921: this same variable can mean
  22. * two different things. If sysctl_unmap_area_factor is zero,
  23. * this means the largest hole below free_area_cache. If the
  24. * sysctl is set to a positive value, this variable is used
  25. * to count how much memory has been munmapped from this process
  26. * since the last time free_area_cache was reset back to mmap_base.
  27. * This is ugly, but necessary to preserve kABI.
  28. */
  29. unsigned long cached_hole_size;
  30. //内核进程搜索进程地址空间中线性地址的空间空间
  31. unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */
  32. //指向页表的目录
  33. pgd_t * pgd;
  34. //共享进程时的个数
  35. atomic_t mm_users; /* How many users with user space? */
  36. //内存描述符的主使用计数器,采用引用计数的原理,当为0时代表无用户再次使用
  37. atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */
  38. //线性区的个数
  39. int map_count; /* number of VMAs */
  40. struct rw_semaphore mmap_sem;
  41. //保护任务页表和引用计数的锁
  42. spinlock_t page_table_lock; /* Protects page tables and some counters */
  43. //mm_struct结构,第一个成员就是初始化的mm_struct结构,
  44. struct list_head mmlist; /* List of maybe swapped mm's. These are globally strung
  45. * together off init_mm.mmlist, and are protected
  46. * by mmlist_lock
  47. */
  48. /* Special counters, in some configurations protected by the
  49. * page_table_lock, in other configurations by being atomic.
  50. */
  51. mm_counter_t _file_rss;
  52. mm_counter_t _anon_rss;
  53. mm_counter_t _swap_usage;
  54. //进程拥有的最大页表数目
  55. unsigned long hiwater_rss; /* High-watermark of RSS usage */
  56. //进程线性区的最大页表数目
  57. unsigned long hiwater_vm; /* High-water virtual memory usage */
  58. //进程地址空间的大小,锁住无法换页的个数,共享文件内存映射的页数,可执行内存映射中的页数
  59. unsigned long total_vm, locked_vm, shared_vm, exec_vm;
  60. //用户态堆栈的页数,
  61. unsigned long stack_vm, reserved_vm, def_flags, nr_ptes;
  62. //维护代码段和数据段
  63. unsigned long start_code, end_code, start_data, end_data;
  64. //维护堆和栈
  65. unsigned long start_brk, brk, start_stack;
  66. //维护命令行参数,命令行参数的起始地址和最后地址,以及环境变量的起始地址和最后地址
  67. unsigned long arg_start, arg_end, env_start, env_end;
  68. unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
  69. struct linux_binfmt *binfmt;
  70. cpumask_t cpu_vm_mask;
  71. /* Architecture-specific MM context */
  72. mm_context_t context;
  73. /* Swap token stuff */
  74. /*
  75. * Last value of global fault stamp as seen by this process.
  76. * In other words, this value gives an indication of how long
  77. * it has been since this task got the token.
  78. * Look at mm/thrash.c
  79. */
  80. unsigned int faultstamp;
  81. unsigned int token_priority;
  82. unsigned int last_interval;
  83. //线性区的默认访问标志
  84. unsigned long flags; /* Must use atomic bitops to access the bits */
  85. struct core_state *core_state; /* coredumping support */
  86. #ifdef CONFIG_AIO
  87. spinlock_t ioctx_lock;
  88. struct hlist_head ioctx_list;
  89. #endif
  90. #ifdef CONFIG_MM_OWNER
  91. /*
  92. * "owner" points to a task that is regarded as the canonical
  93. * user/owner of this mm. All of the following must be true in
  94. * order for it to be changed:
  95. *
  96. * current == mm->owner
  97. * current->mm != mm
  98. * new_owner->mm == mm
  99. * new_owner->alloc_lock is held
  100. */
  101. struct task_struct *owner;
  102. #endif
  103. #ifdef CONFIG_PROC_FS
  104. /* store ref to file /proc/<pid>/exe symlink points to */
  105. struct file *exe_file;
  106. unsigned long num_exe_file_vmas;
  107. #endif
  108. #ifdef CONFIG_MMU_NOTIFIER
  109. struct mmu_notifier_mm *mmu_notifier_mm;
  110. #endif
  111. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  112. pgtable_t pmd_huge_pte; /* protected by page_table_lock */
  113. #endif
  114. /* reserved for Red Hat */
  115. #ifdef __GENKSYMS__
  116. unsigned long rh_reserved[2];
  117. #else
  118. /* How many tasks sharing this mm are OOM_DISABLE */
  119. union {
  120. unsigned long rh_reserved_aux;
  121. atomic_t oom_disable_count;
  122. };
  123. /* base of lib map area (ASCII armour) */
  124. unsigned long shlib_base;
  125. #endif
  126. };