操作系统

操作系统是指在整个系统中完成最基本功能以及系统管理的那部分。包括:

  1. 内核
  2. 设备驱动程序
  3. 启动引导程序
  4. 命令行SHELL或者其他用户界面
  5. 基本文件管理工具和系统工具

    内核

  6. 中断服务程序

  7. 进程调度程序
  8. 内存管理程序
  9. 网络、进程间通信等系统服务程序

    进程

    定义

    处于执行期程序以及资源总称。进程就是处于执行期间的代码段,但不仅限于代码段,通常包含其他资源:

  10. 打开的文件

  11. 挂起的信号
  12. 内核内部数据
  13. 处理器状态
  14. 映射内存地址空间
  15. 多线程
  16. 用来存放全局变量的数据段
  17. … …

线程是进程中活动中的对象,是内核调度的基本单元。在Linux系统中,线程是一种特殊的进程:包含独立的程序计数器、进程栈以及进程寄存器。

生命周期

父进程(fork) 创建子进程;
父进程(exec)子进程创建新的地址空间,载入程序,执行进程;
子进程(exit)退出执行,并释放资源;

描述符(task_struct)

数据结构

内核把进程的列表存放于叫做任务队列(task list)的双向循环链表中。链表中的每一项都是类型为task_struct、称为进程描述符的结构。进程描述符包含一个具体进程所有信息。

  1. // kernel/sched.h
  2. struct task_struct {
  3. volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
  4. void *stack;
  5. atomic_t usage;
  6. unsigned int flags; /* per process flags, defined below */
  7. unsigned int ptrace;
  8. int lock_depth; /* BKL lock depth */
  9. unsigned int policy;
  10. cpumask_t cpus_allowed;
  11. struct sched_info sched_info;
  12. struct list_head tasks;
  13. struct mm_struct *mm, *active_mm;
  14. #if defined(SPLIT_RSS_COUNTING)
  15. struct task_rss_stat rss_stat;
  16. #endif
  17. /* task state */
  18. int exit_state;
  19. int exit_code, exit_signal;
  20. int pdeath_signal; /* The signal sent when the parent dies */
  21. /* ??? */
  22. unsigned int personality;
  23. unsigned did_exec:1;
  24. unsigned in_execve:1; /* Tell the LSMs that the process is doing an
  25. * execve */
  26. unsigned in_iowait:1;
  27. /* Revert to default priority/policy when forking */
  28. unsigned sched_reset_on_fork:1;
  29. pid_t pid;
  30. pid_t tgid;
  31. #ifdef CONFIG_CC_STACKPROTECTOR
  32. /* Canary value for the -fstack-protector gcc feature */
  33. unsigned long stack_canary;
  34. #endif
  35. /*
  36. * pointers to (original) parent process, youngest child, younger sibling,
  37. * older sibling, respectively. (p->father can be replaced with
  38. * p->real_parent->pid)
  39. */
  40. struct task_struct *real_parent; /* real parent process */
  41. struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */
  42. /*
  43. * children/sibling forms the list of my natural children
  44. */
  45. struct list_head children; /* list of my children */
  46. struct list_head sibling; /* linkage in my parent's children list */
  47. struct task_struct *group_leader; /* threadgroup leader */
  48. /*
  49. * ptraced is the list of tasks this task is using ptrace on.
  50. * This includes both natural children and PTRACE_ATTACH targets.
  51. * p->ptrace_entry is p's link on the p->parent->ptraced list.
  52. */
  53. struct list_head ptraced;
  54. struct list_head ptrace_entry;
  55. /* PID/PID hash table linkage. */
  56. struct pid_link pids[PIDTYPE_MAX];
  57. struct list_head thread_group;
  58. struct completion *vfork_done; /* for vfork() */
  59. int __user *set_child_tid; /* CLONE_CHILD_SETTID */
  60. int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
  61. cputime_t utime, stime, utimescaled, stimescaled;
  62. cputime_t gtime;
  63. #ifndef CONFIG_VIRT_CPU_ACCOUNTING
  64. cputime_t prev_utime, prev_stime;
  65. #endif
  66. unsigned long nvcsw, nivcsw; /* context switch counts */
  67. struct timespec start_time; /* monotonic time */
  68. struct timespec real_start_time; /* boot based time */
  69. /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
  70. unsigned long min_flt, maj_flt;
  71. struct task_cputime cputime_expires;
  72. struct list_head cpu_timers[3];
  73. /* process credentials */
  74. const struct cred __rcu *real_cred; /* objective and real subjective task
  75. * credentials (COW) */
  76. const struct cred __rcu *cred; /* effective (overridable) subjective task
  77. * credentials (COW) */
  78. struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */
  79. char comm[TASK_COMM_LEN]; /* executable name excluding path
  80. - access with [gs]et_task_comm (which lock
  81. it with task_lock())
  82. - initialized normally by setup_new_exec */
  83. /* file system info */
  84. int link_count, total_link_count;
  85. #ifdef CONFIG_SYSVIPC
  86. /* ipc stuff */
  87. struct sysv_sem sysvsem;
  88. #endif
  89. #ifdef CONFIG_DETECT_HUNG_TASK
  90. /* hung task detection */
  91. unsigned long last_switch_count;
  92. #endif
  93. /* CPU-specific state of this task */
  94. struct thread_struct thread;
  95. /* filesystem information */
  96. struct fs_struct *fs;
  97. /* open file information */
  98. struct files_struct *files;
  99. /* namespaces */
  100. struct nsproxy *nsproxy;
  101. /* signal handlers */
  102. struct signal_struct *signal;
  103. struct sighand_struct *sighand;
  104. sigset_t blocked, real_blocked;
  105. sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
  106. struct sigpending pending;
  107. unsigned long sas_ss_sp;
  108. size_t sas_ss_size;
  109. int (*notifier)(void *priv);
  110. void *notifier_data;
  111. sigset_t *notifier_mask;
  112. struct audit_context *audit_context;
  113. #ifdef CONFIG_AUDITSYSCALL
  114. uid_t loginuid;
  115. unsigned int sessionid;
  116. #endif
  117. seccomp_t seccomp;
  118. /* Thread group tracking */
  119. u32 parent_exec_id;
  120. u32 self_exec_id;
  121. /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
  122. * mempolicy */
  123. spinlock_t alloc_lock;
  124. #ifdef CONFIG_GENERIC_HARDIRQS
  125. /* IRQ handler threads */
  126. struct irqaction *irqaction;
  127. #endif
  128. /* Protection of the PI data structures: */
  129. raw_spinlock_t pi_lock;
  130. #ifdef CONFIG_RT_MUTEXES
  131. /* PI waiters blocked on a rt_mutex held by this task */
  132. struct plist_head pi_waiters;
  133. /* Deadlock detection and priority inheritance handling */
  134. struct rt_mutex_waiter *pi_blocked_on;
  135. #endif
  136. #ifdef CONFIG_DEBUG_MUTEXES
  137. /* mutex deadlock detection */
  138. struct mutex_waiter *blocked_on;
  139. #endif
  140. #ifdef CONFIG_TRACE_IRQFLAGS
  141. unsigned int irq_events;
  142. unsigned long hardirq_enable_ip;
  143. unsigned long hardirq_disable_ip;
  144. unsigned int hardirq_enable_event;
  145. unsigned int hardirq_disable_event;
  146. int hardirqs_enabled;
  147. int hardirq_context;
  148. unsigned long softirq_disable_ip;
  149. unsigned long softirq_enable_ip;
  150. unsigned int softirq_disable_event;
  151. unsigned int softirq_enable_event;
  152. int softirqs_enabled;
  153. int softirq_context;
  154. #endif
  155. #ifdef CONFIG_LOCKDEP
  156. # define MAX_LOCK_DEPTH 48UL
  157. u64 curr_chain_key;
  158. int lockdep_depth;
  159. unsigned int lockdep_recursion;
  160. struct held_lock held_locks[MAX_LOCK_DEPTH];
  161. gfp_t lockdep_reclaim_gfp;
  162. #endif
  163. /* journalling filesystem info */
  164. void *journal_info;
  165. /* stacked block device info */
  166. struct bio_list *bio_list;
  167. #ifdef CONFIG_BLOCK
  168. /* stack plugging */
  169. struct blk_plug *plug;
  170. #endif
  171. /* VM state */
  172. struct reclaim_state *reclaim_state;
  173. struct backing_dev_info *backing_dev_info;
  174. struct io_context *io_context;
  175. unsigned long ptrace_message;
  176. siginfo_t *last_siginfo; /* For ptrace use. */
  177. struct task_io_accounting ioac;
  178. #if defined(CONFIG_TASK_XACCT)
  179. u64 acct_rss_mem1; /* accumulated rss usage */
  180. u64 acct_vm_mem1; /* accumulated virtual memory usage */
  181. cputime_t acct_timexpd; /* stime + utime since last update */
  182. #endif
  183. #ifdef CONFIG_CPUSETS
  184. nodemask_t mems_allowed; /* Protected by alloc_lock */
  185. int mems_allowed_change_disable;
  186. int cpuset_mem_spread_rotor;
  187. int cpuset_slab_spread_rotor;
  188. #endif
  189. #ifdef CONFIG_CGROUPS
  190. /* Control Group info protected by css_set_lock */
  191. struct css_set __rcu *cgroups;
  192. /* cg_list protected by css_set_lock and tsk->alloc_lock */
  193. struct list_head cg_list;
  194. #endif
  195. #ifdef CONFIG_FUTEX
  196. struct robust_list_head __user *robust_list;
  197. #ifdef CONFIG_COMPAT
  198. struct compat_robust_list_head __user *compat_robust_list;
  199. #endif
  200. struct list_head pi_state_list;
  201. struct futex_pi_state *pi_state_cache;
  202. #endif
  203. #ifdef CONFIG_PERF_EVENTS
  204. struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
  205. struct mutex perf_event_mutex;
  206. struct list_head perf_event_list;
  207. #endif
  208. #ifdef CONFIG_NUMA
  209. struct mempolicy *mempolicy; /* Protected by alloc_lock */
  210. short il_next;
  211. short pref_node_fork;
  212. #endif
  213. atomic_t fs_excl; /* holding fs exclusive resources */
  214. struct rcu_head rcu;
  215. /*
  216. * cache last used pipe for splice
  217. */
  218. struct pipe_inode_info *splice_pipe;
  219. /*
  220. * time slack values; these are used to round up poll() and
  221. * select() etc timeout values. These are in nanoseconds.
  222. */
  223. unsigned long timer_slack_ns;
  224. unsigned long default_timer_slack_ns;
  225. struct list_head *scm_work_list;
  226. };

分配进程描述符

Linux通过slab分配task_struct结构,这样能够达到对象复用以及缓存着色的目的。

这样可以避免动态分配和释放带来的资源消耗。

创建与释放

创建分为两个过程去执行: fork()和exec().
fork() -> clone() -> do_fork() -> copy_process(). copy_process 定义在 kernel/fork.c中:

  1. 调用dum_task_struct() 为新进程创建内核栈、thread_info结构和task_struct;
  2. 进程描述符内成员设为初始值,进程状态设置为TASK_UNINTERRUPTIBLE(保证它不会投入运行);
  3. 置为task_struct中的flags成员;
  4. 调用alloc_pid()分配一个有效PID;
  5. 返回一个指向子进程的指针。

进程终结:do_exit().

  1. task_struct 中 flags 设置为 PF_EXITING;
  2. del_timer_sync() 确保没有定时器在排队或者处理程序;
  3. exit_mm() 释放进程占用的 mm_struct;
  4. sem __exit();
  5. exit_files() 和 exit_fs(), 递减文件描述符和文件系统数据引用计数。
  6. 完成内核机制规定的退出动作;
  7. exit_notify()通知父进程,给子进程重新找养父(线程组中其他进程或者init进程);
  8. 进程状态 task_struct->exit_state = EXIT_ZOMBIE;
  9. 调用schedule()切换新的进程,而改进程因为处于僵尸状态不会再次被调度。
  10. 执行完以上过程,进程不可被运行,它所占用内存只包含内核栈、thread_info结构和task_struct结构,这些信息供父进程检索。

  1. 父进程检索到信息后,通知内核那是无关信息,进程所持有剩余内存释放,归还系统:release_task():
    1. 调用 __exit_signal() -> _unhash_process() -> detach_pid() 从pidhash上删除该进程,同时任务列表中删除该进程;
    2. _exit_gignal() 释放僵死进程所有剩余资源,并统计和记录;
    3. release_task()调用put_task_struct()释放进程内核栈和thread_info结构页,释放slab高速缓存。

子进程要寻找养父的目的,是防止子进程切换到僵死状态时,没有父进程索引,白白耗费内存。