操作系统
操作系统是指在整个系统中完成最基本功能以及系统管理的那部分。包括:
- 内核
- 设备驱动程序
- 启动引导程序
- 命令行SHELL或者其他用户界面
-
内核
中断服务程序
- 进程调度程序
- 内存管理程序
-
进程
定义
处于执行期程序以及资源总称。进程就是处于执行期间的代码段,但不仅限于代码段,通常包含其他资源:
打开的文件
- 挂起的信号
- 内核内部数据
- 处理器状态
- 映射内存地址空间
- 多线程
- 用来存放全局变量的数据段
- … …
线程是进程中活动中的对象,是内核调度的基本单元。在Linux系统中,线程是一种特殊的进程:包含独立的程序计数器、进程栈以及进程寄存器。
生命周期
父进程(fork) 创建子进程;
父进程(exec)子进程创建新的地址空间,载入程序,执行进程;
子进程(exit)退出执行,并释放资源;
描述符(task_struct)
数据结构
内核把进程的列表存放于叫做任务队列(task list)的双向循环链表中。链表中的每一项都是类型为task_struct、称为进程描述符的结构。进程描述符包含一个具体进程所有信息。
// kernel/sched.hstruct task_struct {volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */void *stack;atomic_t usage;unsigned int flags; /* per process flags, defined below */unsigned int ptrace;int lock_depth; /* BKL lock depth */unsigned int policy;cpumask_t cpus_allowed;struct sched_info sched_info;struct list_head tasks;struct mm_struct *mm, *active_mm;#if defined(SPLIT_RSS_COUNTING)struct task_rss_stat rss_stat;#endif/* task state */int exit_state;int exit_code, exit_signal;int pdeath_signal; /* The signal sent when the parent dies *//* ??? */unsigned int personality;unsigned did_exec:1;unsigned in_execve:1; /* Tell the LSMs that the process is doing an* execve */unsigned in_iowait:1;/* Revert to default priority/policy when forking */unsigned sched_reset_on_fork:1;pid_t pid;pid_t tgid;#ifdef CONFIG_CC_STACKPROTECTOR/* Canary value for the -fstack-protector gcc feature */unsigned long stack_canary;#endif/** pointers to (original) parent process, youngest child, younger sibling,* older sibling, respectively. (p->father can be replaced with* p->real_parent->pid)*/struct task_struct *real_parent; /* real parent process */struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports *//** children/sibling forms the list of my natural children*/struct list_head children; /* list of my children */struct list_head sibling; /* linkage in my parent's children list */struct task_struct *group_leader; /* threadgroup leader *//** ptraced is the list of tasks this task is using ptrace on.* This includes both natural children and PTRACE_ATTACH targets.* p->ptrace_entry is p's link on the p->parent->ptraced list.*/struct list_head ptraced;struct list_head ptrace_entry;/* PID/PID hash table linkage. */struct pid_link pids[PIDTYPE_MAX];struct list_head thread_group;struct completion *vfork_done; /* for vfork() */int __user *set_child_tid; /* CLONE_CHILD_SETTID */int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */cputime_t utime, stime, utimescaled, stimescaled;cputime_t gtime;#ifndef CONFIG_VIRT_CPU_ACCOUNTINGcputime_t prev_utime, prev_stime;#endifunsigned long nvcsw, nivcsw; /* context switch counts */struct timespec start_time; /* monotonic time */struct timespec real_start_time; /* boot based time *//* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */unsigned long min_flt, maj_flt;struct task_cputime cputime_expires;struct list_head cpu_timers[3];/* process credentials */const struct cred __rcu *real_cred; /* objective and real subjective task* credentials (COW) */const struct cred __rcu *cred; /* effective (overridable) subjective task* credentials (COW) */struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */char comm[TASK_COMM_LEN]; /* executable name excluding path- access with [gs]et_task_comm (which lockit with task_lock())- initialized normally by setup_new_exec *//* file system info */int link_count, total_link_count;#ifdef CONFIG_SYSVIPC/* ipc stuff */struct sysv_sem sysvsem;#endif#ifdef CONFIG_DETECT_HUNG_TASK/* hung task detection */unsigned long last_switch_count;#endif/* CPU-specific state of this task */struct thread_struct thread;/* filesystem information */struct fs_struct *fs;/* open file information */struct files_struct *files;/* namespaces */struct nsproxy *nsproxy;/* signal handlers */struct signal_struct *signal;struct sighand_struct *sighand;sigset_t blocked, real_blocked;sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */struct sigpending pending;unsigned long sas_ss_sp;size_t sas_ss_size;int (*notifier)(void *priv);void *notifier_data;sigset_t *notifier_mask;struct audit_context *audit_context;#ifdef CONFIG_AUDITSYSCALLuid_t loginuid;unsigned int sessionid;#endifseccomp_t seccomp;/* Thread group tracking */u32 parent_exec_id;u32 self_exec_id;/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,* mempolicy */spinlock_t alloc_lock;#ifdef CONFIG_GENERIC_HARDIRQS/* IRQ handler threads */struct irqaction *irqaction;#endif/* Protection of the PI data structures: */raw_spinlock_t pi_lock;#ifdef CONFIG_RT_MUTEXES/* PI waiters blocked on a rt_mutex held by this task */struct plist_head pi_waiters;/* Deadlock detection and priority inheritance handling */struct rt_mutex_waiter *pi_blocked_on;#endif#ifdef CONFIG_DEBUG_MUTEXES/* mutex deadlock detection */struct mutex_waiter *blocked_on;#endif#ifdef CONFIG_TRACE_IRQFLAGSunsigned int irq_events;unsigned long hardirq_enable_ip;unsigned long hardirq_disable_ip;unsigned int hardirq_enable_event;unsigned int hardirq_disable_event;int hardirqs_enabled;int hardirq_context;unsigned long softirq_disable_ip;unsigned long softirq_enable_ip;unsigned int softirq_disable_event;unsigned int softirq_enable_event;int softirqs_enabled;int softirq_context;#endif#ifdef CONFIG_LOCKDEP# define MAX_LOCK_DEPTH 48ULu64 curr_chain_key;int lockdep_depth;unsigned int lockdep_recursion;struct held_lock held_locks[MAX_LOCK_DEPTH];gfp_t lockdep_reclaim_gfp;#endif/* journalling filesystem info */void *journal_info;/* stacked block device info */struct bio_list *bio_list;#ifdef CONFIG_BLOCK/* stack plugging */struct blk_plug *plug;#endif/* VM state */struct reclaim_state *reclaim_state;struct backing_dev_info *backing_dev_info;struct io_context *io_context;unsigned long ptrace_message;siginfo_t *last_siginfo; /* For ptrace use. */struct task_io_accounting ioac;#if defined(CONFIG_TASK_XACCT)u64 acct_rss_mem1; /* accumulated rss usage */u64 acct_vm_mem1; /* accumulated virtual memory usage */cputime_t acct_timexpd; /* stime + utime since last update */#endif#ifdef CONFIG_CPUSETSnodemask_t mems_allowed; /* Protected by alloc_lock */int mems_allowed_change_disable;int cpuset_mem_spread_rotor;int cpuset_slab_spread_rotor;#endif#ifdef CONFIG_CGROUPS/* Control Group info protected by css_set_lock */struct css_set __rcu *cgroups;/* cg_list protected by css_set_lock and tsk->alloc_lock */struct list_head cg_list;#endif#ifdef CONFIG_FUTEXstruct robust_list_head __user *robust_list;#ifdef CONFIG_COMPATstruct compat_robust_list_head __user *compat_robust_list;#endifstruct list_head pi_state_list;struct futex_pi_state *pi_state_cache;#endif#ifdef CONFIG_PERF_EVENTSstruct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];struct mutex perf_event_mutex;struct list_head perf_event_list;#endif#ifdef CONFIG_NUMAstruct mempolicy *mempolicy; /* Protected by alloc_lock */short il_next;short pref_node_fork;#endifatomic_t fs_excl; /* holding fs exclusive resources */struct rcu_head rcu;/** cache last used pipe for splice*/struct pipe_inode_info *splice_pipe;/** time slack values; these are used to round up poll() and* select() etc timeout values. These are in nanoseconds.*/unsigned long timer_slack_ns;unsigned long default_timer_slack_ns;struct list_head *scm_work_list;};
分配进程描述符
Linux通过slab分配task_struct结构,这样能够达到对象复用以及缓存着色的目的。
创建与释放
创建分为两个过程去执行: fork()和exec().
fork() -> clone() -> do_fork() -> copy_process(). copy_process 定义在 kernel/fork.c中:
- 调用dum_task_struct() 为新进程创建内核栈、thread_info结构和task_struct;
- 进程描述符内成员设为初始值,进程状态设置为TASK_UNINTERRUPTIBLE(保证它不会投入运行);
- 置为task_struct中的flags成员;
- 调用alloc_pid()分配一个有效PID;
- 返回一个指向子进程的指针。
进程终结:do_exit().
- task_struct 中 flags 设置为 PF_EXITING;
- del_timer_sync() 确保没有定时器在排队或者处理程序;
- exit_mm() 释放进程占用的 mm_struct;
- sem __exit();
- exit_files() 和 exit_fs(), 递减文件描述符和文件系统数据引用计数。
- 完成内核机制规定的退出动作;
- exit_notify()通知父进程,给子进程重新找养父(线程组中其他进程或者init进程);
- 进程状态 task_struct->exit_state = EXIT_ZOMBIE;
- 调用schedule()切换新的进程,而改进程因为处于僵尸状态不会再次被调度。
- 执行完以上过程,进程不可被运行,它所占用内存只包含内核栈、thread_info结构和task_struct结构,这些信息供父进程检索。
- 父进程检索到信息后,通知内核那是无关信息,进程所持有剩余内存释放,归还系统:release_task():
- 调用 __exit_signal() -> _unhash_process() -> detach_pid() 从pidhash上删除该进程,同时任务列表中删除该进程;
- _exit_gignal() 释放僵死进程所有剩余资源,并统计和记录;
- release_task()调用put_task_struct()释放进程内核栈和thread_info结构页,释放slab高速缓存。
子进程要寻找养父的目的,是防止子进程切换到僵死状态时,没有父进程索引,白白耗费内存。
