Linux 内核结构体(进程内存)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
struct task_struct {
volatile long state; //说明了该进程是否可以执行,还是可中断等信息
unsigned long flags; //Flage 是进程号,在调用fork()时给出
int sigpending; //进程上是否有待处理的信号
mm_segment_t addr_limit; //进程地址空间,区分内核进程与普通进程在内存存放的位置不同
//0-0xBFFFFFFF for user-thead
//0-0xFFFFFFFF for kernel-thread
//调度标志,表示该进程是否需要重新调度,若非0,则当从内核态返回到用户态,会发生调度
volatile long need_resched;
int lock_depth; //锁深度
long nice; //进程的基本时间片
//进程的调度策略,有三种,实时进程:SCHED_FIFO,SCHED_RR, 分时进程:SCHED_OTHER
unsigned long policy;
struct mm_struct *mm; //进程内存管理信息
int processor;
//若进程不在任何CPU上运行, cpus_runnable 的值是0,否则是1 这个值在运行队列被锁时更新
unsigned long cpus_runnable, cpus_allowed;
struct list_head run_list; //指向运行队列的指针
unsigned long sleep_time; //进程的睡眠时间
//用于将系统中所有的进程连成一个双向循环链表, 其根是init_task
struct task_struct *next_task, *prev_task;
struct mm_struct *active_mm;
struct list_head local_pages; //指向本地页面
unsigned int allocation_order, nr_local_pages;
struct linux_binfmt *binfmt; //进程所运行的可执行文件的格式
int exit_code, exit_signal;
int pdeath_signal; //父进程终止时向子进程发送的信号
unsigned long personality;
//Linux可以运行由其他UNIX操作系统生成的符合iBCS2标准的程序
int did_exec:1;
pid_t pid; //进程标识符,用来代表一个进程
pid_t pgrp; //进程组标识,表示进程所属的进程组
pid_t tty_old_pgrp; //进程控制终端所在的组标识
pid_t session; //进程的会话标识
pid_t tgid;
int leader; //表示进程是否为会话主管
struct task_struct *p_opptr,*p_pptr,*p_cptr,*p_ysptr,*p_osptr;
struct list_head thread_group; //线程链表
struct task_struct *pidhash_next; //用于将进程链入HASH表
struct task_struct **pidhash_pprev;
wait_queue_head_t wait_chldexit; //供wait4()使用
struct completion *vfork_done; //供vfork() 使用
unsigned long rt_priority; //实时优先级,用它计算实时进程调度时的weight值

//it_real_value,it_real_incr用于REAL定时器,单位为jiffies, 系统根据it_real_value
//设置定时器的第一个终止时间. 在定时器到期时,向进程发送SIGALRM信号,同时根据
//it_real_incr重置终止时间,it_prof_value,it_prof_incr用于Profile定时器,单位为jiffies。
//当进程运行时,不管在何种状态下,每个tick都使it_prof_value值减一,当减到0时,向进程发送
//信号SIGPROF,并根据it_prof_incr重置时间.
//it_virt_value,it_virt_value用于Virtual定时器,单位为jiffies。当进程运行时,不管在何种
//状态下,每个tick都使it_virt_value值减一当减到0时,向进程发送信号SIGVTALRM,根据
//it_virt_incr重置初值。
unsigned long it_real_value, it_prof_value, it_virt_value;
unsigned long it_real_incr, it_prof_incr, it_virt_value;
struct timer_list real_timer; //指向实时定时器的指针
struct tms times; //记录进程消耗的时间
unsigned long start_time; //进程创建的时间
//记录进程在每个CPU上所消耗的用户态时间和核心态时间
long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS];
//内存缺页和交换信息:
//min_flt, maj_flt累计进程的次缺页数(Copy on Write页和匿名页)和主缺页数(从映射文件或交换
//设备读入的页面数); nswap记录进程累计换出的页面数,即写到交换设备上的页面数。
//cmin_flt, cmaj_flt, cnswap记录本进程为祖先的所有子孙进程的累计次缺页数,主缺页数和换出页面数。
//在父进程回收终止的子进程时,父进程会将子进程的这些信息累计到自己结构的这些域中
unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
int swappable:1; //表示进程的虚拟地址空间是否允许换出
//进程认证信息
//uid,gid为运行该进程的用户的用户标识符和组标识符,通常是进程创建者的uid,gid
//euid,egid为有效uid,gid
//fsuid,fsgid为文件系统uid,gid,这两个ID号通常与有效uid,gid相等,在检查对于文件
//系统的访问权限时使用他们。
//suid,sgid为备份uid,gid
uid_t uid,euid,suid,fsuid;
gid_t gid,egid,sgid,fsgid;
int ngroups; //记录进程在多少个用户组中
gid_t groups[NGROUPS]; //记录进程所在的组
//进程的权能,分别是有效位集合,继承位集合,允许位集合
kernel_cap_t cap_effective, cap_inheritable, cap_permitted;
int keep_capabilities:1;
struct user_struct *user;
struct rlimit rlim[RLIM_NLIMITS]; //与进程相关的资源限制信息
unsigned short used_math; //是否使用FPU
char comm[16]; //进程正在运行的可执行文件名
//文件系统信息
int link_count, total_link_count;
//NULL if no tty 进程所在的控制终端,如果不需要控制终端,则该指针为空
struct tty_struct *tty;
unsigned int locks;
//进程间通信信息
struct sem_undo *semundo; //进程在信号灯上的所有undo操作
struct sem_queue *semsleeping; //当进程因为信号灯操作而挂起时,他在该队列中记录等待的操作
//进程的CPU状态,切换时,要保存到停止进程的task_struct中
struct thread_struct thread;
//文件系统信息
struct fs_struct *fs;
//打开文件信息
struct files_struct *files;
//信号处理函数
spinlock_t sigmask_lock;
struct signal_struct *sig; //信号处理函数
sigset_t blocked; //进程当前要阻塞的信号,每个信号对应一位
struct sigpending pending; //进程上是否有待处理的信号
unsigned long sas_ss_sp;
size_t sas_ss_size;
int (*notifier)(void *priv);
void *notifier_data;
sigset_t *notifier_mask;
u32 parent_exec_id;
u32 self_exec_id;

spinlock_t alloc_lock;
void *journal_info;
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
<include/linux/mm_types.h>
struct mm_struct {
struct {
struct vm_area_struct *mmap; /* list of VMAs */
/*虚拟内存描述符链表,整个地址空间被分成不连续的内存,每片由vm_area_struct管理,所有vma按地址大小链表起来*/
struct rb_root mm_rb;
/*方便增删,把vma传入到红黑树中*/
u64 vmacache_seqnum; /* per-thread vmacache */
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
rwlock_t mm_rb_lock; /* Speculative page fault field */
#endif
#ifdef CONFIG_MMU
/*函数指针,MMU芯片对应架构自己的分配虚拟内存函数*/
unsigned long (*get_unmapped_area) (struct file *filp,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags);
#endif
unsigned long mmap_base; /* base of mmap area */
/*mmap开始映射地址*/
unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */
/*控制上下分配*/
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
/* Base adresses for compatible mmap() */
unsigned long mmap_compat_base;
unsigned long mmap_compat_legacy_base;
#endif
unsigned long task_size; /* size of task vm space */
/*虚拟地址空间大小*/
unsigned long highest_vm_end; /* highest vma end address */
/*最后一个vma的vm_end值*/
pgd_t * pgd;
/*进程的页全局目录地址*/
/**
* @mm_users: The number of users including userspace.
*
* Use mmget()/mmget_not_zero()/mmput() to modify. When this
* drops to 0 (i.e. when the task exits and there are no other
* temporary reference holders), we also release a reference on
* @mm_count (which may then free the &struct mm_struct if
* @mm_count also drops to 0).
*/
atomic_t mm_users;
/*共享同一个用户虚拟地址空间的任务个数,为0的时候释放这个引用*/

/**
* @mm_count: The number of references to &struct mm_struct
* (@mm_users count as 1).
*
* Use mmgrab()/mmdrop() to modify. When this drops to 0, the
* &struct mm_struct is freed.
*/
atomic_t mm_count;
/*mm被引用的计数,为0时,释放这个结构体*/
#ifdef CONFIG_MMU /*打开的*/
atomic_long_t pgtables_bytes; /* PTE page table pages */
#endif
int map_count; /* number of VMAs */
/*mm中vma的个数*/

spinlock_t page_table_lock; /* Protects page tables and some
* counters
*/
/*自旋锁,保护页表和计数器*/
struct rw_semaphore mmap_sem;
/*操作mmap和mm_rb时,通过锁保护防止并发,读写锁,所以使用信号量*/
struct list_head mmlist; /* List of maybe swapped mm's. These
* are globally strung together off
* init_mm.mmlist, and are protected
* by mmlist_lock
*/

unsigned long hiwater_rss; /* High-watermark of RSS usage */
unsigned long hiwater_vm; /* High-water virtual memory usage */

unsigned long total_vm; /* Total pages mapped */
unsigned long locked_vm; /* Pages that have PG_mlocked set */
unsigned long pinned_vm; /* Refcount permanently increased */
unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
unsigned long stack_vm; /* VM_STACK */
unsigned long def_flags;

spinlock_t arg_lock; /* protect the below fields */
unsigned long start_code, end_code, start_data, end_data;
/*代码段和数据段的起始地址和结束地址*/
unsigned long start_brk, brk, start_stack;
/*堆的起始、结束地址,brk的值通过sys_brk系统调用调整;用户态栈的起始地址*/
unsigned long arg_start, arg_end, env_start, env_end;
/*进程在应用程序启动时传递参数字符串的起始、结束地址;环境变量的起始、结束地址*/
unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */

/*
* Special counters, in some configurations protected by the
* page_table_lock, in other configurations by being atomic.
*/
struct mm_rss_stat rss_stat;

struct linux_binfmt *binfmt;

/* Architecture-specific MM context */
mm_context_t context;

unsigned long flags; /* Must use atomic bitops to access */

struct core_state *core_state; /* coredumping support */
#ifdef CONFIG_MEMBARRIER //打开
atomic_t membarrier_state;
#endif
#ifdef CONFIG_AIO //打开
spinlock_t ioctx_lock;
struct kioctx_table __rcu *ioctx_table;
#endif
#ifdef CONFIG_MEMCG //打开
/*
* "owner" points to a task that is regarded as the canonical规范的
* user/owner of this mm. All of the following must be true in
* order for it to be changed:
*
* current == mm->owner
* current->mm != mm
* new_owner->mm == mm
* new_owner->alloc_lock is held
*/
struct task_struct __rcu *owner;
#endif
struct user_namespace *user_ns;

/* store ref to file /proc/<pid>/exe symlink points to */
struct file __rcu *exe_file;
#ifdef CONFIG_MMU_NOTIFIER //打开
struct mmu_notifier_mm *mmu_notifier_mm;
#endif
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS 关闭
pgtable_t pmd_huge_pte; /* protected by page_table_lock */
#endif
#ifdef CONFIG_NUMA_BALANCING //关
/*
* numa_next_scan is the next time that the PTEs will be marked
* pte_numa. NUMA hinting faults will gather statistics and
* migrate pages to new nodes if necessary.
*/
unsigned long numa_next_scan;

/* Restart point for scanning and setting pte_numa */
unsigned long numa_scan_offset;

/* numa_scan_seq prevents two threads setting pte_numa */
int numa_scan_seq;
#endif
/*
* An operation with batched TLB flushing is going on. Anything
* that can move process memory needs to flush the TLB when
* moving a PROT_NONE or PROT_NUMA mapped page.
*/
atomic_t tlb_flush_pending;
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH //关
/* See flush_tlb_batched_pending() */
bool tlb_flush_batched;
#endif
struct uprobes_state uprobes_state;
#ifdef CONFIG_HUGETLB_PAGE //关
atomic_long_t hugetlb_usage;
#endif
struct work_struct async_put_work;

#if IS_ENABLED(CONFIG_HMM)
/* HMM needs to track a few things per mm */
struct hmm *hmm;
#endif
} __randomize_layout;

/*
* The mm_cpumask needs to be at the end of mm_struct, because it
* is dynamically sized based on nr_cpu_ids.
*/
unsigned long cpu_bitmap[];
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
<include/linux/mm_types.h>
struct vm_area_struct {
/* The first cache line has the info for VMA tree walking. */

unsigned long vm_start; /* Our start address within vm_mm. */
unsigned long vm_end; /* The first byte after our end address
within vm_mm. */
/*[vm_start, vm_end) 这么个情况,表示一块虚拟内存空间*/
/* linked list of VM areas per task, sorted by address */
struct vm_area_struct *vm_next, *vm_prev;
/*在mm->mmap链表中前后节点*/
struct rb_node vm_rb;
/*插入到mm->mm_rb红黑树的节点*/
/*
* Largest free memory gap in bytes to the left of this VMA.
* Either between this VMA and vma->vm_prev, or between one of the
* VMAs below us in the VMA rbtree and its ->vm_prev. This helps
* get_unmapped_area find a free area of the right size.
*/
unsigned long rb_subtree_gap;
/*以当前vma为根,左子树中最大可用虚拟内存区域的大小*/
/* Second cache line starts here. */

struct mm_struct *vm_mm; /* The address space we belong to. */
/*同一进程所有的vma指向的vm_mm是相同的*/
pgprot_t vm_page_prot; /* Access permissions of this VMA. */
/*访问权限*/
unsigned long vm_flags; /* Flags, see mm.h. */
/*属性,详看mm.h*/
/*
* For areas with an address space and backing store,
* linkage into the address_space->i_mmap interval tree.
*
* For private anonymous mappings, a pointer to a null terminated string
* in the user process containing the name given to the vma, or NULL
* if unnamed.
*/
union {
struct {
struct rb_node rb;
unsigned long rb_subtree_last;
} shared;
const char __user *anon_name;
};

/*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
* list, after a COW of one of the file pages. A MAP_SHARED vma
* can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
* or brk vma (with NULL file) can only be in an anon_vma list.
*/
struct list_head anon_vma_chain; /* Serialized by mmap_sem &
* page_table_lock */
struct anon_vma *anon_vma; /* Serialized by page_table_lock */

/* Function pointers to deal with this struct. */
const struct vm_operations_struct *vm_ops;
/*该vma操作函数,包括打开、关闭、建立映射三个函数*/

/* Information about our backing store: */
unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
units */
struct file * vm_file; /* File we map to (can be NULL). */
/*文件映射的文件信息,匿名映射为NULL*/
void * vm_private_data; /* was vm_pte (shared mem) */

atomic_long_t swap_readahead_info;
#ifndef CONFIG_MMU
struct vm_region *vm_region; /* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMA
struct mempolicy *vm_policy; /* NUMA policy for the VMA */
#endif
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
seqcount_t vm_sequence; /* Speculative page fault field */
atomic_t vm_ref_count; /* see vma_get(), vma_put() */
#endif
} __randomize_layout;

Linux 内核结构体(进程内存)
https://rogxo.github.io/2023/05/23/2023-05-23-Linux 内核结构体(进程内存)/
作者
Rogxo
发布于
2023年5月23日
许可协议
CC BY-NC-SA 4.0