rogxo@home:~$

Linux 内核结构体(进程内存)

struct task_struct {
    volatile long state;  //说明了该进程是否可以执行,还是可中断等信息
    unsigned long flags;  //Flage 是进程号,在调用fork()时给出
    int sigpending;    //进程上是否有待处理的信号
    mm_segment_t addr_limit; //进程地址空间,区分内核进程与普通进程在内存存放的位置不同
                            //0-0xBFFFFFFF for user-thead
                            //0-0xFFFFFFFF for kernel-thread
    //调度标志,表示该进程是否需要重新调度,若非0,则当从内核态返回到用户态,会发生调度
    volatile long need_resched;
    int lock_depth;  //锁深度
    long nice;       //进程的基本时间片
    //进程的调度策略,有三种,实时进程:SCHED_FIFO,SCHED_RR, 分时进程:SCHED_OTHER
    unsigned long policy;
    struct mm_struct *mm; //进程内存管理信息
    int processor;
    //若进程不在任何CPU上运行, cpus_runnable 的值是0,否则是1 这个值在运行队列被锁时更新
    unsigned long cpus_runnable, cpus_allowed;
    struct list_head run_list; //指向运行队列的指针
    unsigned long sleep_time;  //进程的睡眠时间
    //用于将系统中所有的进程连成一个双向循环链表, 其根是init_task
    struct task_struct *next_task, *prev_task;
    struct mm_struct *active_mm;
    struct list_head local_pages;       //指向本地页面      
    unsigned int allocation_order, nr_local_pages;
    struct linux_binfmt *binfmt;  //进程所运行的可执行文件的格式
    int exit_code, exit_signal;
    int pdeath_signal;     //父进程终止时向子进程发送的信号
    unsigned long personality;
    //Linux可以运行由其他UNIX操作系统生成的符合iBCS2标准的程序
    int did_exec:1; 
    pid_t pid;    //进程标识符,用来代表一个进程
    pid_t pgrp;   //进程组标识,表示进程所属的进程组
    pid_t tty_old_pgrp;  //进程控制终端所在的组标识
    pid_t session;  //进程的会话标识
    pid_t tgid;
    int leader;     //表示进程是否为会话主管
    struct task_struct *p_opptr,*p_pptr,*p_cptr,*p_ysptr,*p_osptr;
    struct list_head thread_group;   //线程链表
    struct task_struct *pidhash_next; //用于将进程链入HASH表
    struct task_struct **pidhash_pprev;
    wait_queue_head_t wait_chldexit;  //供wait4()使用
    struct completion *vfork_done;  //供vfork() 使用
    unsigned long rt_priority; //实时优先级,用它计算实时进程调度时的weight值
     
    //it_real_value,it_real_incr用于REAL定时器,单位为jiffies, 系统根据it_real_value
    //设置定时器的第一个终止时间. 在定时器到期时,向进程发送SIGALRM信号,同时根据
    //it_real_incr重置终止时间,it_prof_value,it_prof_incr用于Profile定时器,单位为jiffies。
    //当进程运行时,不管在何种状态下,每个tick都使it_prof_value值减一,当减到0时,向进程发送
    //信号SIGPROF,并根据it_prof_incr重置时间.
    //it_virt_value,it_virt_value用于Virtual定时器,单位为jiffies。当进程运行时,不管在何种
    //状态下,每个tick都使it_virt_value值减一当减到0时,向进程发送信号SIGVTALRM,根据
    //it_virt_incr重置初值。
    unsigned long it_real_value, it_prof_value, it_virt_value;
    unsigned long it_real_incr, it_prof_incr, it_virt_value;
    struct timer_list real_timer;   //指向实时定时器的指针
    struct tms times;      //记录进程消耗的时间
    unsigned long start_time;  //进程创建的时间
    //记录进程在每个CPU上所消耗的用户态时间和核心态时间
    long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS]; 
    //内存缺页和交换信息:
    //min_flt, maj_flt累计进程的次缺页数(Copy on Write页和匿名页)和主缺页数(从映射文件或交换
    //设备读入的页面数); nswap记录进程累计换出的页面数,即写到交换设备上的页面数。
    //cmin_flt, cmaj_flt, cnswap记录本进程为祖先的所有子孙进程的累计次缺页数,主缺页数和换出页面数。
    //在父进程回收终止的子进程时,父进程会将子进程的这些信息累计到自己结构的这些域中
    unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
    int swappable:1; //表示进程的虚拟地址空间是否允许换出
    //进程认证信息
    //uid,gid为运行该进程的用户的用户标识符和组标识符,通常是进程创建者的uid,gid
    //euid,egid为有效uid,gid
    //fsuid,fsgid为文件系统uid,gid,这两个ID号通常与有效uid,gid相等,在检查对于文件
    //系统的访问权限时使用他们。
    //suid,sgid为备份uid,gid
    uid_t uid,euid,suid,fsuid;
    gid_t gid,egid,sgid,fsgid;
    int ngroups; //记录进程在多少个用户组中
    gid_t groups[NGROUPS]; //记录进程所在的组
    //进程的权能,分别是有效位集合,继承位集合,允许位集合
    kernel_cap_t cap_effective, cap_inheritable, cap_permitted;
    int keep_capabilities:1;
    struct user_struct *user;
    struct rlimit rlim[RLIM_NLIMITS];  //与进程相关的资源限制信息
    unsigned short used_math;   //是否使用FPU
    char comm[16];   //进程正在运行的可执行文件名
     //文件系统信息
    int link_count, total_link_count;
    //NULL if no tty 进程所在的控制终端,如果不需要控制终端,则该指针为空
    struct tty_struct *tty;
    unsigned int locks;
    //进程间通信信息
    struct sem_undo *semundo;  //进程在信号灯上的所有undo操作
    struct sem_queue *semsleeping; //当进程因为信号灯操作而挂起时,他在该队列中记录等待的操作
    //进程的CPU状态,切换时,要保存到停止进程的task_struct中
    struct thread_struct thread;
      //文件系统信息
    struct fs_struct *fs;
      //打开文件信息
    struct files_struct *files;
      //信号处理函数
    spinlock_t sigmask_lock;
    struct signal_struct *sig; //信号处理函数
    sigset_t blocked;  //进程当前要阻塞的信号,每个信号对应一位
    struct sigpending pending;  //进程上是否有待处理的信号
    unsigned long sas_ss_sp;
    size_t sas_ss_size;
    int (*notifier)(void *priv);
    void *notifier_data;
    sigset_t *notifier_mask;
    u32 parent_exec_id;
    u32 self_exec_id;
     
    spinlock_t alloc_lock;
    void *journal_info;
};
<include/linux/mm_types.h>
struct mm_struct {
    struct {
        struct vm_area_struct *mmap;        /* list of VMAs */
/*虚拟内存描述符链表,整个地址空间被分成不连续的内存,每片由vm_area_struct管理,所有vma按地址大小链表起来*/
        struct rb_root mm_rb;
/*方便增删,把vma传入到红黑树中*/
        u64 vmacache_seqnum;                   /* per-thread vmacache */
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
        rwlock_t mm_rb_lock;    /* Speculative page fault field */
#endif
#ifdef CONFIG_MMU
/*函数指针,MMU芯片对应架构自己的分配虚拟内存函数*/
        unsigned long (*get_unmapped_area) (struct file *filp,
                unsigned long addr, unsigned long len,
                unsigned long pgoff, unsigned long flags);
#endif
        unsigned long mmap_base;    /* base of mmap area */
/*mmap开始映射地址*/
        unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */
/*控制上下分配*/
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
        /* Base adresses for compatible mmap() */
        unsigned long mmap_compat_base;
        unsigned long mmap_compat_legacy_base;
#endif
        unsigned long task_size;    /* size of task vm space */
/*虚拟地址空间大小*/
        unsigned long highest_vm_end;   /* highest vma end address */
/*最后一个vma的vm_end值*/
        pgd_t * pgd;
/*进程的页全局目录地址*/
        /**
         * @mm_users: The number of users including userspace.
         *
         * Use mmget()/mmget_not_zero()/mmput() to modify. When this
         * drops to 0 (i.e. when the task exits and there are no other
         * temporary reference holders), we also release a reference on
         * @mm_count (which may then free the &struct mm_struct if
         * @mm_count also drops to 0).
         */
        atomic_t mm_users;
/*共享同一个用户虚拟地址空间的任务个数,为0的时候释放这个引用*/

        /**
         * @mm_count: The number of references to &struct mm_struct
         * (@mm_users count as 1).
         *
         * Use mmgrab()/mmdrop() to modify. When this drops to 0, the
         * &struct mm_struct is freed.
         */
        atomic_t mm_count;
/*mm被引用的计数,为0时,释放这个结构体*/
#ifdef CONFIG_MMU /*打开的*/
        atomic_long_t pgtables_bytes;   /* PTE page table pages */
#endif
        int map_count;          /* number of VMAs */
/*mm中vma的个数*/

        spinlock_t page_table_lock; /* Protects page tables and some
                         * counters
                         */
/*自旋锁,保护页表和计数器*/
        struct rw_semaphore mmap_sem;
/*操作mmap和mm_rb时,通过锁保护防止并发,读写锁,所以使用信号量*/
        struct list_head mmlist; /* List of maybe swapped mm's. These
                      * are globally strung together off
                      * init_mm.mmlist, and are protected
                      * by mmlist_lock
                      */

        unsigned long hiwater_rss; /* High-watermark of RSS usage */
        unsigned long hiwater_vm;  /* High-water virtual memory usage */

        unsigned long total_vm;    /* Total pages mapped */
        unsigned long locked_vm;   /* Pages that have PG_mlocked set */
        unsigned long pinned_vm;   /* Refcount permanently increased */
        unsigned long data_vm;     /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
        unsigned long exec_vm;     /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
        unsigned long stack_vm;    /* VM_STACK */
        unsigned long def_flags;

        spinlock_t arg_lock; /* protect the below fields */
        unsigned long start_code, end_code, start_data, end_data;
/*代码段和数据段的起始地址和结束地址*/
        unsigned long start_brk, brk, start_stack;
/*堆的起始、结束地址,brk的值通过sys_brk系统调用调整;用户态栈的起始地址*/
        unsigned long arg_start, arg_end, env_start, env_end;
/*进程在应用程序启动时传递参数字符串的起始、结束地址;环境变量的起始、结束地址*/
        unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */

        /*
         * Special counters, in some configurations protected by the
         * page_table_lock, in other configurations by being atomic.
         */
        struct mm_rss_stat rss_stat;

        struct linux_binfmt *binfmt;

        /* Architecture-specific MM context */
        mm_context_t context;

        unsigned long flags; /* Must use atomic bitops to access */

        struct core_state *core_state; /* coredumping support */
#ifdef CONFIG_MEMBARRIER //打开
        atomic_t membarrier_state;
#endif
#ifdef CONFIG_AIO //打开
        spinlock_t          ioctx_lock;
        struct kioctx_table __rcu   *ioctx_table;
#endif
#ifdef CONFIG_MEMCG  //打开
        /*
         * "owner" points to a task that is regarded as the canonical规范的
         * user/owner of this mm. All of the following must be true in
         * order for it to be changed:
         *
         * current == mm->owner
         * current->mm != mm
         * new_owner->mm == mm
         * new_owner->alloc_lock is held
         */
        struct task_struct __rcu *owner;
#endif
        struct user_namespace *user_ns;

        /* store ref to file /proc/<pid>/exe symlink points to */
        struct file __rcu *exe_file;
#ifdef CONFIG_MMU_NOTIFIER //打开
        struct mmu_notifier_mm *mmu_notifier_mm;
#endif
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS 关闭
        pgtable_t pmd_huge_pte; /* protected by page_table_lock */
#endif
#ifdef CONFIG_NUMA_BALANCING //关
        /*
         * numa_next_scan is the next time that the PTEs will be marked
         * pte_numa. NUMA hinting faults will gather statistics and
         * migrate pages to new nodes if necessary.
         */
        unsigned long numa_next_scan;

        /* Restart point for scanning and setting pte_numa */
        unsigned long numa_scan_offset;

        /* numa_scan_seq prevents two threads setting pte_numa */
        int numa_scan_seq;
#endif
        /*
         * An operation with batched TLB flushing is going on. Anything
         * that can move process memory needs to flush the TLB when
         * moving a PROT_NONE or PROT_NUMA mapped page.
         */
        atomic_t tlb_flush_pending;
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH //关
        /* See flush_tlb_batched_pending() */
        bool tlb_flush_batched;
#endif
        struct uprobes_state uprobes_state;
#ifdef CONFIG_HUGETLB_PAGE  //关
        atomic_long_t hugetlb_usage;
#endif
        struct work_struct async_put_work;

#if IS_ENABLED(CONFIG_HMM)
        /* HMM needs to track a few things per mm */
        struct hmm *hmm;
#endif
    } __randomize_layout;

    /*
     * The mm_cpumask needs to be at the end of mm_struct, because it
     * is dynamically sized based on nr_cpu_ids.
     */
    unsigned long cpu_bitmap[];
};
<include/linux/mm_types.h>
struct vm_area_struct {
    /* The first cache line has the info for VMA tree walking. */

    unsigned long vm_start;     /* Our start address within vm_mm. */
    unsigned long vm_end;       /* The first byte after our end address
                       within vm_mm. */
/*[vm_start, vm_end) 这么个情况,表示一块虚拟内存空间*/
    /* linked list of VM areas per task, sorted by address */
    struct vm_area_struct *vm_next, *vm_prev;
/*在mm->mmap链表中前后节点*/
    struct rb_node vm_rb;
/*插入到mm->mm_rb红黑树的节点*/
    /*
     * Largest free memory gap in bytes to the left of this VMA.
     * Either between this VMA and vma->vm_prev, or between one of the
     * VMAs below us in the VMA rbtree and its ->vm_prev. This helps
     * get_unmapped_area find a free area of the right size.
     */
    unsigned long rb_subtree_gap;
/*以当前vma为根,左子树中最大可用虚拟内存区域的大小*/
    /* Second cache line starts here. */

    struct mm_struct *vm_mm;    /* The address space we belong to. */
/*同一进程所有的vma指向的vm_mm是相同的*/
    pgprot_t vm_page_prot;      /* Access permissions of this VMA. */
/*访问权限*/
    unsigned long vm_flags;     /* Flags, see mm.h. */
/*属性,详看mm.h*/
    /*
     * For areas with an address space and backing store,
     * linkage into the address_space->i_mmap interval tree.
     *
     * For private anonymous mappings, a pointer to a null terminated string
     * in the user process containing the name given to the vma, or NULL
     * if unnamed.
     */
    union {
        struct {
            struct rb_node rb;
            unsigned long rb_subtree_last;
        } shared;
        const char __user *anon_name;
    };

    /*
     * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
     * list, after a COW of one of the file pages.  A MAP_SHARED vma
     * can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack
     * or brk vma (with NULL file) can only be in an anon_vma list.
     */
    struct list_head anon_vma_chain; /* Serialized by mmap_sem &
                      * page_table_lock */
    struct anon_vma *anon_vma;  /* Serialized by page_table_lock */

    /* Function pointers to deal with this struct. */
    const struct vm_operations_struct *vm_ops;
/*该vma操作函数,包括打开、关闭、建立映射三个函数*/

    /* Information about our backing store: */
    unsigned long vm_pgoff;     /* Offset (within vm_file) in PAGE_SIZE
                       units */
    struct file * vm_file;      /* File we map to (can be NULL). */
/*文件映射的文件信息,匿名映射为NULL*/
    void * vm_private_data;     /* was vm_pte (shared mem) */

    atomic_long_t swap_readahead_info;
#ifndef CONFIG_MMU
    struct vm_region *vm_region;    /* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMA
    struct mempolicy *vm_policy;    /* NUMA policy for the VMA */
#endif
    struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
    seqcount_t vm_sequence;     /* Speculative page fault field */
    atomic_t vm_ref_count;      /* see vma_get(), vma_put() */
#endif
} __randomize_layout;