Linux内核学习小知识


参考:

内核版本:Linux-5.14

常用节点

/proc

参考:

  • https://man7.org/linux/man-pages/man5/proc.5.html

内存

/proc/iomem 和 /proc/ioport

文件:kernel/resource.c
iomem:查看当前注册的物理地址
ioport:查看当前注册的IO端口信息

相关代码
struct resource ioport_resource = {
	.name	= "PCI IO",
	.start	= 0,
	.end	= IO_SPACE_LIMIT,
	.flags	= IORESOURCE_IO,
};
EXPORT_SYMBOL(ioport_resource);

struct resource iomem_resource = {
	.name	= "PCI mem",
	.start	= 0,
	.end	= -1,
	.flags	= IORESOURCE_MEM,
};
EXPORT_SYMBOL(iomem_resource);

static int r_show(struct seq_file *m, void *v)
{
	struct resource *root = PDE_DATA(file_inode(m->file));
	struct resource *r = v, *p;
	unsigned long long start, end;
	int width = root->end < 0x10000 ? 4 : 8;
	int depth;

   /*计算子节点的缩进量*/
	for (depth = 0, p = r; depth < MAX_IORES_LEVEL; depth++, p = p->parent)
		if (p->parent == root)
			break;

	/* 权限检查,如果不是超级用户,那么看到的起始和结束物理地址都是0  */
	if (file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN)) {
		start = r->start;
		end = r->end;
	} else {
		start = end = 0;
	}

	seq_printf(m, "%*s%0*llx-%0*llx : %s\n",
			depth * 2, "",
			width, start,
			width, end,
			r->name ? r->name : "");
	return 0;
}

static int __init ioresources_init(void)
{
	proc_create_seq_data("ioports", 0, NULL, &resource_op,
			&ioport_resource);
	proc_create_seq_data("iomem", 0, NULL, &resource_op, &iomem_resource);
	return 0;
}
__initcall(ioresources_init);

/proc/meminfo

统计了系统内存的使用,free命令会用到这个节点
文件:fs/proc/meminfo.c
示例:

名称 含义
MemTotal 总的可以使用的内存,是全部的物理内存减去一小部分保留内存和内核代码
MemFree LowFree+HighFree
MemAvailable 在不发生交换的情况下,启动新的应用时可用的内存内存,这是一个估计值
Buffers
Cached
SwapCached
Active
Inactive
Active(anon)
Inactive(anon)
Active(file)
Inactive(file)
Unevictable
Mlocked
SwapTotal
SwapFree
Dirty
Writeback
AnonPages
Mapped
Shmem
KReclaimable
Slab
SReclaimable
SUnreclaim
KernelStack
PageTables
NFS_Unstable
Bounce
WritebackTmp
CommitLimit
Committed_AS
VmallocTotal
VmallocUsed
VmallocChunk
Percpu
HugePages_Total
HugePages_Free
HugePages_Rsvd
HugePages_Surp
Hugepagesize
Hugetlb
DirectMap4k
DirectMap2M
DirectMap1G
meminfo_proc_show实现
/*这个每种CPU架构可以实现自己的meminfo函数,对于x86实现的meminfo函数如下:
void arch_report_meminfo(struct seq_file *m)
{
	seq_printf(m, "DirectMap4k:    %8lu kB\n",
			direct_pages_count[PG_LEVEL_4K] << 2);
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
	seq_printf(m, "DirectMap2M:    %8lu kB\n",
			direct_pages_count[PG_LEVEL_2M] << 11);
#else
	seq_printf(m, "DirectMap4M:    %8lu kB\n",
			direct_pages_count[PG_LEVEL_2M] << 12);
#endif
	if (direct_gbpages)
		seq_printf(m, "DirectMap1G:    %8lu kB\n",
			direct_pages_count[PG_LEVEL_1G] << 20);
}
*/
void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
{
}

static void show_val_kb(struct seq_file *m, const char *s, unsigned long num)
{
	seq_put_decimal_ull_width(m, s, num << (PAGE_SHIFT - 10), 8);
	seq_write(m, " kB\n", 4);
}

static int meminfo_proc_show(struct seq_file *m, void *v)
{
	struct sysinfo i;
	unsigned long committed;
	long cached;
	long available;
	unsigned long pages[NR_LRU_LISTS];
	unsigned long sreclaimable, sunreclaim;
	int lru;

	si_meminfo(&i);
	si_swapinfo(&i);
	committed = vm_memory_committed();

	cached = global_node_page_state(NR_FILE_PAGES) -
			total_swapcache_pages() - i.bufferram;
	if (cached < 0)
		cached = 0;

	for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
		pages[lru] = global_node_page_state(NR_LRU_BASE + lru);

	available = si_mem_available();
	sreclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B);
	sunreclaim = global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B);

	show_val_kb(m, "MemTotal:       ", i.totalram);
	show_val_kb(m, "MemFree:        ", i.freeram);
	show_val_kb(m, "MemAvailable:   ", available);
	show_val_kb(m, "Buffers:        ", i.bufferram);
	show_val_kb(m, "Cached:         ", cached);
	show_val_kb(m, "SwapCached:     ", total_swapcache_pages());
	show_val_kb(m, "Active:         ", pages[LRU_ACTIVE_ANON] +
					   pages[LRU_ACTIVE_FILE]);
	show_val_kb(m, "Inactive:       ", pages[LRU_INACTIVE_ANON] +
					   pages[LRU_INACTIVE_FILE]);
	show_val_kb(m, "Active(anon):   ", pages[LRU_ACTIVE_ANON]);
	show_val_kb(m, "Inactive(anon): ", pages[LRU_INACTIVE_ANON]);
	show_val_kb(m, "Active(file):   ", pages[LRU_ACTIVE_FILE]);
	show_val_kb(m, "Inactive(file): ", pages[LRU_INACTIVE_FILE]);
	show_val_kb(m, "Unevictable:    ", pages[LRU_UNEVICTABLE]);
	show_val_kb(m, "Mlocked:        ", global_zone_page_state(NR_MLOCK));

#ifdef CONFIG_HIGHMEM
	show_val_kb(m, "HighTotal:      ", i.totalhigh);
	show_val_kb(m, "HighFree:       ", i.freehigh);
	show_val_kb(m, "LowTotal:       ", i.totalram - i.totalhigh);
	show_val_kb(m, "LowFree:        ", i.freeram - i.freehigh);
#endif

#ifndef CONFIG_MMU
	show_val_kb(m, "MmapCopy:       ",
		    (unsigned long)atomic_long_read(&mmap_pages_allocated));
#endif

	show_val_kb(m, "SwapTotal:      ", i.totalswap);
	show_val_kb(m, "SwapFree:       ", i.freeswap);
	show_val_kb(m, "Dirty:          ",
		    global_node_page_state(NR_FILE_DIRTY));
	show_val_kb(m, "Writeback:      ",
		    global_node_page_state(NR_WRITEBACK));
	show_val_kb(m, "AnonPages:      ",
		    global_node_page_state(NR_ANON_MAPPED));
	show_val_kb(m, "Mapped:         ",
		    global_node_page_state(NR_FILE_MAPPED));
	show_val_kb(m, "Shmem:          ", i.sharedram);
	show_val_kb(m, "KReclaimable:   ", sreclaimable +
		    global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE));
	show_val_kb(m, "Slab:           ", sreclaimable + sunreclaim);
	show_val_kb(m, "SReclaimable:   ", sreclaimable);
	show_val_kb(m, "SUnreclaim:     ", sunreclaim);
	seq_printf(m, "KernelStack:    %8lu kB\n",
		   global_node_page_state(NR_KERNEL_STACK_KB));
#ifdef CONFIG_SHADOW_CALL_STACK
	seq_printf(m, "ShadowCallStack:%8lu kB\n",
		   global_node_page_state(NR_KERNEL_SCS_KB));
#endif
	show_val_kb(m, "PageTables:     ",
		    global_node_page_state(NR_PAGETABLE));

	show_val_kb(m, "NFS_Unstable:   ", 0);
	show_val_kb(m, "Bounce:         ",
		    global_zone_page_state(NR_BOUNCE));
	show_val_kb(m, "WritebackTmp:   ",
		    global_node_page_state(NR_WRITEBACK_TEMP));
	show_val_kb(m, "CommitLimit:    ", vm_commit_limit());
	show_val_kb(m, "Committed_AS:   ", committed);
	seq_printf(m, "VmallocTotal:   %8lu kB\n",
		   (unsigned long)VMALLOC_TOTAL >> 10);
	show_val_kb(m, "VmallocUsed:    ", vmalloc_nr_pages());
	show_val_kb(m, "VmallocChunk:   ", 0ul);
	show_val_kb(m, "Percpu:         ", pcpu_nr_pages());

#ifdef CONFIG_MEMORY_FAILURE
	seq_printf(m, "HardwareCorrupted: %5lu kB\n",
		   atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10));
#endif

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	show_val_kb(m, "AnonHugePages:  ",
		    global_node_page_state(NR_ANON_THPS));
	show_val_kb(m, "ShmemHugePages: ",
		    global_node_page_state(NR_SHMEM_THPS));
	show_val_kb(m, "ShmemPmdMapped: ",
		    global_node_page_state(NR_SHMEM_PMDMAPPED));
	show_val_kb(m, "FileHugePages:  ",
		    global_node_page_state(NR_FILE_THPS));
	show_val_kb(m, "FilePmdMapped:  ",
		    global_node_page_state(NR_FILE_PMDMAPPED));
#endif

#ifdef CONFIG_CMA
	show_val_kb(m, "CmaTotal:       ", totalcma_pages);
	show_val_kb(m, "CmaFree:        ",
		    global_zone_page_state(NR_FREE_CMA_PAGES));
#endif

	hugetlb_report_meminfo(m);

	arch_report_meminfo(m);

	return 0;
}

static int __init proc_meminfo_init(void)
{
	proc_create_single("meminfo", 0, NULL, meminfo_proc_show);
	return 0;
}
fs_initcall(proc_meminfo_init);

/proc/vmstat

作用:查看多种虚拟内存的统计信息
文件:mm/vmstat.c

点击查看代码
void __init init_mm_internals(void)
{
	...
	proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
	...
}
名称 含义
nr_free_pages
nr_zone_inactive_anon
nr_zone_active_anon
nr_zone_inactive_file
nr_zone_active_file
nr_zone_unevictable
nr_zone_write_pending
nr_mlock
nr_bounce
nr_free_cma
numa_hit
numa_miss
numa_foreign
numa_interleave
numa_local
numa_other
nr_inactive_anon
nr_active_anon
nr_inactive_file
nr_active_file
nr_unevictable
nr_slab_reclaimable
nr_slab_unreclaimable
nr_isolated_anon
nr_isolated_file
workingset_nodes
workingset_refault_anon
workingset_refault_file
workingset_activate_anon
workingset_activate_file
workingset_restore_anon
workingset_restore_file
workingset_nodereclaim
nr_anon_pages
nr_mapped
nr_file_pages
nr_dirty
nr_writeback
nr_writeback_temp
nr_shmem
nr_shmem_hugepages
nr_shmem_pmdmapped
nr_file_hugepages
nr_file_pmdmapped
nr_anon_transparent_hugepages
nr_vmscan_write
nr_vmscan_immediate_reclaim
nr_dirtied
nr_written
nr_kernel_misc_reclaimable
nr_foll_pin_acquired
nr_foll_pin_released
nr_kernel_stack
nr_page_table_pages
nr_swapcached
nr_dirty_threshold
nr_dirty_background_threshold
pgpgin
pgpgout
pswpin
pswpout
pgalloc_dma
pgalloc_dma32
pgalloc_normal
pgalloc_movable
allocstall_dma
allocstall_dma32
allocstall_normal
allocstall_movable
pgskip_dma
pgskip_dma32
pgskip_normal
pgskip_movable
pgfree
pgactivate
pgdeactivate
pglazyfree
pgfault
pgmajfault
pglazyfreed
pgrefill
pgreuse
pgsteal_kswapd
pgsteal_direct
pgscan_kswapd
pgscan_direct
pgscan_direct_throttle
pgscan_anon
pgscan_file
pgsteal_anon
pgsteal_file
zone_reclaim_failed
pginodesteal
slabs_scanned
kswapd_inodesteal
kswapd_low_wmark_hit_quickly
kswapd_high_wmark_hit_quickly
pageoutrun
pgrotated
drop_pagecache
drop_slab
oom_kill
pgmigrate_success
pgmigrate_fail
thp_migration_success
thp_migration_fail
thp_migration_split
compact_migrate_scanned
compact_free_scanned
compact_isolated
compact_stall
compact_fail
compact_success
compact_daemon_wake
compact_daemon_migrate_scanned
compact_daemon_free_scanned
htlb_buddy_alloc_success
htlb_buddy_alloc_fail
unevictable_pgs_culled
unevictable_pgs_scanned
unevictable_pgs_rescued
unevictable_pgs_mlocked
unevictable_pgs_munlocked
unevictable_pgs_cleared
unevictable_pgs_stranded
balloon_inflate
balloon_deflate
balloon_migrate
swap_ra
swap_ra_hit
direct_map_level2_splits
direct_map_level3_splits
nr_unstable

/proc/buddyinfo

/proc/zoneinfo

/proc/slabinfo

存储

/proc/diskstats

/proc/[PID]/io

进程

/proc/stat

/proc/[PID]/stat

/proc/[PID]/statm

/proc/[PID]/status

整体状态

/proc/iomem

代码:kernel/resource.c
通过这这个节点可以查看调用了__request_region接口注册片内外设物理地址信息

/proc/vmallocinfo

代码:mm/vmalloc.c
通过这个节点可以查看vmap_area_list链表的内容,查看调用vmalloc以及ioremap接口申请和映射的地址的信息。

/proc/swaps

代码:mm/swapfile.c
作用:查看当前系统所有的swap文件或者设备信息
实现:

swap_show
static int swap_show(struct seq_file *swap, void *v)
{
	struct swap_info_struct *si = v;
	struct file *file;
	int len;
	unsigned int bytes, inuse;

	if (si == SEQ_START_TOKEN) {
		seq_puts(swap, "Filename\t\t\t\tType\t\tSize\t\tUsed\t\tPriority\n");
		return 0;
	}

	/*交换设备的容量,单位为KB*/
	bytes = si->pages << (PAGE_SHIFT - 10);
	/*交换设备已经使用的空间,单位为KB*/
	inuse = si->inuse_pages << (PAGE_SHIFT - 10);

	file = si->swap_file;
	len = seq_file_path(swap, file, " \t\n\\");
	seq_printf(swap, "%*s%s\t%u\t%s%u\t%s%d\n",
			len < 40 ? 40 - len : 1, " ",
			S_ISBLK(file_inode(file)->i_mode) ?
				"partition" : "file\t",
			bytes, bytes < 10000000 ? "\t" : "",
			inuse, inuse < 10000000 ? "\t" : "",
			si->prio);
	return 0;
}

/sys/

/sys/kernel/debug

  • /sys/kernel/debug/kernel_page_talbes
    代码位置:arch/arm/mm/dump.c
    通过这个节点可以查看linux内核页表的映射信息,如虚拟地址范围以及地址属性。

内核文档

  • 内核编译: Documentation/kbuild/

零碎知识

  • 获得内核编译过程中的某个.c文件的预处理后的文件,比如kernel/cgroup/cgroup.c
    make kernel/cgroup/cgroup.i