Linux:内核地址随机化(Kaslr)
文章目录
- 1. Kaslr 简介
- 2. Kaslr 实现简析
- 3. 参考资料
1. Kaslr 简介
Kaslr 是 Kernel address space layout randomization 的缩写,翻译为内核地址空间随机化。在引入 Kaslr 之前,内核的加载虚拟地址是编译时固定好,这样会引入一些安全问题。Kaslr 在运行时产生一个相对于编译时加载虚拟地址的随机偏移,这样能够提高内核的安全性。
2. Kaslr 实现简析
Kaslr 需要开启配置项 CONFIG_RANDOMIZE_BASE。注意开启 CONFIG_RANDOMIZE_BASE 会隐式开启 CONFIG_RELOCATABLE:
config RANDOMIZE_BASEbool "Randomize the address of the kernel image"select ARM64_MODULE_PLTS if MODULESselect RELOCATABLE
本文以 Linux 4.14.111 + ARM64 为例,简析下 Kaslr 的实现。
首先,初始化 Kaslr 内核随机化偏移值 0 (也就是没有偏移)到寄存器 x23,后续可以通过查看 x23 是否为 0 值知道是否已经进行了随机化,以避免 __primary_switched 中代码的逻辑死循环:
/* arch/arm64/kernel/head.S */ENTRY(stext)...adrp x23, __PHYS_OFFSETand x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0...bl __create_page_tables // 初次创建内核临时页表...b __primary_switch
ENDPROC(stext)
然后:
- 随机化内核加载地址偏移
__create_page_tables重建内核临时页表(会应用 x23 随机偏移地址)__relocate_kernel再次重定位内核- 进入
start_kernel()
/* arch/arm64/kernel/head.S */__primary_switch:
#ifdef CONFIG_RANDOMIZE_BASEmov x19, x0 // preserve new SCTLR_EL1 valuemrs x20, sctlr_el1 // preserve old SCTLR_EL1 value
#endifbl __enable_mmu // 启用 MMU
#ifdef CONFIG_RELOCATABLEbl __relocate_kernel // 重定位内核
#ifdef CONFIG_RANDOMIZE_BASE// 随机化内核加载地址偏移,并记录到 x23ldr x8, =__primary_switchedadrp x0, __PHYS_OFFSET // x0 会被 kaslr_early_init() 覆盖,重置为 __PHYS_OFFSETblr x8/** If we return here, we have a KASLR displacement in x23 which we need* to take into account by discarding the current kernel mapping and* creating a new one.*/pre_disable_mmu_workaroundmsr sctlr_el1, x20 // disable the MMUisb// 因随机化内核加载地址,需再次重建内核启动临时页表。// 此次创建页表时会加上 x23 中的随机化偏移值。bl __create_page_tables // recreate kernel mappingtlbi vmalle1 // Remove any stale TLB entriesdsb nshmsr sctlr_el1, x19 // re-enable the MMUisbic iallu // flush instructions fetcheddsb nsh // via old mappingisbbl __relocate_kernel // 重定位内核 到 随机化的加载地址
#endif
#endif// 进入 start_kernel()ldr x8, =__primary_switchedadrp x0, __PHYS_OFFSETbr x8 // __primary_switched -> start_kernel()
ENDPROC(__primary_switch)
__primary_switch 中第一次调用 __primary_switched 是随机化内核加载地址偏移,并记录到 x23:
/* arch/arm64/kernel/head.S *//** The following fragment of code is executed with the MMU enabled.** x0 = __PHYS_OFFSET*/
__primary_switched:...// __primary_switch 第一次调用 __primary_switched 是生成// 内核加载地址偏移,并保存到寄存器 x23
#ifdef CONFIG_RANDOMIZE_BASEtst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized?b.ne 0f // 如果已经随机化了内核加载偏移,则跳转到 start_kernel()mov x0, x21 // pass FDT address in x0bl kaslr_early_init // parse FDT for KASLR optionscbz x0, 0f // KASLR disabled? just proceedorr x23, x23, x0 // record KASLR offsetldp x29, x30, [sp], #16 // we must enable KASLR, returnret // to __primary_switch()
0:
#endif//add sp, sp, #16//mov x29, #0//mov x30, #0//b start_kernel
/* arch/arm64/kernel/kaslr.c *//** This routine will be executed with the kernel mapped at its default virtual* address, and if it returns successfully, the kernel will be remapped, and* start_kernel() will be executed from a randomized virtual offset. The* relocation will result in all absolute references (e.g., static variables* containing function pointers) to be reinitialized, and zero-initialized* .bss variables will be reset to 0.*/
u64 __init kaslr_early_init(u64 dt_phys)
{void *fdt;u64 seed, offset, mask, module_range;const u8 *cmdline, *str;int size;/** Set a reasonable default for module_alloc_base in case* we end up running with module randomization disabled.*/module_alloc_base = (u64)_etext - MODULES_VSIZE;__flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));/** Retrieve (and wipe) the seed from the FDT*/seed = get_kaslr_seed(fdt); // 从 FDT 获取配置随机化种子if (!seed)return 0;/** Check if 'nokaslr' appears on the command line, and* return 0 if that is the case.*/// 内核 nokaslr 命令行参数可以禁用 内核地址随机化cmdline = kaslr_get_cmdline(fdt);str = strstr(cmdline, "nokaslr");if (str == cmdline || (str > cmdline && *(str - 1) == ' '))return 0;/** OK, so we are proceeding with KASLR enabled. Calculate a suitable* kernel image offset from the seed. Let's place the kernel in the* lower half of the VMALLOC area (VA_BITS - 2).* Even if we could randomize at page granularity for 16k and 64k pages,* let's always round to 2 MB so we don't interfere with the ability to* map using contiguous PTEs*/mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1);offset = seed & mask; // 内核加载地址偏移随机化/* use the top 16 bits to randomize the linear region */memstart_offset_seed = seed >> 48; // 内核线性地址区间随机化/** The kernel Image should not extend across a 1GB/32MB/512MB alignment* boundary (for 4KB/16KB/64KB granule kernels, respectively). If this* happens, round down the KASLR offset by (1 << SWAPPER_TABLE_SHIFT).** NOTE: The references to _text and _end below will already take the* modulo offset (the physical displacement modulo 2 MB) into* account, given that the physical placement is controlled by* the loader, and will not change as a result of the virtual* mapping we choose.*/if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) !=(((u64)_end + offset) >> SWAPPER_TABLE_SHIFT))offset = round_down(offset, 1 << SWAPPER_TABLE_SHIFT);if (IS_ENABLED(CONFIG_KASAN))/** KASAN does not expect the module region to intersect the* vmalloc region, since shadow memory is allocated for each* module at load time, whereas the vmalloc region is shadowed* by KASAN zero pages. So keep modules out of the vmalloc* region if KASAN is enabled.*/return offset;// 随机化内核模块区间地址if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {/** Randomize the module region independently from the core* kernel. This prevents modules from leaking any information* about the address of the kernel itself, but results in* branches between modules and the core kernel that are* resolved via PLTs. (Branches between modules will be* resolved normally.)*/module_range = VMALLOC_END - VMALLOC_START - MODULES_VSIZE;module_alloc_base = VMALLOC_START;} else {/** Randomize the module region by setting module_alloc_base to* a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,* _stext) . This guarantees that the resulting region still* covers [_stext, _etext], and that all relative branches can* be resolved without veneers.*/module_range = MODULES_VSIZE - (u64)(_etext - _stext);module_alloc_base = (u64)_etext + offset - MODULES_VSIZE;}/* use the lower 21 bits to randomize the base of the module region */module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;module_alloc_base &= PAGE_MASK;...return offset;
}
接着分别通过 __create_page_tables 重建内核临时页表 和 __relocate_kernel 重定位内核到新地址,最后__primary_switch 第二次调用 __primary_switched 是进入 start_kernel():
/* arch/arm64/kernel/head.S *//** end early head section, begin head code that is also used for* hotplug and needs to have the same protections as the text region*/.section ".idmap.text","awx"ENTRY(kimage_vaddr).quad _text - TEXT_OFFSET/** The following fragment of code is executed with the MMU enabled.** x0 = __PHYS_OFFSET*/
__primary_switched:...// kimage_voffset 保存内核 加载虚拟地址 和 物理地址 的 差值。//// 这里的 ldr_l 得到的是 kimage_vaddr 的运行时虚拟地址.// 以 kalsr 的场景来看:// . 第 1 次 __primary_switch 调用 __primary_switched,// 内核被重定位到编译时的链接地址, 所以此时 kimage_vaddr// 的值为 KIMAGE_VADDR// . 第 2 次 __primary_switch 调用 __primary_switched,// 内核被重定位到[链接地址 + 随机化偏移], 所以此时 kimage_vaddr// 的值为 [KIMAGE_VADDR + 随机化偏移]// 所以 kalsr 的场景下,最终 kimage_voffset 的值设置为:// kimage_voffset = [KIMAGE_VADDR + 随机化偏移] - __PHYS_OFFSET// = KIMAGE_VADDR - __PHYS_OFFSET + 随机化偏移// 而非 kalsr 场景下, kimage_voffset 的值设置为:// kimage_voffset = KIMAGE_VADDR - __PHYS_OFFSET// 所以最终:// kimage_vaddr = 内核虚拟地址 (不管是不是 kalsr 场景)// kimage_voffset = 内核虚拟地址 - 内核物理地址ldr_l x4, kimage_vaddr // Save the offset betweensub x4, x4, x0 // the kernel virtual andstr_l x4, kimage_voffset, x5 // physical mappings...#ifdef CONFIG_RANDOMIZE_BASE
// tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized?
// b.ne 0f
// mov x0, x21 // pass FDT address in x0
// bl kaslr_early_init // parse FDT for KASLR options
// cbz x0, 0f // KASLR disabled? just proceed
// orr x23, x23, x0 // record KASLR offset
// ldp x29, x30, [sp], #16 // we must enable KASLR, return
// ret // to __primary_switch()// __primary_switch 中 第二次 调用 __primary_switched// 是进入 start_kernel()
0:
#endifadd sp, sp, #16mov x29, #0mov x30, #0b start_kernel
ENDPROC(__primary_switched)
这里对 kimage_vaddr 展开说说。先看相关的链接脚本:
/* arch/arm64/kernel/vmlinux.lds.S */SECTIONS
{....... = KIMAGE_VADDR + TEXT_OFFSET;.head.text : {_text = .;HEAD_TEXT}...
}
其中 HEAD_TEXT 定义为:
/* Section used for early init (in .S files) */
#define HEAD_TEXT *(.head.text)
也即内核入口处,这也就是说 _text 指向内核入口处链接(虚拟)地址。从内核编译生成的 System.map 就可以其值:
80008000 T _text
而 kimage_vaddr 定义为 .quad _text - TEXT_OFFSET,其具体含义是什么?从链接脚本片段
. = KIMAGE_VADDR + TEXT_OFFSET;.head.text : {_text = .;HEAD_TEXT
}
可知 _text = KIMAGE_VADDR + TEXT_OFFSET,所以 kimage_vaddr = _text - TEXT_OFFSET = (KIMAGE_VADDR + TEXT_OFFSET) - TEXT_OFFSET = KIMAGE_VADDR,也就是 kimage_vaddr 的初始值为内核空间起始虚拟地址 KIMAGE_VADDR。在进行了 Kalsr 的随机偏移后,内核也进行了重定义(通过 __relocate_kernel),所以最终:
kimage_vaddr = 内核虚拟地址 (不管是不是 kalsr 场景)
kimage_voffset = 内核虚拟地址 - 内核物理地址
那么很显然,kimage_vaddr - KIMAGE_VADDR 的差值就是 Kalsr 生成的随机偏移:
static inline unsigned long kaslr_offset(void)
{return kimage_vaddr - KIMAGE_VADDR;
}
而 kimage_voffset 则可用来转换内核线性地址空间的物理地址和虚拟地址:
/* arch/arm64/include/asm/memory.h *//** The linear kernel range starts in the middle of the virtual adddress* space. Testing the top bit for the start of the region is a* sufficient check.*/
#define __is_lm_address(addr) (!!((addr) & BIT(VA_BITS - 1)))#define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET)
#define __kimg_to_phys(addr) ((addr) - kimage_voffset)#define __virt_to_phys_nodebug(x) ({ \phys_addr_t __x = (phys_addr_t)(x); \__is_lm_address(__x) ? __lm_to_phys(__x) : \__kimg_to_phys(__x); \
})#define __pa_symbol_nodebug(x) __kimg_to_phys((phys_addr_t)(x))#ifdef CONFIG_DEBUG_VIRTUAL
extern phys_addr_t __virt_to_phys(unsigned long x);
extern phys_addr_t __phys_addr_symbol(unsigned long x);
#else
#define __virt_to_phys(x) __virt_to_phys_nodebug(x)
#define __phys_addr_symbol(x) __pa_symbol_nodebug(x)
#endif#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET)
#define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset))
最后看一下进入 start_kernel() 后内核线性地址空间的随机化过程:
start_kernel()setup_arch()arm64_memblock_init()/* arch/arm64/mm/init.c */
void __init arm64_memblock_init(void)
{const s64 linear_region_size = -(s64)PAGE_OFFSET;.../** Select a suitable value for the base of physical memory.*/memstart_addr = round_down(memblock_start_of_DRAM(),ARM64_MEMSTART_ALIGN);/** Remove the memory that we will not be able to cover with the* linear mapping. Take care not to clip the kernel which may be* high in memory.*/memblock_remove(max_t(u64, memstart_addr + linear_region_size,__pa_symbol(_end)), ULLONG_MAX);if (memstart_addr + linear_region_size < memblock_end_of_DRAM()) {/* ensure that memstart_addr remains sufficiently aligned */memstart_addr = round_up(memblock_end_of_DRAM() - linear_region_size,ARM64_MEMSTART_ALIGN);memblock_remove(0, memstart_addr);}...if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {extern u16 memstart_offset_seed; // kaslr_early_init(): 内核线性地址区间随机化偏移u64 range = linear_region_size -(memblock_end_of_DRAM() - memblock_start_of_DRAM()); // 内核线性地址区间大小/** If the size of the linear region exceeds, by a sufficient* margin, the size of the region that the available physical* memory spans, randomize the linear region as well.*/if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) {// 这里有一个 BUG,会导致有概率出现内核随机化线性地址区间和 ERR_PTR 重叠。// 细节见:// https://www.spinics.net/lists/arm-kernel/msg697256.html// v4.14.112 修正补丁:// https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-4.14.y&id=8add7054070ab79cb271b336d9660bca0ffcaf85range = range / ARM64_MEMSTART_ALIGN + 1;memstart_addr -= ARM64_MEMSTART_ALIGN *((range * memstart_offset_seed) >> 16);}}...
}
3. 参考资料
[1] Kernel address space layout randomization
[2] arm64: implement support for KASLR
[3] KASLR
