【u-boot】u-boot启动文件start.S剖析(ARM64)
一、_start标签剖析
对于ARM64来说,u-boot启动入口位于/arch/arm/cpu/armv8/start.S中,完整汇编如下:
.globl _start
_start:
#ifdef CONFIG_ENABLE_ARM_SOC_BOOT0_HOOK
/** Various SoCs need something special and SoC-specific up front in* order to boot, allow them to set that in their boot0.h file and then* use it here.*/
#include <asm/arch/boot0.h>
#elseb reset
#endif#if !CONFIG_IS_ENABLED(TINY_FRAMEWORK).align 3.globl _TEXT_BASE
_TEXT_BASE:
#if defined(CONFIG_SPL_BUILD).quad CONFIG_SPL_TEXT_BASE
#else.quad CONFIG_SYS_TEXT_BASE
#endif/** These are defined in the linker script.*/
.globl _end_ofs
_end_ofs:.quad _end - _start.globl _bss_start_ofs
_bss_start_ofs:.quad __bss_start - _start.globl _bss_end_ofs
_bss_end_ofs:.quad __bss_end - _startreset:/* Allow the board to save important registers */b save_boot_params
.globl save_boot_params_ret
save_boot_params_ret:#if CONFIG_POSITION_INDEPENDENT/** Fix .rela.dyn relocations. This allows U-Boot to be loaded to and* executed at a different address than it was linked at.*/
pie_fixup:adr x0, _start /* x0 <- Runtime value of _start */ldr x1, _TEXT_BASE /* x1 <- Linked value of _start */sub x9, x0, x1 /* x9 <- Run-vs-link offset */adr x2, __rel_dyn_start /* x2 <- Runtime &__rel_dyn_start */adr x3, __rel_dyn_end /* x3 <- Runtime &__rel_dyn_end */
pie_fix_loop:ldp x0, x1, [x2], #16 /* (x0, x1) <- (Link location, fixup) */ldr x4, [x2], #8 /* x4 <- addend */cmp w1, #1027 /* relative fixup? */bne pie_skip_reloc/* relative fix: store addend plus offset at dest location */add x0, x0, x9add x4, x4, x9str x4, [x0]
pie_skip_reloc:cmp x2, x3b.lo pie_fix_loop
pie_fixup_done:
#endif#ifdef CONFIG_SYS_RESET_SCTRLbl reset_sctrl
#endif/** Could be EL3/EL2/EL1, Initial State:* Little Endian, MMU Disabled, i/dCache Disabled*/adr x0, vectorsswitch_el x1, 3f, 2f, 1f
3: msr vbar_el3, x0mrs x0, scr_el3orr x0, x0, #0xf /* SCR_EL3.NS|IRQ|FIQ|EA */msr scr_el3, x0msr cptr_el3, xzr /* Enable FP/SIMD */
#ifdef COUNTER_FREQUENCYldr x0, =COUNTER_FREQUENCYmsr cntfrq_el0, x0 /* Initialize CNTFRQ */
#endifb 0f
2: msr vbar_el2, x0mov x0, #0x33ffmsr cptr_el2, x0 /* Enable FP/SIMD */b 0f
1: msr vbar_el1, x0mov x0, #3 << 20msr cpacr_el1, x0 /* Enable FP/SIMD */
0:/** Enable SMPEN bit for coherency.* This register is not architectural but at the moment* this bit should be set for A53/A57/A72.*/
#ifdef CONFIG_ARMV8_SET_SMPENswitch_el x1, 3f, 1f, 1f
3:mrs x0, S3_1_c15_c2_1 /* cpuectlr_el1 */orr x0, x0, #0x40msr S3_1_c15_c2_1, x0
1:
#endif/* Apply ARM core specific erratas */bl apply_core_errata/** Cache/BPB/TLB Invalidate* i-cache is invalidated before enabled in icache_enable()* tlb is invalidated before mmu is enabled in dcache_enable()* d-cache is invalidated before enabled in dcache_enable()*//* Processor specific initialization */bl lowlevel_init#if defined(CONFIG_ARMV8_SPIN_TABLE) && !defined(CONFIG_SPL_BUILD)branch_if_master x0, x1, master_cpub spin_table_secondary_jump/* never return */
#elif defined(CONFIG_ARMV8_MULTIENTRY)branch_if_master x0, x1, master_cpu/** Slave CPUs*/
slave_cpu:wfeldr x1, =CPU_RELEASE_ADDRldr x0, [x1]cbz x0, slave_cpubr x0 /* branch to the given address */
#endif /* CONFIG_ARMV8_MULTIENTRY */
master_cpu:bl _main#ifdef CONFIG_SYS_RESET_SCTRL
reset_sctrl:switch_el x1, 3f, 2f, 1f
3:mrs x0, sctlr_el3b 0f
2:mrs x0, sctlr_el2b 0f
1:mrs x0, sctlr_el10:ldr x1, =0xfdfffffaand x0, x0, x1switch_el x1, 6f, 5f, 4f
6:msr sctlr_el3, x0b 7f
5:msr sctlr_el2, x0b 7f
4:msr sctlr_el1, x07:dsb syisbb __asm_invalidate_tlb_allret
#endif/*-----------------------------------------------------------------------*/WEAK(apply_core_errata)mov x29, lr /* Save LR *//* For now, we support Cortex-A57 specific errata only *//* Check if we are running on a Cortex-A57 core */branch_if_a57_core x0, apply_a57_core_errata
0:mov lr, x29 /* Restore LR */retapply_a57_core_errata:#ifdef CONFIG_ARM_ERRATA_828024mrs x0, S3_1_c15_c2_0 /* cpuactlr_el1 *//* Disable non-allocate hint of w-b-n-a memory type */orr x0, x0, #1 << 49/* Disable write streaming no L1-allocate threshold */orr x0, x0, #3 << 25/* Disable write streaming no-allocate threshold */orr x0, x0, #3 << 27msr S3_1_c15_c2_0, x0 /* cpuactlr_el1 */
#endif#ifdef CONFIG_ARM_ERRATA_826974mrs x0, S3_1_c15_c2_0 /* cpuactlr_el1 *//* Disable speculative load execution ahead of a DMB */orr x0, x0, #1 << 59msr S3_1_c15_c2_0, x0 /* cpuactlr_el1 */
#endif#ifdef CONFIG_ARM_ERRATA_833471mrs x0, S3_1_c15_c2_0 /* cpuactlr_el1 *//* FPSCR write flush.* Note that in some cases where a flush is unnecessary thiscould impact performance. */orr x0, x0, #1 << 38msr S3_1_c15_c2_0, x0 /* cpuactlr_el1 */
#endif#ifdef CONFIG_ARM_ERRATA_829520mrs x0, S3_1_c15_c2_0 /* cpuactlr_el1 *//* Disable Indirect Predictor bit will prevent this erratumfrom occurring* Note that in some cases where a flush is unnecessary thiscould impact performance. */orr x0, x0, #1 << 4msr S3_1_c15_c2_0, x0 /* cpuactlr_el1 */
#endif#ifdef CONFIG_ARM_ERRATA_833069mrs x0, S3_1_c15_c2_0 /* cpuactlr_el1 *//* Disable Enable Invalidates of BTB bit */and x0, x0, #0xEmsr S3_1_c15_c2_0, x0 /* cpuactlr_el1 */
#endifb 0b
ENDPROC(apply_core_errata)/*-----------------------------------------------------------------------*/WEAK(lowlevel_init)mov x29, lr /* Save LR */#if CONFIG_IS_ENABLED(IRQ)branch_if_slave x0, 1fldr x0, =GICD_BASEbl gic_init_secure
1:
#if defined(CONFIG_GICV3)ldr x0, =GICR_BASEbl gic_init_secure_percpu
#elif defined(CONFIG_GICV2)ldr x0, =GICD_BASEldr x1, =GICC_BASEbl gic_init_secure_percpu
#endif
#endif#ifdef CONFIG_ARMV8_MULTIENTRYbranch_if_master x0, x1, 2f/** Slave should wait for master clearing spin table.* This sync prevent salves observing incorrect* value of spin table and jumping to wrong place.*/
#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3)
#ifdef CONFIG_GICV2ldr x0, =GICC_BASE
#endifbl gic_wait_for_interrupt
#endif/** All slaves will enter EL2 and optionally EL1.*/adr x4, lowlevel_in_el2ldr x5, =ES_TO_AARCH64bl armv8_switch_to_el2lowlevel_in_el2:
#ifdef CONFIG_ARMV8_SWITCH_TO_EL1adr x4, lowlevel_in_el1ldr x5, =ES_TO_AARCH64bl armv8_switch_to_el1lowlevel_in_el1:
#endif#endif /* CONFIG_ARMV8_MULTIENTRY */2:mov lr, x29 /* Restore LR */ret
ENDPROC(lowlevel_init)WEAK(smp_kick_all_cpus)/* Kick secondary cpus up by SGI 0 interrupt */
#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3)ldr x0, =GICD_BASEb gic_kick_secondary_cpus
#endifret
ENDPROC(smp_kick_all_cpus)/*-----------------------------------------------------------------------*/ENTRY(c_runtime_cpu_setup)/* Relocate vBAR */adr x0, vectorsswitch_el x1, 3f, 2f, 1f
3: msr vbar_el3, x0b 0f
2: msr vbar_el2, x0b 0f
1: msr vbar_el1, x0
0:ret
ENDPROC(c_runtime_cpu_setup)WEAK(save_boot_params)b save_boot_params_ret /* back to my caller */
ENDPROC(save_boot_params)
#endif
本文以常规u-boot配置进行分析,对u-boot反汇编查看详细信息:

<save_boot_params_ret>实现如下:

上述汇编代码通过配置宏可开启不同的指令操作,默认情况下,汇编代码执行过程如下:
(1)异常向量表配置和运行级别(EL)状态配置
启动早期根据当前CPU的Exception Level(EL3 / EL2 / EL1)把向量表地址写入对应的VBAR_ELn,并为较低EL配置若干系统寄存器(安全/中断路由、浮点/SIMD使用许可、系统计时器频率等),以便后续进入操作系统或EL1/EL0时异常/中断和浮点指令能被正确处理 :
/** Could be EL3/EL2/EL1, Initial State:* Little Endian, MMU Disabled, i/dCache Disabled*/
adr x0, vectors /* 把当前代码段中名为vectors的符号的地址(PC相对算出来的地址)放入x0 */
/* 读取CurrentEL(当前异常级别)并根据结果跳到对应的标签(3f表示EL3分支,2f表示EL2,1f表示EL1) */
switch_el x1, 3f, 2f, 1f
3: msr vbar_el3, x0mrs x0, scr_el3orr x0, x0, #0xf /* SCR_EL3.NS|IRQ|FIQ|EA */msr scr_el3, x0msr cptr_el3, xzr /* Enable FP/SIMD */
#ifdef COUNTER_FREQUENCYldr x0, =COUNTER_FREQUENCYmsr cntfrq_el0, x0 /* Initialize CNTFRQ */
#endifb 0f
2: msr vbar_el2, x0mov x0, #0x33ffmsr cptr_el2, x0 /* Enable FP/SIMD */b 0f
1: msr vbar_el1, x0mov x0, #3 << 20msr cpacr_el1, x0 /* Enable FP/SIMD */
0:
(2)调用apply_core_errata处理特定处理器核心的操作
WEAK(apply_core_errata)mov x29, lr /* Save LR *//* For now, we support Cortex-A57 specific errata only *//* Check if we are running on a Cortex-A57 core */branch_if_a57_core x0, apply_a57_core_errata
0:mov lr, x29 /* Restore LR */retapply_a57_core_errata:#ifdef CONFIG_ARM_ERRATA_828024mrs x0, S3_1_c15_c2_0 /* cpuactlr_el1 *//* Disable non-allocate hint of w-b-n-a memory type */orr x0, x0, #1 << 49/* Disable write streaming no L1-allocate threshold */orr x0, x0, #3 << 25/* Disable write streaming no-allocate threshold */orr x0, x0, #3 << 27msr S3_1_c15_c2_0, x0 /* cpuactlr_el1 */
#endif#ifdef CONFIG_ARM_ERRATA_826974mrs x0, S3_1_c15_c2_0 /* cpuactlr_el1 *//* Disable speculative load execution ahead of a DMB */orr x0, x0, #1 << 59msr S3_1_c15_c2_0, x0 /* cpuactlr_el1 */
#endif#ifdef CONFIG_ARM_ERRATA_833471mrs x0, S3_1_c15_c2_0 /* cpuactlr_el1 *//* FPSCR write flush.* Note that in some cases where a flush is unnecessary thiscould impact performance. */orr x0, x0, #1 << 38msr S3_1_c15_c2_0, x0 /* cpuactlr_el1 */
#endif#ifdef CONFIG_ARM_ERRATA_829520mrs x0, S3_1_c15_c2_0 /* cpuactlr_el1 *//* Disable Indirect Predictor bit will prevent this erratumfrom occurring* Note that in some cases where a flush is unnecessary thiscould impact performance. */orr x0, x0, #1 << 4msr S3_1_c15_c2_0, x0 /* cpuactlr_el1 */
#endif#ifdef CONFIG_ARM_ERRATA_833069mrs x0, S3_1_c15_c2_0 /* cpuactlr_el1 *//* Disable Enable Invalidates of BTB bit */and x0, x0, #0xEmsr S3_1_c15_c2_0, x0 /* cpuactlr_el1 */
#endifb 0b
ENDPROC(apply_core_errata)
此处以常规u-boot为例进行分析,apply_core_errata汇编指令如下:

(3)调用lowlevel_init进行特定处理器的操作
WEAK(lowlevel_init)mov x29, lr /* Save LR */#if CONFIG_IS_ENABLED(IRQ)branch_if_slave x0, 1fldr x0, =GICD_BASEbl gic_init_secure
1:
#if defined(CONFIG_GICV3)ldr x0, =GICR_BASEbl gic_init_secure_percpu
#elif defined(CONFIG_GICV2)ldr x0, =GICD_BASEldr x1, =GICC_BASEbl gic_init_secure_percpu
#endif
#endif#ifdef CONFIG_ARMV8_MULTIENTRYbranch_if_master x0, x1, 2f/** Slave should wait for master clearing spin table.* This sync prevent salves observing incorrect* value of spin table and jumping to wrong place.*/
#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3)
#ifdef CONFIG_GICV2ldr x0, =GICC_BASE
#endifbl gic_wait_for_interrupt
#endif/** All slaves will enter EL2 and optionally EL1.*/adr x4, lowlevel_in_el2ldr x5, =ES_TO_AARCH64bl armv8_switch_to_el2lowlevel_in_el2:
#ifdef CONFIG_ARMV8_SWITCH_TO_EL1adr x4, lowlevel_in_el1ldr x5, =ES_TO_AARCH64bl armv8_switch_to_el1lowlevel_in_el1:
#endif#endif /* CONFIG_ARMV8_MULTIENTRY */2:mov lr, x29 /* Restore LR */ret
ENDPROC(lowlevel_init)
此处以常规u-boot为例进行分析,lowlevel_init汇编如下:

上述代码将调用git_init_secure初始化gic,实现如下(/arch/arm/lib/gic_64.S):
ENTRY(gic_init_secure)/** Initialize Distributor* x0: Distributor Base*/
#if defined(CONFIG_GICV3)mov w9, #0x37 /* EnableGrp0 | EnableGrp1NS *//* EnableGrp1S | ARE_S | ARE_NS */str w9, [x0, GICD_CTLR] /* Secure GICD_CTLR */ldr w9, [x0, GICD_TYPER]and w10, w9, #0x1f /* ITLinesNumber */cbz w10, 1f /* No SPIs */add x11, x0, (GICD_IGROUPRn + 4)add x12, x0, (GICD_IGROUPMODRn + 4)mov w9, #~0
0: str w9, [x11], #0x4str wzr, [x12], #0x4 /* Config SPIs as Group1NS */sub w10, w10, #0x1cbnz w10, 0b
#elif defined(CONFIG_GICV2)mov w9, #0x3 /* EnableGrp0 | EnableGrp1 */str w9, [x0, GICD_CTLR] /* Secure GICD_CTLR */ldr w9, [x0, GICD_TYPER]and w10, w9, #0x1f /* ITLinesNumber */cbz w10, 1f /* No SPIs */add x11, x0, GICD_IGROUPRnmov w9, #~0 /* Config SPIs as Grp1 */str w9, [x11], #0x4
0: str w9, [x11], #0x4sub w10, w10, #0x1cbnz w10, 0bldr x1, =GICC_BASE /* GICC_CTLR */mov w0, #3 /* EnableGrp0 | EnableGrp1 */str w0, [x1]mov w0, #1 << 7 /* allow NS access to GICC_PMR */str w0, [x1, #4] /* GICC_PMR */
#endif
1:ret
ENDPROC(gic_init_secure)
在lowlevel_init中,还会调用git_init_secure_percpu,实现如下(/arch/arm/lib/gic_64.S):
ENTRY(gic_init_secure_percpu)
#if defined(CONFIG_GICV3)/** Initialize ReDistributor* x0: ReDistributor Base*/mrs x10, mpidr_el1lsr x9, x10, #32bfi x10, x9, #24, #8 /* w10 is aff3:aff2:aff1:aff0 */mov x9, x0
1: ldr x11, [x9, GICR_TYPER]lsr x11, x11, #32 /* w11 is aff3:aff2:aff1:aff0 */cmp w10, w11b.eq 2fadd x9, x9, #(2 << 16)b 1b/* x9: ReDistributor Base Address of Current CPU */
2: mov w10, #~0x2ldr w11, [x9, GICR_WAKER]and w11, w11, w10 /* Clear ProcessorSleep */str w11, [x9, GICR_WAKER]dsb stisb
3: ldr w10, [x9, GICR_WAKER]tbnz w10, #2, 3b /* Wait Children be Alive */add x10, x9, #(1 << 16) /* SGI_Base */mov w11, #~0str w11, [x10, GICR_IGROUPRn]str wzr, [x10, GICR_IGROUPMODRn] /* SGIs|PPIs Group1NS */mov w11, #0x1 /* Enable SGI 0 */str w11, [x10, GICR_ISENABLERn]#if CONFIG_IS_ENABLED(IRQ)/* Rockchip: check elx */switch_el x0, el3_sre, el2_sre, el1_sre/* Initialize Cpu Interface */
el3_sre:mrs x10, ICC_SRE_EL3orr x10, x10, #0xf /* SRE & Disable IRQ/FIQ Bypass & *//* Allow EL2 access to ICC_SRE_EL2 */msr ICC_SRE_EL3, x10isbel2_sre:mrs x10, ICC_SRE_EL2orr x10, x10, #0xf /* SRE & Disable IRQ/FIQ Bypass & *//* Allow EL1 access to ICC_SRE_EL1 */msr ICC_SRE_EL2, x10isbel1_sre:mrs x0, CurrentEL /* check currentEL */cmp x0, 0xCb.ne el1_ctlr /* currentEL != EL3 */el3_ctlr:mov x10, #0x3 /* EnableGrp1NS | EnableGrp1S */msr ICC_IGRPEN1_EL3, x10isbmsr ICC_CTLR_EL3, xzrisbel1_ctlr:mov x10, #0x3 /* EnableGrp1NS | EnableGrp1S */msr ICC_IGRPEN1_EL1, x10isbmsr ICC_CTLR_EL1, xzr /* NonSecure ICC_CTLR_EL1 */isbmov x10, #0xf0 /* Non-Secure access to ICC_PMR_EL1 */msr ICC_PMR_EL1, x10isb
#else/* Initialize Cpu Interface */mrs x10, ICC_SRE_EL3orr x10, x10, #0xf /* SRE & Disable IRQ/FIQ Bypass & *//* Allow EL2 access to ICC_SRE_EL2 */msr ICC_SRE_EL3, x10isbmrs x10, ICC_SRE_EL2orr x10, x10, #0xf /* SRE & Disable IRQ/FIQ Bypass & *//* Allow EL1 access to ICC_SRE_EL1 */msr ICC_SRE_EL2, x10isbmov x10, #0x3 /* EnableGrp1NS | EnableGrp1S */msr ICC_IGRPEN1_EL3, x10isbmsr ICC_CTLR_EL3, xzrisbmsr ICC_CTLR_EL1, xzr /* NonSecure ICC_CTLR_EL1 */isbmov x10, #0x1 << 7 /* Non-Secure access to ICC_PMR_EL1 */msr ICC_PMR_EL1, x10isb
#endif#elif defined(CONFIG_GICV2)/** Initialize SGIs and PPIs* x0: Distributor Base* x1: Cpu Interface Base*/mov w9, #~0 /* Config SGIs and PPIs as Grp1 */str w9, [x0, GICD_IGROUPRn] /* GICD_IGROUPR0 */mov w9, #0x1 /* Enable SGI 0 */str w9, [x0, GICD_ISENABLERn]/* Initialize Cpu Interface */mov w9, #0x1e7 /* Disable IRQ/FIQ Bypass & *//* Enable Ack Group1 Interrupt & *//* EnableGrp0 & EnableGrp1 */str w9, [x1, GICC_CTLR] /* Secure GICC_CTLR */mov w9, #0x1 << 7 /* Non-Secure access to GICC_PMR */str w9, [x1, GICC_PMR]
#endifret
ENDPROC(gic_init_secure_percpu)
(4)调用_main进入u-boot的启动主线
master_cpu:bl _main
