I am working on writing a minimalistic bare metal kernel for the raspberry pi 5 (having arm cortex a76). Everything was going on smoothly, until I came to enabling the MMU. I am not sure what I am missing here, but I think I am unable to initialise the translation tables for the MMU appropriately. Here is my bootstrap code for the kernel:
#include "system_registers.h" #include "mmu.h" .section ".text.boot" .global _start _start: mrs x7, mpidr_el1 // Multi-Processor Identifier(EL1) lsr x7, x7, #8 // Shift the identifier to the right by 8 bits. and x7, x7, #7 // Last 3 bits of the identifier extracted. Indicates CPU ID. cbz x7, intr_init // Proceed if on primary core, else wait for the primary core to initialize. // The default stub for the Raspberry Pi 5 looks for the first instruction at x80000, // and runs it on the primary core of the processor (generally core 0), and parks all // the remaining cores. If the address of a function is written onto the register at // which the respective core expects it, the core is then woken up from sleep to // execute that function. // delay for the secondary cores to wait for the primary core to initialize mov x0, #10000 core_init_delay: sub x0, x0, #1 cbz x0, intr_init b core_init_delay core_hang: wfe // Wait for event, and loop indefinitely b core_hang // Initialise required system registers required before jumping into EL1 exception level. intr_init: ldr x0, =SCTLR_VALUE_MMU_DISABLED msr sctlr_el1, x0 ldr x0, =HCR_VALUE msr hcr_el2, x0 ldr x0, =TCR_VALUE msr tcr_el1, x0 ldr x0, =MAIR_VALUE msr mair_el1, x0 ldr x0, =SPSR_VALUE msr spsr_el2, x0 adr x0, bss_init msr elr_el2, x0 eret bss_init: adrp x1, __bss_start // Load the start address of the BSS section, defined in the linker. adrp x2, __bss_size // Load the end address of the BSS section, defined in the linker. bss_loop: cbz x2, stack_init // Skip initialization of BSS variables if the size is `0` str xzr, [x1], #8 // Initialize all the BSS variables to `0` sub x2, x2, #1 cbnz x2, bss_loop // Iterate for all variables of the BSS section stack_init: bl __create_page_tables adrp x0, __user_end mov sp, x0 adrp x0, id_pg_dir msr ttbr0_el1, x0 adrp x0, high_pg_dir msr ttbr1_el1, x0 mrs x0, sctlr_el1 ldr x1, =SCTLR_MMU_ENABLED orr x0, x0, x1 msr sctlr_el1, x0 dsb sy mov x0, #0 // Pass core ID as an argument to the `kernel_main()` routine of the kernel bl kernel_main // Link and jump to the `kernel_main()` routine of the kernel b core_hang // If the `kernel_main()` routine of the kernel returns, loop indefinitely .macro create_table_entry, table, next_table, va_start, shift, t1, t2 lsr \t1, \va_start, #\shift // Shift the virtual address to the right by the shift value and \t1, \t1, #ENTRIES_PER_TABLE - 1 // Extract the index of the entry in the table, by masking the bits to the left of the index. mov \t2, \next_table // Load the address of the next table into a register orr \t2, \t2, #MM_TYPE_PAGE_TABLE // Set the type of the table to a page table str \t2, [\table, \t1, lsl #3] // Store the table descriptor entry in the table at the calculated index .endm .macro create_block_map, table, va_start, va_end, pa_start, flags, t1 lsr \va_start, \va_start, #SECTION_SHIFT // Shift the virtual address to the right by the section shift value and \va_start, \va_start, #ENTRIES_PER_TABLE - 1 lsr \va_end, \va_end, #SECTION_SHIFT sub \va_end, \va_end, #1 and \va_end, \va_end, #ENTRIES_PER_TABLE - 1 lsr \pa_start, \pa_start, #SECTION_SHIFT lsl \pa_start, \pa_start, #SECTION_SHIFT mov \t1, \flags orr \pa_start, \pa_start, \t1 9999: str \pa_start, [\table, \va_start, lsl #3] add \va_start, \va_start, #1 add \pa_start, \pa_start, #SECTION_SIZE cmp \va_start, \va_end b.le 9999b .endm __create_page_tables: mov x29, x30 // Save the return address adrp x0, id_pg_dir mov x1, #ID_MAP_TABLE_SIZE bl mem_init_zero adrp x0, id_pg_dir add x1, x0, #PAGE_SIZE eor x4, x4, x4 create_table_entry x0, x1, x4, PGD_SHIFT, x2, x3 add x0, x0, #PAGE_SIZE add x1, x1, #PAGE_SIZE create_table_entry x0, x1, x4, PUD_SHIFT, x2, x3 mov x0, x1 eor x2, x2, x2 ldr x3, =ID_MAP_SIZE eor x4, x4, x4 ldr x6, =MMU_KERNEL_FLAGS create_block_map x0, x2, x3, x4, x6, x5 adrp x0, high_pg_dir mov x1, #HIGH_MAP_TABLE_SIZE bl mem_init_zero adrp x0, high_pg_dir add x1, x0, #PAGE_SIZE ldr x4, =VA_START create_table_entry x0, x1, x4, PGD_SHIFT, x2, x3 add x0, x0, #PAGE_SIZE add x1, x1, #PAGE_SIZE ldr x4, =VA_START ldr x5, =PUD_ENTRY_MAP_SIZE create_table_entry x0, x1, x4, PUD_SHIFT, x2, x3 add x1, x1, #PAGE_SIZE add x4, x4, x5 create_table_entry x0, x1, x4, PUD_SHIFT, x2, x3 add x1, x1, #PAGE_SIZE add x4, x4, x5 create_table_entry x0, x1, x4, PUD_SHIFT, x2, x3 add x1, x1, #PAGE_SIZE add x4, x4, x5 create_table_entry x0, x1, x4, PUD_SHIFT, x2, x3 add x1, x1, #PAGE_SIZE ldr x4, =VA_START ldr x5, =PHYSICAL_DEVICE_START add x4, x4, x5 create_table_entry x0, x1, x4, PUD_SHIFT, x2, x3 ldr x10, =HIGH_MAP_FIRST_START ldr x11, =HIGH_MAP_FIRST_END ldr x12, =HIGH_MAP_SECOND_START ldr x13, =HIGH_MAP_SECOND_END ldr x14, =HIGH_MAP_THIRD_START ldr x15, =HIGH_MAP_THIRD_END ldr x16, =HIGH_MAP_FOURTH_START ldr x17, =HIGH_MAP_FOURTH_END ldr x18, =HIGH_MAP_DEVICE_START ldr x19, =HIGH_MAP_DEVICE_END ldr x20, =PHYSICAL_FIRST_START ldr x21, =PHYSICAL_SECOND_START ldr x22, =PHYSICAL_THIRD_START ldr x23, =PHYSICAL_FOURTH_START ldr x24, =PHYSICAL_DEVICE_START add x0, x0, #PAGE_SIZE mov x2, x10 mov x3, x11 mov x4, x20 create_block_map x0, x2, x3, x4, x6, x5 add x0, x0, #PAGE_SIZE mov x2, x12 mov x3, x13 mov x4, x21 create_block_map x0, x2, x3, x4, x6, x5 add x0, x0, #PAGE_SIZE mov x2, x14 mov x3, x15 mov x4, x22 create_block_map x0, x2, x3, x4, x6, x5 add x0, x0, #PAGE_SIZE mov x2, x16 mov x3, x17 mov x4, x23 create_block_map x0, x2, x3, x4, x6, x5 add x0, x0, #PAGE_SIZE mov x2, x18 mov x3, x19 mov x4, x24 ldr x6, =MMU_PERIPHERALS_FLAGS create_block_map x0, x2, x3, x4, x6, x5 mov x30, x29 ret
The code here, fails to branch to `kernel_main()` if it branches to `__create_page_tables`, and I don't understand why. I also had UART prints after every statement in the code, so as to check where the execution is hanging, and it turns out only the `bl kernel_main` statement is not taking place. Following is my linker:
SECTIONS { . = 0x80000; /* Start address of the kernel image */ .text : { KEEP(*(.text.boot)) *(.text .text.* .gnu.linkonce.t*) } .rodata : { *(.rodata .rodata.* .gnu.linkonce.r*) } /* PROVIDE -> for variables which are referenced but not initialized in the section */ PROVIDE(_data = .); /* Initialize data start address to current location pointer */ .data : { *(.data .data.* .gnu.linkonce.d*) } /* Section for initialized data */ .bss (NOLOAD) : { /* Section for uninitialized data; NOLOAD -> Don't allocate space in linking, will be allocated and initialized in runtime */ . = ALIGN(16); /* Align the current location pointer to the next 16bit boundary */ __bss_start = .; /* Start address of the BSS section */ *(.bss .bss.*) *(COMMON) /* COMMON -> used for uninitialized global variables that are declared without an explicit section attribute in multiple files. */ __bss_end = .; /* End address of the BSS section */ } . = ALIGN(0x10000); id_pg_dir = .; .data.id_pg_dir : { . += (3 * (1 << 12)); } . = ALIGN(0x10000); high_pg_dir = .; .data.high_pg_dir : { . += (7 * (1 << 12)); } . = ALIGN(0x10000); __user_begin = .; .text.user : { build/user* (.text .text.*) } .rodata.user : { build/user* (.rodata .rodata.*) } .data.user : { build/user* (.data .data.*) } .bss.user : { build/user* (.bss .bss.*) } __user_end = .; _end = .; /* End address of loaded program data */ /* DISCARD certain sections from the final output file, like comments, metadata and debugging information */ /DISCARD/ : { *(.comment) *(.gnu*) *(.note*) *(.eh_frame*) } } /* .gnu.linkonce.d -> used by the GNU linker for certain optimizations * for example, merge identical constants across various files into a single section to save space and access times */ __bss_size = (__bss_end - __bss_start)>>3; /* Size of the BSS section */
also, for reference, the VA_START variable is a macro which resolves to 0x0 (I'm trying out identity mapping as of now...)
I am not sure as to what I am doing wrong, or what exactly I am missing and failing to understand. Any help would be really helpful...Thanks!
I am using a raspberry pi 5, and the arm stub hands over control to the kernel bootstrap code in EL2, hence I stoop down from EL2 to EL1 in the bootstrap.