Linux内核启动流程【转】

转自:https://blog.csdn.net/cc243494926/article/details/62247071

本文以Linux3.14版本源码为例分析其启动流程。各版本启动代码略有不同,但核心流程与思想万变不离其宗。


内核映像被加载到内存并获得控制权之后,内核启动流程开始。通常,内核映像以压缩形式存储,并不是一个可以执行的内核。因此,内核阶段的首要工作是自解压内核映像。

 

内核编译生成vmliunx后,通常会对其进行压缩,得到zImage(小内核,小于512KB)或bzImage(大内核,大于512KB)。在它们的头部嵌有解压缩程序。


 

通过linux/arch/arm/boot/compressed目录下的Makefile寻找到vmlinux文件的链接脚本(vmlinux.lds),从中查找系统启动入口函数。

 

  1. $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o
  2. $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) $(ashldi3)
  3. $(bswapsdi2) FORCE
  4. @$(check_for_multiple_zreladdr)
  5. $(call if_changed,ld)
  6. @$(check_for_bad_syms)

vmlinux.lds(linux/arch/arm/kernel/vmlinux.lds)链接脚本开头内容

  1. OUTPUT_ARCH(arm)
  2. ENTRY(stext)
  3. jiffies = jiffies_64;
  4. SECTIONS
  5. {

得到内核入口函数为 stextlinux/arch/arm/kernel/head.S

内核引导阶段

 

  1. ENTRY(stext)
  2. bl __lookup_processor_type @ r5=procinfo r9=cpuid //处理器是否支持
  3. movs r10, r5 @ invalid processor (r5=0)?
  4. THUMB( it eq ) @ force fixup-able long branch encoding
  5. beq __error_p @ yes, error 'p' //不支持则打印错误信息
  6.  
  7. bl __create_page_tables //创建页表
  8.  
  9. /*
  10. * The following calls CPU specific code in a position independent
  11. * manner. See arch/arm/mm/proc-*.S for details. r10 = base of
  12. * xxx_proc_info structure selected by __lookup_processor_type
  13. * above. On return, the CPU will be ready for the MMU to be
  14. * turned on, and r0 will hold the CPU control register value.
  15. */
  16. ldr r13, =__mmap_switched @ address to jump to after //保存MMU使能后跳转地址
  17. @ mmu has been enabled
  18. adr lr, BSYM(1f) @ return (PIC) address
  19. mov r8, r4 @ set TTBR1 to swapper_pg_dir
  20. ARM( add pc, r10, #PROCINFO_INITFUNC )
  21. THUMB( add r12, r10, #PROCINFO_INITFUNC )
  22. THUMB( mov pc, r12 )
  23. 1: b __enable_mmu //使能MMU后跳转到__mmap_switched


查找标签 __mmap_switched 所在位置: /linux/arch/arm/kernel/head-common.S

 

  1. __mmap_switched:
  2. /*
  3. * The following fragment of code is executed with the MMU on in MMU mode,
  4. * and uses absolute addresses; this is not position independent.
  5. *
  6. * r0 = cp#15 control register
  7. * r1 = machine ID
  8. * r2 = atags/dtb pointer
  9. * r9 = processor ID
  10. */
  11. //保存设备信息、设备树及启动参数存储地址
  12. b start_kernel


内核初始化阶段

start_kernel函数开始,内核进入C语言部分,完成内核的大部分初始化工作。

函数所在位置:/linux/init/Main.c

start_kernel涉及大量初始化工作,只例举重要的初始化工作。

  1. asmlinkage void __init start_kernel(void)
  2. {
  3. …… //类型判断
  4. smp_setup_processor_id(); //smp相关,返回启动CPU号
  5. ……
  6. local_irq_disable(); //关闭当前CPU中断
  7. early_boot_irqs_disabled = true;
  8. /*
  9. * Interrupts are still disabled. Do necessary setups, then
  10. * enable them
  11. */
  12. boot_cpu_init();
  13. page_address_init(); //初始化页地址
  14. pr_notice("%s", linux_banner); //显示内核版本信息
  15. setup_arch(&command_line);
  16. mm_init_owner(&init_mm, &init_task);
  17. mm_init_cpumask(&init_mm);
  18. setup_command_line(command_line);
  19. setup_nr_cpu_ids();
  20. setup_per_cpu_areas();
  21. smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
  22.  
  23. build_all_zonelists(NULL, NULL);
  24. page_alloc_init(); //页内存申请初始化
  25.  
  26. pr_notice("Kernel command line: %s ", boot_command_line); //打印内核启动命令行参数
  27. parse_early_param();
  28. parse_args("Booting kernel", static_command_line, __start___param,
  29. __stop___param - __start___param,
  30. -1, -1, &unknown_bootoption);
  31.  
  32. ……
  33. /*
  34. * Set up the scheduler prior starting any interrupts (such as the
  35. * timer interrupt). Full topology setup happens at smp_init()
  36. * time - but meanwhile we still have a functioning scheduler.
  37. */
  38. sched_init(); //进程调度器初始化
  39. /*
  40. * Disable preemption - early bootup scheduling is extremely
  41. * fragile until we cpu_idle() for the first time.
  42. */
  43. preempt_disable(); //禁止内核抢占
  44. if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it "))
  45. local_irq_disable(); //检查关闭CPU中断
  46.  
  47.  
  48. /*大量初始化内容 见名知意*/
  49. idr_init_cache();
  50. rcu_init();
  51. tick_nohz_init();
  52. context_tracking_init();
  53. radix_tree_init();
  54. /* init some links before init_ISA_irqs() */
  55. early_irq_init();
  56. init_IRQ();
  57. tick_init();
  58. init_timers();
  59. hrtimers_init();
  60. softirq_init();
  61. timekeeping_init();
  62. time_init();
  63. sched_clock_postinit();
  64. perf_event_init();
  65. profile_init();
  66. call_function_init();
  67. WARN(!irqs_disabled(), "Interrupts were enabled early ");
  68. early_boot_irqs_disabled = false;
  69. local_irq_enable(); //本地中断可以使用了
  70.  
  71. kmem_cache_init_late();
  72.  
  73. /*
  74. * HACK ALERT! This is early. We're enabling the console before
  75. * we've done PCI setups etc, and console_init() must be aware of
  76. * this. But we do want output early, in case something goes wrong.
  77. */
  78. console_init(); //初始化控制台,可以使用printk了
  79. if (panic_later)
  80. panic("Too many boot %s vars at `%s'", panic_later,
  81. panic_param);
  82.  
  83. lockdep_info();
  84.  
  85. /*
  86. * Need to run this when irqs are enabled, because it wants
  87. * to self-test [hard/soft]-irqs on/off lock inversion bugs
  88. * too:
  89. */
  90. locking_selftest();
  91.  
  92. #ifdef CONFIG_BLK_DEV_INITRD
  93. if (initrd_start && !initrd_below_start_ok &&
  94. page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
  95. pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it. ",
  96. page_to_pfn(virt_to_page((void *)initrd_start)),
  97. min_low_pfn);
  98. initrd_start = 0;
  99. }
  100. #endif
  101. page_cgroup_init();
  102. debug_objects_mem_init();
  103. kmemleak_init();
  104. setup_per_cpu_pageset();
  105. numa_policy_init();
  106. if (late_time_init)
  107. late_time_init();
  108. sched_clock_init();
  109. calibrate_delay();
  110. pidmap_init();
  111. anon_vma_init();
  112. acpi_early_init();
  113. #ifdef CONFIG_X86
  114. if (efi_enabled(EFI_RUNTIME_SERVICES))
  115. efi_enter_virtual_mode();
  116. #endif
  117. #ifdef CONFIG_X86_ESPFIX64
  118. /* Should be run before the first non-init thread is created */
  119. init_espfix_bsp();
  120. #endif
  121. thread_info_cache_init();
  122. cred_init();
  123. fork_init(totalram_pages); //初始化fork
  124. proc_caches_init();
  125. buffer_init();
  126. key_init();
  127. security_init();
  128. dbg_late_init();
  129. vfs_caches_init(totalram_pages); //虚拟文件系统初始化
  130. signals_init();
  131. /* rootfs populating might need page-writeback */
  132. page_writeback_init();
  133. #ifdef CONFIG_PROC_FS
  134. proc_root_init();
  135. #endif
  136. cgroup_init();
  137. cpuset_init();
  138. taskstats_init_early();
  139. delayacct_init();
  140.  
  141. check_bugs();
  142.  
  143. sfi_init_late();
  144.  
  145. if (efi_enabled(EFI_RUNTIME_SERVICES)) {
  146. efi_late_init();
  147. efi_free_boot_services();
  148. }
  149.  
  150. ftrace_init();
  151.  
  152. /* Do the rest non-__init'ed, we're now alive */
  153. rest_init();
  154. }

函数最后调用rest_init()函数

 

  1. /*最重要使命:创建kernel_init进程,并进行后续初始化*/
  2. static noinline void __init_refok rest_init(void)
  3. {
  4. int pid;
  5.  
  6. rcu_scheduler_starting();
  7. /*
  8. * We need to spawn init first so that it obtains pid 1, however
  9. * the init task will end up wanting to create kthreads, which, if
  10. * we schedule it before we create kthreadd, will OOPS.
  11. */
  12.  
  13. kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); //创建kernel_init进程
  14.  
  15. numa_default_policy();
  16. pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
  17. rcu_read_lock();
  18. kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
  19. rcu_read_unlock();
  20. complete(&kthreadd_done);
  21.  
  22. /*
  23. * The boot idle thread must execute schedule()
  24. * at least once to get things moving:
  25. */
  26. init_idle_bootup_task(current);
  27. schedule_preempt_disabled();
  28. /* Call into cpu_idle with preempt disabled */
  29. //cpu_idle就是在系统闲置时用来降低电力的使用和减少热的产生的空转函数,函数至此不再返回,其余工作从kernel_init进程处发起
  30. cpu_startup_entry(CPUHP_ONLINE);
  31. }

 

kernel_init函数将完成设备驱动程序的初始化,并调用init_post函数启动用户进程

部分书籍介绍的内核启动流程基于经典的2.6版本,kernel_init函数还会调用init_post函数专门负责_init进程的启动,现版本已经被整合到了一起。

  1. static int __ref kernel_init(void *unused)
  2. {
  3. int ret;
  4.  
  5. kernel_init_freeable(); //该函数中完成smp开启 驱动初始化 共享内存初始化等工作
  6. /* need to finish all async __init code before freeing the memory */
  7. async_synchronize_full();
  8. free_initmem(); //初始化尾声,清除内存无用数据
  9. mark_rodata_ro();
  10. system_state = SYSTEM_RUNNING;
  11. numa_default_policy();
  12.  
  13. flush_delayed_fput();
  14.  
  15. if (ramdisk_execute_command) {
  16. ret = run_init_process(ramdisk_execute_command);
  17. if (!ret)
  18. return 0;
  19. pr_err("Failed to execute %s (error %d) ",
  20. ramdisk_execute_command, ret);
  21. }
  22.  
  23. /*
  24. * We try each of these until one succeeds.
  25. *
  26. * The Bourne shell can be used instead of init if we are
  27. * trying to recover a really broken machine.
  28. *寻找init函数,创建一号进程_init (第一个用户空间进程)*/
  29. if (execute_command) {
  30. ret = run_init_process(execute_command);
  31. if (!ret)
  32. return 0;
  33. pr_err("Failed to execute %s (error %d). Attempting defaults... ",
  34. execute_command, ret);
  35. }
  36. if (!try_to_run_init_process("/sbin/init") ||
  37. !try_to_run_init_process("/etc/init") ||
  38. !try_to_run_init_process("/bin/init") ||
  39. !try_to_run_init_process("/bin/sh"))
  40. return 0;
  41.  
  42. panic("No working init found. Try passing init= option to kernel. "
  43. "See Linux Documentation/init.txt for guidance.");
  44. }

 

到此,内核初始化已经接近尾声,所有的初始化函数都已经调用,因此free_initmem函数可以舍弃内存的__init_begin__init_end之间的数据。

当内核被引导并进行初始化后,内核启动了自己的第一个用户空间应用程序_init,这是调用的第一个使用标准C库编译的程序,其进程编号时钟为1.

_init负责出发其他必须的进程,以使系统进入整体可用的状态。


以下为内核启动流程图:


原文地址:https://www.cnblogs.com/sky-heaven/p/13825161.html