vmlinuz
vmlinuz(Virtual Memory LINUx gZip)是一个使用gzip压缩的linux kernel
$file /boot/vmlinuz-`uname -r`
/boot/vmlinuz-`uname -r`: Linux kernel x86 boot executable bzImage, version `uname -r` (admin@xxx) #1 SMP Tu, RO-rootFS, swap_dev 0x4, Normal VGA
vmlinuz是由bzImage cp而来,也就是说两者完全一样
case "${ARCH}" in
x86|i386|x86_64)
[ -f "${objtree}/arch/x86/boot/bzImage" ] && cp -v -- "${objtree}/arch/x86/boot/bzImage" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}"
;;
bzImage是由build将setup.bin和vmlinux.bin合并而来,其中setup.bin是bzImage的实模式部分,vmlinux.bin是bzImage的保护模式部分
$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(call if_changed,image)
@$(kecho) 'Kernel: $@ is ready' ' (#'`cat .version`')'

setup.bin编译过程
arch/x86/boot下的文件编译得到.o文件
// arch/x86/boot/Makefile
SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o
setup-y += early_serial_console.o edd.o header.o main.o memory.o
setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o
setup-y += video-mode.o version.o
setup-$(CONFIG_X86_APM_BOOT) += apm.o
setup-y += video-vga.o
setup-y += video-vesa.o
setup-y += video-bios.o
.o文件链接得到arch/x86/boot/setup.elf
// arch/x86/boot/Makefile
LDFLAGS_setup.elf := -T
$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
$(call if_changed,ld)
arch/x86/boot/setup.elf通过-O binary转换成纯二进制文件arch/x86/boot/setup.bin(仅保留代码和数据)
// arch/x86/boot/Makefile
OBJCOPYFLAGS_setup.bin := -O binary
$(obj)/setup.bin: $(obj)/setup.elf FORCE
$(call if_changed,objcopy)
vmlinux.bin编译过程
linux kernel编译得到vmlinux
// Makefile
vmlinux: scripts/link-vmlinux.sh autoksyms_recursive $(vmlinux-deps) FORCE
vmlinux去掉comment/符号表/调试信息得到arch/x86/boot/compressed/vmlinux.bin
// arch/x86/boot/compressed/Makefile
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
arch/x86/boot/compressed/vmlinux.bin压缩得到arch/x86/boot/compressed/vmlinux.bin.gz
// arch/x86/boot/compressed/Makefile
$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
$(call if_changed,gzip)
vmlinux.bin.all-y := $(obj)/vmlinux.bin
arch/x86/boot/compressed/vmlinux.bin.gz嵌入arch/x86/boot/compressed/piggy.S,编译得到arch/x86/boot/compressed/piggy.o
// arch/x86/boot/compressed/Makefile
$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE
$(call if_changed,mkpiggy)
# $<表示第一个先决条件(vmlinux.bin.gz),$@表示目标文件(piggy.S),mkpiggy vmlinux.bin.gz > piggy.S
cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )
arch/x86/boot/compressed/piggy.o和其它.o文件链接得到arch/x86/boot/compressed/vmlinux
// arch/x86/boot/compressed/Makefile
$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
$(call if_changed,check-and-link-vmlinux)
vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
$(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
$(obj)/piggy.o $(obj)/cpuflags.o
arch/x86/boot/compressed/vmlinux去掉note/comment/符号表/调试信息,通过-O binary转换成纯二进制文件arch/x86/boot/vmlinux.bin(仅保留代码和数据)
// arch/x86/boot/Makefile
OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
$(call if_changed,objcopy)
arch/x86/boot/compressed/mkpiggy.c
mkpiggy将vmlinux.bin.gz作为.section .rodata…compressed嵌入arch/x86/boot/compressed/piggy.S
int main(int argc, char *argv[])
{
uint32_t olen;
long ilen;
FILE *f = NULL;
int retval = 1;
if (argc < 2) {
fprintf(stderr, "Usage: %s compressed_file\n", argv[0]);
goto bail;
}
/* Get the information for the compressed kernel image first */
f = fopen(argv[1], "r"); // 打开vmlinux.bin.gz
if (!f) {
perror(argv[1]);
goto bail;
}
if (fseek(f, -4L, SEEK_END)) { // 将文件位置指针移到最后4个字节处
perror(argv[1]);
}
if (fread(&olen, sizeof(olen), 1, f) != 1) { // 将最后4个字节读入olen(gzip最后4个字节为压缩前大小)
perror(argv[1]);
goto bail;
}
ilen = ftell(f); // 获取当前文件位置指针,此时就是文件大小,也就是压缩后大小
olen = get_unaligned_le32(&olen);
printf(".section \".rodata..compressed\",\"a\",@progbits\n");
printf(".globl z_input_len\n");
printf("z_input_len = %lu\n", ilen); // z_input_len = 压缩后大小
printf(".globl z_output_len\n");
printf("z_output_len = %lu\n", (unsigned long)olen); // z_output_len = 压缩前大小
printf(".globl input_data, input_data_end\n");
printf("input_data:\n");
printf(".incbin \"%s\"\n", argv[1]); // 将vmlinux.bin.gz嵌入piggy.S
printf("input_data_end:\n");
retval = 0;
bail:
if (f)
fclose(f);
return retval;
}
arch/x86/boot/tools/build.c
bzImage = setup.bin向上对齐到512且至少5个扇区 + (vmlinux.bin + 4字节CRC)向上对齐到16
int main(int argc, char ** argv)
{
unsigned int i, sz, setup_sectors, init_sz;
int c;
u32 sys_size;
struct stat sb;
FILE *file, *dest;
int fd;
void *kernel;
u32 crc = 0xffffffffUL;
efi_stub_defaults();
if (argc != 5)
usage();
parse_zoffset(argv[3]);
dest = fopen(argv[4], "w"); // 打开bzImage
if (!dest)
die("Unable to write `%s': %m", argv[4]);
/* Copy the setup code */
file = fopen(argv[1], "r"); // 打开setup.bin
if (!file)
die("Unable to open `%s': %m", argv[1]);
c = fread(buf, 1, sizeof(buf), file); // 将setup.bin读到buf
if (ferror(file))
die("read-error on `setup'");
if (c < 1024)
die("The setup must be at least 1024 bytes");
if (get_unaligned_le16(&buf[510]) != 0xAA55) // 检查第1个扇区是否是引导扇区
die("Boot block hasn't got boot flag (0xAA55)");
fclose(file);
c += reserve_pecoff_reloc_section(c);
/* Pad unused space with zeros */
setup_sectors = (c + 511) / 512; // 向上对齐到512
if (setup_sectors < SETUP_SECT_MIN)
setup_sectors = SETUP_SECT_MIN; // 至少5个扇区
i = setup_sectors*512; // setup.bin大小
memset(buf+c, 0, i-c);
update_pecoff_setup_and_reloc(i);
/* Set the default root device */
put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
printf("Setup is %d bytes (padded to %d bytes).\n", c, i);
/* Open and stat the kernel file */
fd = open(argv[2], O_RDONLY); // 打开vmlinux.bin
if (fd < 0)
die("Unable to open `%s': %m", argv[2]);
if (fstat(fd, &sb))
die("Unable to stat `%s': %m", argv[2]);
sz = sb.st_size; // vmlimux.bin大小
printf("System is %d kB\n", (sz+1023)/1024);
kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); // 将vmlinux.bin mmap到kernel
if (kernel == MAP_FAILED)
die("Unable to mmap '%s': %m", argv[2]);
/* Number of 16-byte paragraphs, including space for a 4-byte CRC */
sys_size = (sz + 15 + 4) / 16; // sys_size * 16 = (sz + 4)向上对齐到16
#ifdef CONFIG_EFI_STUB
/*
* COFF requires minimum 32-byte alignment of sections, and
* adding a signature is problematic without that alignment.
*/
sys_size = (sys_size + 1) & ~1;
#endif
/* Patch the setup code with the appropriate size parameters */
buf[0x1f1] = setup_sectors-1;
put_unaligned_le32(sys_size, &buf[0x1f4]);
update_pecoff_text(setup_sectors * 512, i + (sys_size * 16));
init_sz = get_unaligned_le32(&buf[0x260]);
update_pecoff_bss(i + (sys_size * 16), init_sz);
efi_stub_entry_update();
crc = partial_crc32(buf, i, crc);
if (fwrite(buf, 1, i, dest) != i) // 将buf写到bzImage,长度为setup.bin大小
die("Writing setup failed");
/* Copy the kernel code */
crc = partial_crc32(kernel, sz, crc);
if (fwrite(kernel, 1, sz, dest) != sz) // 将kernel写入bzImage,长度为vmlimux.bin大小
die("Writing kernel failed");
/* Add padding leaving 4 bytes for the checksum */
while (sz++ < (sys_size*16) - 4) {
crc = partial_crc32_one('\0', crc);
if (fwrite("\0", 1, 1, dest) != 1) // 将"\0"写入bzImage,长度为(sz + 4)向上对齐到16 - (sz + 4)
die("Writing padding failed");
}
/* Write the CRC */
printf("CRC %x\n", crc);
put_unaligned_le32(crc, buf);
if (fwrite(buf, 1, 4, dest) != 4) // 将buf写到bzImage,长度为4
die("Writing CRC failed");
/* Catch any delayed write failures */
if (fclose(dest))
die("Writing image failed");
close(fd);
/* Everything is OK */
return 0;
}
从kernel.img到setup.bin
arch/x86/boot/setup.ld
setup.ld定义了setup.bin的结构
SECTIONS
{
. = 0; // 从0开始
.bstext : { *(.bstext) }
.bsdata : { *(.bsdata) }
. = 495; // 从495开始
.header : { *(.header) }
.entrytext : { *(.entrytext) }
.inittext : { *(.inittext) }
.initdata : { *(.initdata) }
__end_init = .;
.text : { *(.text) }
.text32 : { *(.text32) }
. = ALIGN(16);
.rodata : { *(.rodata*) }
.videocards : {
video_cards = .;
*(.videocards)
video_cards_end = .;
}
. = ALIGN(16);
.data : { *(.data*) }
.signature : {
setup_sig = .;
LONG(0x5a5aaa55)
}
. = ALIGN(16);
.bss :
{
__bss_start = .; // .bss起始地址
*(.bss)
__bss_end = .; // .bss结束地址的下一个地址
}
. = ALIGN(16);
_end = .; // 文件结束地址的的下一个地址
/DISCARD/ : { *(.note*) }
/*
* The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
*/
. = ASSERT(_end <= 0x8000, "Setup too big!");
. = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
/* Necessary for the very-old-loader check to work... */
. = ASSERT(__end_init <= 5*512, "init sections too big!");
}
arch/x86/boot/header.S
_start位于setup.bin开始偏移512个字节,正好是vmlinuz实模式部分起始地址 + 0x200,也就是说kernel.img最后跳转到setup.bin的_start
BOOTSEG = 0x07C0 /* original address of boot-sector */
SYSSEG = 0x1000 /* historical load address >> 4 */
.code16
.section ".bstext", "ax" // 从0开始
.global bootsect_start
bootsect_start:
# Normalize the start address
ljmp $BOOTSEG, $start2
start2:
movw %cs, %ax
movw %ax, %ds
movw %ax, %es
movw %ax, %ss
xorw %sp, %sp
sti
cld
movw $bugger_off_msg, %si
msg_loop:
lodsb
andb %al, %al
jz bs_die
movb $0xe, %ah
movw $7, %bx
int $0x10
jmp msg_loop
bs_die:
# Allow the user to press a key, then reboot
xorw %ax, %ax
int $0x16
int $0x19
# int 0x19 should never return. In case it does anyway,
# invoke the BIOS reset code...
ljmp $0xf000,$0xfff0
.section ".bsdata", "a"
bugger_off_msg:
.ascii "Use a boot loader.\r\n"
.ascii "\n"
.ascii "Remove disk and press any key to reboot...\r\n"
.byte 0
# Kernel attributes; used by setup. This is part 1 of the
# header, from the old boot sector.
.section ".header", "a" // 从495开始
.globl sentinel
sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */ // 495-496
.globl hdr // 对应struct setup_header
hdr:
setup_sects: .byte 0 /* Filled in by build.c */ // 497
root_flags: .word ROOT_RDONLY // 498-499
syssize: .long 0 /* Filled in by build.c */ // 500-503
ram_size: .word 0 /* Obsolete */ // 504-505
vid_mode: .word SVGA_MODE // 506-507
root_dev: .word 0 /* Filled in by build.c */ // 508-509
boot_flag: .word 0xAA55 // 510-511
# offset 512, entry point
.globl _start
_start:
# Explicitly enter this as bytes, or the assembler
# tries to generate a 3-byte jump here, which causes
# everything else to push off to the wrong offset.
.byte 0xeb # short (2-byte) jump // 512
.byte start_of_setup-1f // 513,跳转到start_of_setup
1:
# Part 2 of the header, from the old setup.S
.ascii "HdrS" # header signature
.word 0x020d # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
.globl realmode_swtch
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
start_sys_seg: .word SYSSEG # obsolete and meaningless, but just
# in case something decided to "use" it
.word kernel_version-512 # pointing to kernel version string
# above section of header is compatible
# with loadlin-1.5 (header v1.5). Don't
# change it.
type_of_loader: .byte 0 # 0 means ancient bootloader, newer
# bootloaders know to change this.
# See Documentation/x86/boot.txt for
# assigned ids
# flags, unused bits must be zero (RFU) bit within loadflags
loadflags:
.byte LOADED_HIGH # The kernel is to be loaded high
setup_move_size: .word 0x8000 # size to move, when setup is not
# loaded at 0x90000. We will move setup
# to 0x90000 then just before jumping
# into the kernel. However, only the
# loader knows how much data behind
# us also needs to be loaded.
code32_start: # here loaders can put a different
# start address for 32-bit code.
.long 0x100000 # 0x100000 = default for big kernel
ramdisk_image: .long 0 # address of loaded ramdisk image
# Here the loader puts the 32-bit
# address where it loaded the image.
# This only will be read by the kernel.
ramdisk_size: .long 0 # its size in bytes
bootsect_kludge:
.long 0 # obsolete
heap_end_ptr: .word _end+STACK_SIZE-512
# (Header version 0x0201 or later)
# space from here (exclusive) down to
# end of setup code can be used by setup
# for local heap purposes.
ext_loader_ver:
.byte 0 # Extended boot loader version
ext_loader_type:
.byte 0 # Extended boot loader type
cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
# If nonzero, a 32-bit pointer
# to the kernel command line.
# The command line should be
# located between the start of
# setup and the end of low
# memory (0xa0000), or it may
# get overwritten before it
# gets read. If this field is
# used, there is no longer
# anything magical about the
# 0x90000 segment; the setup
# can be located anywhere in
# low memory 0x10000 or higher.
initrd_addr_max: .long 0x7fffffff
# (Header version 0x0203 or later)
# The highest safe address for
# the contents of an initrd
# The current kernel allows up to 4 GB,
# but leave it at 2 GB to avoid
# possible bootloader bugs.
kernel_alignment: .long CONFIG_PHYSICAL_ALIGN #physical addr alignment
#required for protected mode
#kernel
relocatable_kernel: .byte 1
min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment
xloadflags:
.word XLF0 | XLF1 | XLF23 | XLF4
cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
#added with boot protocol
#version 2.06
hardware_subarch: .long 0 # subarchitecture, added with 2.07
# default to 0 for normal x86 PC
hardware_subarch_data: .quad 0
payload_offset: .long ZO_input_data
payload_length: .long ZO_z_input_len
setup_data: .quad 0 # 64-bit physical pointer to
# single linked list of
# struct setup_data
pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
init_size: .long INIT_SIZE # kernel initialization size
handover_offset: .long 0 # Filled in by build.c
# End of setup header #####################################################
.section ".entrytext", "ax"
start_of_setup:
# Force %es = %ds
movw %ds, %ax
movw %ax, %es // es = X >> 4
cld // DF = 0
# Apparently some ancient versions of LILO invoked the kernel with %ss != %ds,
# which happened to work by accident for the old code. Recalculate the stack
# pointer if %ss is invalid. Otherwise leave it alone, LOADLIN sets up the
# stack behind its own code, so we can't blindly put it directly past the heap.
movw %ss, %dx
cmpw %ax, %dx # %ds == %ss? // 判断ds和ss是否相等
movw %sp, %dx // dx = 0x9000
je 2f # -> assume %sp is reasonably set // 若相等,跳转到2f
# Invalid %ss, make up a new stack
movw $_end, %dx
testb $CAN_USE_HEAP, loadflags
jz 1f
movw heap_end_ptr, %dx
1: addw $STACK_SIZE, %dx
jnc 2f
xorw %dx, %dx # Prevent wraparound
2: # Now %dx should point to the end of our stack space
andw $~3, %dx # dword align (might as well...) // dx = 0x9000
jnz 3f // 若不为0,跳转到3f
movw $0xfffc, %dx # Make sure we're not zero
3: movw %ax, %ss // ss = X >> 4
movzwl %dx, %esp # Clear upper half of %esp // esp = 0x9000,创建一个栈,栈顶ss:sp = X + 0x9000
sti # Now we should have a working stack
# We will have entered with %cs = %ds+0x20, normalize %cs so
# it is on par with the other segments.
pushw %ds // X >> 4
pushw $6f
lretw // pop IP/CS,其中IP = $6f、CS = X >> 4,跳转到X + $6f
6:
# Check signature at end of setup
cmpl $0x5a5aaa55, setup_sig // 在setup.ld中设置setup_sig = 0x5a5aaa55
jne setup_bad
# Zero the bss
movw $__bss_start, %di // __bss_start是.bss起始地址,di = $__bss_start
movw $_end+3, %cx // _end是文件结束地址的下一个地址,cx = $_end+3,加3是为了下面/4时向上取整
xorl %eax, %eax // eax = 0
subw %di, %cx // cx = $_end+3 - $__bss_start
shrw $2, %cx // cx = ($_end+3 - $__bss_start) / 4
rep; stosl // 将EAX(0)复制到ES:DI(X + $__bss_start),重复执行CX次,每次4个byte
# Jump to C code (should not return)
calll main // 调用main
其中hdr对应struct setup_header
struct setup_header {
__u8 setup_sects;
__u16 root_flags;
__u32 syssize;
__u16 ram_size;
__u16 vid_mode;
__u16 root_dev;
__u16 boot_flag;
__u16 jump;
__u32 header;
__u16 version;
__u32 realmode_swtch;
__u16 start_sys_seg;
__u16 kernel_version;
__u8 type_of_loader;
__u8 loadflags;
__u16 setup_move_size;
__u32 code32_start;
__u32 ramdisk_image;
__u32 ramdisk_size;
__u32 bootsect_kludge;
__u16 heap_end_ptr;
__u8 ext_loader_ver;
__u8 ext_loader_type;
__u32 cmd_line_ptr;
__u32 initrd_addr_max;
__u32 kernel_alignment;
__u8 relocatable_kernel;
__u8 min_alignment;
__u16 xloadflags;
__u32 cmdline_size;
__u32 hardware_subarch;
__u64 hardware_subarch_data;
__u32 payload_offset;
__u32 payload_length;
__u64 setup_data;
__u64 pref_address;
__u32 init_size;
__u32 handover_offset;
} __attribute__((packed));
arch/x86/boot/main.c
void main(void)
{
/* First, copy the boot header into the "zeropage" */
copy_boot_params(); // memcpy(&boot_params.hdr, &hdr, sizeof(hdr));
/* Initialize the early-boot console */
console_init();
if (cmdline_find_option_bool("debug"))
puts("early console in setup code\n");
/* End of heap check */
init_heap();
/* Make sure we have all the proper CPU support */
if (validate_cpu()) {
puts("Unable to boot - please use a kernel appropriate "
"for your CPU.\n");
die();
}
/* Tell the BIOS what CPU mode we intend to run in. */
set_bios_mode();
/* Detect memory layout */
// 通过0x15号中断获取e820_table,长度为e820_entries
detect_memory();
/* Set keyboard repeat rate (why?) and query the lock flags */
keyboard_init();
/* Query Intel SpeedStep (IST) information */
query_ist();
/* Query APM information */
#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
query_apm_bios();
#endif
/* Query EDD information */
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
query_edd();
#endif
/* Set the video mode */
set_video();
/* Do the last things and invoke protected mode */
go_to_protected_mode(); // 调用go_to_protected_mode
}
arch/x86/boot/pm.c
void go_to_protected_mode(void)
{
/* Hook before leaving real mode, also disables interrupts */
realmode_switch_hook();
/* Enable the A20 gate */
if (enable_a20()) { // 打开A20地址线
puts("A20 gate not responding, unable to boot...\n");
die();
}
/* Reset coprocessor (IGNNE#) */
reset_coprocessor();
/* Mask all interrupts in the PIC */
mask_all_interrupts(); // 屏蔽中断
/* Actual transition to protected mode... */
setup_idt(); // 加载IDTR
setup_gdt(); // 加载GDTR
protected_mode_jump(boot_params.hdr.code32_start,
(u32)&boot_params + (ds() << 4)); // 调用protected_mode_jump
}
setup_gdt加载boot_gdt到GDTR、setup_ldt加载null_idt到IDTR
/*
* Set up the GDT
*/
struct gdt_ptr {
u16 len;
u32 ptr;
} __attribute__((packed));
static void setup_gdt(void)
{
/* There are machines which are known to not boot with the GDT
being 8-byte unaligned. Intel recommends 16 byte alignment. */
static const u64 boot_gdt[] __attribute__((aligned(16))) = {
/* CS: code, read/execute, 4 GB, base 0 */
[GDT_ENTRY_BOOT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff), // CS.L = 0, CS.D = 1
/* DS: data, read/write, 4 GB, base 0 */
[GDT_ENTRY_BOOT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
/* TSS: 32-bit tss, 104 bytes, base 4096 */
/* We only have a TSS here to keep Intel VT happy;
we don't actually use it for anything. */
[GDT_ENTRY_BOOT_TSS] = GDT_ENTRY(0x0089, 4096, 103),
};
/* Xen HVM incorrectly stores a pointer to the gdt_ptr, instead
of the gdt_ptr contents. Thus, make it static so it will
stay in memory, at least long enough that we switch to the
proper kernel GDT. */
static struct gdt_ptr gdt;
gdt.len = sizeof(boot_gdt)-1;
gdt.ptr = (u32)&boot_gdt + (ds() << 4);
asm volatile("lgdtl %0" : : "m" (gdt));
}
/*
* Set up the IDT
*/
static void setup_idt(void)
{
static const struct gdt_ptr null_idt = {0, 0};
asm volatile("lidtl %0" : : "m" (null_idt));
}
protected_mode_jump的第1个参数为code32_start(0x100000),第2个参数为boot_params
code32_start: # here loaders can put a different
# start address for 32-bit code.
.long 0x100000 # 0x100000 = default for big kernel
struct boot_params boot_params;
函数调用
// arch/x86/entry/calling.h
x86 function call convention, 64-bit:
-------------------------------------
arguments | callee-saved | extra caller-saved | return
[callee-clobbered] | | [callee-clobbered] |
---------------------------------------------------------------------------
rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**]
// https://gcc.gnu.org/onlinedocs/gcc/x86-Function-Attributes.html
// -mregparm=3表示使用3个寄存器传参
For 32-bit we have the following conventions - kernel is built with
-mregparm=3 and -freg-struct-return:
x86 function calling convention, 32-bit:
----------------------------------------
arguments | callee-saved | extra caller-saved | return
[callee-clobbered] | | [callee-clobbered] |
-------------------------------------------------------------------------
eax edx ecx | ebx edi esi ebp [*] | <none> | eax, edx [**]
arch/x86/boot/pmjump.S
在setup.bin中进入保护模式,最后跳转到code32_start(0x100000),正好是vmlinuz保护模式部分起始地址
.text
.code16
/*
* void protected_mode_jump(u32 entrypoint, u32 bootparams);
*/
GLOBAL(protected_mode_jump) // eax为第1个参数code32_start(0x100000),edx为第2个参数boot_params
movl %edx, %esi # Pointer to boot_params table // esi = boot_params
xorl %ebx, %ebx // ebx = 0
movw %cs, %bx // bx = X >> 4
shll $4, %ebx // ebx = X
addl %ebx, 2f // 2f保存的是in_pm32编译地址,加上X后保存的是in_pm32实模式地址
jmp 1f # Short jump to serialize on 386/486 // 跳转到1f
1:
movw $__BOOT_DS, %cx // cx = GDT_ENTRY_BOOT_DS*8
movw $__BOOT_TSS, %di // di = GDT_ENTRY_BOOT_TSS*8
movl %cr0, %edx
orb $X86_CR0_PE, %dl # Protected mode // 设置CR0的PE为1,进入保护模式
movl %edx, %cr0
# Transition to 32-bit mode
.byte 0x66, 0xea # ljmpl opcode // 跳转到in_pm32
2: .long in_pm32 # offset // in_pm32实模式地址
.word __BOOT_CS # segment // GDT_ENTRY_BOOT_CS*8
ENDPROC(protected_mode_jump)
.code32
.section ".text32","ax"
GLOBAL(in_pm32)
# Set up data segments for flat 32-bit mode
movl %ecx, %ds // ds = GDT_ENTRY_BOOT_DS*8
movl %ecx, %es // es = GDT_ENTRY_BOOT_DS*8
movl %ecx, %fs // fs = GDT_ENTRY_BOOT_DS*8
movl %ecx, %gs // gs = GDT_ENTRY_BOOT_DS*8
movl %ecx, %ss // ss = GDT_ENTRY_BOOT_DS*8
# The 32-bit code sets up its own stack, but this way we do have
# a valid stack if some debugging hack wants to use it.
addl %ebx, %esp // esp = esp + X
# Set up TR to make Intel VT happy
ltr %di // 加载TR
# Clear registers to allow for future extensions to the
# 32-bit boot protocol
xorl %ecx, %ecx // ecx = 0
xorl %edx, %edx // edx = 0
xorl %ebx, %ebx // ebx = 0
xorl %ebp, %ebp // ebp = 0
xorl %edi, %edi // edi = 0
# Set up LDTR to make Intel VT happy
lldt %cx // 加载LDTR
jmpl *%eax # Jump to the 32-bit entrypoint // 跳转到code32_start(0x100000),正好是vmlinuz保护模式部分起始地址
ENDPROC(in_pm32)