计算机引导过程三(vmlinuz实模式setup.bin)

vmlinuz

vmlinuz(Virtual Memory LINUx gZip)是一个使用gzip压缩的linux kernel

$file /boot/vmlinuz-`uname -r`
/boot/vmlinuz-`uname -r`: Linux kernel x86 boot executable bzImage, version `uname -r` (admin@xxx) #1 SMP Tu, RO-rootFS, swap_dev 0x4, Normal VGA

vmlinuz是由bzImage cp而来,也就是说两者完全一样

case "${ARCH}" in
	x86|i386|x86_64)
		[ -f "${objtree}/arch/x86/boot/bzImage" ] && cp -v -- "${objtree}/arch/x86/boot/bzImage" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}"
		;;

bzImage是由build将setup.bin和vmlinux.bin合并而来,其中setup.bin是bzImage的实模式部分,vmlinux.bin是bzImage的保护模式部分

$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
	$(call if_changed,image)
	@$(kecho) 'Kernel: $@ is ready' ' (#'`cat .version`')'

setup.bin编译过程

arch/x86/boot下的文件编译得到.o文件

// arch/x86/boot/Makefile
SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))

setup-y		+= a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o
setup-y		+= early_serial_console.o edd.o header.o main.o memory.o
setup-y		+= pm.o pmjump.o printf.o regs.o string.o tty.o video.o
setup-y		+= video-mode.o version.o
setup-$(CONFIG_X86_APM_BOOT) += apm.o
setup-y		+= video-vga.o
setup-y		+= video-vesa.o
setup-y		+= video-bios.o

.o文件链接得到arch/x86/boot/setup.elf

// arch/x86/boot/Makefile
LDFLAGS_setup.elf	:= -T
$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
	$(call if_changed,ld)

arch/x86/boot/setup.elf通过-O binary转换成纯二进制文件arch/x86/boot/setup.bin(仅保留代码和数据)

// arch/x86/boot/Makefile
OBJCOPYFLAGS_setup.bin	:= -O binary
$(obj)/setup.bin: $(obj)/setup.elf FORCE
	$(call if_changed,objcopy)

vmlinux.bin编译过程

linux kernel编译得到vmlinux

// Makefile
vmlinux: scripts/link-vmlinux.sh autoksyms_recursive $(vmlinux-deps) FORCE

vmlinux去掉comment/符号表/调试信息得到arch/x86/boot/compressed/vmlinux.bin

// arch/x86/boot/compressed/Makefile
OBJCOPYFLAGS_vmlinux.bin :=  -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
	$(call if_changed,objcopy)

arch/x86/boot/compressed/vmlinux.bin压缩得到arch/x86/boot/compressed/vmlinux.bin.gz

// arch/x86/boot/compressed/Makefile
$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
	$(call if_changed,gzip)

vmlinux.bin.all-y := $(obj)/vmlinux.bin

arch/x86/boot/compressed/vmlinux.bin.gz嵌入arch/x86/boot/compressed/piggy.S,编译得到arch/x86/boot/compressed/piggy.o

// arch/x86/boot/compressed/Makefile
$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE
    $(call if_changed,mkpiggy)

# $<表示第一个先决条件(vmlinux.bin.gz),$@表示目标文件(piggy.S),mkpiggy vmlinux.bin.gz > piggy.S
cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )

arch/x86/boot/compressed/piggy.o和其它.o文件链接得到arch/x86/boot/compressed/vmlinux

// arch/x86/boot/compressed/Makefile
$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
	$(call if_changed,check-and-link-vmlinux)

vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
	$(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
	$(obj)/piggy.o $(obj)/cpuflags.o

arch/x86/boot/compressed/vmlinux去掉note/comment/符号表/调试信息,通过-O binary转换成纯二进制文件arch/x86/boot/vmlinux.bin(仅保留代码和数据)

// arch/x86/boot/Makefile
OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
	$(call if_changed,objcopy)

arch/x86/boot/compressed/mkpiggy.c

mkpiggy将vmlinux.bin.gz作为.section .rodata…compressed嵌入arch/x86/boot/compressed/piggy.S

int main(int argc, char *argv[])
{
	uint32_t olen;
	long ilen;
	FILE *f = NULL;
	int retval = 1;

	if (argc < 2) {
		fprintf(stderr, "Usage: %s compressed_file\n", argv[0]);
		goto bail;
	}

	/* Get the information for the compressed kernel image first */

	f = fopen(argv[1], "r"); // 打开vmlinux.bin.gz
	if (!f) {
		perror(argv[1]);
		goto bail;
	}


	if (fseek(f, -4L, SEEK_END)) { // 将文件位置指针移到最后4个字节处
		perror(argv[1]);
	}

	if (fread(&olen, sizeof(olen), 1, f) != 1) { // 将最后4个字节读入olen(gzip最后4个字节为压缩前大小)
		perror(argv[1]);
		goto bail;
	}

	ilen = ftell(f); // 获取当前文件位置指针,此时就是文件大小,也就是压缩后大小
	olen = get_unaligned_le32(&olen);

	printf(".section \".rodata..compressed\",\"a\",@progbits\n");
	printf(".globl z_input_len\n");
	printf("z_input_len = %lu\n", ilen); // z_input_len = 压缩后大小
	printf(".globl z_output_len\n");
	printf("z_output_len = %lu\n", (unsigned long)olen); // z_output_len = 压缩前大小

	printf(".globl input_data, input_data_end\n");
	printf("input_data:\n");
	printf(".incbin \"%s\"\n", argv[1]); // 将vmlinux.bin.gz嵌入piggy.S
	printf("input_data_end:\n");

	retval = 0;
bail:
	if (f)
		fclose(f);
	return retval;
}

arch/x86/boot/tools/build.c

bzImage = setup.bin向上对齐到512且至少5个扇区 + (vmlinux.bin + 4字节CRC)向上对齐到16

int main(int argc, char ** argv)
{
	unsigned int i, sz, setup_sectors, init_sz;
	int c;
	u32 sys_size;
	struct stat sb;
	FILE *file, *dest;
	int fd;
	void *kernel;
	u32 crc = 0xffffffffUL;

	efi_stub_defaults();

	if (argc != 5)
		usage();
	parse_zoffset(argv[3]);

	dest = fopen(argv[4], "w"); // 打开bzImage
	if (!dest)
		die("Unable to write `%s': %m", argv[4]);

	/* Copy the setup code */
	file = fopen(argv[1], "r"); // 打开setup.bin
	if (!file)
		die("Unable to open `%s': %m", argv[1]);
	c = fread(buf, 1, sizeof(buf), file); // 将setup.bin读到buf
	if (ferror(file))
		die("read-error on `setup'");
	if (c < 1024)
		die("The setup must be at least 1024 bytes");
	if (get_unaligned_le16(&buf[510]) != 0xAA55) // 检查第1个扇区是否是引导扇区
		die("Boot block hasn't got boot flag (0xAA55)");
	fclose(file);

	c += reserve_pecoff_reloc_section(c);

	/* Pad unused space with zeros */
	setup_sectors = (c + 511) / 512; // 向上对齐到512
	if (setup_sectors < SETUP_SECT_MIN)
		setup_sectors = SETUP_SECT_MIN; // 至少5个扇区
	i = setup_sectors*512; // setup.bin大小
	memset(buf+c, 0, i-c);

	update_pecoff_setup_and_reloc(i);

	/* Set the default root device */
	put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);

	printf("Setup is %d bytes (padded to %d bytes).\n", c, i);

	/* Open and stat the kernel file */
	fd = open(argv[2], O_RDONLY); // 打开vmlinux.bin
	if (fd < 0)
		die("Unable to open `%s': %m", argv[2]);
	if (fstat(fd, &sb))
		die("Unable to stat `%s': %m", argv[2]);
	sz = sb.st_size; // vmlimux.bin大小
	printf("System is %d kB\n", (sz+1023)/1024);
	kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); // 将vmlinux.bin mmap到kernel
	if (kernel == MAP_FAILED)
		die("Unable to mmap '%s': %m", argv[2]);
	/* Number of 16-byte paragraphs, including space for a 4-byte CRC */
	sys_size = (sz + 15 + 4) / 16; // sys_size * 16 = (sz + 4)向上对齐到16
#ifdef CONFIG_EFI_STUB
	/*
	 * COFF requires minimum 32-byte alignment of sections, and
	 * adding a signature is problematic without that alignment.
	 */
	sys_size = (sys_size + 1) & ~1;
#endif

	/* Patch the setup code with the appropriate size parameters */
	buf[0x1f1] = setup_sectors-1;
	put_unaligned_le32(sys_size, &buf[0x1f4]);

	update_pecoff_text(setup_sectors * 512, i + (sys_size * 16));
	init_sz = get_unaligned_le32(&buf[0x260]);
	update_pecoff_bss(i + (sys_size * 16), init_sz);

	efi_stub_entry_update();

	crc = partial_crc32(buf, i, crc);
	if (fwrite(buf, 1, i, dest) != i) // 将buf写到bzImage,长度为setup.bin大小
		die("Writing setup failed");

	/* Copy the kernel code */
	crc = partial_crc32(kernel, sz, crc);
	if (fwrite(kernel, 1, sz, dest) != sz) // 将kernel写入bzImage,长度为vmlimux.bin大小
		die("Writing kernel failed");

	/* Add padding leaving 4 bytes for the checksum */
	while (sz++ < (sys_size*16) - 4) {
		crc = partial_crc32_one('\0', crc);
		if (fwrite("\0", 1, 1, dest) != 1) // 将"\0"写入bzImage,长度为(sz + 4)向上对齐到16 - (sz + 4)
			die("Writing padding failed");
	}

	/* Write the CRC */
	printf("CRC %x\n", crc);
	put_unaligned_le32(crc, buf);
	if (fwrite(buf, 1, 4, dest) != 4) // 将buf写到bzImage,长度为4
		die("Writing CRC failed");

	/* Catch any delayed write failures */
	if (fclose(dest))
		die("Writing image failed");

	close(fd);

	/* Everything is OK */
	return 0;
}

从kernel.img到setup.bin

arch/x86/boot/setup.ld

setup.ld定义了setup.bin的结构

SECTIONS
{
	. = 0; // 从0开始
	.bstext		: { *(.bstext) }
	.bsdata		: { *(.bsdata) }

	. = 495; // 从495开始
	.header		: { *(.header) }
	.entrytext	: { *(.entrytext) }
	.inittext	: { *(.inittext) }
	.initdata	: { *(.initdata) }
	__end_init = .;

	.text		: { *(.text) }
	.text32		: { *(.text32) }

	. = ALIGN(16);
	.rodata		: { *(.rodata*) }

	.videocards	: {
		video_cards = .;
		*(.videocards)
		video_cards_end = .;
	}

	. = ALIGN(16);
	.data		: { *(.data*) }

	.signature	: {
		setup_sig = .;
		LONG(0x5a5aaa55)
	}


	. = ALIGN(16);
	.bss		:
	{
		__bss_start = .;	// .bss起始地址
		*(.bss)
		__bss_end = .;		// .bss结束地址的下一个地址
	}
	. = ALIGN(16);
	_end = .;				// 文件结束地址的的下一个地址

	/DISCARD/ : { *(.note*) }

	/*
	 * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
	 */
	. = ASSERT(_end <= 0x8000, "Setup too big!");
	. = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
	/* Necessary for the very-old-loader check to work... */
	. = ASSERT(__end_init <= 5*512, "init sections too big!");

}

arch/x86/boot/header.S

_start位于setup.bin开始偏移512个字节,正好是vmlinuz实模式部分起始地址 + 0x200,也就是说kernel.img最后跳转到setup.bin的_start

BOOTSEG		= 0x07C0		/* original address of boot-sector */
SYSSEG		= 0x1000		/* historical load address >> 4 */

	.code16
	.section ".bstext", "ax" // 从0开始

	.global bootsect_start
bootsect_start:

	# Normalize the start address
	ljmp	$BOOTSEG, $start2

start2:
	movw	%cs, %ax
	movw	%ax, %ds
	movw	%ax, %es
	movw	%ax, %ss
	xorw	%sp, %sp
	sti
	cld

	movw	$bugger_off_msg, %si

msg_loop:
	lodsb
	andb	%al, %al
	jz	bs_die
	movb	$0xe, %ah
	movw	$7, %bx
	int	$0x10
	jmp	msg_loop

bs_die:
	# Allow the user to press a key, then reboot
	xorw	%ax, %ax
	int	$0x16
	int	$0x19

	# int 0x19 should never return.  In case it does anyway,
	# invoke the BIOS reset code...
	ljmp	$0xf000,$0xfff0

	.section ".bsdata", "a"
bugger_off_msg:
	.ascii	"Use a boot loader.\r\n"
	.ascii	"\n"
	.ascii	"Remove disk and press any key to reboot...\r\n"
	.byte	0

	# Kernel attributes; used by setup.  This is part 1 of the
	# header, from the old boot sector.

	.section ".header", "a" // 从495开始
	.globl	sentinel
sentinel:	.byte 0xff, 0xff        /* Used to detect broken loaders */ // 495-496

	.globl	hdr // 对应struct setup_header
hdr:
setup_sects:	.byte 0			/* Filled in by build.c */              // 497
root_flags:	.word ROOT_RDONLY                                           // 498-499
syssize:	.long 0			/* Filled in by build.c */                  // 500-503
ram_size:	.word 0			/* Obsolete */                              // 504-505
vid_mode:	.word SVGA_MODE                                             // 506-507
root_dev:	.word 0			/* Filled in by build.c */                  // 508-509
boot_flag:	.word 0xAA55                                                // 510-511

	# offset 512, entry point

	.globl	_start
_start:
		# Explicitly enter this as bytes, or the assembler
		# tries to generate a 3-byte jump here, which causes
		# everything else to push off to the wrong offset.
		.byte	0xeb		# short (2-byte) jump                       // 512
		.byte	start_of_setup-1f                                       // 513,跳转到start_of_setup
1:

	# Part 2 of the header, from the old setup.S

		.ascii	"HdrS"		# header signature
		.word	0x020d		# header version number (>= 0x0105)
					# or else old loadlin-1.5 will fail)
		.globl realmode_swtch
realmode_swtch:	.word	0, 0		# default_switch, SETUPSEG
start_sys_seg:	.word	SYSSEG		# obsolete and meaningless, but just
					# in case something decided to "use" it
		.word	kernel_version-512 # pointing to kernel version string
					# above section of header is compatible
					# with loadlin-1.5 (header v1.5). Don't
					# change it.

type_of_loader:	.byte	0		# 0 means ancient bootloader, newer
					# bootloaders know to change this.
					# See Documentation/x86/boot.txt for
					# assigned ids

# flags, unused bits must be zero (RFU) bit within loadflags
loadflags:
		.byte	LOADED_HIGH	# The kernel is to be loaded high

setup_move_size: .word  0x8000		# size to move, when setup is not
					# loaded at 0x90000. We will move setup
					# to 0x90000 then just before jumping
					# into the kernel. However, only the
					# loader knows how much data behind
					# us also needs to be loaded.

code32_start:				# here loaders can put a different
					# start address for 32-bit code.
		.long	0x100000	# 0x100000 = default for big kernel

ramdisk_image:	.long	0		# address of loaded ramdisk image
					# Here the loader puts the 32-bit
					# address where it loaded the image.
					# This only will be read by the kernel.

ramdisk_size:	.long	0		# its size in bytes

bootsect_kludge:
		.long	0		# obsolete

heap_end_ptr:	.word	_end+STACK_SIZE-512
					# (Header version 0x0201 or later)
					# space from here (exclusive) down to
					# end of setup code can be used by setup
					# for local heap purposes.

ext_loader_ver:
		.byte	0		# Extended boot loader version
ext_loader_type:
		.byte	0		# Extended boot loader type

cmd_line_ptr:	.long	0		# (Header version 0x0202 or later)
					# If nonzero, a 32-bit pointer
					# to the kernel command line.
					# The command line should be
					# located between the start of
					# setup and the end of low
					# memory (0xa0000), or it may
					# get overwritten before it
					# gets read.  If this field is
					# used, there is no longer
					# anything magical about the
					# 0x90000 segment; the setup
					# can be located anywhere in
					# low memory 0x10000 or higher.

initrd_addr_max: .long 0x7fffffff
					# (Header version 0x0203 or later)
					# The highest safe address for
					# the contents of an initrd
					# The current kernel allows up to 4 GB,
					# but leave it at 2 GB to avoid
					# possible bootloader bugs.

kernel_alignment:  .long CONFIG_PHYSICAL_ALIGN	#physical addr alignment
						#required for protected mode
						#kernel
relocatable_kernel:    .byte 1
min_alignment:		.byte MIN_KERNEL_ALIGN_LG2	# minimum alignment

xloadflags:
			.word XLF0 | XLF1 | XLF23 | XLF4

cmdline_size:   .long   COMMAND_LINE_SIZE-1     #length of the command line,
                                                #added with boot protocol
                                                #version 2.06

hardware_subarch:	.long 0			# subarchitecture, added with 2.07
						# default to 0 for normal x86 PC

hardware_subarch_data:	.quad 0

payload_offset:		.long ZO_input_data
payload_length:		.long ZO_z_input_len

setup_data:		.quad 0			# 64-bit physical pointer to
						# single linked list of
						# struct setup_data

pref_address:		.quad LOAD_PHYSICAL_ADDR	# preferred load addr

init_size:		.long INIT_SIZE		# kernel initialization size
handover_offset:	.long 0			# Filled in by build.c

# End of setup header #####################################################

	.section ".entrytext", "ax"
start_of_setup:
# Force %es = %ds
	movw	%ds, %ax
	movw	%ax, %es // es = X >> 4
	cld // DF = 0

# Apparently some ancient versions of LILO invoked the kernel with %ss != %ds,
# which happened to work by accident for the old code.  Recalculate the stack
# pointer if %ss is invalid.  Otherwise leave it alone, LOADLIN sets up the
# stack behind its own code, so we can't blindly put it directly past the heap.

	movw	%ss, %dx
	cmpw	%ax, %dx	# %ds == %ss? // 判断ds和ss是否相等
	movw	%sp, %dx // dx = 0x9000
	je	2f		# -> assume %sp is reasonably set // 若相等,跳转到2f

	# Invalid %ss, make up a new stack
	movw	$_end, %dx
	testb	$CAN_USE_HEAP, loadflags
	jz	1f
	movw	heap_end_ptr, %dx
1:	addw	$STACK_SIZE, %dx
	jnc	2f
	xorw	%dx, %dx	# Prevent wraparound

2:	# Now %dx should point to the end of our stack space
	andw	$~3, %dx	# dword align (might as well...) // dx = 0x9000
	jnz	3f // 若不为0,跳转到3f
	movw	$0xfffc, %dx	# Make sure we're not zero
3:	movw	%ax, %ss // ss = X >> 4
	movzwl	%dx, %esp	# Clear upper half of %esp // esp = 0x9000,创建一个栈,栈顶ss:sp = X + 0x9000
	sti			# Now we should have a working stack

# We will have entered with %cs = %ds+0x20, normalize %cs so
# it is on par with the other segments.
	pushw	%ds // X >> 4
	pushw	$6f
	lretw // pop IP/CS,其中IP = $6f、CS = X >> 4,跳转到X + $6f
6:

# Check signature at end of setup
	cmpl	$0x5a5aaa55, setup_sig // 在setup.ld中设置setup_sig = 0x5a5aaa55
	jne	setup_bad

# Zero the bss
	movw	$__bss_start, %di	// __bss_start是.bss起始地址,di = $__bss_start
	movw	$_end+3, %cx 		// _end是文件结束地址的下一个地址,cx = $_end+3,加3是为了下面/4时向上取整
	xorl	%eax, %eax 			// eax = 0
	subw	%di, %cx 			// cx = $_end+3 - $__bss_start
	shrw	$2, %cx 			// cx = ($_end+3 - $__bss_start) / 4
	rep; stosl // 将EAX(0)复制到ES:DI(X + $__bss_start),重复执行CX次,每次4个byte

# Jump to C code (should not return)
	calll	main // 调用main

其中hdr对应struct setup_header

struct setup_header {
	__u8	setup_sects;
	__u16	root_flags;
	__u32	syssize;
	__u16	ram_size;
	__u16	vid_mode;
	__u16	root_dev;
	__u16	boot_flag;
	__u16	jump;
	__u32	header;
	__u16	version;
	__u32	realmode_swtch;
	__u16	start_sys_seg;
	__u16	kernel_version;
	__u8	type_of_loader;
	__u8	loadflags;
	__u16	setup_move_size;
	__u32	code32_start;
	__u32	ramdisk_image;
	__u32	ramdisk_size;
	__u32	bootsect_kludge;
	__u16	heap_end_ptr;
	__u8	ext_loader_ver;
	__u8	ext_loader_type;
	__u32	cmd_line_ptr;
	__u32	initrd_addr_max;
	__u32	kernel_alignment;
	__u8	relocatable_kernel;
	__u8	min_alignment;
	__u16	xloadflags;
	__u32	cmdline_size;
	__u32	hardware_subarch;
	__u64	hardware_subarch_data;
	__u32	payload_offset;
	__u32	payload_length;
	__u64	setup_data;
	__u64	pref_address;
	__u32	init_size;
	__u32	handover_offset;
} __attribute__((packed));

arch/x86/boot/main.c

void main(void)
{
	/* First, copy the boot header into the "zeropage" */
	copy_boot_params(); // memcpy(&boot_params.hdr, &hdr, sizeof(hdr));

	/* Initialize the early-boot console */
	console_init();
	if (cmdline_find_option_bool("debug"))
		puts("early console in setup code\n");

	/* End of heap check */
	init_heap();

	/* Make sure we have all the proper CPU support */
	if (validate_cpu()) {
		puts("Unable to boot - please use a kernel appropriate "
		     "for your CPU.\n");
		die();
	}

	/* Tell the BIOS what CPU mode we intend to run in. */
	set_bios_mode();

	/* Detect memory layout */
	// 通过0x15号中断获取e820_table,长度为e820_entries
	detect_memory();

	/* Set keyboard repeat rate (why?) and query the lock flags */
	keyboard_init();

	/* Query Intel SpeedStep (IST) information */
	query_ist();

	/* Query APM information */
#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
	query_apm_bios();
#endif

	/* Query EDD information */
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
	query_edd();
#endif

	/* Set the video mode */
	set_video();

	/* Do the last things and invoke protected mode */
	go_to_protected_mode(); // 调用go_to_protected_mode
}

arch/x86/boot/pm.c

void go_to_protected_mode(void)
{
	/* Hook before leaving real mode, also disables interrupts */
	realmode_switch_hook();

	/* Enable the A20 gate */
	if (enable_a20()) { // 打开A20地址线
		puts("A20 gate not responding, unable to boot...\n");
		die();
	}

	/* Reset coprocessor (IGNNE#) */
	reset_coprocessor();

	/* Mask all interrupts in the PIC */
	mask_all_interrupts(); // 屏蔽中断

	/* Actual transition to protected mode... */
	setup_idt(); // 加载IDTR
	setup_gdt(); // 加载GDTR
	protected_mode_jump(boot_params.hdr.code32_start,
			    (u32)&boot_params + (ds() << 4)); // 调用protected_mode_jump
}

setup_gdt加载boot_gdt到GDTR、setup_ldt加载null_idt到IDTR

/*
 * Set up the GDT
 */

struct gdt_ptr {
	u16 len;
	u32 ptr;
} __attribute__((packed));

static void setup_gdt(void)
{
	/* There are machines which are known to not boot with the GDT
	   being 8-byte unaligned.  Intel recommends 16 byte alignment. */
	static const u64 boot_gdt[] __attribute__((aligned(16))) = {
		/* CS: code, read/execute, 4 GB, base 0 */
		[GDT_ENTRY_BOOT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff), // CS.L = 0, CS.D = 1
		/* DS: data, read/write, 4 GB, base 0 */
		[GDT_ENTRY_BOOT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
		/* TSS: 32-bit tss, 104 bytes, base 4096 */
		/* We only have a TSS here to keep Intel VT happy;
		   we don't actually use it for anything. */
		[GDT_ENTRY_BOOT_TSS] = GDT_ENTRY(0x0089, 4096, 103),
	};
	/* Xen HVM incorrectly stores a pointer to the gdt_ptr, instead
	   of the gdt_ptr contents.  Thus, make it static so it will
	   stay in memory, at least long enough that we switch to the
	   proper kernel GDT. */
	static struct gdt_ptr gdt;

	gdt.len = sizeof(boot_gdt)-1;
	gdt.ptr = (u32)&boot_gdt + (ds() << 4);

	asm volatile("lgdtl %0" : : "m" (gdt));
}

/*
 * Set up the IDT
 */
static void setup_idt(void)
{
	static const struct gdt_ptr null_idt = {0, 0};
	asm volatile("lidtl %0" : : "m" (null_idt));
}

protected_mode_jump的第1个参数为code32_start(0x100000),第2个参数为boot_params

code32_start:				# here loaders can put a different
					# start address for 32-bit code.
		.long	0x100000	# 0x100000 = default for big kernel

struct boot_params boot_params;

函数调用

// arch/x86/entry/calling.h

 x86 function call convention, 64-bit:
 -------------------------------------
  arguments           |  callee-saved      | extra caller-saved | return
 [callee-clobbered]   |                    | [callee-clobbered] |
 ---------------------------------------------------------------------------
 rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11             | rax, rdx [**]

// https://gcc.gnu.org/onlinedocs/gcc/x86-Function-Attributes.html
// -mregparm=3表示使用3个寄存器传参
For 32-bit we have the following conventions - kernel is built with
-mregparm=3 and -freg-struct-return:

 x86 function calling convention, 32-bit:
 ----------------------------------------
  arguments         | callee-saved        | extra caller-saved | return
 [callee-clobbered] |                     | [callee-clobbered] |
 -------------------------------------------------------------------------
 eax edx ecx        | ebx edi esi ebp [*] | <none>             | eax, edx [**]

arch/x86/boot/pmjump.S

在setup.bin中进入保护模式,最后跳转到code32_start(0x100000),正好是vmlinuz保护模式部分起始地址

	.text
	.code16

/*
 * void protected_mode_jump(u32 entrypoint, u32 bootparams);
 */
GLOBAL(protected_mode_jump) // eax为第1个参数code32_start(0x100000),edx为第2个参数boot_params
	movl	%edx, %esi		# Pointer to boot_params table // esi = boot_params

	xorl	%ebx, %ebx // ebx = 0
	movw	%cs, %bx // bx = X >> 4
	shll	$4, %ebx // ebx = X
	addl	%ebx, 2f // 2f保存的是in_pm32编译地址,加上X后保存的是in_pm32实模式地址
	jmp	1f			# Short jump to serialize on 386/486 // 跳转到1f
1:

	movw	$__BOOT_DS, %cx // cx = GDT_ENTRY_BOOT_DS*8
	movw	$__BOOT_TSS, %di // di = GDT_ENTRY_BOOT_TSS*8

	movl	%cr0, %edx
	orb	$X86_CR0_PE, %dl	# Protected mode // 设置CR0的PE为1,进入保护模式
	movl	%edx, %cr0

	# Transition to 32-bit mode
	.byte	0x66, 0xea		# ljmpl opcode // 跳转到in_pm32
2:	.long	in_pm32			# offset // in_pm32实模式地址
	.word	__BOOT_CS		# segment // GDT_ENTRY_BOOT_CS*8
ENDPROC(protected_mode_jump)
	.code32
	.section ".text32","ax"
GLOBAL(in_pm32)
	# Set up data segments for flat 32-bit mode
	movl	%ecx, %ds // ds = GDT_ENTRY_BOOT_DS*8
	movl	%ecx, %es // es = GDT_ENTRY_BOOT_DS*8
	movl	%ecx, %fs // fs = GDT_ENTRY_BOOT_DS*8
	movl	%ecx, %gs // gs = GDT_ENTRY_BOOT_DS*8
	movl	%ecx, %ss // ss = GDT_ENTRY_BOOT_DS*8
	# The 32-bit code sets up its own stack, but this way we do have
	# a valid stack if some debugging hack wants to use it.
	addl	%ebx, %esp // esp = esp + X

	# Set up TR to make Intel VT happy
	ltr	%di // 加载TR

	# Clear registers to allow for future extensions to the
	# 32-bit boot protocol
	xorl	%ecx, %ecx // ecx = 0
	xorl	%edx, %edx // edx = 0
	xorl	%ebx, %ebx // ebx = 0
	xorl	%ebp, %ebp // ebp = 0
	xorl	%edi, %edi // edi = 0

	# Set up LDTR to make Intel VT happy
	lldt	%cx // 加载LDTR

	jmpl	*%eax			# Jump to the 32-bit entrypoint // 跳转到code32_start(0x100000),正好是vmlinuz保护模式部分起始地址
ENDPROC(in_pm32)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值