目前,代码进度已经推进到,int setjmp_gen_code(),如下:
(gdb) bt
#0 setjmp_gen_code (env=0x30ac5306bd0, tb=0x7fffa4c1d040, pc=0, host_pc=0x7fff23400000, max_insns=0x7fff64c17b74, ti=0x7fff64c17b90) at ../accel/tcg/translate-all.c:242
#1 0x0000555555d0e8ac in tb_gen_code (cpu=0x30ac5304000, s=...) at ../accel/tcg/translate-all.c:320
#2 0x0000555555d0219e in cpu_exec_loop (cpu=0x30ac5304000, sc=0x7fff64c17ce0) at ../accel/tcg/cpu-exec.c:953
#3 0x0000555555d0232a in cpu_exec_setjmp (cpu=0x30ac5304000, sc=0x7fff64c17ce0) at ../accel/tcg/cpu-exec.c:999
#4 0x0000555555d023c7 in cpu_exec (cpu=0x30ac5304000) at ../accel/tcg/cpu-exec.c:1025
#5 0x0000555555d2b223 in tcg_cpu_exec (cpu=0x30ac5304000) at ../accel/tcg/tcg-accel-ops.c:81
#6 0x0000555555d2bddf in mttcg_cpu_thread_fn (arg=0x30ac5304000) at ../accel/tcg/tcg-accel-ops-mttcg.c:94
#7 0x0000555556190c12 in qemu_thread_start (args=0x30ac4becd20) at ../util/qemu-thread-posix.c:393
#8 0x00007ffff5208aa4 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:447
#9 0x00007ffff5295c3c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78
setjmp_gen_code() 主要实现 RISCV ISA -> TCG Ops -> X86_64 ,即:
/*
* Isolate the portion of code gen which can setjmp/longjmp.
* Return the size of the generated code, or negative on error.
*/
static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
vaddr pc, void *host_pc,
int *max_insns, int64_t *ti){
...
CPUState *cs = env_cpu(env);
tcg_ctx->cpu = cs;
cs->cc->tcg_ops->translate_code(cs, tb, max_insns, pc, host_pc);
...
return tcg_gen_code(tcg_ctx, tb, pc);}
本文的主角是 cs->cc->tcg_ops->translate_code(),用于实现 RISCV ISA -> TCG Ops。也就是 RISCV 二进制指令转译成 QEMU TCG 定义的指令。
原理很明了,就是用另一种语言(语法)表达同样的内容(语义)。如,用英语(English Syntax)表达了一个意思(Semantics),现在要将其用中文(Chinese Syntax)来表达同样的意思。
这就是转译的要义。包括后续的 TCG Ops -> X86_64 也是一样。只是处理细节不同而已。
const TCGCPUOps *tcg_ops 定义了在这个转译过程中,对于某一CPU要做的事,即接口:
struct TCGCPUOps {
/**
* mttcg_supported: multi-threaded TCG is supported
*
* Target (TCG frontend) supports:
* - atomic instructions
* - memory ordering primitives (barriers)
*/
bool mttcg_supported;
/**
* @precise_smc: Stores which modify code within the current TB force
* the TB to exit; the next executed instruction will see
* the result of the store.
*/
bool precise_smc;
/**
* @guest_default_memory_order: default barrier that is required
* for the guest memory ordering.
*/
TCGBar guest_default_memory_order;
/**
* @initialize: Initialize TCG state
*
* Called when the first CPU is realized.
*/
void (*initialize)(void);
/**
* @translate_code: Translate guest instructions to TCGOps
* @cpu: cpu context
* @tb: translation block
* @max_insns: max number of instructions to translate
* @pc: guest virtual program counter address
* @host_pc: host physical program counter address
*
* This function must be provided by the target, which should create
* the target-specific DisasContext, and then invoke translator_loop.
*/
void (*translate_code)(CPUState *cpu, TranslationBlock *tb,
int *max_insns, vaddr pc, void *host_pc);
/**
* @get_tb_cpu_state: Extract CPU state for a TCG #TranslationBlock
*
* Fill in all data required to select or compile a TranslationBlock.
*/
TCGTBCPUState (*get_tb_cpu_state)(CPUState *cs);
/**
* @synchronize_from_tb: Synchronize state from a TCG #TranslationBlock
*
* This is called when we abandon execution of a TB before starting it,
* and must set all parts of the CPU state which the previous TB in the
* chain may not have updated.
* By default, when this is NULL, a call is made to @set_pc(tb->pc).
*
* If more state needs to be restored, the target must implement a
* function to restore all the state, and register it here.
*/
void (*synchronize_from_tb)(CPUState *cpu, const TranslationBlock *tb);
/**
* @restore_state_to_opc: Synchronize state from INDEX_op_start_insn
*
* This is called when we unwind state in the middle of a TB,
* usually before raising an exception. Set all part of the CPU
* state which are tracked insn-by-insn in the target-specific
* arguments to start_insn, passed as @data.
*/
void (*restore_state_to_opc)(CPUState *cpu, const TranslationBlock *tb,
const uint64_t *data);
/** @cpu_exec_enter: Callback for cpu_exec preparation */
void (*cpu_exec_enter)(CPUState *cpu);
/** @cpu_exec_exit: Callback for cpu_exec cleanup */
void (*cpu_exec_exit)(CPUState *cpu);
/** @debug_excp_handler: Callback for handling debug exceptions */
void (*debug_excp_handler)(CPUState *cpu);
/** @mmu_index: Callback for choosing softmmu mmu index */
int (*mmu_index)(CPUState *cpu, bool ifetch);
#ifdef CONFIG_USER_ONLY
/**
* @fake_user_interrupt: Callback for 'fake exception' handling.
*
* Simulate 'fake exception' which will be handled outside the
* cpu execution loop (hack for x86 user mode).
*/
void (*fake_user_interrupt)(CPUState *cpu);
/**
* record_sigsegv:
* @cpu: cpu context
* @addr: faulting guest address
* @access_type: access was read/write/execute
* @maperr: true for invalid page, false for permission fault
* @ra: host pc for unwinding
*
* We are about to raise SIGSEGV with si_code set for @maperr,
* and si_addr set for @addr. Record anything further needed
* for the signal ucontext_t.
*
* If the emulated kernel does not provide anything to the signal
* handler with anything besides the user context registers, and
* the siginfo_t, then this hook need do nothing and may be omitted.
* Otherwise, record the data and return; the caller will raise
* the signal, unwind the cpu state, and return to the main loop.
*
* If it is simpler to re-use the sysemu tlb_fill code, @ra is provided
* so that a "normal" cpu exception can be raised. In this case,
* the signal must be raised by the architecture cpu_loop.
*/
void (*record_sigsegv)(CPUState *cpu, vaddr addr,
MMUAccessType access_type,
bool maperr, uintptr_t ra);
/**
* record_sigbus:
* @cpu: cpu context
* @addr: misaligned guest address
* @access_type: access was read/write/execute
* @ra: host pc for unwinding
*
* We are about to raise SIGBUS with si_code BUS_ADRALN,
* and si_addr set for @addr. Record anything further needed
* for the signal ucontext_t.
*
* If the emulated kernel does not provide the signal handler with
* anything besides the user context registers, and the siginfo_t,
* then this hook need do nothing and may be omitted.
* Otherwise, record the data and return; the caller will raise
* the signal, unwind the cpu state, and return to the main loop.
*
* If it is simpler to re-use the sysemu do_unaligned_access code,
* @ra is provided so that a "normal" cpu exception can be raised.
* In this case, the signal must be raised by the architecture cpu_loop.
*/
void (*record_sigbus)(CPUState *cpu, vaddr addr,
MMUAccessType access_type, uintptr_t ra);
/**
* untagged_addr: Remove an ignored tag from an address
* @cpu: cpu context
* @addr: tagged guest address
*/
vaddr (*untagged_addr)(CPUState *cs, vaddr addr);
#else
/** @do_interrupt: Callback for interrupt handling. */
void (*do_interrupt)(CPUState *cpu);
/** @cpu_exec_interrupt: Callback for processing interrupts in cpu_exec */
bool (*cpu_exec_interrupt)(CPUState *cpu, int interrupt_request);
/** @cpu_exec_reset: Callback for reset in cpu_exec. */
void (*cpu_exec_reset)(CPUState *cpu);
/**
* @cpu_exec_halt: Callback for handling halt in cpu_exec.
*
* The target CPU should do any special processing here that it needs
* to do when the CPU is in the halted state.
*
* Return true to indicate that the CPU should now leave halt, false
* if it should remain in the halted state. (This should generally
* be the same value that cpu_has_work() would return.)
*
* This method must be provided. If the target does not need to
* do anything special for halt, the same function used for its
* SysemuCPUOps::has_work method can be used here, as they have the
* same function signature.
*/
bool (*cpu_exec_halt)(CPUState *cpu);
/**
* @tlb_fill_align: Handle a softmmu tlb miss
* @cpu: cpu context
* @out: output page properties
* @addr: virtual address
* @access_type: read, write or execute
* @mmu_idx: mmu context
* @memop: memory operation for the access
* @size: memory access size, or 0 for whole page
* @probe: test only, no fault
* @ra: host return address for exception unwind
*
* If the access is valid, fill in @out and return true.
* Otherwise if probe is true, return false.
* Otherwise raise an exception and do not return.
*
* The alignment check for the access is deferred to this hook,
* so that the target can determine the priority of any alignment
* fault with respect to other potential faults from paging.
* Zero may be passed for @memop to skip any alignment check
* for non-memory-access operations such as probing.
*/
bool (*tlb_fill_align)(CPUState *cpu, CPUTLBEntryFull *out, vaddr addr,
MMUAccessType access_type, int mmu_idx,
MemOp memop, int size, bool probe, uintptr_t ra);
/**
* @tlb_fill: Handle a softmmu tlb miss
*
* If the access is valid, call tlb_set_page and return true;
* if the access is invalid and probe is true, return false;
* otherwise raise an exception and do not return.
*/
bool (*tlb_fill)(CPUState *cpu, vaddr address, int size,
MMUAccessType access_type, int mmu_idx,
bool probe, uintptr_t retaddr);
/**
* @pointer_wrap:
*
* We have incremented @base to @result, resulting in a page change.
* For the current cpu state, adjust @result for possible overflow.
*/
vaddr (*pointer_wrap)(CPUState *cpu, int mmu_idx, vaddr result, vaddr base);
/**
* @do_transaction_failed: Callback for handling failed memory transactions
* (ie bus faults or external aborts; not MMU faults)
*/
void (*do_transaction_failed)(CPUState *cpu, hwaddr physaddr, vaddr addr,
unsigned size, MMUAccessType access_type,
int mmu_idx, MemTxAttrs attrs,
MemTxResult response, uintptr_t retaddr);
/**
* @do_unaligned_access: Callback for unaligned access handling
* The callback must exit via raising an exception.
*/
G_NORETURN void (*do_unaligned_access)(CPUState *cpu, vaddr addr,
MMUAccessType access_type,
int mmu_idx, uintptr_t retaddr);
/**
* @adjust_watchpoint_address: hack for cpu_check_watchpoint used by ARM
*/
vaddr (*adjust_watchpoint_address)(CPUState *cpu, vaddr addr, int len);
/**
* @debug_check_watchpoint: return true if the architectural
* watchpoint whose address has matched should really fire, used by ARM
* and RISC-V
*/
bool (*debug_check_watchpoint)(CPUState *cpu, CPUWatchpoint *wp);
/**
* @debug_check_breakpoint: return true if the architectural
* breakpoint whose PC has matched should really fire.
*/
bool (*debug_check_breakpoint)(CPUState *cpu);
/**
* @io_recompile_replay_branch: Callback for cpu_io_recompile.
*
* The cpu has been stopped, and cpu_restore_state_from_tb has been
* called. If the faulting instruction is in a delay slot, and the
* target architecture requires re-execution of the branch, then
* adjust the cpu state as required and return true.
*/
bool (*io_recompile_replay_branch)(CPUState *cpu,
const TranslationBlock *tb);
/**
* @need_replay_interrupt: Return %true if @interrupt_request
* needs to be recorded for replay purposes.
*/
bool (*need_replay_interrupt)(int interrupt_request);
#endif /* !CONFIG_USER_ONLY */
};
cs->cc->tcg_ops 对应的是 riscv_tcg_ops,在 RISCVCPUClass 初始化时设定的:
static void riscv_cpu_common_class_init(ObjectClass *c, const void *data){
...
CPUClass *cc = CPU_CLASS(c);
...
#ifdef CONFIG_TCG
cc->tcg_ops = &riscv_tcg_ops;
#endif /* CONFIG_TCG */
...}
const TCGCPUOps riscv_tcg_ops = {
.mttcg_supported = true,
.guest_default_memory_order = 0,
.initialize = riscv_translate_init,
.translate_code = riscv_translate_code,
.get_tb_cpu_state = riscv_get_tb_cpu_state,
.synchronize_from_tb = riscv_cpu_synchronize_from_tb,
.restore_state_to_opc = riscv_restore_state_to_opc,
.mmu_index = riscv_cpu_mmu_index,
#ifndef CONFIG_USER_ONLY
.tlb_fill = riscv_cpu_tlb_fill,
.pointer_wrap = riscv_pointer_wrap,
.cpu_exec_interrupt = riscv_cpu_exec_interrupt,
.cpu_exec_halt = riscv_cpu_has_work,
.cpu_exec_reset = cpu_reset,
.do_interrupt = riscv_cpu_do_interrupt,
.do_transaction_failed = riscv_cpu_do_transaction_failed,
.do_unaligned_access = riscv_cpu_do_unaligned_access,
.debug_excp_handler = riscv_cpu_debug_excp_handler,
.debug_check_breakpoint = riscv_cpu_debug_check_breakpoint,
.debug_check_watchpoint = riscv_cpu_debug_check_watchpoint,
#endif /* !CONFIG_USER_ONLY */
}
那么,cs->cc->tcg_ops->translate_code() 就对应了 riscv_translate_code(),即:
static const TranslatorOps riscv_tr_ops = {
.init_disas_context = riscv_tr_init_disas_context,
.tb_start = riscv_tr_tb_start,
.insn_start = riscv_tr_insn_start,
.translate_insn = riscv_tr_translate_insn,
.tb_stop = riscv_tr_tb_stop,
};
void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
int *max_insns, vaddr pc, void *host_pc){
DisasContext ctx;
translator_loop(cs, tb, max_insns, pc, host_pc, &riscv_tr_ops, &ctx.base);}
translator_loop() 要做的是,从 Guest PC 指向的指令开始,一直循环转译,直至条件不满足,而实际的转译动作主要由 static const TranslatorOps riscv_tr_ops 定义。如下:
void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
vaddr pc, void *host_pc, const TranslatorOps *ops,
DisasContextBase *db) {
...
ops->init_disas_context(db, cpu);
...
/* Start translating. */
icount_start_insn = gen_tb_start(db, cflags);
ops->tb_start(db, cpu);
...
while (true) {
...
ops->insn_start(db, cpu);
...
/*
* Disassemble one instruction. The translate_insn hook should
* update db->pc_next and db->is_jmp to indicate what should be
* done next -- either exiting this loop or locate the start of
* the next instruction.
*/
ops->translate_insn(db, cpu);
...
/* Stop translation if translate_insn so indicated. */
if (db->is_jmp != DISAS_NEXT) {break;}
/* Stop translation if the output buffer is full,
or we have executed all of the allowed instructions. */
if (tcg_op_buf_full() || db->num_insns >= db->max_insns) {
db->is_jmp = DISAS_TOO_MANY;break;}
}
/* Emit code to exit the TB, as indicated by db->is_jmp. */
ops->tb_stop(db, cpu);
gen_tb_end(tb, cflags, icount_start_insn, db->num_insns);
...}
/**
* TranslatorOps:
* @init_disas_context:
* Initialize the target-specific portions of DisasContext struct.
* The generic DisasContextBase has already been initialized.
*
* @tb_start:
* Emit any code required before the start of the main loop,
* after the generic gen_tb_start().
*
* @insn_start:
* Emit the tcg_gen_insn_start opcode.
*
* @translate_insn:
* Disassemble one instruction and set db->pc_next for the start
* of the following instruction. Set db->is_jmp as necessary to
* terminate the main loop.
*
* @tb_stop:
* Emit any opcodes required to exit the TB, based on db->is_jmp.
*
* @disas_log:
* Print instruction disassembly to log.
*/
typedef struct TranslatorOps {
void (*init_disas_context)(DisasContextBase *db, CPUState *cpu);
void (*tb_start)(DisasContextBase *db, CPUState *cpu);
void (*insn_start)(DisasContextBase *db, CPUState *cpu);
void (*translate_insn)(DisasContextBase *db, CPUState *cpu);
void (*tb_stop)(DisasContextBase *db, CPUState *cpu);
bool (*disas_log)(const DisasContextBase *db, CPUState *cpu, FILE *f);
} TranslatorOps;
/**
* translator_loop:
* @cpu: Target vCPU.
* @tb: Translation block.
* @max_insns: Maximum number of insns to translate.
* @pc: guest virtual program counter address
* @host_pc: host physical program counter address
* @ops: Target-specific operations.
* @db: Disassembly context.
*
* Generic translator loop.
*
* Translation will stop in the following cases (in order):
* - When is_jmp set by #TranslatorOps::breakpoint_check.
* - set to DISAS_TOO_MANY exits after translating one more insn
* - set to any other value than DISAS_NEXT exits immediately.
* - When is_jmp set by #TranslatorOps::translate_insn.
* - set to any value other than DISAS_NEXT exits immediately.
* - When the TCG operation buffer is full.
* - When single-stepping is enabled (system-wide or on the current vCPU).
* - When too many instructions have been translated.
*/
void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
vaddr pc, void *host_pc, const TranslatorOps *ops,
DisasContextBase *db);
在对 riscv_tr_ops 中定义的每个函数分析前,需要先了解一下 TCG IR,即 TCG Ops,官方文档为 《frontend-ops》、《backend-ops》以及《TCG IR》等。
其中重点有:
1. A TCG basic block is a single entry, multiple exit region which corresponds to a list of instructions terminated by a label, or any branch instruction.
对应的代码为:
void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
vaddr pc, void *host_pc, const TranslatorOps *ops,
DisasContextBase *db){
...
while (true) {
...
/* Stop translation if translate_insn so indicated. */
if (db->is_jmp != DISAS_NEXT) {break;}
/* Stop translation if the output buffer is full,
or we have executed all of the allowed instructions. */
if (tcg_op_buf_full() || db->num_insns >= db->max_insns) {
db->is_jmp = DISAS_TOO_MANY;
break;}}
...}
2. TCG instructions or ops operate on TCG variables, both of which are strongly typed. Each instruction has a fixed number of output variable operands, input variable operands and constant operands. Vector instructions have a field specifying the element size within the vector. The notable exception is the call instruction which has a variable number of outputs and inputs.
由此,对应 TCG Ops 的代码定义在include/tcg/tcg-opc.h:
/*
* DEF(name, oargs, iargs, cargs, flags)
*/
/* predefined ops */
DEF(discard, 1, 0, 0, TCG_OPF_NOT_PRESENT)
DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
/* variable number of parameters */
DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT)
DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
DEF(brcond, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | TCG_OPF_INT)
DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
DEF(mov, 1, 1, 0, TCG_OPF_INT | TCG_OPF_NOT_PRESENT)
DEF(add, 1, 2, 0, TCG_OPF_INT)
DEF(and, 1, 2, 0, TCG_OPF_INT)
DEF(andc, 1, 2, 0, TCG_OPF_INT)
...
3. Variables
TEMP_FIXED
There is one TCG fixed global variable,
cpu_env
, which is live in all translation blocks, and holds a pointer toCPUArchState
. This variable is held in a host cpu register at all times in all translation blocks.
TEMP_GLOBAL
A TCG global is a variable which is live in all translation blocks, and corresponds to memory location that is within
CPUArchState
. These may be specified as an offset fromcpu_env
, in which case they are called direct globals, or may be specified as an offset from a direct global, in which case they are called indirect globals. Even indirect globals should still reference memory withinCPUArchState
. All TCG globals are defined duringTCGCPUOps.initialize
, before any translation blocks are generated.
TEMP_CONST
A TCG constant is a variable which is live throughout the entire translation block, and contains a constant value. These variables are allocated on demand during translation and are hashed so that there is exactly one variable holding a given value.
TEMP_TB
A TCG translation block temporary is a variable which is live throughout the entire translation block, but dies on any exit. These temporaries are allocated explicitly during translation.
TEMP_EBB
A TCG extended basic block temporary is a variable which is live throughout an extended basic block, but dies on any exit. These temporaries are allocated explicitly during translation.
对应的代码有:(定义了 TCG Global Variables)
/* global register indices */
static TCGv cpu_gpr[32], cpu_gprh[32], cpu_pc, cpu_vl, cpu_vstart;
static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */
static TCGv load_res;
static TCGv load_val;
...
void riscv_translate_init(void){
int i;
/*
* cpu_gpr[0] is a placeholder for the zero register. Do not use it.
* Use the gen_set_gpr and get_gpr helper functions when accessing regs,
* unless you specifically block reads/writes to reg 0.
*/
cpu_gpr[0] = NULL;
cpu_gprh[0] = NULL;
for (i = 1; i < 32; i++) {
cpu_gpr[i] = tcg_global_mem_new(tcg_env,
offsetof(CPURISCVState, gpr[i]), riscv_int_regnames[i]);
cpu_gprh[i] = tcg_global_mem_new(tcg_env,
offsetof(CPURISCVState, gprh[i]), riscv_int_regnamesh[i]);}
for (i = 0; i < 32; i++) {
cpu_fpr[i] = tcg_global_mem_new_i64(tcg_env,
offsetof(CPURISCVState, fpr[i]), riscv_fpr_regnames[i]);}
cpu_pc = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, pc), "pc");
cpu_vl = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, vl), "vl");
cpu_vstart = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, vstart),
"vstart");
load_res = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, load_res),
"load_res");
load_val = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, load_val),
"load_val");
}
该函数调用栈如下:
(gdb) bt
#0 riscv_translate_init () at ../target/riscv/translate.c:1399
#1 0x0000555555d025c3 in tcg_exec_realizefn (cpu=0x255ebf05000, errp=0x7fffffffcbc8) at ../accel/tcg/cpu-exec.c:1047
#2 0x00005555558bddec in accel_cpu_common_realize (cpu=0x255ebf05000, errp=0x7fffffffcbc8) at ../accel/accel-common.c:104
#3 0x00005555558910ea in cpu_exec_realizefn (cpu=0x255ebf05000, errp=0x7fffffffcbc8) at ../hw/core/cpu-common.c:233
#4 0x0000555555d9b455 in riscv_cpu_realize (dev=0x255ebf05000, errp=0x7fffffffcc30) at ../target/riscv/cpu.c:931
#5 0x0000555555f38314 in device_set_realized (obj=0x255ebf05000, value=true, errp=0x7fffffffcef0) at ../hw/core/qdev.c:494
#6 0x0000555555f4350c in property_set_bool (obj=0x255ebf05000, v=0x255eb7a3f00, name=0x555556382bf9 "realized", opaque=0x255eb4943a0, errp=0x7fffffffcef0) at ../qom/object.c:2375
#7 0x0000555555f40eb9 in object_property_set (obj=0x255ebf05000, name=0x555556382bf9 "realized", v=0x255eb7a3f00, errp=0x7fffffffcef0) at ../qom/object.c:1450
#8 0x0000555555f461ce in object_property_set_qobject (obj=0x255ebf05000, name=0x555556382bf9 "realized", value=0x255eb7ec7e0, errp=0x7fffffffcef0) at ../qom/qom-qobject.c:28
#9 0x0000555555f4127a in object_property_set_bool (obj=0x255ebf05000, name=0x555556382bf9 "realized", value=true, errp=0x7fffffffcef0) at ../qom/object.c:1520
#10 0x0000555555f379cc in qdev_realize (dev=0x255ebf05000, bus=0x0, errp=0x7fffffffcef0) at ../hw/core/qdev.c:276
#11 0x0000555555d75c91 in riscv_hart_realize (s=0x255ed400328, idx=0, cpu_type=0x255ed002930 "rv64-riscv-cpu", errp=0x7fffffffcef0) at ../hw/riscv/riscv_hart.c:145
#12 0x0000555555d75d20 in riscv_harts_realize (dev=0x255ed400328, errp=0x7fffffffcef0) at ../hw/riscv/riscv_hart.c:160
#13 0x0000555555f38314 in device_set_realized (obj=0x255ed400328, value=true, errp=0x7fffffffd000) at ../hw/core/qdev.c:494
#14 0x0000555555f4350c in property_set_bool (obj=0x255ed400328, v=0x255eb7a3100, name=0x555556382bf9 "realized", opaque=0x255eb4943a0, errp=0x7fffffffd000) at ../qom/object.c:2375
#15 0x0000555555f40eb9 in object_property_set (obj=0x255ed400328, name=0x555556382bf9 "realized", v=0x255eb7a3100, errp=0x7fffffffd000) at ../qom/object.c:1450
#16 0x0000555555f461ce in object_property_set_qobject (obj=0x255ed400328, name=0x555556382bf9 "realized", value=0x255eb7e80a0, errp=0x55555693a360 <error_fatal>) at ../qom/qom-qobject.c:28
#17 0x0000555555f4127a in object_property_set_bool (obj=0x255ed400328, name=0x555556382bf9 "realized", value=true, errp=0x55555693a360 <error_fatal>) at ../qom/object.c:1520
#18 0x0000555555f379cc in qdev_realize (dev=0x255ed400328, bus=0x255eb445d00, errp=0x55555693a360 <error_fatal>) at ../hw/core/qdev.c:276
#19 0x000055555597b910 in sysbus_realize (dev=0x255ed400328, errp=0x55555693a360 <error_fatal>) at ../hw/core/sysbus.c:238
#20 0x0000555555d7de44 in quard_star_cpu_create (machine=0x255ed400100) at ../hw/riscv/quard_star.c:304
#21 0x0000555555d7fb13 in quard_star_machine_init (machine=0x255ed400100) at ../hw/riscv/quard_star.c:768
#22 0x0000555555973622 in machine_run_board_init (machine=0x255ed400100, mem_path=0x0, errp=0x7fffffffd1c0) at ../hw/core/machine.c:1669
#23 0x0000555555be3337 in qemu_init_board () at ../system/vl.c:2710
#24 0x0000555555be36d9 in qmp_x_exit_preconfig (errp=0x55555693a360 <error_fatal>) at ../system/vl.c:2804
#25 0x0000555555be6334 in qemu_init (argc=12, argv=0x7fffffffd578) at ../system/vl.c:3840
#26 0x00005555560a8f51 in main (argc=12, argv=0x7fffffffd578) at ../system/main.c:71
那么,从上面重点介绍中,可以推断出,TCG IR 的计算模型为,定义了 一系列的操作(Operations),其主要操作对象为目标ISA(如 RISCV ISA)的状态,及对应地址空间。这就很直观地体现了目标ISA的指令操作了。一般 ISA 的指令操作无非就是改变CPU状态以及对地址进行读写访问。
例子如下,其中 IN: 是 RISCV ISA,OP: 是 TCG IR,OUT:是 X86_64。(请搭配其中的注释进行阅读)
----------------
IN:
0x00000000: 00000297 auipc t0,0 # 0x0
// cs->cc->tcg_ops->translate_code(cs, tb, max_insns, pc, host_pc);
// void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
// int *max_insns, vaddr pc, void *host_pc)
OP:
//icount_start_insn = gen_tb_start(db, cflags);
ld_i32 loc1,env,$0xfffffffffffffff8
// if no cycles left for execution, then exit tb right now.
brcond_i32 loc1,$0x0,lt,$L0
// set_can_do_io(db, true);
st8_i32 $0x1,env,$0xfffffffffffffffc
---- 0000000000000000 0000000000000000 0000000000000000
// riscv_tr_translate_insn()
// auipc implementation of TCG IR.
mov_i64 x5/t0,pc
// riscv_tr_tb_stop()
// move to next instruction.
add_i64 pc,pc,$0x4
// find the next tb.
call lookup_tb_ptr,$0x6,$1,tmp4,env
// if tmp4 valid then go.
goto_ptr tmp4
// gen_tb_end();
// set up a label.
set_label $L0
// exit this tb with this tb address.
exit_tb $0x7d0f041b3043
// tcg_gen_code(tcg_ctx, tb, pc);
// %rbp pointing to env, the CPURISCVState
// %rdi used to pass 1st argument to functions
// %rsi used to pass 2nd argument to functions
// %rdx used to pass 3rd argument
// %rcx used to pass 4th argument
// %r8 used to pass 5th argument
// %rax used to store 1st return value.
OUT: [size=72]
-- guest addr 0x0000000000000000 + tb prologue
// ld_i32 loc1,env,$0xfffffffffffffff8
0x7d0f041b3100: 8b 5d f8 movl -8(%rbp), %ebx
// brcond_i32 loc1,$0x0,lt,$L0
0x7d0f041b3103: 85 db testl %ebx, %ebx
0x7d0f041b3105: 0f 8c 25 00 00 00 jl 0x7d0f041b3130
// st8_i32 $0x1,env,$0xfffffffffffffffc
0x7d0f041b310b: c6 45 fc 01 movb $1, -4(%rbp)
// mov_i64 x5/t0,pc
0x7d0f041b310f: 48 8b 9d 30 12 00 00 movq 0x1230(%rbp), %rbx
0x7d0f041b3116: 48 89 5d 28 movq %rbx, 0x28(%rbp)
// add_i64 pc,pc,$0x4
0x7d0f041b311a: 48 83 c3 04 addq $4, %rbx
0x7d0f041b311e: 48 89 9d 30 12 00 00 movq %rbx, 0x1230(%rbp)
// call lookup_tb_ptr,$0x6,$1,tmp4,env
0x7d0f041b3125: 48 8b fd movq %rbp, %rdi
0x7d0f041b3128: ff 15 12 00 00 00 callq *0x12(%rip)
// goto_ptr tmp4
0x7d0f041b312e: ff e0 jmpq *%rax
// exit_tb $0x7d0f041b3043
0x7d0f041b3130: 48 8d 05 0c ff ff ff leaq -0xf4(%rip), %rax
0x7d0f041b3137: e9 dc fe ff ff jmp 0x7d0f041b3018
-- tb slow paths + alignment
0x7d0f041b313c: 90 nop
0x7d0f041b313d: 90 nop
0x7d0f041b313e: 90 nop
0x7d0f041b313f: 90 nop
data: [size=8]
0x7d0f041b3140: .quad 0x00005d3d6c92eeb0
----------------
基于上述理解,看看 QEMU 是如何将 00000297 auipc t0, 0,转译成 mov_i64 x5/t0,pc 的,即riscv_tr_translate_insn() 函数的实现,如下:
static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) {
DisasContext *ctx = container_of(dcbase, DisasContext, base);
CPURISCVState *env = cpu_env(cpu);
uint16_t opcode16 = translator_lduw(env, &ctx->base, ctx->base.pc_next);
ctx->ol = ctx->xl;
decode_opc(env, ctx, opcode16);
ctx->base.pc_next += ctx->cur_insn_len;
...}
static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode){
ctx->virt_inst_excp = false;
ctx->cur_insn_len = insn_len(opcode);
/* Check for compressed insn */
if (ctx->cur_insn_len == 2) {
ctx->opcode = opcode;
/*
* The Zca extension is added as way to refer to instructions in the C
* extension that do not include the floating-point loads and stores
*/
if ((has_ext(ctx, RVC) || ctx->cfg_ptr->ext_zca) &&
decode_insn16(ctx, opcode)) {
return;
}
} else {
uint32_t opcode32 = opcode;
opcode32 = deposit32(opcode32, 16, 16,
translator_lduw(env, &ctx->base,
ctx->base.pc_next + 2));
ctx->opcode = opcode32;
for (guint i = 0; i < ctx->decoders->len; ++i) {
riscv_cpu_decode_fn func = g_ptr_array_index(ctx->decoders, i);
if (func(ctx, opcode32)) {
return;
}
}
}
...}
其中,对于每条指令的解码函数表为 ctx->decoders,其赋值代码如下:
static const TranslatorOps riscv_tr_ops = {
.init_disas_context = riscv_tr_init_disas_context,
...};
static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs){
...
ctx->decoders = cpu->decoders;}
void riscv_tcg_cpu_finalize_dynamic_decoder(RISCVCPU *cpu){
GPtrArray *dynamic_decoders;
dynamic_decoders = g_ptr_array_sized_new(decoder_table_size);
for (size_t i = 0; i < decoder_table_size; ++i) {
if (decoder_table[i].guard_func &&
decoder_table[i].guard_func(&cpu->cfg)) {
g_ptr_array_add(dynamic_decoders,
(gpointer)decoder_table[i].riscv_cpu_decode_fn);
}
}
cpu->decoders = dynamic_decoders;}
typedef struct RISCVDecoder {
bool (*guard_func)(const struct RISCVCPUConfig *);
bool (*riscv_cpu_decode_fn)(struct DisasContext *, uint32_t);
} RISCVDecoder;
const RISCVDecoder decoder_table[] = {
{ always_true_p, decode_insn32 },
{ has_xthead_p, decode_xthead},
{ has_XVentanaCondOps_p, decode_XVentanaCodeOps},
};
static bool decode_insn32(DisasContext *ctx, uint32_t insn){
...
switch (insn & 0x0000007f) {
case 0x00000003:
/* ........ ........ ........ .0000011 */
decode_insn32_extract_i(ctx, &u.f_i, insn);
switch ((insn >> 12) & 0x7) {
case 0x0:
/* ........ ........ .000.... .0000011 */
/* ../target/riscv/insn32.decode:141 */
if (trans_lb(ctx, &u.f_i)) return true;
break;
...}
...
case 0x00000017:
/* ........ ........ ........ .0010111 */
if ((insn & 0x00000f80) == 0x00000000) {
/* ........ ........ ....0000 00010111 */
/* ../target/riscv/insn32.decode:130 */
decode_insn32_extract_decode_insn32_Fmt_38(ctx, &u.f_decode_insn3226, insn);
if (trans_lpad(ctx, &u.f_decode_insn3226)) return true;
}
/* ../target/riscv/insn32.decode:131 */
decode_insn32_extract_u(ctx, &u.f_u, insn);
if (trans_auipc(ctx, &u.f_u)) return true;
break;
...}
...}
那么,对于 00000297 auipc t0, 0 来说,在 decode_insn32() 中,会进入 trans_auipc(),如下:
static bool trans_auipc(DisasContext *ctx, arg_auipc *a){
// target_pc <- a->rd
TCGv target_pc = dest_gpr(ctx, a->rd);
// target_pc <- pc + a->imm
gen_pc_plus_diff(target_pc, ctx, a->imm);
// a->rd <- targe_pc
gen_set_gpr(ctx, a->rd, target_pc);
return true;}
其中,a->rd 为 t0,因此,target_pc 为 指向 t0 的 TCG Global variable。如下:
static TCGv dest_gpr(DisasContext *ctx, int reg_num){
if (reg_num == 0 || get_olen(ctx) < TARGET_LONG_BITS) {
return tcg_temp_new();
}
return cpu_gpr[reg_num];
}
gen_pc_plus_diff(target_pc, ctx, a->imm) 意思是将 target_pc <- pc + a->imm。如下:
static void gen_pc_plus_diff(TCGv target, DisasContext *ctx,
target_long diff){
target_ulong dest = ctx->base.pc_next + diff;
...
if (tb_cflags(ctx->base.tb) & CF_PCREL) {
tcg_gen_addi_tl(target, cpu_pc, dest - ctx->pc_save);
...} else {...}}
void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2){
/* some cases can be optimized here */
if (arg2 == 0) {
tcg_gen_mov_i64(ret, arg1);
} else if (TCG_TARGET_REG_BITS == 64) {
tcg_gen_add_i64(ret, arg1, tcg_constant_i64(arg2));
} else {
tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
TCGV_LOW(arg1), TCGV_HIGH(arg1),
tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));}}
void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg){
if (ret == arg) {return;}
if (TCG_TARGET_REG_BITS == 64) {
tcg_gen_op2_i64(INDEX_op_mov, ret, arg);
} else {...}}
static void DNI tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2){
tcg_gen_op2(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2));}
TCGOp * NI tcg_gen_op2(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2){
TCGOp *op = tcg_emit_op(opc, 2);
TCGOP_TYPE(op) = type;
op->args[0] = a1;
op->args[1] = a2;
return op;}
TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs){
TCGOp *op = tcg_op_alloc(opc, nargs);
if (tcg_ctx->emit_before_op) {
QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
} else {
QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
}
return op;}
由于 a->imm = 0;因此,使用了 tcg_gen_mov_i64(),即 mov_i64 x5/t0,pc。
然后到了,gen_set_gpr(ctx, a->rd, target_pc) ,其定义如下:
static void gen_set_gpr(DisasContext *ctx, int reg_num, TCGv t){
if (reg_num != 0) {
switch (get_ol(ctx)) {
...
case MXL_RV64:
case MXL_RV128:
tcg_gen_mov_tl(cpu_gpr[reg_num], t);
break;
...}
...}}
由于 a->rd 与 target_pc 都指向 t0,因此,gen_set_gpr() 并没有输出。由此可见,00000297 auipc t0,0 对应的是 mov_i64 x5/t0,pc。
其它的 RISCV 指令也大致上按上述流程进行转译成 TCG Ops。那么,后续文章将讲解 TCG Ops 转译成 X86_64,敬请期待。