QEMU RISCV TCG 详解十 -- RISCV ISA -> TCGOps

        目前,代码进度已经推进到,int setjmp_gen_code(),如下:

(gdb) bt
#0  setjmp_gen_code (env=0x30ac5306bd0, tb=0x7fffa4c1d040, pc=0, host_pc=0x7fff23400000, max_insns=0x7fff64c17b74, ti=0x7fff64c17b90) at ../accel/tcg/translate-all.c:242
#1  0x0000555555d0e8ac in tb_gen_code (cpu=0x30ac5304000, s=...) at ../accel/tcg/translate-all.c:320
#2  0x0000555555d0219e in cpu_exec_loop (cpu=0x30ac5304000, sc=0x7fff64c17ce0) at ../accel/tcg/cpu-exec.c:953
#3  0x0000555555d0232a in cpu_exec_setjmp (cpu=0x30ac5304000, sc=0x7fff64c17ce0) at ../accel/tcg/cpu-exec.c:999
#4  0x0000555555d023c7 in cpu_exec (cpu=0x30ac5304000) at ../accel/tcg/cpu-exec.c:1025
#5  0x0000555555d2b223 in tcg_cpu_exec (cpu=0x30ac5304000) at ../accel/tcg/tcg-accel-ops.c:81
#6  0x0000555555d2bddf in mttcg_cpu_thread_fn (arg=0x30ac5304000) at ../accel/tcg/tcg-accel-ops-mttcg.c:94
#7  0x0000555556190c12 in qemu_thread_start (args=0x30ac4becd20) at ../util/qemu-thread-posix.c:393
#8  0x00007ffff5208aa4 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:447
#9  0x00007ffff5295c3c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78

        setjmp_gen_code() 主要实现  RISCV ISA -> TCG Ops -> X86_64 ,即:

/*
 * Isolate the portion of code gen which can setjmp/longjmp.
 * Return the size of the generated code, or negative on error.
 */
static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
                           vaddr pc, void *host_pc,
                           int *max_insns, int64_t *ti){
...
    CPUState *cs = env_cpu(env);
    tcg_ctx->cpu = cs;
    cs->cc->tcg_ops->translate_code(cs, tb, max_insns, pc, host_pc);
...
    return tcg_gen_code(tcg_ctx, tb, pc);}

        本文的主角是 cs->cc->tcg_ops->translate_code(),用于实现 RISCV ISA -> TCG Ops。也就是 RISCV 二进制指令转译成 QEMU TCG 定义的指令。

        原理很明了,就是用另一种语言(语法)表达同样的内容(语义)。如,用英语(English Syntax)表达了一个意思(Semantics),现在要将其用中文(Chinese Syntax)来表达同样的意思。

        这就是转译的要义。包括后续的 TCG Ops -> X86_64 也是一样。只是处理细节不同而已。

        const TCGCPUOps *tcg_ops 定义了在这个转译过程中,对于某一CPU要做的事,即接口:

struct TCGCPUOps {
    /**
     * mttcg_supported: multi-threaded TCG is supported
     *
     * Target (TCG frontend) supports:
     *   - atomic instructions
     *   - memory ordering primitives (barriers)
     */
    bool mttcg_supported;

    /**
     * @precise_smc: Stores which modify code within the current TB force
     *               the TB to exit; the next executed instruction will see
     *               the result of the store.
     */
    bool precise_smc;

    /**
     * @guest_default_memory_order: default barrier that is required
     *                              for the guest memory ordering.
     */
    TCGBar guest_default_memory_order;

    /**
     * @initialize: Initialize TCG state
     *
     * Called when the first CPU is realized.
     */
    void (*initialize)(void);
    /**
     * @translate_code: Translate guest instructions to TCGOps
     * @cpu: cpu context
     * @tb: translation block
     * @max_insns: max number of instructions to translate
     * @pc: guest virtual program counter address
     * @host_pc: host physical program counter address
     *
     * This function must be provided by the target, which should create
     * the target-specific DisasContext, and then invoke translator_loop.
     */
    void (*translate_code)(CPUState *cpu, TranslationBlock *tb,
                           int *max_insns, vaddr pc, void *host_pc);
    /**
     * @get_tb_cpu_state: Extract CPU state for a TCG #TranslationBlock
     *
     * Fill in all data required to select or compile a TranslationBlock.
     */
    TCGTBCPUState (*get_tb_cpu_state)(CPUState *cs);
    /**
     * @synchronize_from_tb: Synchronize state from a TCG #TranslationBlock
     *
     * This is called when we abandon execution of a TB before starting it,
     * and must set all parts of the CPU state which the previous TB in the
     * chain may not have updated.
     * By default, when this is NULL, a call is made to @set_pc(tb->pc).
     *
     * If more state needs to be restored, the target must implement a
     * function to restore all the state, and register it here.
     */
    void (*synchronize_from_tb)(CPUState *cpu, const TranslationBlock *tb);
    /**
     * @restore_state_to_opc: Synchronize state from INDEX_op_start_insn
     *
     * This is called when we unwind state in the middle of a TB,
     * usually before raising an exception.  Set all part of the CPU
     * state which are tracked insn-by-insn in the target-specific
     * arguments to start_insn, passed as @data.
     */
    void (*restore_state_to_opc)(CPUState *cpu, const TranslationBlock *tb,
                                 const uint64_t *data);

    /** @cpu_exec_enter: Callback for cpu_exec preparation */
    void (*cpu_exec_enter)(CPUState *cpu);
    /** @cpu_exec_exit: Callback for cpu_exec cleanup */
    void (*cpu_exec_exit)(CPUState *cpu);
    /** @debug_excp_handler: Callback for handling debug exceptions */
    void (*debug_excp_handler)(CPUState *cpu);

    /** @mmu_index: Callback for choosing softmmu mmu index */
    int (*mmu_index)(CPUState *cpu, bool ifetch);

#ifdef CONFIG_USER_ONLY
    /**
     * @fake_user_interrupt: Callback for 'fake exception' handling.
     *
     * Simulate 'fake exception' which will be handled outside the
     * cpu execution loop (hack for x86 user mode).
     */
    void (*fake_user_interrupt)(CPUState *cpu);

    /**
     * record_sigsegv:
     * @cpu: cpu context
     * @addr: faulting guest address
     * @access_type: access was read/write/execute
     * @maperr: true for invalid page, false for permission fault
     * @ra: host pc for unwinding
     *
     * We are about to raise SIGSEGV with si_code set for @maperr,
     * and si_addr set for @addr.  Record anything further needed
     * for the signal ucontext_t.
     *
     * If the emulated kernel does not provide anything to the signal
     * handler with anything besides the user context registers, and
     * the siginfo_t, then this hook need do nothing and may be omitted.
     * Otherwise, record the data and return; the caller will raise
     * the signal, unwind the cpu state, and return to the main loop.
     *
     * If it is simpler to re-use the sysemu tlb_fill code, @ra is provided
     * so that a "normal" cpu exception can be raised.  In this case,
     * the signal must be raised by the architecture cpu_loop.
     */
    void (*record_sigsegv)(CPUState *cpu, vaddr addr,
                           MMUAccessType access_type,
                           bool maperr, uintptr_t ra);
    /**
     * record_sigbus:
     * @cpu: cpu context
     * @addr: misaligned guest address
     * @access_type: access was read/write/execute
     * @ra: host pc for unwinding
     *
     * We are about to raise SIGBUS with si_code BUS_ADRALN,
     * and si_addr set for @addr.  Record anything further needed
     * for the signal ucontext_t.
     *
     * If the emulated kernel does not provide the signal handler with
     * anything besides the user context registers, and the siginfo_t,
     * then this hook need do nothing and may be omitted.
     * Otherwise, record the data and return; the caller will raise
     * the signal, unwind the cpu state, and return to the main loop.
     *
     * If it is simpler to re-use the sysemu do_unaligned_access code,
     * @ra is provided so that a "normal" cpu exception can be raised.
     * In this case, the signal must be raised by the architecture cpu_loop.
     */
    void (*record_sigbus)(CPUState *cpu, vaddr addr,
                          MMUAccessType access_type, uintptr_t ra);

    /**
     * untagged_addr: Remove an ignored tag from an address
     * @cpu: cpu context
     * @addr: tagged guest address
     */
    vaddr (*untagged_addr)(CPUState *cs, vaddr addr);
#else
    /** @do_interrupt: Callback for interrupt handling.  */
    void (*do_interrupt)(CPUState *cpu);
    /** @cpu_exec_interrupt: Callback for processing interrupts in cpu_exec */
    bool (*cpu_exec_interrupt)(CPUState *cpu, int interrupt_request);
    /** @cpu_exec_reset: Callback for reset in cpu_exec.  */
    void (*cpu_exec_reset)(CPUState *cpu);
    /**
     * @cpu_exec_halt: Callback for handling halt in cpu_exec.
     *
     * The target CPU should do any special processing here that it needs
     * to do when the CPU is in the halted state.
     *
     * Return true to indicate that the CPU should now leave halt, false
     * if it should remain in the halted state. (This should generally
     * be the same value that cpu_has_work() would return.)
     *
     * This method must be provided. If the target does not need to
     * do anything special for halt, the same function used for its
     * SysemuCPUOps::has_work method can be used here, as they have the
     * same function signature.
     */
    bool (*cpu_exec_halt)(CPUState *cpu);
    /**
     * @tlb_fill_align: Handle a softmmu tlb miss
     * @cpu: cpu context
     * @out: output page properties
     * @addr: virtual address
     * @access_type: read, write or execute
     * @mmu_idx: mmu context
     * @memop: memory operation for the access
     * @size: memory access size, or 0 for whole page
     * @probe: test only, no fault
     * @ra: host return address for exception unwind
     *
     * If the access is valid, fill in @out and return true.
     * Otherwise if probe is true, return false.
     * Otherwise raise an exception and do not return.
     *
     * The alignment check for the access is deferred to this hook,
     * so that the target can determine the priority of any alignment
     * fault with respect to other potential faults from paging.
     * Zero may be passed for @memop to skip any alignment check
     * for non-memory-access operations such as probing.
     */
    bool (*tlb_fill_align)(CPUState *cpu, CPUTLBEntryFull *out, vaddr addr,
                           MMUAccessType access_type, int mmu_idx,
                           MemOp memop, int size, bool probe, uintptr_t ra);
    /**
     * @tlb_fill: Handle a softmmu tlb miss
     *
     * If the access is valid, call tlb_set_page and return true;
     * if the access is invalid and probe is true, return false;
     * otherwise raise an exception and do not return.
     */
    bool (*tlb_fill)(CPUState *cpu, vaddr address, int size,
                     MMUAccessType access_type, int mmu_idx,
                     bool probe, uintptr_t retaddr);
    /**
     * @pointer_wrap:
     *
     * We have incremented @base to @result, resulting in a page change.
     * For the current cpu state, adjust @result for possible overflow.
     */
    vaddr (*pointer_wrap)(CPUState *cpu, int mmu_idx, vaddr result, vaddr base);
    /**
     * @do_transaction_failed: Callback for handling failed memory transactions
     * (ie bus faults or external aborts; not MMU faults)
     */
    void (*do_transaction_failed)(CPUState *cpu, hwaddr physaddr, vaddr addr,
                                  unsigned size, MMUAccessType access_type,
                                  int mmu_idx, MemTxAttrs attrs,
                                  MemTxResult response, uintptr_t retaddr);
    /**
     * @do_unaligned_access: Callback for unaligned access handling
     * The callback must exit via raising an exception.
     */
    G_NORETURN void (*do_unaligned_access)(CPUState *cpu, vaddr addr,
                                           MMUAccessType access_type,
                                           int mmu_idx, uintptr_t retaddr);

    /**
     * @adjust_watchpoint_address: hack for cpu_check_watchpoint used by ARM
     */
    vaddr (*adjust_watchpoint_address)(CPUState *cpu, vaddr addr, int len);

    /**
     * @debug_check_watchpoint: return true if the architectural
     * watchpoint whose address has matched should really fire, used by ARM
     * and RISC-V
     */
    bool (*debug_check_watchpoint)(CPUState *cpu, CPUWatchpoint *wp);

    /**
     * @debug_check_breakpoint: return true if the architectural
     * breakpoint whose PC has matched should really fire.
     */
    bool (*debug_check_breakpoint)(CPUState *cpu);

    /**
     * @io_recompile_replay_branch: Callback for cpu_io_recompile.
     *
     * The cpu has been stopped, and cpu_restore_state_from_tb has been
     * called.  If the faulting instruction is in a delay slot, and the
     * target architecture requires re-execution of the branch, then
     * adjust the cpu state as required and return true.
     */
    bool (*io_recompile_replay_branch)(CPUState *cpu,
                                       const TranslationBlock *tb);
    /**
     * @need_replay_interrupt: Return %true if @interrupt_request
     * needs to be recorded for replay purposes.
     */
    bool (*need_replay_interrupt)(int interrupt_request);
#endif /* !CONFIG_USER_ONLY */
};

        cs->cc->tcg_ops 对应的是 riscv_tcg_ops,在 RISCVCPUClass 初始化时设定的:

static void riscv_cpu_common_class_init(ObjectClass *c, const void *data){
...
    CPUClass *cc = CPU_CLASS(c);
...
#ifdef CONFIG_TCG
    cc->tcg_ops = &riscv_tcg_ops;
#endif /* CONFIG_TCG */
...}

const TCGCPUOps riscv_tcg_ops = {
    .mttcg_supported = true,
    .guest_default_memory_order = 0,

    .initialize = riscv_translate_init,
    .translate_code = riscv_translate_code,
    .get_tb_cpu_state = riscv_get_tb_cpu_state,
    .synchronize_from_tb = riscv_cpu_synchronize_from_tb,
    .restore_state_to_opc = riscv_restore_state_to_opc,
    .mmu_index = riscv_cpu_mmu_index,

#ifndef CONFIG_USER_ONLY
    .tlb_fill = riscv_cpu_tlb_fill,
    .pointer_wrap = riscv_pointer_wrap,
    .cpu_exec_interrupt = riscv_cpu_exec_interrupt,
    .cpu_exec_halt = riscv_cpu_has_work,
    .cpu_exec_reset = cpu_reset,
    .do_interrupt = riscv_cpu_do_interrupt,
    .do_transaction_failed = riscv_cpu_do_transaction_failed,
    .do_unaligned_access = riscv_cpu_do_unaligned_access,
    .debug_excp_handler = riscv_cpu_debug_excp_handler,
    .debug_check_breakpoint = riscv_cpu_debug_check_breakpoint,
    .debug_check_watchpoint = riscv_cpu_debug_check_watchpoint,
#endif /* !CONFIG_USER_ONLY */
}

        那么,cs->cc->tcg_ops->translate_code() 就对应了 riscv_translate_code(),即:

static const TranslatorOps riscv_tr_ops = {
    .init_disas_context = riscv_tr_init_disas_context,
    .tb_start           = riscv_tr_tb_start,
    .insn_start         = riscv_tr_insn_start,
    .translate_insn     = riscv_tr_translate_insn,
    .tb_stop            = riscv_tr_tb_stop,
};
void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
                          int *max_insns, vaddr pc, void *host_pc){
    DisasContext ctx;
    translator_loop(cs, tb, max_insns, pc, host_pc, &riscv_tr_ops, &ctx.base);}

        translator_loop() 要做的是,从 Guest PC 指向的指令开始,一直循环转译,直至条件不满足,而实际的转译动作主要由 static const TranslatorOps riscv_tr_ops 定义。如下:

void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
                     vaddr pc, void *host_pc, const TranslatorOps *ops,
                     DisasContextBase *db) {
...
    ops->init_disas_context(db, cpu);
...
    /* Start translating.  */
    icount_start_insn = gen_tb_start(db, cflags);
    ops->tb_start(db, cpu);
...
    while (true) {
        ...
        ops->insn_start(db, cpu);
        ...
        /*
         * Disassemble one instruction.  The translate_insn hook should
         * update db->pc_next and db->is_jmp to indicate what should be
         * done next -- either exiting this loop or locate the start of
         * the next instruction.
         */
        ops->translate_insn(db, cpu);
        ...
        /* Stop translation if translate_insn so indicated.  */
        if (db->is_jmp != DISAS_NEXT) {break;}
        /* Stop translation if the output buffer is full,
           or we have executed all of the allowed instructions.  */
        if (tcg_op_buf_full() || db->num_insns >= db->max_insns) {
            db->is_jmp = DISAS_TOO_MANY;break;}
    }
    /* Emit code to exit the TB, as indicated by db->is_jmp.  */
    ops->tb_stop(db, cpu);
    gen_tb_end(tb, cflags, icount_start_insn, db->num_insns);
    ...}

/**
 * TranslatorOps:
 * @init_disas_context:
 *      Initialize the target-specific portions of DisasContext struct.
 *      The generic DisasContextBase has already been initialized.
 *
 * @tb_start:
 *      Emit any code required before the start of the main loop,
 *      after the generic gen_tb_start().
 *
 * @insn_start:
 *      Emit the tcg_gen_insn_start opcode.
 *
 * @translate_insn:
 *      Disassemble one instruction and set db->pc_next for the start
 *      of the following instruction.  Set db->is_jmp as necessary to
 *      terminate the main loop.
 *
 * @tb_stop:
 *      Emit any opcodes required to exit the TB, based on db->is_jmp.
 *
 * @disas_log:
 *      Print instruction disassembly to log.
 */
typedef struct TranslatorOps {
    void (*init_disas_context)(DisasContextBase *db, CPUState *cpu);
    void (*tb_start)(DisasContextBase *db, CPUState *cpu);
    void (*insn_start)(DisasContextBase *db, CPUState *cpu);
    void (*translate_insn)(DisasContextBase *db, CPUState *cpu);
    void (*tb_stop)(DisasContextBase *db, CPUState *cpu);
    bool (*disas_log)(const DisasContextBase *db, CPUState *cpu, FILE *f);
} TranslatorOps;
/**
 * translator_loop:
 * @cpu: Target vCPU.
 * @tb: Translation block.
 * @max_insns: Maximum number of insns to translate.
 * @pc: guest virtual program counter address
 * @host_pc: host physical program counter address
 * @ops: Target-specific operations.
 * @db: Disassembly context.
 *
 * Generic translator loop.
 *
 * Translation will stop in the following cases (in order):
 * - When is_jmp set by #TranslatorOps::breakpoint_check.
 *   - set to DISAS_TOO_MANY exits after translating one more insn
 *   - set to any other value than DISAS_NEXT exits immediately.
 * - When is_jmp set by #TranslatorOps::translate_insn.
 *   - set to any value other than DISAS_NEXT exits immediately.
 * - When the TCG operation buffer is full.
 * - When single-stepping is enabled (system-wide or on the current vCPU).
 * - When too many instructions have been translated.
 */
void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
                     vaddr pc, void *host_pc, const TranslatorOps *ops,
                     DisasContextBase *db);

        在对 riscv_tr_ops 中定义的每个函数分析前,需要先了解一下 TCG IR,即 TCG Ops,官方文档为 《frontend-ops》《backend-ops》以及《TCG IR》等。

        其中重点有:

1. A TCG basic block is a single entry, multiple exit region which corresponds to a list of instructions terminated by a label, or any branch instruction.

        对应的代码为:

void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
                     vaddr pc, void *host_pc, const TranslatorOps *ops,
                     DisasContextBase *db){
 ...
    while (true) {
       ...
        /* Stop translation if translate_insn so indicated.  */
        if (db->is_jmp != DISAS_NEXT) {break;}

        /* Stop translation if the output buffer is full,
           or we have executed all of the allowed instructions.  */
        if (tcg_op_buf_full() || db->num_insns >= db->max_insns) {
            db->is_jmp = DISAS_TOO_MANY;
            break;}}
...}

2. TCG instructions or ops operate on TCG variables, both of which are strongly typed. Each instruction has a fixed number of output variable operands, input variable operands and constant operands. Vector instructions have a field specifying the element size within the vector. The notable exception is the call instruction which has a variable number of outputs and inputs.

        由此,对应 TCG Ops 的代码定义在include/tcg/tcg-opc.h

/*
 * DEF(name, oargs, iargs, cargs, flags)
 */
/* predefined ops */
DEF(discard, 1, 0, 0, TCG_OPF_NOT_PRESENT)
DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
/* variable number of parameters */
DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT)
DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
DEF(brcond, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | TCG_OPF_INT)
DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
DEF(mov, 1, 1, 0, TCG_OPF_INT | TCG_OPF_NOT_PRESENT)
DEF(add, 1, 2, 0, TCG_OPF_INT)
DEF(and, 1, 2, 0, TCG_OPF_INT)
DEF(andc, 1, 2, 0, TCG_OPF_INT)
...

3. Variables

  • TEMP_FIXED

    There is one TCG fixed global variable, cpu_env, which is live in all translation blocks, and holds a pointer to CPUArchState. This variable is held in a host cpu register at all times in all translation blocks.

  • TEMP_GLOBAL

    A TCG global is a variable which is live in all translation blocks, and corresponds to memory location that is within CPUArchState. These may be specified as an offset from cpu_env, in which case they are called direct globals, or may be specified as an offset from a direct global, in which case they are called indirect globals. Even indirect globals should still reference memory within CPUArchState. All TCG globals are defined during TCGCPUOps.initialize, before any translation blocks are generated.

  • TEMP_CONST

    A TCG constant is a variable which is live throughout the entire translation block, and contains a constant value. These variables are allocated on demand during translation and are hashed so that there is exactly one variable holding a given value.

  • TEMP_TB

    A TCG translation block temporary is a variable which is live throughout the entire translation block, but dies on any exit. These temporaries are allocated explicitly during translation.

  • TEMP_EBB

    A TCG extended basic block temporary is a variable which is live throughout an extended basic block, but dies on any exit. These temporaries are allocated explicitly during translation.

        对应的代码有:(定义了 TCG Global Variables)

/* global register indices */
static TCGv cpu_gpr[32], cpu_gprh[32], cpu_pc, cpu_vl, cpu_vstart;
static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */
static TCGv load_res;
static TCGv load_val;
...
void riscv_translate_init(void){
    int i;
    /*
     * cpu_gpr[0] is a placeholder for the zero register. Do not use it.
     * Use the gen_set_gpr and get_gpr helper functions when accessing regs,
     * unless you specifically block reads/writes to reg 0.
     */
    cpu_gpr[0] = NULL;
    cpu_gprh[0] = NULL;
    for (i = 1; i < 32; i++) {
        cpu_gpr[i] = tcg_global_mem_new(tcg_env,
            offsetof(CPURISCVState, gpr[i]), riscv_int_regnames[i]);
        cpu_gprh[i] = tcg_global_mem_new(tcg_env,
            offsetof(CPURISCVState, gprh[i]), riscv_int_regnamesh[i]);}
    for (i = 0; i < 32; i++) {
        cpu_fpr[i] = tcg_global_mem_new_i64(tcg_env,
            offsetof(CPURISCVState, fpr[i]), riscv_fpr_regnames[i]);}

    cpu_pc = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, pc), "pc");
    cpu_vl = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, vl), "vl");
    cpu_vstart = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, vstart),
                            "vstart");
    load_res = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, load_res),
                             "load_res");
    load_val = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, load_val),
                             "load_val");
}

        该函数调用栈如下:

(gdb) bt
#0  riscv_translate_init () at ../target/riscv/translate.c:1399
#1  0x0000555555d025c3 in tcg_exec_realizefn (cpu=0x255ebf05000, errp=0x7fffffffcbc8) at ../accel/tcg/cpu-exec.c:1047
#2  0x00005555558bddec in accel_cpu_common_realize (cpu=0x255ebf05000, errp=0x7fffffffcbc8) at ../accel/accel-common.c:104
#3  0x00005555558910ea in cpu_exec_realizefn (cpu=0x255ebf05000, errp=0x7fffffffcbc8) at ../hw/core/cpu-common.c:233
#4  0x0000555555d9b455 in riscv_cpu_realize (dev=0x255ebf05000, errp=0x7fffffffcc30) at ../target/riscv/cpu.c:931
#5  0x0000555555f38314 in device_set_realized (obj=0x255ebf05000, value=true, errp=0x7fffffffcef0) at ../hw/core/qdev.c:494
#6  0x0000555555f4350c in property_set_bool (obj=0x255ebf05000, v=0x255eb7a3f00, name=0x555556382bf9 "realized", opaque=0x255eb4943a0, errp=0x7fffffffcef0) at ../qom/object.c:2375
#7  0x0000555555f40eb9 in object_property_set (obj=0x255ebf05000, name=0x555556382bf9 "realized", v=0x255eb7a3f00, errp=0x7fffffffcef0) at ../qom/object.c:1450
#8  0x0000555555f461ce in object_property_set_qobject (obj=0x255ebf05000, name=0x555556382bf9 "realized", value=0x255eb7ec7e0, errp=0x7fffffffcef0) at ../qom/qom-qobject.c:28
#9  0x0000555555f4127a in object_property_set_bool (obj=0x255ebf05000, name=0x555556382bf9 "realized", value=true, errp=0x7fffffffcef0) at ../qom/object.c:1520
#10 0x0000555555f379cc in qdev_realize (dev=0x255ebf05000, bus=0x0, errp=0x7fffffffcef0) at ../hw/core/qdev.c:276
#11 0x0000555555d75c91 in riscv_hart_realize (s=0x255ed400328, idx=0, cpu_type=0x255ed002930 "rv64-riscv-cpu", errp=0x7fffffffcef0) at ../hw/riscv/riscv_hart.c:145
#12 0x0000555555d75d20 in riscv_harts_realize (dev=0x255ed400328, errp=0x7fffffffcef0) at ../hw/riscv/riscv_hart.c:160
#13 0x0000555555f38314 in device_set_realized (obj=0x255ed400328, value=true, errp=0x7fffffffd000) at ../hw/core/qdev.c:494
#14 0x0000555555f4350c in property_set_bool (obj=0x255ed400328, v=0x255eb7a3100, name=0x555556382bf9 "realized", opaque=0x255eb4943a0, errp=0x7fffffffd000) at ../qom/object.c:2375
#15 0x0000555555f40eb9 in object_property_set (obj=0x255ed400328, name=0x555556382bf9 "realized", v=0x255eb7a3100, errp=0x7fffffffd000) at ../qom/object.c:1450
#16 0x0000555555f461ce in object_property_set_qobject (obj=0x255ed400328, name=0x555556382bf9 "realized", value=0x255eb7e80a0, errp=0x55555693a360 <error_fatal>) at ../qom/qom-qobject.c:28
#17 0x0000555555f4127a in object_property_set_bool (obj=0x255ed400328, name=0x555556382bf9 "realized", value=true, errp=0x55555693a360 <error_fatal>) at ../qom/object.c:1520
#18 0x0000555555f379cc in qdev_realize (dev=0x255ed400328, bus=0x255eb445d00, errp=0x55555693a360 <error_fatal>) at ../hw/core/qdev.c:276
#19 0x000055555597b910 in sysbus_realize (dev=0x255ed400328, errp=0x55555693a360 <error_fatal>) at ../hw/core/sysbus.c:238
#20 0x0000555555d7de44 in quard_star_cpu_create (machine=0x255ed400100) at ../hw/riscv/quard_star.c:304
#21 0x0000555555d7fb13 in quard_star_machine_init (machine=0x255ed400100) at ../hw/riscv/quard_star.c:768
#22 0x0000555555973622 in machine_run_board_init (machine=0x255ed400100, mem_path=0x0, errp=0x7fffffffd1c0) at ../hw/core/machine.c:1669
#23 0x0000555555be3337 in qemu_init_board () at ../system/vl.c:2710
#24 0x0000555555be36d9 in qmp_x_exit_preconfig (errp=0x55555693a360 <error_fatal>) at ../system/vl.c:2804
#25 0x0000555555be6334 in qemu_init (argc=12, argv=0x7fffffffd578) at ../system/vl.c:3840
#26 0x00005555560a8f51 in main (argc=12, argv=0x7fffffffd578) at ../system/main.c:71

        那么,从上面重点介绍中,可以推断出,TCG IR 的计算模型为,定义了 一系列的操作(Operations),其主要操作对象为目标ISA(如 RISCV ISA)的状态,及对应地址空间。这就很直观地体现了目标ISA的指令操作了。一般 ISA 的指令操作无非就是改变CPU状态以及对地址进行读写访问。

        例子如下,其中 IN: 是 RISCV ISA,OP: 是 TCG IR,OUT:是 X86_64。(请搭配其中的注释进行阅读)

----------------
IN: 
0x00000000:  00000297          auipc                   t0,0                    # 0x0

// cs->cc->tcg_ops->translate_code(cs, tb, max_insns, pc, host_pc);
// void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
//                          int *max_insns, vaddr pc, void *host_pc)
OP:
//icount_start_insn = gen_tb_start(db, cflags);
 ld_i32 loc1,env,$0xfffffffffffffff8

// if no cycles left for execution, then exit tb right now.
 brcond_i32 loc1,$0x0,lt,$L0

// set_can_do_io(db, true);
 st8_i32 $0x1,env,$0xfffffffffffffffc

 ---- 0000000000000000 0000000000000000 0000000000000000
// riscv_tr_translate_insn() 
// auipc implementation of TCG IR.
 mov_i64 x5/t0,pc

// riscv_tr_tb_stop()
// move to next instruction.
 add_i64 pc,pc,$0x4
// find the next tb.
 call lookup_tb_ptr,$0x6,$1,tmp4,env
// if tmp4 valid then go.
 goto_ptr tmp4

// gen_tb_end();
// set up a label.
 set_label $L0
// exit this tb with this tb address.
 exit_tb $0x7d0f041b3043

// tcg_gen_code(tcg_ctx, tb, pc);
// %rbp pointing to env, the CPURISCVState
// %rdi used to pass 1st argument to functions
// %rsi used to pass 2nd argument to functions
// %rdx used to pass 3rd argument
// %rcx used to pass 4th argument
// %r8  used to pass 5th argument
// %rax used to store 1st return value.

OUT: [size=72]
  -- guest addr 0x0000000000000000 + tb prologue
// ld_i32 loc1,env,$0xfffffffffffffff8
0x7d0f041b3100:  8b 5d f8                 movl     -8(%rbp), %ebx
// brcond_i32 loc1,$0x0,lt,$L0
0x7d0f041b3103:  85 db                    testl    %ebx, %ebx
0x7d0f041b3105:  0f 8c 25 00 00 00        jl       0x7d0f041b3130
// st8_i32 $0x1,env,$0xfffffffffffffffc
0x7d0f041b310b:  c6 45 fc 01              movb     $1, -4(%rbp)

// mov_i64 x5/t0,pc
0x7d0f041b310f:  48 8b 9d 30 12 00 00     movq     0x1230(%rbp), %rbx
0x7d0f041b3116:  48 89 5d 28              movq     %rbx, 0x28(%rbp)

// add_i64 pc,pc,$0x4
0x7d0f041b311a:  48 83 c3 04              addq     $4, %rbx
0x7d0f041b311e:  48 89 9d 30 12 00 00     movq     %rbx, 0x1230(%rbp)

// call lookup_tb_ptr,$0x6,$1,tmp4,env
0x7d0f041b3125:  48 8b fd                 movq     %rbp, %rdi
0x7d0f041b3128:  ff 15 12 00 00 00        callq    *0x12(%rip)

// goto_ptr tmp4
0x7d0f041b312e:  ff e0                    jmpq     *%rax

// exit_tb $0x7d0f041b3043
0x7d0f041b3130:  48 8d 05 0c ff ff ff     leaq     -0xf4(%rip), %rax
0x7d0f041b3137:  e9 dc fe ff ff           jmp      0x7d0f041b3018
  -- tb slow paths + alignment
0x7d0f041b313c:  90                       nop      
0x7d0f041b313d:  90                       nop      
0x7d0f041b313e:  90                       nop      
0x7d0f041b313f:  90                       nop      
  data: [size=8]
0x7d0f041b3140:  .quad  0x00005d3d6c92eeb0

----------------

        基于上述理解,看看 QEMU 是如何将 00000297 auipc t0, 0,转译成 mov_i64 x5/t0,pc 的,即riscv_tr_translate_insn() 函数的实现,如下:

static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) {
    DisasContext *ctx = container_of(dcbase, DisasContext, base);
    CPURISCVState *env = cpu_env(cpu);
    uint16_t opcode16 = translator_lduw(env, &ctx->base, ctx->base.pc_next);
    ctx->ol = ctx->xl;
    decode_opc(env, ctx, opcode16);
    ctx->base.pc_next += ctx->cur_insn_len;
...}
static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode){
    ctx->virt_inst_excp = false;
    ctx->cur_insn_len = insn_len(opcode);
    /* Check for compressed insn */
    if (ctx->cur_insn_len == 2) {
        ctx->opcode = opcode;
        /*
         * The Zca extension is added as way to refer to instructions in the C
         * extension that do not include the floating-point loads and stores
         */
        if ((has_ext(ctx, RVC) || ctx->cfg_ptr->ext_zca) &&
            decode_insn16(ctx, opcode)) {
            return;
        }
    } else {
        uint32_t opcode32 = opcode;
        opcode32 = deposit32(opcode32, 16, 16,
                             translator_lduw(env, &ctx->base,
                                             ctx->base.pc_next + 2));
        ctx->opcode = opcode32;
        for (guint i = 0; i < ctx->decoders->len; ++i) {
            riscv_cpu_decode_fn func = g_ptr_array_index(ctx->decoders, i);
            if (func(ctx, opcode32)) {
                return;
            }
        }
    }
...}

        其中,对于每条指令的解码函数表为 ctx->decoders,其赋值代码如下:

static const TranslatorOps riscv_tr_ops = {
    .init_disas_context = riscv_tr_init_disas_context,
...};
static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs){
 ...
    ctx->decoders = cpu->decoders;}

void riscv_tcg_cpu_finalize_dynamic_decoder(RISCVCPU *cpu){
    GPtrArray *dynamic_decoders;
    dynamic_decoders = g_ptr_array_sized_new(decoder_table_size);
    for (size_t i = 0; i < decoder_table_size; ++i) {
        if (decoder_table[i].guard_func &&
            decoder_table[i].guard_func(&cpu->cfg)) {
            g_ptr_array_add(dynamic_decoders,
                            (gpointer)decoder_table[i].riscv_cpu_decode_fn);
        }
    }
    cpu->decoders = dynamic_decoders;}
typedef struct RISCVDecoder {
    bool (*guard_func)(const struct RISCVCPUConfig *);
    bool (*riscv_cpu_decode_fn)(struct DisasContext *, uint32_t);
} RISCVDecoder;
const RISCVDecoder decoder_table[] = {
    { always_true_p, decode_insn32 },
    { has_xthead_p, decode_xthead},
    { has_XVentanaCondOps_p, decode_XVentanaCodeOps},
};
static bool decode_insn32(DisasContext *ctx, uint32_t insn){
...
    switch (insn & 0x0000007f) {
    case 0x00000003:
        /* ........ ........ ........ .0000011 */
        decode_insn32_extract_i(ctx, &u.f_i, insn);
        switch ((insn >> 12) & 0x7) {
        case 0x0:
            /* ........ ........ .000.... .0000011 */
            /* ../target/riscv/insn32.decode:141 */
            if (trans_lb(ctx, &u.f_i)) return true;
            break;
        ...}
    ...
    case 0x00000017:
        /* ........ ........ ........ .0010111 */
        if ((insn & 0x00000f80) == 0x00000000) {
            /* ........ ........ ....0000 00010111 */
            /* ../target/riscv/insn32.decode:130 */
            decode_insn32_extract_decode_insn32_Fmt_38(ctx, &u.f_decode_insn3226, insn);
            if (trans_lpad(ctx, &u.f_decode_insn3226)) return true;
        }
        /* ../target/riscv/insn32.decode:131 */
        decode_insn32_extract_u(ctx, &u.f_u, insn);
        if (trans_auipc(ctx, &u.f_u)) return true;
        break;
    ...}
...}

        那么,对于 00000297 auipc t0, 0 来说,在 decode_insn32() 中,会进入 trans_auipc(),如下:

static bool trans_auipc(DisasContext *ctx, arg_auipc *a){
    // target_pc <- a->rd
    TCGv target_pc = dest_gpr(ctx, a->rd);
    // target_pc <- pc + a->imm
    gen_pc_plus_diff(target_pc, ctx, a->imm);
    // a->rd <- targe_pc
    gen_set_gpr(ctx, a->rd, target_pc);
    return true;}

        其中,a->rd 为 t0,因此,target_pc 为 指向 t0 的 TCG Global variable。如下:

static TCGv dest_gpr(DisasContext *ctx, int reg_num){
    if (reg_num == 0 || get_olen(ctx) < TARGET_LONG_BITS) {
        return tcg_temp_new();
    }
    return cpu_gpr[reg_num];
}

        gen_pc_plus_diff(target_pc, ctx, a->imm) 意思是将 target_pc <- pc + a->imm。如下:

static void gen_pc_plus_diff(TCGv target, DisasContext *ctx,
                             target_long diff){
    target_ulong dest = ctx->base.pc_next + diff;
...
    if (tb_cflags(ctx->base.tb) & CF_PCREL) {
        tcg_gen_addi_tl(target, cpu_pc, dest - ctx->pc_save);
    ...} else {...}}
void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2){
    /* some cases can be optimized here */
    if (arg2 == 0) {
        tcg_gen_mov_i64(ret, arg1);
    } else if (TCG_TARGET_REG_BITS == 64) {
        tcg_gen_add_i64(ret, arg1, tcg_constant_i64(arg2));
    } else {
        tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
                         TCGV_LOW(arg1), TCGV_HIGH(arg1),
                         tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));}}
void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg){
    if (ret == arg) {return;}
    if (TCG_TARGET_REG_BITS == 64) {
        tcg_gen_op2_i64(INDEX_op_mov, ret, arg);
    } else {...}}
static void DNI tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2){
    tcg_gen_op2(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2));}
TCGOp * NI tcg_gen_op2(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2){
    TCGOp *op = tcg_emit_op(opc, 2);
    TCGOP_TYPE(op) = type;
    op->args[0] = a1;
    op->args[1] = a2;
    return op;}
TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs){
    TCGOp *op = tcg_op_alloc(opc, nargs);
    if (tcg_ctx->emit_before_op) {
        QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
    } else {
        QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
    }
    return op;}

        由于 a->imm = 0;因此,使用了 tcg_gen_mov_i64(),即 mov_i64 x5/t0,pc。

        然后到了,gen_set_gpr(ctx, a->rd, target_pc) ,其定义如下:

static void gen_set_gpr(DisasContext *ctx, int reg_num, TCGv t){
    if (reg_num != 0) {
        switch (get_ol(ctx)) {
        ...
        case MXL_RV64:
        case MXL_RV128:
            tcg_gen_mov_tl(cpu_gpr[reg_num], t);
            break;
        ...}
    ...}}

        由于 a->rd 与 target_pc 都指向 t0,因此,gen_set_gpr() 并没有输出。由此可见,00000297 auipc t0,0 对应的是 mov_i64 x5/t0,pc。

        其它的 RISCV 指令也大致上按上述流程进行转译成 TCG Ops。那么,后续文章将讲解 TCG Ops 转译成 X86_64,敬请期待。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

KeithTsui

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值