Skip to content

Commit

Permalink
Extend basic block without aggressive memory copy
Browse files Browse the repository at this point in the history
Due to the significant overhead associated with using aggressive memory
copy, we opted to use a pointer to the begin of IR array of the basic block
we intend to merge, rather than copying whole IR array to the basic block
we wish to extend.

The performance results obtained from running CoreMark using different
implementation strategies for EBB are presented below. As we can see,
aggressive memory copy substantially degrades performance.

|Test| aggressive memcpy|Compiler | Iterations / Sec |Speedup|
|----+------------------+---------+------------------+-------|
| BB |                  |clang-15 |  971.951         |       |
|----+------------------+---------+------------------+-------|
| BB |                  | gcc-12  |  963.336         |       |
|----+------------------+---------+------------------+-------|
| EBB|       O          |clang-15 |  1013.070        | +4.2% |
|----+------------------+---------+------------------+-------|
| EBB|       O          | gcc-12  |  1020.391        | +6%   |
|----+------------------+---------+------------------+-------|
| EBB|       X          |clang-15 |  1160.894        | +19.4%|
|----+------------------+---------+------------------+-------|
| EBB|       X          | gcc-12  | 1167.938         | +21.2%|
  • Loading branch information
qwe661234 committed Apr 4, 2023
1 parent 57cb96a commit 06d960e
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 16 deletions.
6 changes: 6 additions & 0 deletions src/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,12 @@ typedef struct rv_insn {
*/
bool tailcall;
bool (*impl)(riscv_t *, const struct rv_insn *);
/* To extend the basic block without frequently using memory copy to copy
* the IR array, we utilize two pointers: pointer branch_taken and pointer
* branch_untaken. They point to the first IR of the first basic block in
* the path branch_taken and branch_untaken, respectively.
*/
struct rv_insn *branch_taken, *branch_untaken;
} rv_insn_t;

/* decode the RISC-V instruction */
Expand Down
133 changes: 117 additions & 16 deletions src/emulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -270,10 +270,6 @@ enum {
rv->X[rv_reg_zero] = 0; \
code; \
rv->csr_cycle++; \
if (__rv_insn_##inst##_canbranch) { \
/* can branch */ \
return true; \
} \
nextop: \
rv->PC += ir->insn_len; \
if (ir->tailcall) \
Expand Down Expand Up @@ -319,6 +315,7 @@ RVOP(jal, {
rv_except_insn_misaligned(rv, pc);
return false;
}
return true;
})

/*The indirect jump instruction JALR uses the I-type encoding. The
Expand All @@ -341,90 +338,139 @@ RVOP(jalr, {
rv_except_insn_misaligned(rv, pc);
return false;
}
return true;
})

/* BEQ: Branch if Equal */
RVOP(beq, {
const uint32_t pc = rv->PC;
if (rv->X[ir->rs1] != rv->X[ir->rs2])
if (rv->X[ir->rs1] != rv->X[ir->rs2]) {
if (ir->branch_untaken) {
rv->PC += ir->insn_len;
return ir->branch_untaken->impl(rv, ir->branch_untaken);
}
goto nextop;
}
rv->PC += ir->imm;
/* check instruction misaligned */
if (unlikely(insn_is_misaligned(rv->PC))) {
rv->compressed = false;
rv_except_insn_misaligned(rv, pc);
return false;
}
if (ir->branch_taken)
return ir->branch_taken->impl(rv, ir->branch_taken);
return true;
})

/* BNE: Branch if Not Equal */
RVOP(bne, {
const uint32_t pc = rv->PC;
if (rv->X[ir->rs1] == rv->X[ir->rs2])
if (rv->X[ir->rs1] == rv->X[ir->rs2]) {
if (ir->branch_untaken) {
rv->PC += ir->insn_len;
return ir->branch_untaken->impl(rv, ir->branch_untaken);
}
goto nextop;
}
rv->PC += ir->imm;
/* check instruction misaligned */
if (unlikely(insn_is_misaligned(rv->PC))) {
rv->compressed = false;
rv_except_insn_misaligned(rv, pc);
return false;
}
if (ir->branch_taken)
return ir->branch_taken->impl(rv, ir->branch_taken);
return true;
})

/* BLT: Branch if Less Than */
RVOP(blt, {
const uint32_t pc = rv->PC;
if ((int32_t) rv->X[ir->rs1] >= (int32_t) rv->X[ir->rs2])
if ((int32_t) rv->X[ir->rs1] >= (int32_t) rv->X[ir->rs2]) {
if (ir->branch_untaken) {
rv->PC += ir->insn_len;
return ir->branch_untaken->impl(rv, ir->branch_untaken);
}
goto nextop;
}
rv->PC += ir->imm;
/* check instruction misaligned */
if (unlikely(insn_is_misaligned(rv->PC))) {
rv->compressed = false;
rv_except_insn_misaligned(rv, pc);
return false;
}
if (ir->branch_taken)
return ir->branch_taken->impl(rv, ir->branch_taken);
return true;
})

/* BGE: Branch if Greater Than */
RVOP(bge, {
const uint32_t pc = rv->PC;
if ((int32_t) rv->X[ir->rs1] < (int32_t) rv->X[ir->rs2])
if ((int32_t) rv->X[ir->rs1] < (int32_t) rv->X[ir->rs2]) {
if (ir->branch_untaken) {
rv->PC += ir->insn_len;
return ir->branch_untaken->impl(rv, ir->branch_untaken);
}
goto nextop;
}
rv->PC += ir->imm;
/* check instruction misaligned */
if (unlikely(insn_is_misaligned(rv->PC))) {
rv->compressed = false;
rv_except_insn_misaligned(rv, pc);
return false;
}
if (ir->branch_taken)
return ir->branch_taken->impl(rv, ir->branch_taken);
return true;
})

/* BLTU: Branch if Less Than Unsigned */
RVOP(bltu, {
const uint32_t pc = rv->PC;
if (rv->X[ir->rs1] >= rv->X[ir->rs2])
if (rv->X[ir->rs1] >= rv->X[ir->rs2]) {
if (ir->branch_untaken) {
rv->PC += ir->insn_len;
return ir->branch_untaken->impl(rv, ir->branch_untaken);
}
goto nextop;
}
rv->PC += ir->imm;
/* check instruction misaligned */
if (unlikely(insn_is_misaligned(rv->PC))) {
rv->compressed = false;
rv_except_insn_misaligned(rv, pc);
return false;
}
if (ir->branch_taken)
return ir->branch_taken->impl(rv, ir->branch_taken);
return true;
})

/* BGEU: Branch if Greater Than Unsigned */
RVOP(bgeu, {
const uint32_t pc = rv->PC;
if (rv->X[ir->rs1] < rv->X[ir->rs2])
if (rv->X[ir->rs1] < rv->X[ir->rs2]) {
if (ir->branch_untaken) {
rv->PC += ir->insn_len;
return ir->branch_untaken->impl(rv, ir->branch_untaken);
}
goto nextop;
}
rv->PC += ir->imm;
/* check instruction misaligned */
if (unlikely(insn_is_misaligned(rv->PC))) {
rv->compressed = false;
rv_except_insn_misaligned(rv, pc);
return false;
}
if (ir->branch_taken)
return ir->branch_taken->impl(rv, ir->branch_taken);
return true;
})

/* LB: Load Byte */
Expand Down Expand Up @@ -583,12 +629,14 @@ RVOP(and, { rv->X[ir->rd] = rv->X[ir->rs1] & rv->X[ir->rs2]; })
RVOP(ecall, {
rv->compressed = false;
rv->io.on_ecall(rv);
return true;
})

/* EBREAK: Environment Break */
RVOP(ebreak, {
rv->compressed = false;
rv->io.on_ebreak(rv);
return true;
})

/* WFI: Wait for Interrupt */
Expand Down Expand Up @@ -616,12 +664,16 @@ RVOP(hret, {
})

/* MRET: return from traps in U-mode */
RVOP(mret, { rv->PC = rv->csr_mepc; })
RVOP(mret, {
rv->PC = rv->csr_mepc;
return true;
})

#if RV32_HAS(Zifencei) /* RV32 Zifencei Standard Extension */
RVOP(fencei, {
rv->PC += ir->insn_len;
/* FIXME: fill real implementations */
return true;
})
#endif

Expand Down Expand Up @@ -1085,6 +1137,7 @@ RVOP(cjal, {
rv_except_insn_misaligned(rv, rv->PC);
return false;
}
return true;
})

/* C.LI loads the sign-extended 6-bit immediate, imm, into register rd.
Expand Down Expand Up @@ -1156,6 +1209,7 @@ RVOP(cj, {
rv_except_insn_misaligned(rv, rv->PC);
return false;
}
return true;
})

/* C.BEQZ performs conditional control transfers. The offset is
Expand All @@ -1164,11 +1218,34 @@ RVOP(cj, {
* the value in register rs1' is zero. It expands to beq rs1', x0,
* offset[8:1].
*/
RVOP(cbeqz,
{ rv->PC += (!rv->X[ir->rs1]) ? (uint32_t) ir->imm : ir->insn_len; })
RVOP(cbeqz, {
if (rv->X[ir->rs1]) {
if (ir->branch_untaken) {
rv->PC += ir->insn_len;
return ir->branch_untaken->impl(rv, ir->branch_untaken);
}
goto nextop;
}
rv->PC += (uint32_t) ir->imm;
if (ir->branch_taken)
return ir->branch_taken->impl(rv, ir->branch_taken);
return true;
})

/* C.BEQZ */
RVOP(cbnez, { rv->PC += (rv->X[ir->rs1]) ? (uint32_t) ir->imm : ir->insn_len; })
RVOP(cbnez, {
if (!rv->X[ir->rs1]) {
if (ir->branch_untaken) {
rv->PC += ir->insn_len;
return ir->branch_untaken->impl(rv, ir->branch_untaken);
}
goto nextop;
}
rv->PC += (uint32_t) ir->imm;
if (ir->branch_taken)
return ir->branch_taken->impl(rv, ir->branch_taken);
return true;
})

/* C.SLLI is a CI-format instruction that performs a logical left shift
* of the value in register rd then writes the result to rd. The shift
Expand All @@ -1189,7 +1266,10 @@ RVOP(clwsp, {
})

/* C.JR */
RVOP(cjr, { rv->PC = rv->X[ir->rs1]; })
RVOP(cjr, {
rv->PC = rv->X[ir->rs1];
return true;
})

/* C.MV */
RVOP(cmv, { rv->X[ir->rd] = rv->X[ir->rs2]; })
Expand All @@ -1198,6 +1278,7 @@ RVOP(cmv, { rv->X[ir->rd] = rv->X[ir->rs2]; })
RVOP(cebreak, {
rv->compressed = true;
rv->io.on_ebreak(rv);
return true;
})

/* C.JALR */
Expand All @@ -1211,6 +1292,7 @@ RVOP(cjalr, {
rv_except_insn_misaligned(rv, rv->PC);
return false;
}
return true;
})

/* C.ADD adds the values in registers rd and rs2 and writes the
Expand Down Expand Up @@ -1340,6 +1422,23 @@ static void block_translate(riscv_t *rv, block_t *block)
block->ir[block->n_insn - 1].tailcall = true;
}

static void extend_block(riscv_t *rv, block_t *block)
{
rv_insn_t *last_ir = block->ir + block->n_insn - 1;
if (last_ir->branch_taken && last_ir->branch_untaken)
return;
uint32_t taken_pc = block->pc_end - last_ir->insn_len + last_ir->imm,
not_taken_pc = block->pc_end;

block_map_t *map = &rv->block_map;
block_t *next;
if (!last_ir->branch_taken && (next = block_find(map, taken_pc)))
last_ir->branch_taken = next->ir;

if (!last_ir->branch_untaken && (next = block_find(map, not_taken_pc)))
last_ir->branch_untaken = next->ir;
}

static block_t *block_find_or_translate(riscv_t *rv, block_t *prev)
{
block_map_t *map = &rv->block_map;
Expand Down Expand Up @@ -1368,7 +1467,9 @@ static block_t *block_find_or_translate(riscv_t *rv, block_t *prev)
*/
if (prev)
prev->predict = next;
}
} else
extend_block(rv, next);


return next;
}
Expand Down

0 comments on commit 06d960e

Please sign in to comment.