Skip to content

Commit

Permalink
Extend basic block without aggressive memory copy
Browse files Browse the repository at this point in the history
Due to the significant overhead associated with using aggressive memory
copy, we opted to use a pointer to the begin of IR array of the basic block
we intend to merge, rather than copying whole IR array to the basic block
we wish to extend.

The performance results obtained from running CoreMark using different
implementation strategies for EBB are presented below. As we can see,
aggressive memory copy substantially degrades performance.

|Test| aggressive memcpy|Compiler | Iterations / Sec |Speedup|
|----+------------------+---------+------------------+-------|
| BB |                  |clang-15 |  971.951         |       |
|----+------------------+---------+------------------+-------|
| BB |                  | gcc-12  |  963.336         |       |
|----+------------------+---------+------------------+-------|
| EBB|       O          |clang-15 |  1013.070        | +4.2% |
|----+------------------+---------+------------------+-------|
| EBB|       O          | gcc-12  |  1020.391        | +6%   |
|----+------------------+---------+------------------+-------|
| EBB|       X          |clang-15 |  1160.894        | +19.4%|
|----+------------------+---------+------------------+-------|
| EBB|       X          | gcc-12  | 1167.938         | +21.2%|
  • Loading branch information
qwe661234 committed Apr 4, 2023
1 parent 57cb96a commit 9b81f4d
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 22 deletions.
8 changes: 8 additions & 0 deletions src/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,14 @@ typedef struct rv_insn {
*/
bool tailcall;
bool (*impl)(riscv_t *, const struct rv_insn *);

/* We employ two pointers, branch taken and branch untaken, to avoid the
* significant overhead resulting from aggressive memory copy. Instead of
* copying the entire IR array, these pointers respectively point to the
* first IR of the first basic block in the path of the taken and untaken
* branches, so we can jump to the specific IR array directly.
*/
struct rv_insn *branch_taken, *branch_untaken;
} rv_insn_t;

/* decode the RISC-V instruction */
Expand Down
144 changes: 122 additions & 22 deletions src/emulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -270,10 +270,6 @@ enum {
rv->X[rv_reg_zero] = 0; \
code; \
rv->csr_cycle++; \
if (__rv_insn_##inst##_canbranch) { \
/* can branch */ \
return true; \
} \
nextop: \
rv->PC += ir->insn_len; \
if (ir->tailcall) \
Expand Down Expand Up @@ -319,6 +315,7 @@ RVOP(jal, {
rv_except_insn_misaligned(rv, pc);
return false;
}
return true;
})

/*The indirect jump instruction JALR uses the I-type encoding. The
Expand All @@ -341,90 +338,133 @@ RVOP(jalr, {
rv_except_insn_misaligned(rv, pc);
return false;
}
return true;
})

/* BEQ: Branch if Equal */
RVOP(beq, {
const uint32_t pc = rv->PC;
if (rv->X[ir->rs1] != rv->X[ir->rs2])
goto nextop;
if (rv->X[ir->rs1] != rv->X[ir->rs2]) {
if (!ir->branch_untaken)
goto nextop;
rv->PC += ir->insn_len;
return ir->branch_untaken->impl(rv, ir->branch_untaken);
}
rv->PC += ir->imm;
/* check instruction misaligned */
if (unlikely(insn_is_misaligned(rv->PC))) {
rv->compressed = false;
rv_except_insn_misaligned(rv, pc);
return false;
}
if (ir->branch_taken)
return ir->branch_taken->impl(rv, ir->branch_taken);
return true;
})

/* BNE: Branch if Not Equal */
RVOP(bne, {
const uint32_t pc = rv->PC;
if (rv->X[ir->rs1] == rv->X[ir->rs2])
goto nextop;
if (rv->X[ir->rs1] == rv->X[ir->rs2]) {
if (!ir->branch_untaken)
goto nextop;
rv->PC += ir->insn_len;
return ir->branch_untaken->impl(rv, ir->branch_untaken);
}
rv->PC += ir->imm;
/* check instruction misaligned */
if (unlikely(insn_is_misaligned(rv->PC))) {
rv->compressed = false;
rv_except_insn_misaligned(rv, pc);
return false;
}
if (ir->branch_taken)
return ir->branch_taken->impl(rv, ir->branch_taken);
return true;
})

/* BLT: Branch if Less Than */
RVOP(blt, {
const uint32_t pc = rv->PC;
if ((int32_t) rv->X[ir->rs1] >= (int32_t) rv->X[ir->rs2])
goto nextop;
if ((int32_t) rv->X[ir->rs1] >= (int32_t) rv->X[ir->rs2]) {
if (!ir->branch_untaken)
goto nextop;
rv->PC += ir->insn_len;
return ir->branch_untaken->impl(rv, ir->branch_untaken);
}
rv->PC += ir->imm;
/* check instruction misaligned */
if (unlikely(insn_is_misaligned(rv->PC))) {
rv->compressed = false;
rv_except_insn_misaligned(rv, pc);
return false;
}
if (ir->branch_taken)
return ir->branch_taken->impl(rv, ir->branch_taken);
return true;
})

/* BGE: Branch if Greater Than */
RVOP(bge, {
const uint32_t pc = rv->PC;
if ((int32_t) rv->X[ir->rs1] < (int32_t) rv->X[ir->rs2])
goto nextop;
if ((int32_t) rv->X[ir->rs1] < (int32_t) rv->X[ir->rs2]) {
if (!ir->branch_untaken)
goto nextop;
rv->PC += ir->insn_len;
return ir->branch_untaken->impl(rv, ir->branch_untaken);
}
rv->PC += ir->imm;
/* check instruction misaligned */
if (unlikely(insn_is_misaligned(rv->PC))) {
rv->compressed = false;
rv_except_insn_misaligned(rv, pc);
return false;
}
if (ir->branch_taken)
return ir->branch_taken->impl(rv, ir->branch_taken);
return true;
})

/* BLTU: Branch if Less Than Unsigned */
RVOP(bltu, {
const uint32_t pc = rv->PC;
if (rv->X[ir->rs1] >= rv->X[ir->rs2])
goto nextop;
if (rv->X[ir->rs1] >= rv->X[ir->rs2]) {
if (!ir->branch_untaken)
goto nextop;
rv->PC += ir->insn_len;
return ir->branch_untaken->impl(rv, ir->branch_untaken);
}
rv->PC += ir->imm;
/* check instruction misaligned */
if (unlikely(insn_is_misaligned(rv->PC))) {
rv->compressed = false;
rv_except_insn_misaligned(rv, pc);
return false;
}
if (ir->branch_taken)
return ir->branch_taken->impl(rv, ir->branch_taken);
return true;
})

/* BGEU: Branch if Greater Than Unsigned */
RVOP(bgeu, {
const uint32_t pc = rv->PC;
if (rv->X[ir->rs1] < rv->X[ir->rs2])
goto nextop;
if (rv->X[ir->rs1] < rv->X[ir->rs2]) {
if (!ir->branch_untaken)
goto nextop;
rv->PC += ir->insn_len;
return ir->branch_untaken->impl(rv, ir->branch_untaken);
}
rv->PC += ir->imm;
/* check instruction misaligned */
if (unlikely(insn_is_misaligned(rv->PC))) {
rv->compressed = false;
rv_except_insn_misaligned(rv, pc);
return false;
}
if (ir->branch_taken)
return ir->branch_taken->impl(rv, ir->branch_taken);
return true;
})

/* LB: Load Byte */
Expand Down Expand Up @@ -583,12 +623,14 @@ RVOP(and, { rv->X[ir->rd] = rv->X[ir->rs1] & rv->X[ir->rs2]; })
RVOP(ecall, {
rv->compressed = false;
rv->io.on_ecall(rv);
return true;
})

/* EBREAK: Environment Break */
RVOP(ebreak, {
rv->compressed = false;
rv->io.on_ebreak(rv);
return true;
})

/* WFI: Wait for Interrupt */
Expand Down Expand Up @@ -616,12 +658,16 @@ RVOP(hret, {
})

/* MRET: return from traps in U-mode */
RVOP(mret, { rv->PC = rv->csr_mepc; })
RVOP(mret, {
rv->PC = rv->csr_mepc;
return true;
})

#if RV32_HAS(Zifencei) /* RV32 Zifencei Standard Extension */
RVOP(fencei, {
rv->PC += ir->insn_len;
/* FIXME: fill real implementations */
return true;
})
#endif

Expand Down Expand Up @@ -1085,6 +1131,7 @@ RVOP(cjal, {
rv_except_insn_misaligned(rv, rv->PC);
return false;
}
return true;
})

/* C.LI loads the sign-extended 6-bit immediate, imm, into register rd.
Expand Down Expand Up @@ -1156,6 +1203,7 @@ RVOP(cj, {
rv_except_insn_misaligned(rv, rv->PC);
return false;
}
return true;
})

/* C.BEQZ performs conditional control transfers. The offset is
Expand All @@ -1164,11 +1212,32 @@ RVOP(cj, {
* the value in register rs1' is zero. It expands to beq rs1', x0,
* offset[8:1].
*/
RVOP(cbeqz,
{ rv->PC += (!rv->X[ir->rs1]) ? (uint32_t) ir->imm : ir->insn_len; })
RVOP(cbeqz, {
if (rv->X[ir->rs1]) {
if (!ir->branch_untaken)
goto nextop;
rv->PC += ir->insn_len;
return ir->branch_untaken->impl(rv, ir->branch_untaken);
}
rv->PC += (uint32_t) ir->imm;
if (ir->branch_taken)
return ir->branch_taken->impl(rv, ir->branch_taken);
return true;
})

/* C.BEQZ */
RVOP(cbnez, { rv->PC += (rv->X[ir->rs1]) ? (uint32_t) ir->imm : ir->insn_len; })
RVOP(cbnez, {
if (!rv->X[ir->rs1]) {
if (!ir->branch_untaken)
goto nextop;
rv->PC += ir->insn_len;
return ir->branch_untaken->impl(rv, ir->branch_untaken);
}
rv->PC += (uint32_t) ir->imm;
if (ir->branch_taken)
return ir->branch_taken->impl(rv, ir->branch_taken);
return true;
})

/* C.SLLI is a CI-format instruction that performs a logical left shift
* of the value in register rd then writes the result to rd. The shift
Expand All @@ -1189,7 +1258,10 @@ RVOP(clwsp, {
})

/* C.JR */
RVOP(cjr, { rv->PC = rv->X[ir->rs1]; })
RVOP(cjr, {
rv->PC = rv->X[ir->rs1];
return true;
})

/* C.MV */
RVOP(cmv, { rv->X[ir->rd] = rv->X[ir->rs2]; })
Expand All @@ -1198,6 +1270,7 @@ RVOP(cmv, { rv->X[ir->rd] = rv->X[ir->rs2]; })
RVOP(cebreak, {
rv->compressed = true;
rv->io.on_ebreak(rv);
return true;
})

/* C.JALR */
Expand All @@ -1211,6 +1284,7 @@ RVOP(cjalr, {
rv_except_insn_misaligned(rv, rv->PC);
return false;
}
return true;
})

/* C.ADD adds the values in registers rd and rs2 and writes the
Expand Down Expand Up @@ -1340,6 +1414,30 @@ static void block_translate(riscv_t *rv, block_t *block)
block->ir[block->n_insn - 1].tailcall = true;
}

static void extend_block(riscv_t *rv, block_t *block)
{
rv_insn_t *last_ir = block->ir + block->n_insn - 1;
if (last_ir->branch_taken && last_ir->branch_untaken)
return;
/* calculate the PC of taken and untaken branches to find block */
uint32_t taken_pc = block->pc_end - last_ir->insn_len + last_ir->imm,
not_taken_pc = block->pc_end;

block_map_t *map = &rv->block_map;
block_t *next;

/* check the branch_taken/branch_untaken pointer has been assigned and the
* first basic block in the path of the taken/untaken branches exists or
* not. If either of these conditions is not met, it will not be possible to
* extend the path of the taken/untaken branches for basic block.
*/
if (!last_ir->branch_taken && (next = block_find(map, taken_pc)))
last_ir->branch_taken = next->ir;

if (!last_ir->branch_untaken && (next = block_find(map, not_taken_pc)))
last_ir->branch_untaken = next->ir;
}

static block_t *block_find_or_translate(riscv_t *rv, block_t *prev)
{
block_map_t *map = &rv->block_map;
Expand Down Expand Up @@ -1368,7 +1466,9 @@ static block_t *block_find_or_translate(riscv_t *rv, block_t *prev)
*/
if (prev)
prev->predict = next;
}
} else
extend_block(rv, next);


return next;
}
Expand Down

0 comments on commit 9b81f4d

Please sign in to comment.