diff --git a/.github/actions/create-conda-env/action.yml b/.github/actions/create-conda-env/action.yml index 5410ae5036..f033ed937d 100644 --- a/.github/actions/create-conda-env/action.yml +++ b/.github/actions/create-conda-env/action.yml @@ -1,11 +1,5 @@ name: create-conda-env description: 'Create conda environments if they dont exist' -inputs: - install-collateral: - description: 'Install Spike/Libgloss/etc' - required: false - default: true - runs: using: "composite" steps: @@ -20,14 +14,12 @@ runs: conda-lock install -n ${{ env.conda-env-name-no-time }}-$(date --date "${{ env.workflow-timestamp }}" +%Y%m%d)-esp-tools ./conda-reqs/conda-lock-reqs/conda-requirements-esp-tools-linux-64.conda-lock.yml conda deactivate - if [[ "${{ inputs.install-collateral }}" == 'true' ]]; then - echo "Add extra toolchain collateral to RISC-V install area" - conda activate ${{ env.conda-env-name-no-time }}-$(date --date "${{ env.workflow-timestamp }}" +%Y%m%d)-riscv-tools - ./scripts/build-toolchain-extra.sh riscv-tools -p $CONDA_PREFIX/riscv-tools - conda deactivate - conda activate ${{ env.conda-env-name-no-time }}-$(date --date "${{ env.workflow-timestamp }}" +%Y%m%d)-esp-tools - ./scripts/build-toolchain-extra.sh esp-tools -p $CONDA_PREFIX/esp-tools - conda deactivate - fi + echo "Add extra toolchain collateral to RISC-V install area" + conda activate ${{ env.conda-env-name-no-time }}-$(date --date "${{ env.workflow-timestamp }}" +%Y%m%d)-riscv-tools + ./scripts/build-toolchain-extra.sh riscv-tools -p $CONDA_PREFIX/riscv-tools + conda deactivate + conda activate ${{ env.conda-env-name-no-time }}-$(date --date "${{ env.workflow-timestamp }}" +%Y%m%d)-esp-tools + ./scripts/build-toolchain-extra.sh esp-tools -p $CONDA_PREFIX/esp-tools + conda deactivate fi shell: bash -leo pipefail {0} diff --git a/.github/scripts/defaults.sh b/.github/scripts/defaults.sh index 02b612f930..7d38e5f97e 100755 --- a/.github/scripts/defaults.sh +++ b/.github/scripts/defaults.sh @@ -25,7 +25,7 @@ LOCAL_FIRESIM_DIR=$LOCAL_CHIPYARD_DIR/sims/firesim/sim # key value store to get the build groups declare -A grouping -grouping["group-cores"]="chipyard-cva6 chipyard-ibex chipyard-rocket chipyard-hetero chipyard-boom chipyard-sodor chipyard-digitaltop chipyard-multiclock-rocket chipyard-nomem-scratchpad" +grouping["group-cores"]="chipyard-cva6 chipyard-ibex chipyard-rocket chipyard-hetero chipyard-boom chipyard-sodor chipyard-digitaltop chipyard-multiclock-rocket chipyard-nomem-scratchpad chipyard-spike" grouping["group-peripherals"]="chipyard-dmirocket chipyard-blkdev chipyard-spiflashread chipyard-spiflashwrite chipyard-mmios chipyard-lbwif" grouping["group-accels"]="chipyard-fftgenerator chipyard-nvdla chipyard-mempress chipyard-sha3 chipyard-hwacha chipyard-gemmini chipyard-streaming-fir chipyard-streaming-passthrough" grouping["group-constellation"]="chipyard-constellation" @@ -45,6 +45,7 @@ mapping["chipyard-streaming-fir"]=" CONFIG=StreamingFIRRocketConfig" mapping["chipyard-streaming-passthrough"]=" CONFIG=StreamingPassthroughRocketConfig" mapping["chipyard-hetero"]=" CONFIG=LargeBoomAndRocketConfig" mapping["chipyard-boom"]=" CONFIG=SmallBoomConfig" +mapping["chipyard-spike"]=" CONFIG=SpikeConfig" mapping["chipyard-blkdev"]=" CONFIG=SimBlockDeviceRocketConfig" mapping["chipyard-hwacha"]=" CONFIG=HwachaRocketConfig" mapping["chipyard-gemmini"]=" CONFIG=GemminiRocketConfig" diff --git a/.github/scripts/run-tests.sh b/.github/scripts/run-tests.sh index c9e190dbbb..8dc6060316 100755 --- a/.github/scripts/run-tests.sh +++ b/.github/scripts/run-tests.sh @@ -41,6 +41,9 @@ case $1 in chipyard-boom) run_bmark ${mapping[$1]} ;; + chipyard-spike) + run_bmark ${mapping[$1]} + ;; chipyard-hetero) run_bmark ${mapping[$1]} ;; diff --git a/.github/workflows/chipyard-run-tests.yml b/.github/workflows/chipyard-run-tests.yml index 95a4482802..750f35a231 100644 --- a/.github/workflows/chipyard-run-tests.yml +++ b/.github/workflows/chipyard-run-tests.yml @@ -83,8 +83,6 @@ jobs: uses: ./.github/actions/git-workaround - name: Create conda env uses: ./.github/actions/create-conda-env - with: - install-collateral: false - name: Check commits of each submodule run: | conda activate ${{ env.conda-env-name-no-time }}-$(date --date "${{ env.workflow-timestamp }}" +%Y%m%d)-riscv-tools @@ -111,8 +109,6 @@ jobs: uses: ./.github/actions/git-workaround - name: Create conda env uses: ./.github/actions/create-conda-env - with: - install-collateral: false - name: Check that the tutorial-setup patches apply run: | conda activate ${{ env.conda-env-name-no-time }}-$(date --date "${{ env.workflow-timestamp }}" +%Y%m%d)-riscv-tools @@ -138,8 +134,6 @@ jobs: uses: ./.github/actions/git-workaround - name: Create conda env uses: ./.github/actions/create-conda-env - with: - install-collateral: false - name: Check that documentation builds with no warnings/errors run: | conda activate ${{ env.conda-env-name-no-time }}-$(date --date "${{ env.workflow-timestamp }}" +%Y%m%d)-riscv-tools @@ -535,6 +529,29 @@ jobs: group-key: "group-cores" project-key: "chipyard-sodor" + chipyard-spike-run-tests: + name: chipyard-spike-run-tests + needs: prepare-chipyard-cores + runs-on: self-hosted + steps: + - name: Delete old checkout + run: | + ls -alh . + rm -rf ${{ github.workspace }}/* || true + rm -rf ${{ github.workspace }}/.* || true + ls -alh . + - name: Checkout + uses: actions/checkout@v3 + - name: Git workaround + uses: ./.github/actions/git-workaround + - name: Create conda env + uses: ./.github/actions/create-conda-env + - name: Run tests + uses: ./.github/actions/run-tests + with: + group-key: "group-cores" + project-key: "chipyard-spike" + chipyard-fftgenerator-run-tests: name: chipyard-fftgenerator-run-tests needs: prepare-chipyard-accels diff --git a/docs/Software/Spike.rst b/docs/Software/Spike.rst index 5c022471a5..bdaa4750d4 100644 --- a/docs/Software/Spike.rst +++ b/docs/Software/Spike.rst @@ -21,3 +21,15 @@ full cycle-accurate simulation using software RTL simulators or FireSim. Spike comes pre-packaged in the RISC-V toolchain and is available on the path as ``spike``. More information can be found in the `Spike repository `__. + +Spike-as-a-Tile +----------------- + +Chipyard contains experimental support for simulating a Spike processor model with the uncore, similar to a virtual-platform. +In this configuration, Spike is cache-coherent, and communicates with the uncore through a C++ TileLink private cache model. + +.. code-block:: shell + + make CONFIG=SpikeConfig run-binary BINARY=hello.riscv + + diff --git a/generators/chipyard/src/main/resources/csrc/spiketile.cc b/generators/chipyard/src/main/resources/csrc/spiketile.cc new file mode 100644 index 0000000000..06b57ab3f9 --- /dev/null +++ b/generators/chipyard/src/main/resources/csrc/spiketile.cc @@ -0,0 +1,936 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +enum transfer_t { + NToB, + NToT, + BToT +}; + +enum cache_state_t { + NONE, + BRANCH, + TRUNK, + DIRTY +}; + +struct cache_line_t { + cache_state_t state; + uint64_t addr; + uint64_t data[8]; +}; + +struct mem_region_t { + uint64_t base; + uint64_t size; +}; + +struct stq_entry_t { + uint64_t addr; + uint64_t bytes; + size_t len; +}; + +struct cache_miss_t { + bool valid; + uint64_t addr; + size_t way; + transfer_t type; +}; + +struct writeback_t { + cache_line_t line; + cache_state_t desired; + uint64_t sourceid; + bool voluntary; +}; + + +class chipyard_simif_t : public simif_t +{ +public: + char* addr_to_mem(reg_t addr) override { return NULL; }; + bool reservable(reg_t addr) override; + bool mmio_fetch(reg_t addr, size_t len, uint8_t* bytes) override; + bool mmio_load(reg_t addr, size_t len, uint8_t* bytes) override; + bool mmio_store(reg_t addr, size_t len, const uint8_t* bytes) override; + void proc_reset(unsigned id) override { }; + const char* get_symbol(uint64_t addr) override { return nullptr; }; + + bool icache_a(uint64_t *address, uint64_t *source); + void icache_d(uint64_t sourceid, uint64_t data[8]); + + bool mmio_a(uint64_t *address, uint64_t* data, unsigned char* store, int* size); + void mmio_d(uint64_t data); + + bool dcache_a(uint64_t *address, uint64_t* source, unsigned char* state_old, unsigned char* state_new); + void dcache_b(uint64_t address, uint64_t source, int param); + bool dcache_c(uint64_t *address, uint64_t* source, int* param, unsigned char* voluntary, unsigned char* has_data, uint64_t* data[8]); + void dcache_d(uint64_t sourceid, uint64_t data[8], unsigned char has_data, unsigned char grantack); + void drain_stq(); + bool stq_empty() { return st_q.size() == 0; }; + + ~chipyard_simif_t() { }; + chipyard_simif_t(size_t icache_ways, + size_t icache_sets, + size_t dcache_ways, + size_t dcache_sets, + char* cacheable, + char* uncacheable, + char* readonly_uncacheable, + char* executable, + size_t icache_sourceids, + size_t dcache_sourceids); + uint64_t cycle; + bool use_stq; +private: + bool handle_cache_access(reg_t addr, size_t len, + uint8_t* load_bytes, + const uint8_t* store_bytes, + access_type type); + void handle_mmio_access(reg_t addr, size_t len, + uint8_t* load_bytes, + const uint8_t* store_bytes, + access_type type, + bool readonly); + + size_t icache_ways; + size_t icache_sets; + size_t dcache_ways; + size_t dcache_sets; + + std::vector cacheables; + std::vector uncacheables; + std::vector readonly_uncacheables; + std::vector executables; + + std::vector> dcache; + std::vector> icache; + std::vector icache_sourceids; + std::vector dcache_a_sourceids; + std::vector dcache_c_sourceids; + std::vector dcache_mmio_sourceids; + + std::vector dcache_miss_q; + std::vector icache_miss_q; + std::vector icache_inflight; + std::vector dcache_inflight; + std::vector wb_q; + std::vector st_q; + + std::map, uint64_t> readonly_cache; + + bool mmio_valid; + bool mmio_inflight; + uint64_t mmio_addr; + bool mmio_st; + uint64_t mmio_stdata; + size_t mmio_len; + uint64_t mmio_lddata; +}; + +class tile_t { +public: + tile_t(processor_t* p, chipyard_simif_t* s); + processor_t* proc; + chipyard_simif_t* simif; + size_t max_insns; + context_t spike_context; + context_t stq_context; +}; + +context_t *host; +std::map tiles; +std::ostream sout(nullptr); +log_file_t* log_file; + +extern "C" void spike_tile_reset(int hartid) +{ + if (tiles.find(hartid) != tiles.end()) { + tiles[hartid]->proc->reset(); + } +} + +extern "C" void spike_tile(int hartid, char* isa, + int pmpregions, + int icache_sets, int icache_ways, + int dcache_sets, int dcache_ways, + char* cacheable, char* uncacheable, char* readonly_uncacheable, char* executable, + int icache_sourceids, int dcache_sourceids, + long long int reset_vector, + long long int ipc, + long long int cycle, + long long int* insns_retired, + + char debug, + char mtip, char msip, char meip, + char seip, + + unsigned char icache_a_ready, + unsigned char* icache_a_valid, + long long int* icache_a_address, + long long int* icache_a_sourceid, + + unsigned char icache_d_valid, + long long int icache_d_sourceid, + long long int icache_d_data_0, + long long int icache_d_data_1, + long long int icache_d_data_2, + long long int icache_d_data_3, + long long int icache_d_data_4, + long long int icache_d_data_5, + long long int icache_d_data_6, + long long int icache_d_data_7, + + unsigned char dcache_a_ready, + unsigned char* dcache_a_valid, + long long int* dcache_a_address, + long long int* dcache_a_sourceid, + unsigned char* dcache_a_state_old, + unsigned char* dcache_a_state_new, + + unsigned char dcache_b_valid, + long long int dcache_b_address, + long long int dcache_b_source, + int dcache_b_param, + + unsigned char dcache_c_ready, + unsigned char* dcache_c_valid, + long long int* dcache_c_address, + long long int* dcache_c_source, + int* dcache_c_param, + unsigned char* dcache_c_voluntary, + unsigned char* dcache_c_has_data, + long long int* dcache_c_data_0, + long long int* dcache_c_data_1, + long long int* dcache_c_data_2, + long long int* dcache_c_data_3, + long long int* dcache_c_data_4, + long long int* dcache_c_data_5, + long long int* dcache_c_data_6, + long long int* dcache_c_data_7, + + unsigned char dcache_d_valid, + unsigned char dcache_d_has_data, + unsigned char dcache_d_grantack, + long long int dcache_d_sourceid, + long long int dcache_d_data_0, + long long int dcache_d_data_1, + long long int dcache_d_data_2, + long long int dcache_d_data_3, + long long int dcache_d_data_4, + long long int dcache_d_data_5, + long long int dcache_d_data_6, + long long int dcache_d_data_7, + + unsigned char mmio_a_ready, + unsigned char* mmio_a_valid, + long long int* mmio_a_address, + long long int* mmio_a_data, + unsigned char* mmio_a_store, + int* mmio_a_size, + + unsigned char mmio_d_valid, + long long int mmio_d_data + ) +{ + if (!host) { + host = context_t::current(); + sout.rdbuf(std::cerr.rdbuf()); + log_file = new log_file_t(nullptr); + } + if (tiles.find(hartid) == tiles.end()) { + printf("Constructing spike processor_t\n"); + isa_parser_t *isa_parser = new isa_parser_t(isa, "MSU"); + chipyard_simif_t* simif = new chipyard_simif_t(icache_ways, icache_sets, + dcache_ways, dcache_sets, + cacheable, uncacheable, readonly_uncacheable, executable, + icache_sourceids, dcache_sourceids); + std::string* isastr = new std::string(isa); + cfg_t* cfg = new cfg_t(std::make_pair(0, 0), + nullptr, + isastr->c_str(), + "MSU", + "vlen:128,elen:64", + false, + endianness_little, + false, + pmpregions, + std::vector(), + std::vector(), + false); + processor_t* p = new processor_t(isa_parser, + cfg, + simif, + hartid, + false, + log_file->get(), + sout); + + p->enable_log_commits(); + + s_vpi_vlog_info vinfo; + if (!vpi_get_vlog_info(&vinfo)) + abort(); + for (int i = 1; i < vinfo.argc; i++) { + std::string arg(vinfo.argv[i]); + if (arg == "+spike-debug") { + p->set_debug(true); + } + if (arg == "+spike-stq") { + simif->use_stq = true; + } + } + + p->reset(); + p->get_state()->pc = reset_vector; + tiles[hartid] = new tile_t(p, simif); + printf("Done constructing spike processor\n"); + } + tile_t* tile = tiles[hartid]; + chipyard_simif_t* simif = tile->simif; + processor_t* proc = tile->proc; + + simif->cycle = cycle; + if (debug) { + proc->halt_request = proc->HR_REGULAR; + } + + proc->get_state()->mip->backdoor_write_with_mask(MIP_MTIP, mtip ? MIP_MTIP : 0); + proc->get_state()->mip->backdoor_write_with_mask(MIP_MSIP, msip ? MIP_MSIP : 0); + proc->get_state()->mip->backdoor_write_with_mask(MIP_MEIP, meip ? MIP_MEIP : 0); + proc->get_state()->mip->backdoor_write_with_mask(MIP_SEIP, seip ? MIP_SEIP : 0); + + tile->max_insns = ipc; + uint64_t pre_insns = proc->get_state()->minstret->read(); + tile->spike_context.switch_to(); + *insns_retired = proc->get_state()->minstret->read() - pre_insns; + if (simif->use_stq) { + tile->stq_context.switch_to(); + } + + *icache_a_valid = 0; + if (icache_a_ready) { + *icache_a_valid = simif->icache_a((uint64_t*)icache_a_address, + (uint64_t*)icache_a_sourceid); + } + + if (icache_d_valid) { + uint64_t data[8] = {icache_d_data_0, icache_d_data_1, icache_d_data_2, icache_d_data_3, + icache_d_data_4, icache_d_data_5, icache_d_data_6, icache_d_data_7}; + simif->icache_d(icache_d_sourceid, data); + } + + *dcache_a_valid = 0; + if (dcache_a_ready) { + *dcache_a_valid = simif->dcache_a((uint64_t*)dcache_a_address, + (uint64_t*)dcache_a_sourceid, + dcache_a_state_old, dcache_a_state_new); + } + if (dcache_b_valid) { + simif->dcache_b(dcache_b_address, dcache_b_source, dcache_b_param); + } + *dcache_c_valid = 0; + if (dcache_c_ready) { + uint64_t* data[8] = {(uint64_t*)dcache_c_data_0, (uint64_t*)dcache_c_data_1, (uint64_t*)dcache_c_data_2, (uint64_t*)dcache_c_data_3, + (uint64_t*)dcache_c_data_4, (uint64_t*)dcache_c_data_5, (uint64_t*)dcache_c_data_6, (uint64_t*)dcache_c_data_7}; + *dcache_c_valid = simif->dcache_c((uint64_t*)dcache_c_address, (uint64_t*)dcache_c_source, (int*)dcache_c_param, + dcache_c_voluntary, dcache_c_has_data, data); + } + if (dcache_d_valid) { + uint64_t data[8] = {dcache_d_data_0, dcache_d_data_1, dcache_d_data_2, dcache_d_data_3, + dcache_d_data_4, dcache_d_data_5, dcache_d_data_6, dcache_d_data_7}; + simif->dcache_d(dcache_d_sourceid, data, dcache_d_has_data, dcache_d_grantack); + } + + *mmio_a_valid = 0; + if (mmio_a_ready) { + *mmio_a_valid = simif->mmio_a((uint64_t*)mmio_a_address, (uint64_t*) mmio_a_data, + mmio_a_store, mmio_a_size); + } + if (mmio_d_valid) { + simif->mmio_d(mmio_d_data); + } +} + + +chipyard_simif_t::chipyard_simif_t(size_t icache_ways, + size_t icache_sets, + size_t dcache_ways, + size_t dcache_sets, + char* cacheable, + char* uncacheable, + char* readonly_uncacheable, + char* executable, + size_t ic_sourceids, + size_t dc_sourceids + ) : + cycle(0), + use_stq(false), + icache_ways(icache_ways), + icache_sets(icache_sets), + dcache_ways(dcache_ways), + dcache_sets(dcache_sets), + mmio_valid(false), + mmio_inflight(false) +{ + + icache.resize(icache_ways); + for (auto &w : icache) { + w.resize(icache_sets); + for (size_t i = 0; i < icache_sets; i++) w[i].state = NONE; + } + + dcache.resize(dcache_ways); + for (auto &w : dcache) { + w.resize(dcache_sets); + for (size_t i = 0; i < dcache_sets; i++) w[i].state = NONE; + } + for (int i = 0; i < ic_sourceids; i++) { + icache_sourceids.push_back(i); + icache_inflight.push_back(cache_miss_t { 0, 0, 0, NToB }); + } + for (int i = 0; i < dc_sourceids; i++) { + dcache_a_sourceids.push_back(i); + dcache_c_sourceids.push_back(i); + dcache_inflight.push_back(cache_miss_t { 0, 0, 0, NToB }); + } + + std::stringstream css(cacheable); + std::stringstream uss(uncacheable); + std::stringstream rss(readonly_uncacheable); + std::stringstream xss(executable); + std::string base; + std::string size; + while (css >> base) { + css >> size; + uint64_t base_int = std::stoul(base); + uint64_t size_int = std::stoul(size); + cacheables.push_back(mem_region_t { base_int, size_int }); + } + while (uss >> base) { + uss >> size; + uint64_t base_int = std::stoul(base); + uint64_t size_int = std::stoul(size); + uncacheables.push_back(mem_region_t { base_int, size_int }); + } + while (rss >> base) { + rss >> size; + uint64_t base_int = std::stoul(base); + uint64_t size_int = std::stoul(size); + readonly_uncacheables.push_back(mem_region_t { base_int, size_int }); + } + while (xss >> base) { + xss >> size; + uint64_t base_int = std::stoul(base); + uint64_t size_int = std::stoul(size); + executables.push_back(mem_region_t { base_int, size_int }); + } +} + +bool chipyard_simif_t::reservable(reg_t addr) { + for (auto& r: cacheables) { + if (addr >= r.base && addr < r.base + r.size) { + return true; + } + } + return false; +} + +bool chipyard_simif_t::mmio_fetch(reg_t addr, size_t len, uint8_t* bytes) { + bool executable = false; + + for (auto& r: executables) { + if (addr >= r.base && addr + len <= r.base + r.size) { + executable = true; + break; + } + } + if (!executable) { + return false; + } + + while (!handle_cache_access(addr, len, bytes, nullptr, FETCH)) { + host->switch_to(); + } + return true; +} + +bool chipyard_simif_t::mmio_load(reg_t addr, size_t len, uint8_t* bytes) { + bool found = false; + bool cacheable = false; + bool readonly = false; + for (auto& r: cacheables) { + if (addr >= r.base && addr + len <= r.base + r.size) { + cacheable = true; + found = true; + break; + } + } + if (!found) { + for (auto& r: uncacheables) { + if (addr >= r.base && addr + len <= r.base + r.size) { + cacheable = false; + found = true; + break; + } + } + for (auto& r: readonly_uncacheables) { + if (addr >= r.base && addr + len <= r.base + r.size) { + readonly = true; + break; + } + } + } + + if (!found) { + return false; + } + + if (cacheable) { + while (!handle_cache_access(addr, len, bytes, nullptr, LOAD)) { + host->switch_to(); + } + } else { + handle_mmio_access(addr, len, bytes, nullptr, LOAD, readonly); + } + + return true; +} + +void chipyard_simif_t::handle_mmio_access(reg_t addr, size_t len, + uint8_t* load_bytes, + const uint8_t* store_bytes, + access_type type, + bool readonly) { + if (type == LOAD && readonly) { + auto it = readonly_cache.find(std::make_pair(addr, len)); + if (it != readonly_cache.end()) { + memcpy(load_bytes, &(it->second), len); + return; + } + } + + mmio_valid = true; + mmio_inflight = false; + mmio_addr = addr; + mmio_st = type == STORE; + if (type == STORE) { + assert(len <= 8); + memcpy(&mmio_stdata, store_bytes, len); + } + mmio_len = len; + + while (mmio_valid) { + host->switch_to(); + } + if (type == LOAD) { + memcpy(load_bytes , &mmio_lddata, len); + } + if (type == LOAD && readonly) { + readonly_cache[std::make_pair(addr, len)] = mmio_lddata; + } +} + +bool chipyard_simif_t::handle_cache_access(reg_t addr, size_t len, + uint8_t* load_bytes, + const uint8_t* store_bytes, + access_type type) { + uint64_t stdata = 0; + if (type == STORE) { + assert(len <= 8); + memcpy(&stdata, store_bytes, len); + } + + // no stores to icache + std::vector> *cache = &icache; + std::vector *missq = &icache_miss_q; + std::vector *inflight = &icache_inflight; + size_t n_sets = icache_sets; + size_t n_ways = icache_ways; + if (type != FETCH) { + cache = &dcache; + missq = &dcache_miss_q; + inflight = &dcache_inflight; + n_sets = dcache_sets; + n_ways = dcache_ways; + } + if (type == LOAD) { + for (auto& s : st_q) { + if (addr == s.addr && len < s.len) { + // Forwarding + memcpy(load_bytes, &(s.bytes), len); + return true; + } + if (addr < s.addr && addr + len > s.addr) { + return false; + } + if (s.addr < addr && s.addr + s.len > addr) { + return false; + } + } + } + +#define SETIDX(ADDR) ((ADDR >> 6) & (n_sets - 1)) + uint64_t setidx = SETIDX(addr); + uint64_t offset = addr & (64 - 1); + bool cache_hit = false; + size_t hit_way = 0; + for (int i = 0; i < n_ways; i++) { + bool addr_match = ((*cache)[i][setidx].addr >> 6) == (addr >> 6); + if (addr_match && (*cache)[i][setidx].state != NONE) { + assert(!cache_hit); + cache_hit = true; + hit_way = i; + } + } + + if (type != STORE) { + if (cache_hit) { + memcpy(load_bytes, (uint8_t*)((*cache)[hit_way][setidx].data) + offset, len); + return true; + } + } else { + for (int i = 0; i < icache_ways; i++) { + if ((icache[i][setidx].addr >> 6) == addr >> 6) { + icache[i][setidx].state = NONE; + } + } + if (cache_hit && dcache[hit_way][setidx].state != BRANCH) { + dcache[hit_way][setidx].state = DIRTY; + memcpy((uint8_t*)(dcache[hit_way][setidx].data) + offset, store_bytes, len); + return true; + } + } + + for (auto& e : wb_q) { + cache_line_t& cl = e.line; + if (cl.addr >> 6 == addr >> 6) { + return false; + } + } + + for (cache_miss_t& cl : *missq) { + if (cl.addr >> 6 == addr >> 6) { + return false; + } + } + + for (cache_miss_t& cl : *inflight) { + if (cl.addr >> 6 == addr >> 6 && cl.valid) { + return false; + } + } + + + size_t repl_way = rand() % n_ways; + transfer_t upgrade; + size_t upgrade_way; + bool do_repl; + if (type == STORE) { + if (cache_hit && (*cache)[hit_way][setidx].state != NONE) { + upgrade = BToT; + upgrade_way = hit_way; + do_repl = false; + } else { + upgrade = NToT; + upgrade_way = repl_way; + do_repl = true; + } + } else { + upgrade = NToB; + upgrade_way = repl_way; + do_repl = true; + } + if (do_repl) { + for (auto& e : *missq) { + if (SETIDX(e.addr) == setidx) { + return false; + } + } + for (auto& e : *inflight) { + if (e.valid && SETIDX(e.addr) == setidx) { + return false; + } + } + } + + missq->push_back(cache_miss_t { true, addr, upgrade_way, upgrade }); + + cache_line_t repl_cl = (*cache)[repl_way][setidx]; + if (do_repl) { + if (repl_cl.state == DIRTY) { + wb_q.push_back(writeback_t { repl_cl, NONE, 0, true}); + } + (*cache)[repl_way][setidx].state = NONE; + } + (*cache)[upgrade_way][setidx].state = NONE; + + return false; +} + +bool chipyard_simif_t::icache_a(uint64_t* address, uint64_t* sourceid) { + if (icache_miss_q.empty() || icache_sourceids.empty()) { + return false; + } + *sourceid = icache_sourceids[0]; + *address = (icache_miss_q[0].addr >> 6) << 6; + + icache_inflight[icache_sourceids[0]] = icache_miss_q[0]; + + icache_sourceids.erase(icache_sourceids.begin()); + icache_miss_q.erase(icache_miss_q.begin()); + + return true; +} + +void chipyard_simif_t::icache_d(uint64_t sourceid, uint64_t data[8]) { + cache_miss_t& miss = icache_inflight[sourceid]; + uint64_t setidx = (miss.addr >> 6) & (icache_sets - 1); + icache_inflight[sourceid].valid = false; + icache[miss.way][setidx].state = BRANCH; + icache[miss.way][setidx].addr = miss.addr; + memcpy(icache[miss.way][setidx].data, (void*)data, 64); + icache_sourceids.push_back(sourceid); +} + +bool chipyard_simif_t::mmio_a(uint64_t* address, uint64_t* data, unsigned char* store, int* size) { + if (!mmio_valid || mmio_inflight) { + return false; + } + mmio_inflight = true; + *address = mmio_addr; + *store = mmio_st; + *data = mmio_stdata; + *size = mmio_len; + return true; +} + +void chipyard_simif_t::mmio_d(uint64_t data) { + mmio_valid = false; + mmio_inflight = false; + size_t offset = mmio_addr & 7; + mmio_lddata = data >> (offset * 8); +} + +bool chipyard_simif_t::dcache_a(uint64_t *address, uint64_t* source, unsigned char* state_old, unsigned char* state_new) { + if (dcache_miss_q.empty() || dcache_a_sourceids.empty()) { + return false; + } + *source = dcache_a_sourceids[0]; + *address = (dcache_miss_q[0].addr >> 6) << 6; + switch (dcache_miss_q[0].type) { + case NToB: + *state_old = 0; + *state_new = 0; + break; + case NToT: + *state_old = 0; + *state_new = 1; + break; + case BToT: + *state_old = 1; + *state_new = 1; + break; + } + + dcache_inflight[dcache_a_sourceids[0]] = dcache_miss_q[0]; + dcache_a_sourceids.erase(dcache_a_sourceids.begin()); + dcache_miss_q.erase(dcache_miss_q.begin()); + return true; +} + +void chipyard_simif_t::dcache_b(uint64_t address, uint64_t source, int param) { + uint64_t setidx = (address >> 6) & (dcache_sets - 1); + uint64_t offset = address & (64 - 1); + bool cache_hit = false; + size_t hit_way = 0; + for (int i = 0; i < dcache_ways; i++) { + bool addr_match = dcache[i][setidx].addr >> 6 == address >> 6; + if (addr_match && dcache[i][setidx].state != NONE) { + cache_hit = true; + hit_way = i; + } + } + cache_state_t desired; + switch (param) { + case 0: + desired = TRUNK; + break; + case 1: + desired = BRANCH; + break; + case 2: + desired = NONE; + break; + } + if (!cache_hit) { + cache_line_t miss { NONE, address, {} }; + wb_q.push_back(writeback_t { miss, desired, source, false}); + } else { + wb_q.push_back(writeback_t { dcache[hit_way][setidx], desired, source, false}); + if (desired == TRUNK && dcache[hit_way][setidx].state == BRANCH) { + dcache[hit_way][setidx].state = BRANCH; + } else { + dcache[hit_way][setidx].state = desired; + } + + } +} + +bool chipyard_simif_t::dcache_c(uint64_t* address, uint64_t* source, int* param, unsigned char* voluntary, + unsigned char* has_data, + uint64_t* data[8]) { + if (wb_q.empty()) + return false; + + writeback_t& wb = wb_q[0]; + if (wb.voluntary && dcache_c_sourceids.empty()) + return false; + + *address = (wb.line.addr >> 6) << 6; + *source = wb.sourceid; + *voluntary = wb.voluntary; + if (wb.voluntary) { + *source = dcache_c_sourceids[0]; + dcache_c_sourceids.erase(dcache_c_sourceids.begin()); + } + +#define SHRINK(_desired, _state, _has_data, _param) \ + if (wb.line.state == _state && wb.desired == _desired) { \ + *has_data = _has_data; \ + *param = _param; \ + } + + SHRINK(TRUNK , DIRTY , true , 3); + SHRINK(TRUNK , TRUNK , false, 3); + SHRINK(TRUNK , BRANCH , false, 4); + SHRINK(TRUNK , NONE , false, 5); + SHRINK(BRANCH , DIRTY , true , 0); + SHRINK(BRANCH , TRUNK , false, 0); + SHRINK(BRANCH , BRANCH , false, 4); + SHRINK(BRANCH , NONE , false, 5); + SHRINK(NONE , DIRTY , true , 1); + SHRINK(NONE , TRUNK , false, 1); + SHRINK(NONE , BRANCH , false, 2); + SHRINK(NONE , NONE , false, 5); + + for (int i = 0; i < 8; i++) { + *(data[i]) = wb.line.data[i]; + } + wb_q.erase(wb_q.begin()); + return true; +} + +bool chipyard_simif_t::mmio_store(reg_t addr, size_t len, const uint8_t* bytes) { + bool found = false; + bool cacheable = false; + for (auto& r: cacheables) { + if (addr >= r.base && addr + len <= r.base + r.size) { + cacheable = true; + found = true; + break; + } + } + for (auto& r: uncacheables) { + if (addr >= r.base && addr + len <= r.base + r.size) { + cacheable = false; + found = true; + break; + } + } + if (!found) { + return false; + } + if (cacheable) { + if (use_stq) { + assert(len <= 8); + uint64_t stdata; + memcpy(&stdata, bytes, len); + st_q.push_back(stq_entry_t { addr, stdata, len }); + } else { + while (!handle_cache_access(addr, len, nullptr, bytes, STORE)) { + host->switch_to(); + } + } + } else { + handle_mmio_access(addr, len, nullptr, bytes, STORE, false); + } + + return true; +} + +void chipyard_simif_t::drain_stq() { + while (true) { + while (st_q.size() == 0) { + host->switch_to(); + } + stq_entry_t store = st_q[0]; + while (!handle_cache_access(store.addr, store.len, nullptr, (uint8_t*)(&(store.bytes)), STORE)) { + host->switch_to(); + } + st_q.erase(st_q.begin()); + } +} + +void chipyard_simif_t::dcache_d(uint64_t sourceid, uint64_t data[8], unsigned char has_data, unsigned char grantack) { + if (grantack) { + cache_miss_t& miss = dcache_inflight[sourceid]; + uint64_t setidx = (miss.addr >> 6) & (dcache_sets - 1); + if (has_data) { + memcpy(dcache[miss.way][setidx].data, (void*)data, 64); + } + dcache_inflight[sourceid].valid = false; + if (miss.type == NToB) { + dcache[miss.way][setidx].state = BRANCH; + } else { + dcache[miss.way][setidx].state = TRUNK; + } + dcache[miss.way][setidx].addr = miss.addr; + dcache_a_sourceids.push_back(sourceid); + } else { + dcache_c_sourceids.push_back(sourceid); + } +} + +bool insn_should_fence(uint64_t bits) { + uint8_t opcode = bits & 0x7f; + return opcode == 0b0101111 || opcode == 0b0001111; +} + +void spike_thread_main(void* arg) +{ + tile_t* tile = (tile_t*) arg; + while (true) { + while (tile->max_insns == 0) { + host->switch_to(); + } + while (tile->max_insns != 0) { + // TODO: Fences don't work + // uint64_t last_bits = tile->proc->get_last_bits(); + // if (insn_should_fence(last_bits) && !tile->simif->stq_empty()) { + // host->switch_to(); + // } + tile->proc->step(1); + tile->max_insns--; + tile->proc->get_state()->mcycle->write(tile->simif->cycle); + } + } +} + +void stq_thread_main(void* arg) +{ + tile_t* tile = (tile_t*) arg; + tile->simif->drain_stq(); +} + +tile_t::tile_t(processor_t* p, chipyard_simif_t* s) : proc(p), simif(s), max_insns(0) { + spike_context.init(spike_thread_main, this); + stq_context.init(stq_thread_main, this); +} diff --git a/generators/chipyard/src/main/resources/vsrc/spiketile.v b/generators/chipyard/src/main/resources/vsrc/spiketile.v new file mode 100644 index 0000000000..87254e35dd --- /dev/null +++ b/generators/chipyard/src/main/resources/vsrc/spiketile.v @@ -0,0 +1,429 @@ +import "DPI-C" function void spike_tile_reset(input int hartid); + +import "DPI-C" function void spike_tile(input int hartid, + input string isa, + input int pmpregions, + input int icache_sets, + input int icache_ways, + input int dcache_sets, + input int dcache_ways, + input string cacheable, + input string uncacheable, + input string readonly_uncacheable, + input string executable, + input int icache_sourceids, + input int dcache_sourceids, + input longint reset_vector, + input longint ipc, + input longint cycle, + output longint insns_retired, + + input bit debug, + input bit mtip, + input bit msip, + input bit meip, + input bit seip, + + input bit icache_a_ready, + output bit icache_a_valid, + output longint icache_a_address, + output longint icache_a_sourceid, + + input bit icache_d_valid, + input longint icache_d_sourceid, + input longint icache_d_data_0, + input longint icache_d_data_1, + input longint icache_d_data_2, + input longint icache_d_data_3, + input longint icache_d_data_4, + input longint icache_d_data_5, + input longint icache_d_data_6, + input longint icache_d_data_7, + + input bit dcache_a_ready, + output bit dcache_a_valid, + output longint dcache_a_address, + output longint dcache_a_sourceid, + output bit dcache_a_state_old, + output bit dcache_a_state_new, + + input bit dcache_b_valid, + input longint dcache_b_address, + input longint dcache_b_source, + input int dcache_b_param, + + input bit dcache_c_ready, + output bit dcache_c_valid, + output longint dcache_c_address, + output longint dcache_c_sourceid, + output int dcache_c_param, + output bit dcache_c_voluntary, + output bit dcache_c_has_data, + output longint dcache_c_data_0, + output longint dcache_c_data_1, + output longint dcache_c_data_2, + output longint dcache_c_data_3, + output longint dcache_c_data_4, + output longint dcache_c_data_5, + output longint dcache_c_data_6, + output longint dcache_c_data_7, + + input bit dcache_d_valid, + input bit dcache_d_has_data, + input bit dcache_d_grantack, + input longint dcache_d_sourceid, + input longint dcache_d_data_0, + input longint dcache_d_data_1, + input longint dcache_d_data_2, + input longint dcache_d_data_3, + input longint dcache_d_data_4, + input longint dcache_d_data_5, + input longint dcache_d_data_6, + input longint dcache_d_data_7, + + input bit mmio_a_ready, + output bit mmio_a_valid, + output longint mmio_a_address, + output longint mmio_a_data, + output bit mmio_a_store, + output int mmio_a_size, + + input bit mmio_d_valid, + input longint mmio_d_data + ); + + +module SpikeBlackBox #( + parameter HARTID, + parameter ISA, + parameter PMPREGIONS, + parameter ICACHE_SETS, + parameter ICACHE_WAYS, + parameter DCACHE_SETS, + parameter DCACHE_WAYS, + parameter CACHEABLE, + parameter UNCACHEABLE, + parameter READONLY_UNCACHEABLE, + parameter EXECUTABLE, + parameter ICACHE_SOURCEIDS, + parameter DCACHE_SOURCEIDS )( + input clock, + input reset, + input [63:0] reset_vector, + input [63:0] ipc, + input [63:0] cycle, + output [63:0] insns_retired, + + input debug, + input mtip, + input msip, + input meip, + input seip, + + input icache_a_ready, + output icache_a_valid, + output [63:0] icache_a_address, + output [63:0] icache_a_sourceid, + + input icache_d_valid, + input [63:0] icache_d_sourceid, + input [63:0] icache_d_data_0, + input [63:0] icache_d_data_1, + input [63:0] icache_d_data_2, + input [63:0] icache_d_data_3, + input [63:0] icache_d_data_4, + input [63:0] icache_d_data_5, + input [63:0] icache_d_data_6, + input [63:0] icache_d_data_7, + + input dcache_a_ready, + output dcache_a_valid, + output [63:0] dcache_a_address, + output [63:0] dcache_a_sourceid, + output dcache_a_state_old, + output dcache_a_state_new, + + input dcache_b_valid, + input [63:0] dcache_b_address, + input [63:0] dcache_b_source, + input [31:0] dcache_b_param, + + input dcache_c_ready, + output dcache_c_valid, + output [63:0] dcache_c_address, + output [63:0] dcache_c_sourceid, + output [31:0] dcache_c_param, + output dcache_c_voluntary, + output dcache_c_has_data, + output [63:0] dcache_c_data_0, + output [63:0] dcache_c_data_1, + output [63:0] dcache_c_data_2, + output [63:0] dcache_c_data_3, + output [63:0] dcache_c_data_4, + output [63:0] dcache_c_data_5, + output [63:0] dcache_c_data_6, + output [63:0] dcache_c_data_7, + + input dcache_d_valid, + input dcache_d_has_data, + input dcache_d_grantack, + input [63:0] dcache_d_sourceid, + input [63:0] dcache_d_data_0, + input [63:0] dcache_d_data_1, + input [63:0] dcache_d_data_2, + input [63:0] dcache_d_data_3, + input [63:0] dcache_d_data_4, + input [63:0] dcache_d_data_5, + input [63:0] dcache_d_data_6, + input [63:0] dcache_d_data_7, + + input mmio_a_ready, + output mmio_a_valid, + output [63:0] mmio_a_address, + output [63:0] mmio_a_data, + output mmio_a_store, + output [31:0] mmio_a_size, + + input mmio_d_valid, + input [63:0] mmio_d_data + ); + + longint __insns_retired; + reg [63:0] __insns_retired_reg; + + wire __icache_a_ready; + bit __icache_a_valid; + longint __icache_a_address; + longint __icache_a_sourceid; + + reg __icache_a_valid_reg; + reg [63:0] __icache_a_address_reg; + reg [63:0] __icache_a_sourceid_reg; + + wire __mmio_a_ready; + bit __mmio_a_valid; + longint __mmio_a_address; + longint __mmio_a_data; + bit __mmio_a_store; + int __mmio_a_size; + + reg __mmio_a_valid_reg; + reg [63:0] __mmio_a_address_reg; + reg [31:0] __mmio_a_size_reg; + reg [63:0] __mmio_a_data_reg; + reg __mmio_a_store_reg; + + wire __dcache_a_ready; + bit __dcache_a_valid; + longint __dcache_a_address; + longint __dcache_a_sourceid; + bit __dcache_a_state_old; + bit __dcache_a_state_new; + + reg __dcache_a_valid_reg; + reg [63:0] __dcache_a_address_reg; + reg [63:0] __dcache_a_sourceid_reg; + reg __dcache_a_state_old_reg; + reg __dcache_a_state_new_reg; + + wire __dcache_c_ready; + bit __dcache_c_valid; + longint __dcache_c_address; + longint __dcache_c_sourceid; + int __dcache_c_param; + bit __dcache_c_voluntary; + bit __dcache_c_has_data; + longint __dcache_c_data_0; + longint __dcache_c_data_1; + longint __dcache_c_data_2; + longint __dcache_c_data_3; + longint __dcache_c_data_4; + longint __dcache_c_data_5; + longint __dcache_c_data_6; + longint __dcache_c_data_7; + + reg __dcache_c_valid_reg; + reg [63:0] __dcache_c_address_reg; + reg [63:0] __dcache_c_sourceid_reg; + reg [31:0] __dcache_c_param_reg; + reg __dcache_c_voluntary_reg; + reg __dcache_c_has_data_reg; + reg [63:0] __dcache_c_data_0_reg; + reg [63:0] __dcache_c_data_1_reg; + reg [63:0] __dcache_c_data_2_reg; + reg [63:0] __dcache_c_data_3_reg; + reg [63:0] __dcache_c_data_4_reg; + reg [63:0] __dcache_c_data_5_reg; + reg [63:0] __dcache_c_data_6_reg; + reg [63:0] __dcache_c_data_7_reg; + + + + + always @(posedge clock) begin + if (reset) begin + __insns_retired = 64'h0; + __insns_retired_reg <= 64'h0; + + __icache_a_valid = 1'b0; + __icache_a_valid_reg <= 1'b0; + __icache_a_address = 64'h0; + __icache_a_address_reg <= 64'h0; + __icache_a_sourceid = 64'h0; + __icache_a_sourceid_reg <= 64'h0; + + __mmio_a_valid = 1'b0; + __mmio_a_valid_reg <= 1'b0; + __mmio_a_address = 64'h0; + __mmio_a_address_reg <= 64'h0; + __mmio_a_data = 64'h0; + __mmio_a_data_reg <= 64'h0; + __mmio_a_store = 1'b0; + __mmio_a_store_reg <= 1'b0; + __mmio_a_size = 32'h0; + __mmio_a_size_reg <= 32'h0; + + __dcache_a_valid = 1'b0; + __dcache_a_valid_reg <= 1'b0; + __dcache_a_address = 64'h0; + __dcache_a_address_reg <= 64'h0; + __dcache_a_sourceid = 64'h0; + __dcache_a_sourceid_reg <= 64'h0; + __dcache_a_state_old = 1'h0; + __dcache_a_state_old_reg <= 1'h0; + __dcache_a_state_new = 1'h0; + __dcache_a_state_new_reg <= 1'h0; + + __dcache_c_valid = 1'b0; + __dcache_c_valid_reg <= 1'b0; + __dcache_c_address = 64'h0; + __dcache_c_address_reg <= 64'h0; + __dcache_c_sourceid = 64'h0; + __dcache_c_sourceid_reg <= 64'h0; + __dcache_c_param = 32'h0; + __dcache_c_param_reg <= 32'h0; + __dcache_c_voluntary = 1'h0; + __dcache_c_voluntary_reg <= 1'h0; + __dcache_c_has_data = 1'h0; + __dcache_c_has_data_reg <= 1'h0; + __dcache_c_data_0 = 64'h0; + __dcache_c_data_0_reg <= 64'h0; + __dcache_c_data_1 = 64'h0; + __dcache_c_data_1_reg <= 64'h0; + __dcache_c_data_2 = 64'h0; + __dcache_c_data_2_reg <= 64'h0; + __dcache_c_data_3 = 64'h0; + __dcache_c_data_3_reg <= 64'h0; + __dcache_c_data_4 = 64'h0; + __dcache_c_data_4_reg <= 64'h0; + __dcache_c_data_5 = 64'h0; + __dcache_c_data_5_reg <= 64'h0; + __dcache_c_data_6 = 64'h0; + __dcache_c_data_6_reg <= 64'h0; + __dcache_c_data_7 = 64'h0; + __dcache_c_data_7_reg <= 64'h0; + spike_tile_reset(HARTID); + end else begin + spike_tile(HARTID, ISA, PMPREGIONS, + ICACHE_SETS, ICACHE_WAYS, DCACHE_SETS, DCACHE_WAYS, + CACHEABLE, UNCACHEABLE, READONLY_UNCACHEABLE, EXECUTABLE, + ICACHE_SOURCEIDS, DCACHE_SOURCEIDS, + reset_vector, ipc, cycle, __insns_retired, + debug, mtip, msip, meip, seip, + + __icache_a_ready, __icache_a_valid, __icache_a_address, __icache_a_sourceid, + + icache_d_valid, icache_d_sourceid, + icache_d_data_0, icache_d_data_1, icache_d_data_2, icache_d_data_3, + icache_d_data_4, icache_d_data_5, icache_d_data_6, icache_d_data_7, + + __dcache_a_ready, __dcache_a_valid, __dcache_a_address, __dcache_a_sourceid, __dcache_a_state_old, __dcache_a_state_new, + + dcache_b_valid, dcache_b_address, dcache_b_source, dcache_b_param, + + __dcache_c_ready, __dcache_c_valid, __dcache_c_address, __dcache_c_sourceid, __dcache_c_param, __dcache_c_voluntary, __dcache_c_has_data, + __dcache_c_data_0, __dcache_c_data_1, __dcache_c_data_2, __dcache_c_data_3, + __dcache_c_data_4, __dcache_c_data_5, __dcache_c_data_6, __dcache_c_data_7, + + dcache_d_valid, dcache_d_has_data, dcache_d_grantack, dcache_d_sourceid, + dcache_d_data_0, dcache_d_data_1, dcache_d_data_2, dcache_d_data_3, + dcache_d_data_4, dcache_d_data_5, dcache_d_data_6, dcache_d_data_7, + + __mmio_a_ready, __mmio_a_valid, __mmio_a_address, __mmio_a_data, __mmio_a_store, __mmio_a_size, + mmio_d_valid, mmio_d_data + ); + __insns_retired_reg <= __insns_retired; + + + __icache_a_valid_reg <= __icache_a_valid; + __icache_a_address_reg <= __icache_a_address; + __icache_a_sourceid_reg <= __icache_a_sourceid; + + __dcache_a_valid_reg <= __dcache_a_valid; + __dcache_a_address_reg <= __dcache_a_address; + __dcache_a_sourceid_reg <= __dcache_a_sourceid; + __dcache_a_state_old_reg <= __dcache_a_state_old; + __dcache_a_state_new_reg <= __dcache_a_state_new; + + __dcache_c_valid_reg <= __dcache_c_valid; + __dcache_c_address_reg <= __dcache_c_address; + __dcache_c_sourceid_reg <= __dcache_c_sourceid; + __dcache_c_param_reg <= __dcache_c_param; + __dcache_c_voluntary_reg <= __dcache_c_voluntary; + __dcache_c_has_data_reg <= __dcache_c_has_data; + __dcache_c_data_0_reg <= __dcache_c_data_0; + __dcache_c_data_1_reg <= __dcache_c_data_1; + __dcache_c_data_2_reg <= __dcache_c_data_2; + __dcache_c_data_3_reg <= __dcache_c_data_3; + __dcache_c_data_4_reg <= __dcache_c_data_4; + __dcache_c_data_5_reg <= __dcache_c_data_5; + __dcache_c_data_6_reg <= __dcache_c_data_6; + __dcache_c_data_7_reg <= __dcache_c_data_7; + + __mmio_a_valid_reg <= __mmio_a_valid; + __mmio_a_address_reg <= __mmio_a_address; + __mmio_a_data_reg <= __mmio_a_data; + __mmio_a_store_reg <= __mmio_a_store; + __mmio_a_size_reg <= __mmio_a_size; + end + end // always @ (posedge clock) + assign insns_retired = __insns_retired_reg; + + assign icache_a_valid = __icache_a_valid_reg; + assign icache_a_address = __icache_a_address_reg; + assign icache_a_sourceid = __icache_a_sourceid_reg; + assign __icache_a_ready = icache_a_ready; + + assign dcache_a_valid = __dcache_a_valid_reg; + assign dcache_a_address = __dcache_a_address_reg; + assign dcache_a_sourceid = __dcache_a_sourceid_reg; + assign dcache_a_state_old = __dcache_a_state_old_reg; + assign dcache_a_state_new = __dcache_a_state_new_reg; + assign __dcache_a_ready = dcache_a_ready; + + assign dcache_c_valid = __dcache_c_valid_reg; + assign dcache_c_address = __dcache_c_address_reg; + assign dcache_c_sourceid = __dcache_c_sourceid_reg; + assign dcache_c_param = __dcache_c_param_reg; + assign dcache_c_voluntary = __dcache_c_voluntary_reg; + assign dcache_c_has_data = __dcache_c_has_data_reg; + assign dcache_c_data_0 = __dcache_c_data_0_reg; + assign dcache_c_data_1 = __dcache_c_data_1_reg; + assign dcache_c_data_2 = __dcache_c_data_2_reg; + assign dcache_c_data_3 = __dcache_c_data_3_reg; + assign dcache_c_data_4 = __dcache_c_data_4_reg; + assign dcache_c_data_5 = __dcache_c_data_5_reg; + assign dcache_c_data_6 = __dcache_c_data_6_reg; + assign dcache_c_data_7 = __dcache_c_data_7_reg; + assign __dcache_c_ready = dcache_c_ready; + + assign mmio_a_valid = __mmio_a_valid_reg; + assign mmio_a_address = __mmio_a_address_reg; + assign mmio_a_store = __mmio_a_store_reg; + assign mmio_a_data = __mmio_a_data_reg; + assign mmio_a_size = __mmio_a_size_reg; + assign __mmio_a_ready = mmio_a_ready; + + + +endmodule; diff --git a/generators/chipyard/src/main/scala/SpikeTile.scala b/generators/chipyard/src/main/scala/SpikeTile.scala new file mode 100644 index 0000000000..dd6e34b726 --- /dev/null +++ b/generators/chipyard/src/main/scala/SpikeTile.scala @@ -0,0 +1,392 @@ +package chipyard + +import chisel3._ +import chisel3.util._ +import chisel3.experimental.{IntParam, StringParam, IO} + +import freechips.rocketchip.config._ +import freechips.rocketchip.subsystem._ +import freechips.rocketchip.devices.tilelink._ +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.rocket._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.interrupts._ +import freechips.rocketchip.util._ +import freechips.rocketchip.tile._ +import freechips.rocketchip.prci.ClockSinkParameters + +case class SpikeCoreParams( + val maxInsnsPerCycle: Int = 10000 +) extends CoreParams { + val useVM = true + val useHypervisor = false + val useSupervisor = true + val useUser = true + val useDebug = true + val useAtomics = true + val useAtomicsOnlyForIO = false + val useCompressed = true + override val useVector = true + val useSCIE = false + val useRVE = false + val mulDiv = Some(MulDivParams()) + val fpu = Some(FPUParams()) + val nLocalInterrupts = 0 + val useNMI = false + val nPTECacheEntries = 0 + val nPMPs = 16 + val pmpGranularity = 4 + val nBreakpoints = 0 + val useBPWatch = false + val mcontextWidth = 0 + val scontextWidth = 0 + val nPerfCounters = 0 + val haveBasicCounters = true + val haveFSDirty = true + val misaWritable = true + val haveCFlush = false + val nL2TLBEntries = 0 + val nL2TLBWays = 0 + val mtvecInit = None + val mtvecWritable = true + val instBits = 16 + val lrscCycles = 1 + val decodeWidth = 1 + val fetchWidth = 1 + val retireWidth = 1 + val bootFreqHz = BigInt(1000000000) + val rasEntries = 0 + val btbEntries = 0 + val bhtEntries = 0 + + override def vLen = 128 + override def vMemDataBits = 128 +} + +case class SpikeTileAttachParams( + tileParams: SpikeTileParams +) extends CanAttachTile { + type TileType = SpikeTile + val lookup = PriorityMuxHartIdFromSeq(Seq(tileParams)) + val crossingParams = RocketCrossingParams() +} + +case class SpikeTileParams( + hartId: Int = 0, + val core: SpikeCoreParams = SpikeCoreParams(), + icacheParams: ICacheParams = ICacheParams(nWays = 32), + dcacheParams: DCacheParams = DCacheParams(nWays = 32) +) extends InstantiableTileParams[SpikeTile] +{ + val name = Some("spike_tile") + val beuAddr = None + val blockerCtrlAddr = None + val btb = None + val boundaryBuffers = false + val dcache = Some(dcacheParams) + val icache = Some(icacheParams) + val clockSinkParams = ClockSinkParameters() + def instantiate(crossing: TileCrossingParamsLike, lookup: LookupByHartIdImpl)(implicit p: Parameters): SpikeTile = { + new SpikeTile(this, crossing, lookup) + } +} + +class SpikeTile( + val spikeTileParams: SpikeTileParams, + crossing: ClockCrossingType, + lookup: LookupByHartIdImpl, + q: Parameters) extends BaseTile(spikeTileParams, crossing, lookup, q) + with SinksExternalInterrupts + with SourcesExternalNotifications +{ + // Private constructor ensures altered LazyModule.p is used implicitly + def this(params: SpikeTileParams, crossing: TileCrossingParamsLike, lookup: LookupByHartIdImpl)(implicit p: Parameters) = + this(params, crossing.crossingType, lookup, p) + + // Required TileLink nodes + val intOutwardNode = IntIdentityNode() + val masterNode = visibilityNode + val slaveNode = TLIdentityNode() + + override def isaDTS = "rv64gcv_Zfh" + + // Required entry of CPU device in the device tree for interrupt purpose + val cpuDevice: SimpleDevice = new SimpleDevice("cpu", Seq("ucb-bar,spike", "riscv")) { + override def parent = Some(ResourceAnchors.cpus) + override def describe(resources: ResourceBindings): Description = { + val Description(name, mapping) = super.describe(resources) + Description(name, mapping ++ + cpuProperties ++ + nextLevelCacheProperty ++ + tileProperties) + } + } + + ResourceBinding { + Resource(cpuDevice, "reg").bind(ResourceAddress(hartId)) + } + + + val icacheNode = TLClientNode(Seq(TLMasterPortParameters.v1(Seq(TLMasterParameters.v1( + sourceId = IdRange(0, 1), + name = s"Core ${staticIdForMetadataUseOnly} ICache"))))) + + val dcacheNode = TLClientNode(Seq(TLMasterPortParameters.v1(Seq(TLMasterParameters.v1( + name = s"Core ${staticIdForMetadataUseOnly} DCache", + sourceId = IdRange(0, tileParams.dcache.get.nMSHRs), + supportsProbe = TransferSizes(p(CacheBlockBytes), p(CacheBlockBytes))))))) + + val mmioNode = TLClientNode((Seq(TLMasterPortParameters.v1(Seq(TLMasterParameters.v1( + name = s"Core ${staticIdForMetadataUseOnly} MMIO", + sourceId = IdRange(0, 1), + requestFifo = true)))))) + + tlOtherMastersNode := TLBuffer() := tlMasterXbar.node + masterNode :=* tlOtherMastersNode + tlMasterXbar.node := TLWidthWidget(64) := TLBuffer():= icacheNode + tlMasterXbar.node := TLWidthWidget(64) := TLBuffer() := dcacheNode + tlMasterXbar.node := TLWidthWidget(8) := TLBuffer() := mmioNode + + override lazy val module = new SpikeTileModuleImp(this) +} + +class SpikeBlackBox( + hartId: Int, + isa: String, + pmpregions: Int, + icache_sets: Int, + icache_ways: Int, + dcache_sets: Int, + dcache_ways: Int, + dcache_sourceids: Int, + cacheable_regions: String, + uncacheable_regions: String, + readonly_uncacheable_regions: String, + executable_regions: String) extends BlackBox(Map( + "HARTID" -> IntParam(hartId), + "ISA" -> StringParam(isa), + "PMPREGIONS" -> IntParam(pmpregions), + "ICACHE_SETS" -> IntParam(icache_sets), + "ICACHE_WAYS" -> IntParam(icache_ways), + "DCACHE_SETS" -> IntParam(dcache_sets), + "DCACHE_WAYS" -> IntParam(dcache_ways), + "ICACHE_SOURCEIDS" -> IntParam(1), + "DCACHE_SOURCEIDS" -> IntParam(dcache_sourceids), + "UNCACHEABLE" -> StringParam(uncacheable_regions), + "READONLY_UNCACHEABLE" -> StringParam(readonly_uncacheable_regions), + "CACHEABLE" -> StringParam(cacheable_regions), + "EXECUTABLE" -> StringParam(executable_regions) + )) with HasBlackBoxResource { + + val io = IO(new Bundle { + val clock = Input(Bool()) + val reset = Input(Bool()) + val reset_vector = Input(UInt(64.W)) + val ipc = Input(UInt(64.W)) + val cycle = Input(UInt(64.W)) + val insns_retired = Output(UInt(64.W)) + + val debug = Input(Bool()) + val mtip = Input(Bool()) + val msip = Input(Bool()) + val meip = Input(Bool()) + val seip = Input(Bool()) + + val icache = new Bundle { + val a = new Bundle { + val valid = Output(Bool()) + val ready = Input(Bool()) + val address = Output(UInt(64.W)) + val sourceid = Output(UInt(64.W)) + } + val d = new Bundle { + val valid = Input(Bool()) + val sourceid = Input(UInt(64.W)) + val data = Input(Vec(8, UInt(64.W))) + } + } + + val dcache = new Bundle { + val a = new Bundle { + val valid = Output(Bool()) + val ready = Input(Bool()) + val address = Output(UInt(64.W)) + val sourceid = Output(UInt(64.W)) + val state_old = Output(Bool()) + val state_new = Output(Bool()) + } + val b = new Bundle { + val valid = Input(Bool()) + val address = Input(UInt(64.W)) + val source = Input(UInt(64.W)) + val param = Input(UInt(32.W)) + } + val c = new Bundle { + val valid = Output(Bool()) + val ready = Input(Bool()) + val address = Output(UInt(64.W)) + val sourceid = Output(UInt(64.W)) + val param = Output(UInt(32.W)) + val voluntary = Output(Bool()) + val has_data = Output(Bool()) + val data = Output(Vec(8, UInt(64.W))) + } + val d = new Bundle { + val valid = Input(Bool()) + val sourceid = Input(UInt(64.W)) + val data = Input(Vec(8, UInt(64.W))) + val has_data = Input(Bool()) + val grantack = Input(Bool()) + } + } + + val mmio = new Bundle { + val a = new Bundle { + val valid = Output(Bool()) + val ready = Input(Bool()) + val address = Output(UInt(64.W)) + val data = Output(UInt(64.W)) + val store = Output(Bool()) + val size = Output(UInt(32.W)) + } + val d = new Bundle { + val valid = Input(Bool()) + val data = Input(UInt(64.W)) + } + } + }) + addResource("/vsrc/spiketile.v") + addResource("/csrc/spiketile.cc") + +} + +class SpikeTileModuleImp(outer: SpikeTile) extends BaseTileModuleImp(outer) { + + // We create a bundle here and decode the interrupt. + val int_bundle = Wire(new TileInterrupts()) + outer.decodeCoreInterrupts(int_bundle) + val managers = outer.visibilityNode.edges.out.flatMap(_.manager.managers) + val cacheable_regions = AddressRange.fromSets(managers.filter(_.supportsAcquireB).flatMap(_.address)) + .map(a => s"${a.base} ${a.size}").mkString(" ") + val uncacheable_regions = AddressRange.fromSets(managers.filter(!_.supportsAcquireB).flatMap(_.address)) + .map(a => s"${a.base} ${a.size}").mkString(" ") + val readonly_uncacheable_regions = AddressRange.fromSets(managers.filter { + m => !m.supportsAcquireB && !m.supportsPutFull && m.regionType == RegionType.UNCACHED + }.flatMap(_.address)) + .map(a => s"${a.base} ${a.size}").mkString(" ") + val executable_regions = AddressRange.fromSets(managers.filter(_.executable).flatMap(_.address)) + .map(a => s"${a.base} ${a.size}").mkString(" ") + + val (icache_tl, icacheEdge) = outer.icacheNode.out(0) + val (dcache_tl, dcacheEdge) = outer.dcacheNode.out(0) + val (mmio_tl, mmioEdge) = outer.mmioNode.out(0) + + val spike = Module(new SpikeBlackBox(hartId, isaDTS, tileParams.core.nPMPs, + tileParams.icache.get.nSets, tileParams.icache.get.nWays, + tileParams.dcache.get.nSets, tileParams.dcache.get.nWays, + tileParams.dcache.get.nMSHRs, + cacheable_regions, uncacheable_regions, readonly_uncacheable_regions, executable_regions)) + spike.io.clock := clock.asBool + val cycle = RegInit(0.U(64.W)) + cycle := cycle + 1.U + spike.io.reset := reset + spike.io.cycle := cycle + dontTouch(spike.io.insns_retired) + val reset_vector = Wire(UInt(64.W)) + reset_vector := outer.resetVectorSinkNode.bundle + spike.io.reset_vector := reset_vector + spike.io.debug := int_bundle.debug + spike.io.mtip := int_bundle.mtip + spike.io.msip := int_bundle.msip + spike.io.meip := int_bundle.meip + spike.io.seip := int_bundle.seip.get + spike.io.ipc := outer.spikeTileParams.core.maxInsnsPerCycle.U + + val blockBits = log2Ceil(p(CacheBlockBytes)) + spike.io.icache.a.ready := icache_tl.a.ready + icache_tl.a.valid := spike.io.icache.a.valid + icache_tl.a.bits := icacheEdge.Get( + fromSource = spike.io.icache.a.sourceid, + toAddress = (spike.io.icache.a.address >> blockBits) << blockBits, + lgSize = blockBits.U)._2 + icache_tl.d.ready := true.B + spike.io.icache.d.valid := icache_tl.d.valid + spike.io.icache.d.sourceid := icache_tl.d.bits.source + spike.io.icache.d.data := icache_tl.d.bits.data.asTypeOf(Vec(8, UInt(64.W))) + + spike.io.dcache.a.ready := dcache_tl.a.ready + dcache_tl.a.valid := spike.io.dcache.a.valid + dcache_tl.a.bits := dcacheEdge.AcquireBlock( + fromSource = spike.io.dcache.a.sourceid, + toAddress = (spike.io.dcache.a.address >> blockBits) << blockBits, + lgSize = blockBits.U, + growPermissions = Mux(spike.io.dcache.a.state_old, 2.U, Mux(spike.io.dcache.a.state_new, 1.U, 0.U)))._2 + + dcache_tl.b.ready := true.B + spike.io.dcache.b.valid := dcache_tl.b.valid + spike.io.dcache.b.address := dcache_tl.b.bits.address + spike.io.dcache.b.source := dcache_tl.b.bits.source + spike.io.dcache.b.param := dcache_tl.b.bits.param + + spike.io.dcache.c.ready := dcache_tl.c.ready + dcache_tl.c.valid := spike.io.dcache.c.valid + dcache_tl.c.bits := Mux(spike.io.dcache.c.voluntary, + dcacheEdge.Release( + fromSource = spike.io.dcache.c.sourceid, + toAddress = spike.io.dcache.c.address, + lgSize = blockBits.U, + shrinkPermissions = spike.io.dcache.c.param, + data = spike.io.dcache.c.data.asUInt)._2, + Mux(spike.io.dcache.c.has_data, + dcacheEdge.ProbeAck( + fromSource = spike.io.dcache.c.sourceid, + toAddress = spike.io.dcache.c.address, + lgSize = blockBits.U, + reportPermissions = spike.io.dcache.c.param, + data = spike.io.dcache.c.data.asUInt), + dcacheEdge.ProbeAck( + fromSource = spike.io.dcache.c.sourceid, + toAddress = spike.io.dcache.c.address, + lgSize = blockBits.U, + reportPermissions = spike.io.dcache.c.param) + )) + + val has_data = dcacheEdge.hasData(dcache_tl.d.bits) + val should_finish = dcacheEdge.isRequest(dcache_tl.d.bits) + val can_finish = dcache_tl.e.ready + dcache_tl.d.ready := can_finish + spike.io.dcache.d.valid := dcache_tl.d.valid && can_finish + spike.io.dcache.d.has_data := has_data + spike.io.dcache.d.grantack := dcache_tl.d.bits.opcode.isOneOf(TLMessages.Grant, TLMessages.GrantData) + spike.io.dcache.d.sourceid := dcache_tl.d.bits.source + spike.io.dcache.d.data := dcache_tl.d.bits.data.asTypeOf(Vec(8, UInt(64.W))) + + dcache_tl.e.valid := dcache_tl.d.valid && should_finish + dcache_tl.e.bits := dcacheEdge.GrantAck(dcache_tl.d.bits) + + spike.io.mmio.a.ready := mmio_tl.a.ready + mmio_tl.a.valid := spike.io.mmio.a.valid + val log_size = MuxCase(0.U, (0 until 3).map { i => (spike.io.mmio.a.size === (1 << i).U) -> i.U }) + mmio_tl.a.bits := Mux(spike.io.mmio.a.store, + mmioEdge.Put(0.U, spike.io.mmio.a.address, log_size, spike.io.mmio.a.data)._2, + mmioEdge.Get(0.U, spike.io.mmio.a.address, log_size)._2) + + mmio_tl.d.ready := true.B + spike.io.mmio.d.valid := mmio_tl.d.valid + spike.io.mmio.d.data := mmio_tl.d.bits.data + +} + +class WithNSpikeCores(n: Int = 1, overrideIdOffset: Option[Int] = None) extends Config((site, here, up) => { + case TilesLocated(InSubsystem) => { + // Calculate the next available hart ID (since hart ID cannot be duplicated) + val prev = up(TilesLocated(InSubsystem), site) + val idOffset = overrideIdOffset.getOrElse(prev.size) + // Create TileAttachParams for every core to be instantiated + (0 until n).map { i => + SpikeTileAttachParams( + tileParams = SpikeTileParams(hartId = i + idOffset) + ) + } ++ prev + } +}) diff --git a/generators/chipyard/src/main/scala/config/SpikeConfigs.scala b/generators/chipyard/src/main/scala/config/SpikeConfigs.scala new file mode 100644 index 0000000000..b45245c05c --- /dev/null +++ b/generators/chipyard/src/main/scala/config/SpikeConfigs.scala @@ -0,0 +1,19 @@ +package chipyard + +import freechips.rocketchip.config.{Config} + +// Configs which instantiate a Spike-simulated +// tile that interacts with the Chipyard SoC +// as a hardware core would + +class SpikeConfig extends Config( + new chipyard.WithNSpikeCores(1) ++ + new chipyard.config.AbstractConfig) + +// Avoids polling on the UART registers +class SpikeFastUARTConfig extends Config( + new chipyard.WithNSpikeCores(1) ++ + new chipyard.config.WithUARTFIFOEntries(128, 128) ++ + new chipyard.config.WithMemoryBusFrequency(1) ++ + new chipyard.config.WithPeripheryBusFrequency(1) ++ + new chipyard.config.AbstractConfig) diff --git a/generators/chipyard/src/main/scala/config/fragments/PeripheralFragments.scala b/generators/chipyard/src/main/scala/config/fragments/PeripheralFragments.scala index 542cafeeb8..b12abe706e 100644 --- a/generators/chipyard/src/main/scala/config/fragments/PeripheralFragments.scala +++ b/generators/chipyard/src/main/scala/config/fragments/PeripheralFragments.scala @@ -37,6 +37,10 @@ class WithUART(baudrate: BigInt = 115200) extends Config((site, here, up) => { UARTParams(address = 0x54000000L, nTxEntries = 256, nRxEntries = 256, initBaudRate = baudrate)) }) +class WithUARTFIFOEntries(txEntries: Int, rxEntries: Int) extends Config((site, here, up) => { + case PeripheryUARTKey => up(PeripheryUARTKey).map(_.copy(nTxEntries = txEntries, nRxEntries = rxEntries)) +}) + class WithSPIFlash(size: BigInt = 0x10000000) extends Config((site, here, up) => { // Note: the default size matches freedom with the addresses below case PeripherySPIFlashKey => Seq( diff --git a/sims/common-sim-flags.mk b/sims/common-sim-flags.mk index cf974f923f..3b4281c3a5 100644 --- a/sims/common-sim-flags.mk +++ b/sims/common-sim-flags.mk @@ -3,6 +3,17 @@ #---------------------------------------------------------------------------------------- SIM_OPT_CXXFLAGS := -O3 +# Workaround: esp-isa-sim doesn't install libriscv, +# so don't link with libriscv if it doesn't exist +# potentially breaks some configs + +ifeq (,$(wildcard $(RISCV)/lib/libriscv.so)) +$(warning libriscv not found) +LRISCV= +else +LRISCV=-lriscv +endif + SIM_CXXFLAGS = \ $(CXXFLAGS) \ $(SIM_OPT_CXXFLAGS) \ @@ -18,6 +29,7 @@ SIM_LDFLAGS = \ -Wl,-rpath,$(RISCV)/lib \ -L$(sim_dir) \ -L$(dramsim_dir) \ + $(LRISCV) \ -lfesvr \ -ldramsim \ $(EXTRA_SIM_LDFLAGS)