diff --git a/umi/sumi/rtl/umi_messages.vh b/umi/sumi/rtl/umi_messages.vh index 245cacf3..c9d51413 100644 --- a/umi/sumi/rtl/umi_messages.vh +++ b/umi/sumi/rtl/umi_messages.vh @@ -22,9 +22,14 @@ * ******************************************************************************/ +// Requests (host -> device) +localparam UMI_MAXSIZE = 1024; // max word size per transaction +localparam UMI_MAXLEN = 256; // max word transfers per transaction + +// Invalid transaction indicator (cmd[7:0]) localparam UMI_INVALID = 8'h00; -// Requests (host -> device) +// Requests (host -> device) (cmd[7:0]) localparam UMI_REQ_READ = 5'h01; // read/load localparam UMI_REQ_WRITE = 5'h03; // write/store with ack localparam UMI_REQ_POSTED = 5'h05; // posted write @@ -34,7 +39,8 @@ localparam UMI_REQ_USER0 = 5'h0B; // reserved for user localparam UMI_REQ_FUTURE0 = 5'h0D; // reserved fur future use localparam UMI_REQ_ERROR = 8'h0F; // reserved for error message localparam UMI_REQ_LINK = 8'h2F; // reserved for link ctrl -// Response (device -> host) + +// Response (device -> host) (cmd[7:0]) localparam UMI_RESP_READ = 5'h02; // response to read request localparam UMI_RESP_WRITE = 5'h04; // response (ack) from write request localparam UMI_RESP_USER0 = 5'h06; // signal write without ack @@ -43,6 +49,7 @@ localparam UMI_RESP_FUTURE0 = 5'h0A; // reserved for future use localparam UMI_RESP_FUTURE1 = 5'h0C; // reserved for future use localparam UMI_RESP_LINK = 8'h0E; // reserved for link ctrl +// Atomic command decode (cmd[15:8]) localparam UMI_REQ_ATOMICADD = 8'h00; localparam UMI_REQ_ATOMICAND = 8'h01; localparam UMI_REQ_ATOMICOR = 8'h02; diff --git a/umi/sumi/rtl/umi_stimulus.v b/umi/sumi/rtl/umi_stimulus.v deleted file mode 100644 index 03d4060d..00000000 --- a/umi/sumi/rtl/umi_stimulus.v +++ /dev/null @@ -1,179 +0,0 @@ -/******************************************************************************* - * Copyright 2020 Zero ASIC Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * ---- - * - * ##Documentation## - * - * - Synthesizable UMI stimulus - * - Useful for FPGAs and emulators - * - ******************************************************************************/ - -module umi_stimulus - #( parameter DW = 256, // stimulus packet width - parameter AW = 64, // width of control words - parameter CW = 32, // width of control words - parameter TCW = 8, - parameter DEPTH = 8192, // Memory depth - parameter TARGET = "DEFAULT" // pass through variable for hard macro - ) - ( - // control - input nreset, // async reset - input load, // load memory - input go, // drive stimulus from memory - // external interface - input ext_clk, // External clock for write path - input ext_valid, // Valid packet for memory - input [DW+AW+AW+CW+TCW-1:0] ext_packet, // packet for memory - // dut feedback - input dut_clk, // DUT side clock - input dut_ready, // DUT ready signal - // stimulus outputs - output stim_valid, // Packet valid - output [CW-1:0] stim_cmd, // packet to DUT - output [AW-1:0] stim_dstaddr, // packet to DUT - output [AW-1:0] stim_srcaddr, // packet to DUT - output [DW-1:0] stim_data, // packet to DUT - output stim_done // Signals that stimulus is done - ); - - // memory parameters - localparam MAW = $clog2(DEPTH); // Memory address width - - // state machine parameters - localparam STIM_IDLE = 2'b00; - localparam STIM_ACTIVE = 2'b01; - localparam STIM_PAUSE = 2'b10; - localparam STIM_DONE = 2'b11; - - // Local values - reg [1:0] rd_state; - reg [DW+AW+AW+CW+TCW-1:0] ram[0:DEPTH-1]; - reg [DW+AW+AW+CW+TCW-1:0] mem_data; - reg [MAW-1:0] wr_addr; - reg [MAW-1:0] rd_addr; - reg [1:0] sync_pipe; - reg [TCW-2:0] rd_delay; - reg data_valid; - - wire dut_start; - wire [MAW-1:0] rd_addr_nxt; - wire [TCW-2:0] rd_delay_nxt; - - wire beat; - wire pause; - - //################################# - // Memory write port state machine - //################################# - - always @ (posedge ext_clk or negedge nreset) - if(!nreset) - wr_addr[MAW-1:0] <= 'b0; - else if(ext_valid & load) - wr_addr[MAW-1:0] <= wr_addr[MAW-1:0] + 1; - - //Synchronize mode to dut_clk domain - always @ (posedge dut_clk or negedge nreset) - if(!nreset) - sync_pipe[1:0] <= 'b0; - else - sync_pipe[1:0] <= {sync_pipe[0],go}; - - assign dut_start = sync_pipe[1]; - - //################################# - // Memory read port state machine - //################################# - //1. Start on dut_start - //2. Drive valid while active - //3. Set end state on special end packet (bit 0) - - // control signals - assign stim_done = (rd_state[1:0]==STIM_DONE); - assign stim_valid = (rd_state[1:0]==STIM_ACTIVE); - assign beat = stim_valid & dut_ready; - assign pause = data_valid & dut_ready & (|rd_delay_nxt); - - always @ (posedge dut_clk or negedge nreset) - if(!nreset) - rd_state[1:0] <= STIM_IDLE; - else - case (rd_state[1:0]) - STIM_IDLE : - rd_state[1:0] <= (dut_start & data_valid) ? STIM_ACTIVE : - (dut_start & ~data_valid) ? STIM_DONE : - STIM_IDLE; - STIM_ACTIVE : - rd_state[1:0] <= pause ? STIM_PAUSE : - data_valid ? STIM_ACTIVE : - STIM_DONE; - STIM_PAUSE : - rd_state[1:0] <= (|rd_delay) ? STIM_PAUSE : - data_valid ? STIM_ACTIVE : - STIM_DONE; - STIM_DONE : - rd_state[1:0] <= STIM_DONE; - - endcase // case (rd_state[1:0]) - - always @ (posedge dut_clk) - data_valid <= ((TCW==0) | mem_data[0]); - - // Read address updates on every beat - - /* verilator lint_off WIDTH */ - assign rd_addr_nxt = rd_addr[MAW-1:0] + beat; - /* verilator lint_on WIDTH */ - - always @ (posedge dut_clk or negedge nreset) - if(!nreset) - rd_addr[MAW-1:0] <= 'b0; - else - rd_addr[MAW-1:0] <= rd_addr_nxt; - - assign rd_delay_nxt = (TCW > 1) ? mem_data[TCW-1:1] : 'b0; - - // Update delay when in pause or when active - always @ (posedge dut_clk or negedge nreset) - if(!nreset) - rd_delay <= 'b0; - else if(rd_state[1:0]==STIM_PAUSE) - rd_delay <= rd_delay - 1'b1; - else - rd_delay <= rd_delay_nxt; - - //################################# - // Dual Port RAM - //################################# - - //write port - always @(posedge ext_clk) - if (ext_valid) - ram[wr_addr[MAW-1:0]] <= ext_packet[DW+AW+AW+CW+TCW-1:0]; - - //read port - always @ (posedge dut_clk) - mem_data[DW+AW+AW+CW+TCW-1:0] <= ram[rd_addr_nxt[MAW-1:0]]; - - // Remove extra CW information from stimulus - assign stim_cmd[CW-1:0] = mem_data[CW+TCW-1:TCW]; - assign stim_dstaddr[AW-1:0] = mem_data[AW+CW+TCW-1:CW+TCW]; - assign stim_srcaddr[AW-1:0] = mem_data[AW+AW+CW+TCW-1:AW+CW+TCW]; - assign stim_data[DW-1:0] = mem_data[DW+AW+AW+CW+TCW-1:AW+AW+CW+TCW]; - -endmodule // umi_stimulus diff --git a/umi/sumi/rtl/umi_tester.v b/umi/sumi/rtl/umi_tester.v new file mode 100644 index 00000000..2ee85801 --- /dev/null +++ b/umi/sumi/rtl/umi_tester.v @@ -0,0 +1,513 @@ +/****************************************************************************** + * Copyright 2020 Zero ASIC Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * ---- + * + * Documentation: + * + * - This module is a UMI host transaction generator. The module reads + * UMI transactions from memory at a rate of one transaction per + * clock cycle and sends them out as uhost_req_*s. Valid UMI host + * transactions from memory, incrementing the memory + * read address and sending a UMI transaction whnile 'go' is held high. + * + * - UMI responses are stored in the a separate response RAM. + * + * - The local memory has one host transaction per memory address, + * with the following format: [MSB..LSB] + * {data, srcaddr, dstaddr, cmd, ctrl} + * + * - The data, srcaddr, dstaddr, cmd, ctrl widths are parametrized + * via DW, AW, CW. + * + * - Bit[0] of the ctrl field indicates a valid transaction. Bits + * [7:1] user bits driven out to to the interface + * + * - Memory read address loops to zero when it reaches the end of + * the memory. + * + * - APB access port can be used by an external host to + * to read/write from the memory. + * + * - The memory access priority is: + * - apb (highest) + * - response + * - request (lowest) + * + * - error[3:0]: + * 0000 = no error + * 0001 = timeout + * 0010 = + * + * Dependencies: + * - https://github.com/siliconcompiler/lambdalib + * + * Demo: + * + * >> iverilog umi_stimulus.v -DTB_UMI_TESTER -y . -I. -y $LAMBDALIBPATH + * >> ./a.out +hexfile="./test0.memh" + * + *****************************************************************************/ + +module umi_tester + #(// user parameters + parameter DEPTH = 128, // memory depth (entries) + parameter ARGREQ = "hexreq", // $plusargs for req memh init (optional) + parameter ARGRESP = "hexresp", // $plusargs for resp memh init (optional) + parameter TCW = 8, // ctrl interface width + parameter MAXWIDTH = 512, // bits [256, 512, 1024, 2048] + parameter DEBUG = 1, // turn on debug messages + // bus parameters + parameter DW = 64, // umi data width + parameter AW = 64, // umi addr width + parameter CW = 32, // umi ctrl width + parameter RW = 32, // apb data width + parameter RAW = 32 // apb address width + ) + ( + // control + input nreset, // async active low reset + input clk, // clk + input autoloop, // loop req addr back to zero + input req_en, // enable request generation + output reg req_done, // all requests sent + input resp_en, // enable response capture + output reg resp_done, // all responses received + output [3:0] error, // tester error + input [TCW-2:0] gpio_in, // gpio inputs to response RAM + output [TCW-2:0] gpio_out, // gpio outputs from request RAM + // apb load interface (optional) + input [RAW-1:0] apb_paddr, // apb address bus + input apb_penable, // goes high for cycle 2:n of transfer + input apb_pwrite, // 1=write, 0=read + input [RW-1:0] apb_pwdata, // write data (8, 16, 32b) + input [3:0] apb_pstrb, // (optional) write strobe byte lanes + input [2:0] apb_pprot, // (optional) level of access + input apb_psel, // select signal for each device + output apb_pready, // device ready + output [RW-1:0] apb_prdata, // read data (8, 16, 32b) + // umi host interface + output uhost_req_valid, + output [CW-1:0] uhost_req_cmd, + output [AW-1:0] uhost_req_dstaddr, + output [AW-1:0] uhost_req_srcaddr, + output [DW-1:0] uhost_req_data, + input uhost_req_ready, + input uhost_resp_valid, + input [CW-1:0] uhost_resp_cmd, + input [AW-1:0] uhost_resp_dstaddr, + input [AW-1:0] uhost_resp_srcaddr, + input [DW-1:0] uhost_resp_data, + output uhost_resp_ready + ); + +`include "umi_messages.vh" + + // memory parameters + localparam MAW = $clog2(DEPTH); // Memory address-width + localparam MW = DW+2*AW+CW+TCW; // Memory data width + localparam LAW = $clog2(MAXWIDTH/8); // Per entry address width + + // file names + reg [8*128-1:0] memhreq; + reg [8*128-1:0] memhresp; + + // local state + reg [MAW-1:0] req_addr; + reg [MAW-1:0] resp_addr; + reg req_valid; + + // local wires + wire [MW-1:0] mem_req_dout; + wire [MW-1:0] mem_req_din; + wire mem_req_ce; + wire [MAW-1:0] mem_req_addr; + wire [MW-1:0] mem_req_wmask; + + wire [MW-1:0] mem_resp_dout; + wire [MW-1:0] mem_resp_din; + wire mem_resp_ce; + wire [MAW-1:0] mem_resp_addr; + wire [MW-1:0] mem_resp_wmask; + + wire [MW-1:0] apb_din; + wire [MW-1:0] apb_wmask; + wire apb_req_beat; + wire apb_resp_beat; + + wire [MW-1:0] resp_din; + + integer i; + + //##################################################### + // Initialize RAM + //##################################################### + + initial + begin + if($value$plusargs($sformatf("%s=%%s", ARGREQ), memhreq)) + $readmemh(memhreq, ram_req.memory.ram); + else + if($value$plusargs($sformatf("%s=%%s", ARGRESP), memhresp)) + $readmemh(memhresp, ram_resp.memory.ram); + end + + //##################################################### + // Monitor Transactions + //##################################################### + + if(DEBUG) begin + always @ (posedge clk) begin + if (uhost_req_valid & uhost_req_ready) + $display("(request) data=%h srcaddr=%h dstaddr=%h cmd=%h", + uhost_resp_data, uhost_resp_srcaddr,uhost_resp_dstaddr, uhost_resp_cmd); + + if (uhost_resp_valid & uhost_resp_ready) + $display("(response) data=%h srcaddr=%h dstaddr=%h cmd=%h", + uhost_resp_data, uhost_resp_srcaddr, uhost_resp_dstaddr, uhost_resp_cmd); + end + end + + + //#################################################### + // Request Generator + //#################################################### + // 1. Generate memory read requests when go is high + // 2. Not ready creates a valid bubble at addr stage + // 3. Stall valid on requst ready signal + + assign req_beat = req_en & ~req_done & + uhost_req_ready & ~apb_penable; + + // memory read address + always @ (posedge clk or negedge nreset) + if(!nreset) + req_addr[MAW-1:0] <= 'b0; + else if (!req_done) + req_addr[MAW-1:0] <= req_addr[MAW-1:0] + req_beat; + + // requests driven on next clock cycle by RAM + always @ (posedge clk or negedge nreset) + if(!nreset) + req_valid <= 'b0; + else if(uhost_req_ready) + req_valid <= req_beat; + + // requests done + always @ (posedge clk or negedge nreset) + if(!nreset) + req_done <= 'b0; + else if(~req_en) + req_done <= 1'b0; + else if(&req_addr[MAW-1:0] &~autoloop) + req_done <= 1'b1; + + // assigning RAM output to UMI signals + assign gpio_out[TCW-2:0] = mem_req_dout[1+:(TCW-1)]; + assign uhost_req_valid = req_valid & mem_req_dout[0]; + assign uhost_req_cmd[CW-1:0] = mem_req_dout[TCW+:CW]; + assign uhost_req_dstaddr[AW-1:0] = mem_req_dout[(TCW+CW)+:AW]; + assign uhost_req_srcaddr[AW-1:0] = mem_req_dout[(TCW+CW+AW)+:AW]; + assign uhost_req_data[DW-1:0] = mem_req_dout[(TCW+CW+2*AW)+:DW]; + + //#################################################### + // Response Tracker + //#################################################### + + assign uhost_resp_ready = ~apb_resp_beat; + + assign resp_beat = uhost_resp_valid & uhost_resp_ready; + + always @ (posedge clk or negedge nreset) + if(!nreset) + resp_addr[MAW-1:0] <= 'b0; + else if (!resp_done) + resp_addr[MAW-1:0] <= resp_addr[MAW-1:0] + resp_beat; + + // requests done + always @ (posedge clk or negedge nreset) + if(!nreset) + resp_done <= 'b0; + else if(~resp_en) + resp_done <= 1'b0; + else if(&resp_addr[MAW-1:0] &~autoloop) + resp_done <= 1'b1; + + assign resp_din[0] = resp_beat; + assign resp_din[TCW-1:1] = gpio_in[TCW-2:0]; + assign resp_din[TCW+:CW] = uhost_resp_cmd[CW-1:0]; + assign resp_din[(TCW+CW)+:AW] = uhost_resp_dstaddr[AW-1:0]; + assign resp_din[(TCW+CW+AW)+:AW] = uhost_resp_srcaddr[AW-1:0]; + assign resp_din[(TCW+CW+2*AW)+:DW] = uhost_resp_data[DW-1:0]; + + //#################################################### + // APB Port + //#################################################### + + // respone RAM placed after request in memory map + // note address gaps between last byte of data and MAXWIDTH + // done to avoid odd modulo addressing + assign apb_req_sel = apb_psel & apb_paddr[MAW+LAW]; + assign apb_resp_sel = apb_psel & ~apb_paddr[MAW+LAW]; + + // avoiding clobbering sdtalled umi request at output + // (neeeded due to 1 cycle RAM pipeline) + assign apb_req_beat = (apb_penable & apb_req_sel & apb_pready); + assign apb_resp_beat = (apb_penable & apb_resp_sel); + + assign apb_din[MW-1:0] = apb_pwdata[RW-1:0] << + apb_paddr[$clog2(MAXWIDTH)-1:0]; + + // TODO: implement + assign apb_wmask[MW-1:0] = {8{apb_pstrb[3:0]}} << apb_paddr[LAW-1:0]; + + // TODO: implement + assign apb_prdata[RW-1:0] = mem_req_dout[RW-1:0]; + + assign apb_pready = ~(apb_req_sel & uhost_req_valid & ~uhost_req_ready); + + //###################################################### + // REQUEST RAM + //###################################################### + + assign mem_req_ce = apb_req_beat | req_beat; + + assign mem_req_we = apb_req_beat ? apb_pwrite : 1'b0; + + assign mem_req_addr[MAW-1:0] = apb_req_beat ? apb_paddr[LAW+:MAW]: + req_addr[MAW-1:0]; + + assign mem_req_din[MW-1:0] = apb_din; + + assign mem_req_wmask[MW-1:0] = apb_wmask; + + + la_spram #(.DW (MW), // Memory width + .AW (MAW)) // Address width (derived) + ram_req(// Outputs + .dout (mem_req_dout[MW-1:0]), + // Inputs + .clk (clk), + .ce (mem_req_ce), + .we (mem_req_we), + .wmask (mem_req_wmask[MW-1:0]), + .addr (mem_req_addr[MAW-1:0]), + .din (mem_req_din[MW-1:0]), + .vss (1'b0), + .vdd (1'b1), + .vddio (1'b1), + .ctrl (1'b0), + .test (1'b0)); + + //######################################################### + // RESPONSE RAM + //######################################################### + + assign mem_resp_ce = apb_resp_beat | uhost_resp_valid; + + assign mem_resp_we = apb_resp_beat ? apb_pwrite : 1'b1; + + assign mem_resp_addr[MAW-1:0] = apb_resp_beat ? apb_paddr[LAW+:MAW]: + resp_addr; + + assign mem_resp_din[MW-1:0] = apb_resp_beat ? apb_din : + resp_din; + + assign mem_resp_wmask[MW-1:0] = apb_resp_beat ? apb_wmask : + {{MW}{1'b1}}; + + la_spram #(.DW (MW), // Memory width + .AW (MAW)) // Address width (derived) + ram_resp(// Outputs + .dout (mem_resp_dout[MW-1:0]), + // Inputs + .clk (clk), + .ce (mem_resp_ce), + .we (mem_resp_we), + .wmask (mem_resp_wmask[MW-1:0]), + .addr (mem_resp_addr[MAW-1:0]), + .din (mem_resp_din[MW-1:0]), + .vss (1'b0), + .vdd (1'b1), + .vddio (1'b1), + .ctrl (1'b0), + .test (1'b0)); + +endmodule +// Local Variables: +// verilog-library-directories:("./" "../../../../lambdalib/lambdalib/ramlib/rtl/") +// End: + +//##################################################################### +// DEMO TESTBENCH +//##################################################################### + +`ifdef TB_UMI_TESTER + +module tb(); + + parameter integer RW = 32; + parameter integer RAW = 32; + parameter integer DW = 64; + parameter integer AW = 64; + parameter integer CW = 32; + parameter integer TCW = 8; + parameter integer CTRLW = 8; + parameter integer DEPTH = 8; + parameter integer PERIOD = 2; + parameter integer TIMEOUT = PERIOD * 1000; + parameter ARGREQ = "hexreq"; + parameter ARGRESP = "hexresp"; + parameter integer MAXWIDTH = 512; // bits [256, 512, 1024, 2048] + + // memory parameters + localparam MAW = $clog2(DEPTH); // Memory address-width + localparam MW = DW+2*AW+CW+TCW; // Memory data width + localparam LAW = $clog2(MAXWIDTH/8); // Per entry address width + + reg [128*8-1:0] memhreq; + reg [128*8-1:0] memhresp; + + //###################################### + // TEST HARNESS + //###################################### + + // waveform dump + initial + begin + $timeformat(-9, 0, " ns", 20); + $dumpfile("dump.vcd"); + $dumpvars(); + #(TIMEOUT) + $finish; + end + + // load memory + integer i; + initial + begin + if(!$value$plusargs($sformatf("%s=%%s", ARGREQ), memhreq)) + for (i=0;i