Skip to content

Commit b97ed0d

Browse files
authored
Merge pull request #40 from pulp-platform/yt/synthesis
Add proper standalone synthesis flow
2 parents 4bb85c5 + fc9a823 commit b97ed0d

33 files changed

+705
-858
lines changed

.github/verible.waiver

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,6 @@ waive --rule=line-length
77
# Disable parameter style check
88
waive --rule=parameter-name-style
99
# Disable default check in case statements
10-
waive --rule=case-missing-default
10+
waive --rule=case-missing-default
11+
# Disable default check typedef structure parameters
12+
waive --rule=typedef-structs-unions

.github/workflows/ci.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ jobs:
4242
runs-on: ubuntu-latest
4343
env:
4444
Target: verilator
45-
REDMULE_COMPLEX: 0
46-
BENDER: ./bender
45+
Gcc: vendor/install/riscv/bin/
46+
UseXif: 0
4747

4848
needs:
4949
install-tools
@@ -69,8 +69,8 @@ jobs:
6969
runs-on: ubuntu-latest
7070
env:
7171
Target: verilator
72-
REDMULE_COMPLEX: 1
73-
BENDER: ./bender
72+
Gcc: vendor/install/riscv/bin/
73+
UseXif: 1
7474

7575
needs:
7676
install-tools

Bender.lock

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ packages:
3030
- fpnew
3131
- tech_cells_generic
3232
cv32e40x:
33-
revision: baba1eeb3b845306c45be714f677ff786753f136
33+
revision: 96b933ac2f723351872da55e7d2e9a82abd5df34
3434
version: null
3535
source:
3636
Git: https://github.com/pulp-platform/cv32e40x.git
@@ -86,6 +86,14 @@ packages:
8686
source:
8787
Git: https://github.com/pulp-platform/L2_tcdm_hybrid_interco.git
8888
dependencies: []
89+
obi:
90+
revision: 8097928cf1b43712f93d5356f336397879b4ad2c
91+
version: 0.1.6
92+
source:
93+
Git: https://github.com/pulp-platform/obi.git
94+
dependencies:
95+
- common_cells
96+
- common_verification
8997
tech_cells_generic:
9098
revision: 7968dd6e6180df2c644636bc6d2908a49f2190cf
9199
version: 0.2.13

Bender.yml

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,14 @@ package:
1313

1414
dependencies:
1515
cv32e40p : { git: "https://github.com/pulp-platform/cv32e40p.git" , rev: "pulpissimo-v4.1.0" }
16-
cv32e40x : { git: "https://github.com/pulp-platform/cv32e40x.git" , rev: "redmule-v1.0" }
16+
cv32e40x : { git: "https://github.com/pulp-platform/cv32e40x.git" , rev: "redmule-v1.1" }
1717
ibex : { git: "https://github.com/pulp-platform/ibex.git" , rev: pulpissimo-v6.1.2 }
1818
hwpe-stream : { git: "https://github.com/pulp-platform/hwpe-stream.git" , version: 1.7 }
1919
hwpe-ctrl : { git: "https://github.com/pulp-platform/hwpe-ctrl.git" , rev: 0e95510c0f4d43452d21b7723d766ae92e45c101 } # branch: yt/task-interfaces
2020
hci : { git: "https://github.com/pulp-platform/hci.git" , rev: v2.1.2 }
2121
fpnew : { git: "https://github.com/pulp-platform/cvfpu.git" , rev: "pulp-v0.1.3" }
2222
common_cells : { git: "https://github.com/pulp-platform/common_cells.git" , version: 1.21.0 }
23+
obi : { git: "https://github.com/pulp-platform/obi.git" , version: 0.1.6 }
2324
tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.11 }
2425

2526
sources:
@@ -44,29 +45,19 @@ sources:
4445
- rtl/redmule_ce.sv
4546
- rtl/redmule_row.sv
4647
- rtl/redmule_engine.sv
48+
- rtl/redmule_inst_decoder.sv
4749
- rtl/redmule_top.sv
4850
- rtl/redmule_memory_scheduler.sv
51+
- rtl/redmule_complex.sv
4952

50-
- target: redmule_hwpe
53+
- target: redmule_deprecated
5154
files:
5255
- rtl/redmule_wrap.sv
53-
54-
- target: redmule_complex
55-
files:
56-
- rtl/redmule_inst_decoder.sv
57-
- rtl/redmule_complex.sv
5856
- rtl/redmule_complex_wrap.sv
5957

60-
- target: any(redmule_test_complex, redmule_test_hwpe)
58+
- target: redmule_test
6159
files:
6260
- target/sim/src/tb_dummy_memory.sv
63-
64-
- target: redmule_test_complex
65-
files:
66-
- target/sim/src/redmule_complex_tb.sv
67-
68-
- target: redmule_test_hwpe
69-
files:
7061
- target/sim/src/redmule_tb.sv
7162

7263
- target: vsim

Makefile

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ VsimPath := target/sim/vsim
1616
SW ?= $(RootDir)sw
1717
BUILD_DIR ?= $(SW)/build
1818
SIM_DIR ?= $(RootDir)vsim
19-
QUESTA ?= questa-2023.4
2019
Bender ?= $(CargoInstallDir)/bin/bender
2120
Gcc ?= $(GccInstallDir)/bin/
2221
ISA ?= riscv
@@ -28,27 +27,28 @@ PYTHON ?= python3
2827
target ?= verilator
2928
TargetPath := $(SimDir)/$(target)
3029

30+
# Useful Parameters
31+
gui ?= 0
32+
ipstools ?= 0
33+
P_STALL ?= 0.0
34+
UseXif ?= 0
35+
3136
# Included makefrags
32-
include $(TargetPath)/$(target).mk
3337
include bender_common.mk
3438
include bender_sim.mk
3539
include bender_synth.mk
36-
37-
ifeq ($(REDMULE_COMPLEX),1)
38-
TEST_SRCS := $(SW)/redmule_complex.c
39-
else
40-
TEST_SRCS := $(SW)/redmule.c
41-
endif
40+
include $(TargetPath)/$(target).mk
4241

4342
compile_script_synth ?= $(RootDir)scripts/synth_compile.tcl
4443

4544
INI_PATH = $(RootDir)modelsim.ini
4645
WORK_PATH = $(SIM_DIR)/work
4746

48-
# Useful Parameters
49-
gui ?= 0
50-
ipstools ?= 0
51-
P_STALL ?= 0.0
47+
TEST_SRCS := $(SW)/redmule.c
48+
49+
ifeq ($(UseXif),1)
50+
FLAGS += -DCOMPLEX_OFFLOADER
51+
endif
5252

5353
ifeq ($(verbose),1)
5454
FLAGS += -DVERBOSE
@@ -79,12 +79,14 @@ BIN=$(BUILD_DIR)/verif
7979
DUMP=$(BUILD_DIR)/verif.dump
8080
STIM_INSTR=$(BUILD_DIR)/stim_instr.txt
8181
STIM_DATA=$(BUILD_DIR)/stim_data.txt
82+
STACK_INIT=$(BUILD_DIR)/stack_init.txt
8283

8384
# Build implicit rules
84-
$(STIM_INSTR) $(STIM_DATA): $(BIN)
85+
$(STIM_INSTR) $(STIM_DATA) $(STACK_INIT): $(BIN)
8586
objcopy --srec-len 1 --output-target=srec $(BIN) $(BIN).s19
8687
$(PYTHON) scripts/parse_s19.py < $(BIN).s19 > $(BIN).txt
8788
$(PYTHON) scripts/s19tomem.py $(BIN).txt $(STIM_INSTR) $(STIM_DATA)
89+
$(PYTHON) scripts/stack_init.py $(STACK_INIT)
8890

8991
$(BIN): $(CRT) $(OBJ)
9092
$(LD) $(LD_OPTS) -o $(BIN) $(CRT) $(OBJ) -T$(LINKSCRIPT)
@@ -101,7 +103,7 @@ $(BUILD_DIR):
101103
SHELL := /bin/bash
102104

103105
# Generate instructions and data stimuli
104-
sw-build: $(STIM_INSTR) $(STIM_DATA) dis
106+
sw-build: $(STIM_INSTR) $(STIM_DATA) $(STACK_INIT) dis
105107

106108
$(SIM_DIR):
107109
mkdir -p $(SIM_DIR)
@@ -186,3 +188,5 @@ $(CargoInstallDir)/bin/bender:
186188
chmod +x $(RustupInit); source $(RustupInit) -y && \
187189
$(Cargo) install bender
188190
rm -rf $(RustupInit)
191+
192+
tools: bender riscv32-gcc

README.md

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ If you want to use RedMulE for academic purposes, please cite it as:
2121

2222
```
2323
@article{TORTORELLA2023122,
24-
title = {RedMule: A mixed-precision matrixmatrix operation engine for flexible and energy-efficient on-chip linear algebra and TinyML training acceleration},
24+
title = {RedMule: A mixed-precision matrix?matrix operation engine for flexible and energy-efficient on-chip linear algebra and TinyML training acceleration},
2525
journal = {Future Generation Computer Systems},
2626
volume = {149},
2727
pages = {122-135},
@@ -84,19 +84,18 @@ Each execution of the RedMulE Golden Model also generates data in `.txt` format
8484
the generated matrices.
8585

8686
## RedMulE Testbench
87-
RedMulE offers a complete testing environment under the `tb` folder, providing two different testbenches.
87+
RedMulE offers a complete testing environment under the `target/sim/src/redmule_tb.sv` file, providing the testbench shown in the following diagram.
8888

89-
### HWPE memory-mapped interface
90-
The `tb/redmule_tb.sv` is based on the [hwpe-tb](https://github.com/pulp-platform/hwpe-tb) example, and features a RedMulE instance, a [CV32E40P](https://github.com/pulp-platform/cv32e40p/tree/pulpissimo-v4.1.0) controller core, and dummy memories used to simulate instruction and data memories, as shown in the picture below.
89+
![](doc/RedmuleSubsystem-CoreComplex.png)
9190

92-
![](doc/redmule_testbench.png)
91+
The testbench instantiates two synthetic memories for instruction and data that have a compile-configurable `PROB_STALL` parameter used only in the data memory to simulate the incurrence of stalls. A so-called Core Complex connects to such memories. The Core Complex complex configuration can be selected by enabling or disabling the `UseXif` parameter.
9392

94-
In this configuration, RedMulE is connected to the core through a memory-mapped configuration interface based on the [hwpe-ctrl](https://github.com/pulp-platform/hwpe-ctrl). Through this interface, when the data request of the core is made to an address that corresponds to the accelerator register-file scope, the internal configuration registers of the accelerator are written to make it start an operation. This is the most typical implementation used for integration of tightly-coupled accelerators within a [PULP cluster](https://github.com/pulp-platform/pulp_cluster).
93+
### HWPE memory-mapped interface
94+
The default value of the `UseXif` parameter is 0, meaning the Core complex provides a RedMulE instance, a [CV32E40P](https://github.com/pulp-platform/cv32e40p/tree/pulpissimo-v4.1.0) controller core, and an address decoder. In such configuration, RedMulE exposes its configuration interface as a memory-mapped request/response-based port, to which the core accesses via memory-mapped read/write operations. In the default configuration, such programmable port is reachable at the `0x00100000` base address and has a 1 kiB dedicated address range.
95+
The memory-mapped configuration interface is based on the [hwpe-ctrl](https://github.com/pulp-platform/hwpe-ctrl). This port exposes direct access to the accelerator register-file, where the internal configuration registers of the accelerator are written to make it start an operation. This is the most typical implementation used for integration of tightly-coupled accelerators within a [PULP cluster](https://github.com/pulp-platform/pulp_cluster).
9596

9697
### Tensor co-processor with ISA extension
97-
The `tb/redmule_complex_tb.sv` provides an implementatio of a complex core as an atomic unit. This unit features RedMulE as a tensor coprocessor of a [CV32E40X](https://github.com/openhwgroup/cv32e40x) core. In this configuration, the offloading of a matrix multiplication from the core to the accelerator is possible through the use of a general-purpose [eXtension Interface](https://github.com/openhwgroup/core-v-xif). Through such interface, it is possible to build a dedicated ISA instruction extension to offload an operation to an external co-processor without changing the core internal architecture. The RedMulE complex core is depicted in the figure below.
98-
99-
![](doc/redmule_complex_testbench.png)
98+
If the `UseXif` parameter is set to 1, the Core Complex configuration implements RedMulE as a tensor coprocessor of a [CV32E40X](https://github.com/openhwgroup/cv32e40x) core. In this configuration, the operation offloading is no-longer memory-mapped, but is possible through the use of a general-purpose [eXtension Interface](https://github.com/openhwgroup/core-v-xif). Through such interface, it is possible to build a dedicated ISA instruction extension to offload an operation to an external co-processor without changing the core internal architecture.
10099

101100
## Programming model
102101
RedMulE is designed to reduce the effort required for the matrix multiplication tiling to the minimum. It features an internal hardware unit called "tiler" that needs very reduced input information (i.e. tesors dimensions, computing format, pointers to the input/output tensors and the operation to perform). Then, it is in charge of autonomously calculate the tiling of the tensors.
@@ -129,7 +128,6 @@ redmule_cfg ((unsigned int) x, (unsigned int) w, (unsigned int) y,
129128
(uint8_t) Float16);
130129
```
131130
The `sw/hal_redmule.h` also contains other useful APIs for programming RedMulE, such as the `hwpe_soft_clear()` to provide a soft reset to all the internal accelerator state and configuration registers, or the `hwpe_trigger_job()` to trigger the accelerator operation after it is configured.
132-
The `sw/redmule.c` provides an example of code for programming RedMulE through its memory-mapped interface.
133131

134132
### Tensor co-processor with ISA extension
135133
The offloading mechanism through the dedicated ISA tensor extension requires the same information as in the memory-mapped case, meaning the pointers to the tensors, their dimensions, the encoding format, and the operation selection. All these pieces of information must be encapsulated into two dedicated ISA instructions: a `mcnfig` instruction and a `marith` instruction.
@@ -218,7 +216,8 @@ asm volatile(
218216
(0b001 << 7) | \
219217
(0b0101011 << 0) \n");
220218
```
221-
The `sw/redmule_complex.c` provides an example of code for programming RedMulE through a dedicate ISA extension.
219+
220+
The unified `sw/redmule.c` provides an example of code for programming RedMulE either through its memory-mapped interface or through a dedicate ISA extension.
222221

223222
### Getting Started
224223
The RedMulE repository offers support for different simulation targets. At the moment, the support is for Questasim (`vsim`) and Verilator (`verilator`).
@@ -254,7 +253,7 @@ make hw-script target=verilator
254253

255254
The compiled hardware is then built with the command:
256255
```bash
257-
make hw-build target=verilator
256+
make hw-build target=verilator (UseXif=1 if you want to build for the XiF interface)
258257
```
259258
260259
In case one wants to run a simulation using Questasim, it sufficient to re-run the above commands replacing the `target=vsim` string.
@@ -263,11 +262,10 @@ In case one wants to run a simulation using Questasim, it sufficient to re-run t
263262
264263
To run the available tests, just do:
265264
```bash
266-
make sw-build
267-
make hw-run target=verilator (gui=1 to open the GtkWave tool or the Questasim Graphic User Interface depending on the value of `target`)
265+
make sw-build (UseXif=1 if you want to compile the test for the XiF interface)
266+
make hw-run target=verilator (gui=1 to open the GtkWave tool or the Questasim Graphic User Interface depending on the value of `target`, and UseXif=1 if you want to run test for the XiF interface)
268267
```
269268
It is possible to run the test introducing a parametric probability of stall by explicitly passing the `P_STALL` parameter while running the test (`P_STALL=0.1` means a stall probability of the 10%).
270-
If the `scripts/setup-hwpe.sh` was sourced, the above commands will execute the `sw/redmule.c` example, while if the `scripts/setup-complex.sh` was source, the above commands will execute the `sw/redmule_complex.c` test.
271269
272270
### Golden Model Generation
273271
It is possible to generate fresh golden models directly from the `redmule` folder. The parameters that can be used to generate different golden models are the following:

bender_common.mk

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,5 @@
55
# Yvan Tortorella <[email protected]>
66
#
77

8-
common_targs += -t cv32e40p_exclude_tracer
9-
10-
ifeq ($(REDMULE_COMPLEX),1)
11-
common_targs += -t redmule_complex
12-
common_targs += -e cv32e40p
13-
else
14-
common_targs += -t redmule_hwpe
15-
common_targs += -e cv32e40x
16-
endif
17-
8+
common_targs += -t rtl
189
common_defs += -D COREV_ASSERT_OFF

bender_sim.mk

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,15 @@
55
# Yvan Tortorella <[email protected]>
66
#
77

8-
sim_targs += -t rtl
8+
sim_targs += -t redmule_test
9+
sim_defs += -D COREV_ASSERT_OFF
910

10-
ifeq ($(REDMULE_COMPLEX),1)
11-
sim_targs += -t redmule_test_complex
11+
ifneq ($(target),verilator)
12+
ifeq ($(UseXif),1)
13+
sim_targs += -t cv32e40x_bhv
14+
sim_defs += -D CV32E40X_TRACE_EXECUTION
1215
else
13-
sim_targs += -t redmule_test_hwpe
16+
sim_targs += -t cv32e40p_include_tracer
17+
sim_defs += -D CV32E40P_TRACE_EXECUTION
18+
endif
1419
endif

doc/RedmuleSubsystem-CoreComplex.png

31.2 KB
Loading
File renamed without changes.

0 commit comments

Comments
 (0)