Skip to content

Commit 1e59faf

Browse files
authored
Merge pull request #41 from pulp-platform/yt/fix-streamer
Prevent X stream from start fetching while all controllers are still idle
2 parents b9b29ac + 205703f commit 1e59faf

File tree

7 files changed

+83
-67
lines changed

7 files changed

+83
-67
lines changed

Bender.lock

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,16 +51,16 @@ packages:
5151
dependencies:
5252
- common_cells
5353
hci:
54-
revision: 90d55999bf357df6af153d20fc2786fc35167c59
55-
version: 2.1.2
54+
revision: 06fcba671e060f2e1b03b7ebe2d3e719f1557099
55+
version: null
5656
source:
5757
Git: https://github.com/pulp-platform/hci.git
5858
dependencies:
5959
- cluster_interconnect
6060
- hwpe-stream
6161
- l2_tcdm_hybrid_interco
6262
hwpe-ctrl:
63-
revision: 0e95510c0f4d43452d21b7723d766ae92e45c101
63+
revision: 3690a3c648f120546d8de2bc583d5170c36d2f20
6464
version: null
6565
source:
6666
Git: https://github.com/pulp-platform/hwpe-ctrl.git

Bender.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,16 @@ package:
1212
- "Yvan Tortorella ([email protected])"
1313

1414
dependencies:
15-
cv32e40p : { git: "https://github.com/pulp-platform/cv32e40p.git" , rev: "pulpissimo-v4.1.0" }
16-
cv32e40x : { git: "https://github.com/pulp-platform/cv32e40x.git" , rev: "redmule-v1.1" }
17-
ibex : { git: "https://github.com/pulp-platform/ibex.git" , rev: pulpissimo-v6.1.2 }
18-
hwpe-stream : { git: "https://github.com/pulp-platform/hwpe-stream.git" , version: 1.7 }
19-
hwpe-ctrl : { git: "https://github.com/pulp-platform/hwpe-ctrl.git" , rev: 0e95510c0f4d43452d21b7723d766ae92e45c101 } # branch: yt/task-interfaces
20-
hci : { git: "https://github.com/pulp-platform/hci.git" , rev: v2.1.2 }
21-
fpnew : { git: "https://github.com/pulp-platform/cvfpu.git" , rev: "pulp-v0.1.3" }
22-
common_cells : { git: "https://github.com/pulp-platform/common_cells.git" , version: 1.21.0 }
23-
obi : { git: "https://github.com/pulp-platform/obi.git" , version: 0.1.6 }
24-
tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.11 }
15+
cv32e40p : { git: "https://github.com/pulp-platform/cv32e40p.git" , rev: "pulpissimo-v4.1.0" }
16+
cv32e40x : { git: "https://github.com/pulp-platform/cv32e40x.git" , rev: "redmule-v1.1" }
17+
ibex : { git: "https://github.com/pulp-platform/ibex.git" , rev: pulpissimo-v6.1.2 }
18+
hwpe-stream : { git: "https://github.com/pulp-platform/hwpe-stream.git" , version: =1.9.0 }
19+
hwpe-ctrl : { git: "https://github.com/pulp-platform/hwpe-ctrl.git" , rev: "3690a3c" } # master
20+
hci : { git: "https://github.com/pulp-platform/hci.git" , rev: "06fcba6" } # main
21+
fpnew : { git: "https://github.com/pulp-platform/cvfpu.git" , rev: "pulp-v0.1.3" }
22+
common_cells : { git: "https://github.com/pulp-platform/common_cells.git" , version: =1.38.0 }
23+
obi : { git: "https://github.com/pulp-platform/obi.git" , version: =0.1.6 }
24+
tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: =0.2.13 }
2525

2626
sources:
2727
files:

rtl/redmule_ctrl.sv

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ module redmule_ctrl
3838
output logic flush_o ,
3939
// Control signals for the state machine
4040
output cntrl_scheduler_t cntrl_scheduler_o ,
41+
output cntrl_flags_t cntrl_flags_o,
4142
// Peripheral slave port
4243
hwpe_ctrl_intf_periph.slave periph
4344
);
@@ -135,15 +136,18 @@ module redmule_ctrl
135136
assign latch_clear = current == REDMULE_LATCH_RST;
136137

137138
always_comb begin : controller_fsm
139+
cntrl_flags_o.idle = 1'b0;
138140
cntrl_slave = '0;
139141
next = current;
140142

141143
case (current)
142144
REDMULE_LATCH_RST: begin
145+
cntrl_flags_o.idle = 1'b1;
143146
next = REDMULE_IDLE;
144147
end
145148

146149
REDMULE_IDLE: begin
150+
cntrl_flags_o.idle = 1'b1;
147151
if ((slave_start & tiler_valid) || test_mode_i) begin
148152
next = REDMULE_STARTING;
149153
end

rtl/redmule_memory_scheduler.sv

Lines changed: 50 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ module redmule_memory_scheduler
2121
input ctrl_regfile_t reg_file_i ,
2222
input flgs_streamer_t flgs_streamer_i ,
2323
input cntrl_scheduler_t cntrl_scheduler_i,
24+
input cntrl_flags_t cntrl_flags_i ,
2425
output cntrl_streamer_t cntrl_streamer_o
2526
);
2627
localparam int unsigned JMP = NumByte*(DATA_W/MemDw - 1);
@@ -123,60 +124,55 @@ module redmule_memory_scheduler
123124

124125
assign num_x_reads = x_rows_iters_q == reg_file_i.hwpe_params[X_ITERS][31:16]-1 && reg_file_i.hwpe_params[LEFTOVERS][31:24] != '0 ? reg_file_i.hwpe_params[LEFTOVERS][31:24] : W;
125126

126-
always_comb begin : address_gen_signals
127-
// Here we initialize the streamer source signals
128-
// for the X stream source
129-
cntrl_streamer_o.x_stream_source_ctrl.addressgen_ctrl.base_addr = reg_file_i.hwpe_params[X_ADDR]
130-
+ x_rows_offs_q + x_cols_offs_q;
131-
cntrl_streamer_o.x_stream_source_ctrl.addressgen_ctrl.tot_len = num_x_reads;
132-
cntrl_streamer_o.x_stream_source_ctrl.addressgen_ctrl.d0_len = 'd1;
133-
cntrl_streamer_o.x_stream_source_ctrl.addressgen_ctrl.d0_stride = 'd0;
134-
cntrl_streamer_o.x_stream_source_ctrl.addressgen_ctrl.d1_len = W;
135-
cntrl_streamer_o.x_stream_source_ctrl.addressgen_ctrl.d1_stride = reg_file_i.hwpe_params[X_D1_STRIDE];
136-
cntrl_streamer_o.x_stream_source_ctrl.addressgen_ctrl.d2_stride = '0;
137-
cntrl_streamer_o.x_stream_source_ctrl.addressgen_ctrl.dim_enable_1h = 2'b11;
138-
139-
// Here we initialize the streamer source signals
140-
// for the W stream source
141-
cntrl_streamer_o.w_stream_source_ctrl.addressgen_ctrl.base_addr = reg_file_i.hwpe_params[W_ADDR];
142-
cntrl_streamer_o.w_stream_source_ctrl.addressgen_ctrl.tot_len = reg_file_i.hwpe_params[W_TOT_LEN];
143-
cntrl_streamer_o.w_stream_source_ctrl.addressgen_ctrl.d0_len = reg_file_i.hwpe_params[W_ITERS][31:16];
144-
cntrl_streamer_o.w_stream_source_ctrl.addressgen_ctrl.d0_stride = reg_file_i.hwpe_params[W_D0_STRIDE];
145-
cntrl_streamer_o.w_stream_source_ctrl.addressgen_ctrl.d1_len = reg_file_i.hwpe_params[W_ITERS][15:0];
146-
cntrl_streamer_o.w_stream_source_ctrl.addressgen_ctrl.d1_stride = JMP;
147-
cntrl_streamer_o.w_stream_source_ctrl.addressgen_ctrl.d2_stride = 'd0;
148-
cntrl_streamer_o.w_stream_source_ctrl.addressgen_ctrl.dim_enable_1h = 2'b11;
149-
150-
// Here we initialize the streamer source signals
151-
// for the Y stream source
152-
cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.base_addr = reg_file_i.hwpe_params[Z_ADDR];
153-
cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.tot_len = reg_file_i.hwpe_params[Z_TOT_LEN];
154-
cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.d0_len = W;
155-
cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.d0_stride = reg_file_i.hwpe_params[Z_D0_STRIDE];
156-
cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.d1_len = reg_file_i.hwpe_params[W_ITERS][15:0];
157-
cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.d1_stride = JMP;
158-
cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.d2_stride = reg_file_i.hwpe_params[Z_D2_STRIDE];
159-
cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.dim_enable_1h = 2'b11;
160-
161-
// Here we initialize the streamer sink signals for
162-
// the Z stream sink
163-
cntrl_streamer_o.z_stream_sink_ctrl.addressgen_ctrl.base_addr = reg_file_i.hwpe_params[Z_ADDR];
164-
cntrl_streamer_o.z_stream_sink_ctrl.addressgen_ctrl.tot_len = reg_file_i.hwpe_params[Z_TOT_LEN];
165-
cntrl_streamer_o.z_stream_sink_ctrl.addressgen_ctrl.d0_len = W;
166-
cntrl_streamer_o.z_stream_sink_ctrl.addressgen_ctrl.d0_stride = reg_file_i.hwpe_params[Z_D0_STRIDE];
167-
cntrl_streamer_o.z_stream_sink_ctrl.addressgen_ctrl.d1_len = reg_file_i.hwpe_params[W_ITERS][15:0];
168-
cntrl_streamer_o.z_stream_sink_ctrl.addressgen_ctrl.d1_stride = JMP;
169-
cntrl_streamer_o.z_stream_sink_ctrl.addressgen_ctrl.d2_stride = reg_file_i.hwpe_params[Z_D2_STRIDE];
170-
cntrl_streamer_o.z_stream_sink_ctrl.addressgen_ctrl.dim_enable_1h = 2'b11;
171-
end
172-
173-
always_comb begin : req_start_assignment
174-
cntrl_streamer_o.x_stream_source_ctrl.req_start = (cntrl_scheduler_i.first_load || tot_x_read_q < reg_file_i.hwpe_params[TOT_X_READ]) &&
175-
flgs_streamer_i.x_stream_source_flags.ready_start;
176-
cntrl_streamer_o.w_stream_source_ctrl.req_start = cntrl_scheduler_i.first_load && flgs_streamer_i.z_stream_sink_flags.ready_start;
177-
cntrl_streamer_o.y_stream_source_ctrl.req_start = cntrl_scheduler_i.first_load && reg_file_i.hwpe_params[OP_SELECTION][0] && flgs_streamer_i.y_stream_source_flags.ready_start;
178-
cntrl_streamer_o.z_stream_sink_ctrl.req_start = cntrl_scheduler_i.first_load && flgs_streamer_i.z_stream_sink_flags.ready_start;
179-
end
127+
// Here we initialize the streamer source signals
128+
// for the X stream source
129+
assign cntrl_streamer_o.x_stream_source_ctrl.req_start = !cntrl_flags_i.idle && flgs_streamer_i.x_stream_source_flags.ready_start &&
130+
(cntrl_scheduler_i.first_load || tot_x_read_q < reg_file_i.hwpe_params[TOT_X_READ]);
131+
assign cntrl_streamer_o.x_stream_source_ctrl.addressgen_ctrl.base_addr = reg_file_i.hwpe_params[X_ADDR]
132+
+ x_rows_offs_q + x_cols_offs_q;
133+
assign cntrl_streamer_o.x_stream_source_ctrl.addressgen_ctrl.tot_len = num_x_reads;
134+
assign cntrl_streamer_o.x_stream_source_ctrl.addressgen_ctrl.d0_len = 'd1;
135+
assign cntrl_streamer_o.x_stream_source_ctrl.addressgen_ctrl.d0_stride = 'd0;
136+
assign cntrl_streamer_o.x_stream_source_ctrl.addressgen_ctrl.d1_len = W;
137+
assign cntrl_streamer_o.x_stream_source_ctrl.addressgen_ctrl.d1_stride = reg_file_i.hwpe_params[X_D1_STRIDE];
138+
assign cntrl_streamer_o.x_stream_source_ctrl.addressgen_ctrl.d2_stride = '0;
139+
assign cntrl_streamer_o.x_stream_source_ctrl.addressgen_ctrl.dim_enable_1h = 2'b11;
140+
141+
// Here we initialize the streamer source signals
142+
// for the W stream source
143+
assign cntrl_streamer_o.w_stream_source_ctrl.req_start = cntrl_scheduler_i.first_load && flgs_streamer_i.z_stream_sink_flags.ready_start;
144+
assign cntrl_streamer_o.w_stream_source_ctrl.addressgen_ctrl.base_addr = reg_file_i.hwpe_params[W_ADDR];
145+
assign cntrl_streamer_o.w_stream_source_ctrl.addressgen_ctrl.tot_len = reg_file_i.hwpe_params[W_TOT_LEN];
146+
assign cntrl_streamer_o.w_stream_source_ctrl.addressgen_ctrl.d0_len = reg_file_i.hwpe_params[W_ITERS][31:16];
147+
assign cntrl_streamer_o.w_stream_source_ctrl.addressgen_ctrl.d0_stride = reg_file_i.hwpe_params[W_D0_STRIDE];
148+
assign cntrl_streamer_o.w_stream_source_ctrl.addressgen_ctrl.d1_len = reg_file_i.hwpe_params[W_ITERS][15:0];
149+
assign cntrl_streamer_o.w_stream_source_ctrl.addressgen_ctrl.d1_stride = JMP;
150+
assign cntrl_streamer_o.w_stream_source_ctrl.addressgen_ctrl.d2_stride = 'd0;
151+
assign cntrl_streamer_o.w_stream_source_ctrl.addressgen_ctrl.dim_enable_1h = 2'b11;
152+
153+
// Here we initialize the streamer source signals
154+
// for the Y stream source
155+
assign cntrl_streamer_o.y_stream_source_ctrl.req_start = cntrl_scheduler_i.first_load && reg_file_i.hwpe_params[OP_SELECTION][0] && flgs_streamer_i.y_stream_source_flags.ready_start;
156+
assign cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.base_addr = reg_file_i.hwpe_params[Z_ADDR];
157+
assign cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.tot_len = reg_file_i.hwpe_params[Z_TOT_LEN];
158+
assign cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.d0_len = W;
159+
assign cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.d0_stride = reg_file_i.hwpe_params[Z_D0_STRIDE];
160+
assign cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.d1_len = reg_file_i.hwpe_params[W_ITERS][15:0];
161+
assign cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.d1_stride = JMP;
162+
assign cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.d2_stride = reg_file_i.hwpe_params[Z_D2_STRIDE];
163+
assign cntrl_streamer_o.y_stream_source_ctrl.addressgen_ctrl.dim_enable_1h = 2'b11;
164+
165+
// Here we initialize the streamer sink signals for
166+
// the Z stream sink
167+
assign cntrl_streamer_o.z_stream_sink_ctrl.req_start = cntrl_scheduler_i.first_load && flgs_streamer_i.z_stream_sink_flags.ready_start;
168+
assign cntrl_streamer_o.z_stream_sink_ctrl.addressgen_ctrl.base_addr = reg_file_i.hwpe_params[Z_ADDR];
169+
assign cntrl_streamer_o.z_stream_sink_ctrl.addressgen_ctrl.tot_len = reg_file_i.hwpe_params[Z_TOT_LEN];
170+
assign cntrl_streamer_o.z_stream_sink_ctrl.addressgen_ctrl.d0_len = W;
171+
assign cntrl_streamer_o.z_stream_sink_ctrl.addressgen_ctrl.d0_stride = reg_file_i.hwpe_params[Z_D0_STRIDE];
172+
assign cntrl_streamer_o.z_stream_sink_ctrl.addressgen_ctrl.d1_len = reg_file_i.hwpe_params[W_ITERS][15:0];
173+
assign cntrl_streamer_o.z_stream_sink_ctrl.addressgen_ctrl.d1_stride = JMP;
174+
assign cntrl_streamer_o.z_stream_sink_ctrl.addressgen_ctrl.d2_stride = reg_file_i.hwpe_params[Z_D2_STRIDE];
175+
assign cntrl_streamer_o.z_stream_sink_ctrl.addressgen_ctrl.dim_enable_1h = 2'b11;
180176

181177
assign cntrl_streamer_o.input_cast_src_fmt = fpnew_pkg::fp_format_e'(reg_file_i.hwpe_params[OP_SELECTION][15:13]);
182178
assign cntrl_streamer_o.input_cast_dst_fmt = fpnew_pkg::fp_format_e'(reg_file_i.hwpe_params[OP_SELECTION][12:10]);

rtl/redmule_pkg.sv

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,10 @@ package redmule_pkg;
213213
logic w_loaded;
214214
} flgs_scheduler_t;
215215

216+
typedef struct packed {
217+
logic idle;
218+
} cntrl_flags_t;
219+
216220
typedef enum logic [2:0] { MATMUL=3'h0, GEMM=3'h1, ADDMAX=3'h2, ADDMIN=3'h3, MULMAX=3'h4, MULMIN=3'h5, MAXMIN=3'h6, MINMAX=3'h7 } gemm_op_e;
217221
typedef enum logic [1:0] { Float8=2'h0, Float16=2'h1, Float8Alt=2'h2, Float16Alt=2'h3 } gemm_fmt_e;
218222
typedef enum logic { RNE=1'h0, RTZ=1'h1 } rnd_mode_e;

rtl/redmule_top.sv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ flgs_scheduler_t flgs_scheduler;
139139
// Register file binded from controller to FSM
140140
ctrl_regfile_t reg_file;
141141
flags_fifo_t w_fifo_flgs;
142+
cntrl_flags_t cntrl_flags;
142143

143144
/*--------------------------------------------------------------*/
144145
/* | Streamer | */
@@ -409,6 +410,7 @@ redmule_memory_scheduler #(
409410
.reg_file_i ( reg_file ),
410411
.flgs_streamer_i ( flgs_streamer ),
411412
.cntrl_scheduler_i ( cntrl_scheduler ),
413+
.cntrl_flags_i ( cntrl_flags ),
412414
.cntrl_streamer_o ( cntrl_streamer )
413415
);
414416

@@ -442,6 +444,7 @@ redmule_ctrl #(
442444
.w_loaded_i ( flgs_scheduler.w_loaded ),
443445
.flush_o ( engine_flush ),
444446
.cntrl_scheduler_o ( cntrl_scheduler ),
447+
.cntrl_flags_o ( cntrl_flags ),
445448
.periph ( local_periph )
446449
);
447450

target/sim/src/redmule_tb.sv

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ module redmule_tb
140140
assign redmule_tcdm.gnt = &tcdm_gnt;
141141
assign redmule_tcdm.r_data = { >> {tcdm_r_data} };
142142
assign redmule_tcdm.r_valid = &tcdm_r_valid;
143+
assign redmule_tcdm.r_id = '0;
143144
assign redmule_tcdm.r_opc = '0;
144145
assign redmule_tcdm.r_user = '0;
145146

@@ -171,9 +172,17 @@ module redmule_tb
171172
);
172173
end
173174

175+
// FixMe: How should we drive these?
176+
// assign redmule_tcdm.egnt = '1;
177+
// assign redmule_tcdm.r_evalid = '0;
178+
174179
end else begin: gen_no_ecc
175180
for(genvar ii=0; ii<MP; ii++)
176181
assign tcdm[ii].data = redmule_tcdm.data[(ii+1)*32-1:ii*32];
182+
183+
assign redmule_tcdm.r_ecc = '0;
184+
assign redmule_tcdm.egnt = '1;
185+
assign redmule_tcdm.r_evalid = '0;
177186
end
178187

179188
assign tcdm[MP].req = core_data_req.req &

0 commit comments

Comments
 (0)