i_cache.sv
5.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
/*
* i_cache.sv
* Author: Zinsser Zhang
* Last Revision: 03/13/2022
*
* This is a direct-mapped instruction cache. Line size and depth (number of
* lines) are set via INDEX_WIDTH and BLOCK_OFFSET_WIDTH parameters. Notice that
* line size means number of words (each consist of 32 bit) in a line. Because
* all addresses in mips_core are 26 byte addresses, so the sum of TAG_WIDTH,
* INDEX_WIDTH and BLOCK_OFFSET_WIDTH is `ADDR_WIDTH - 2.
*
* Typical line sizes are from 2 words to 8 words. The memory interfaces only
* support up to 8 words line size.
*
* Because we need a hit latency of 1 cycle, we need an asynchronous read port,
* i.e. data is ready during the same cycle when address is calculated. However,
* SRAMs only support synchronous read, i.e. data is ready the cycle after the
* address is calculated. Due to this conflict, we need to read from the banks
* on the clock edge at the beginning of the cycle. As a result, we need both
* the registered version of address and a non-registered version of address
* (which will effectively be registered in SRAM).
*
* See wiki page "Synchronous Caches" for details.
*/
`include "mips_core.svh"
module i_cache #(
parameter INDEX_WIDTH = 8,
parameter BLOCK_OFFSET_WIDTH = 2
)(
// General signals
input clk, // Clock
input rst_n, // Synchronous reset active low
// Request
pc_ifc.in i_pc_current,
pc_ifc.in i_pc_next,
// Response
cache_output_ifc.out out,
// Memory interface
axi_read_address.master mem_read_address,
axi_read_data.master mem_read_data
);
localparam TAG_WIDTH = `ADDR_WIDTH - INDEX_WIDTH - BLOCK_OFFSET_WIDTH - 2;
localparam LINE_SIZE = 1 << BLOCK_OFFSET_WIDTH;
localparam DEPTH = 1 << INDEX_WIDTH;
// Check if the parameters are set correctly
generate
if(TAG_WIDTH <= 0 || LINE_SIZE > 16)
begin
INVALID_I_CACHE_PARAM invalid_i_cache_param ();
end
endgenerate
// Parsing
logic [TAG_WIDTH - 1 : 0] i_tag;
logic [INDEX_WIDTH - 1 : 0] i_index;
logic [BLOCK_OFFSET_WIDTH - 1 : 0] i_block_offset;
logic [INDEX_WIDTH - 1 : 0] i_index_next;
assign {i_tag, i_index, i_block_offset} = i_pc_current.pc[`ADDR_WIDTH - 1 : 2];
assign i_index_next = i_pc_next.pc[BLOCK_OFFSET_WIDTH + 2 +: INDEX_WIDTH];
// Above line uses +: slice, a feature of SystemVerilog
// See https://stackoverflow.com/questions/18067571
// States
enum logic[1:0] {
STATE_READY, // Ready for incoming requests
STATE_REFILL_REQUEST, // Sending out a memory read request
STATE_REFILL_DATA // Missing on a read
} state, next_state;
// Registers for refilling
logic [INDEX_WIDTH - 1:0] r_index;
logic [TAG_WIDTH - 1:0] r_tag;
// databank signals
logic [LINE_SIZE - 1 : 0] databank_select;
logic [LINE_SIZE - 1 : 0] databank_we;
logic [`DATA_WIDTH - 1 : 0] databank_wdata;
logic [INDEX_WIDTH - 1 : 0] databank_waddr;
logic [INDEX_WIDTH - 1 : 0] databank_raddr;
logic [`DATA_WIDTH - 1 : 0] databank_rdata [LINE_SIZE];
// databanks
genvar g;
generate
for (g = 0; g < LINE_SIZE; g++)
begin : databanks
cache_bank #(
.DATA_WIDTH (`DATA_WIDTH),
.ADDR_WIDTH (INDEX_WIDTH)
) databank (
.clk,
.i_we (databank_we[g]),
.i_wdata(databank_wdata),
.i_waddr(databank_waddr),
.i_raddr(databank_raddr),
.o_rdata(databank_rdata[g])
);
end
endgenerate
// tagbank signals
logic tagbank_we;
logic [TAG_WIDTH - 1 : 0] tagbank_wdata;
logic [INDEX_WIDTH - 1 : 0] tagbank_waddr;
logic [INDEX_WIDTH - 1 : 0] tagbank_raddr;
logic [TAG_WIDTH - 1 : 0] tagbank_rdata;
cache_bank #(
.DATA_WIDTH (TAG_WIDTH),
.ADDR_WIDTH (INDEX_WIDTH)
) tagbank (
.clk,
.i_we (tagbank_we),
.i_wdata (tagbank_wdata),
.i_waddr (tagbank_waddr),
.i_raddr (tagbank_raddr),
.o_rdata (tagbank_rdata)
);
// Valid bits
logic [DEPTH - 1 : 0] valid_bits;
// Intermediate signals
logic hit, miss;
logic last_refill_word;
always_comb
begin
hit = valid_bits[i_index]
& (i_tag == tagbank_rdata)
& (state == STATE_READY);
miss = ~hit;
last_refill_word = databank_select[LINE_SIZE - 1]
& mem_read_data.RVALID;
end
always_comb
begin
mem_read_address.ARADDR = {r_tag, r_index,
{BLOCK_OFFSET_WIDTH + 2{1'b0}}};
mem_read_address.ARLEN = LINE_SIZE;
mem_read_address.ARVALID = state == STATE_REFILL_REQUEST;
mem_read_address.ARID = 4'd0;
// Always ready to consume data
mem_read_data.RREADY = 1'b1;
end
always_comb
begin
if (mem_read_data.RVALID)
databank_we = databank_select;
else
databank_we = '0;
databank_wdata = mem_read_data.RDATA;
databank_waddr = r_index;
databank_raddr = i_index_next;
end
always_comb
begin
tagbank_we = last_refill_word;
tagbank_wdata = r_tag;
tagbank_waddr = r_index;
tagbank_raddr = i_index_next;
end
always_comb
begin
out.valid = hit;
out.data = databank_rdata[i_block_offset];
end
always_comb
begin
next_state = state;
unique case (state)
STATE_READY:
if (miss)
next_state = STATE_REFILL_REQUEST;
STATE_REFILL_REQUEST:
if (mem_read_address.ARREADY)
next_state = STATE_REFILL_DATA;
STATE_REFILL_DATA:
if (last_refill_word)
next_state = STATE_READY;
endcase
end
always_ff @(posedge clk)
begin
if(~rst_n)
begin
state <= STATE_READY;
databank_select <= 1;
valid_bits <= '0;
end
else
begin
state <= next_state;
case (state)
STATE_READY:
begin
if (miss)
begin
r_tag <= i_tag;
r_index <= i_index;
end
end
STATE_REFILL_REQUEST:
begin
end
STATE_REFILL_DATA:
begin
if (mem_read_data.RVALID)
begin
databank_select <= {databank_select[LINE_SIZE - 2 : 0],
databank_select[LINE_SIZE - 1]};
valid_bits[r_index] <= last_refill_word;
end
end
endcase
end
end
endmodule