Added Nvme trim/deallocate functionality.
[DuneNvme.git] / src / NvmeWrite.vhd
1 --------------------------------------------------------------------------------
2 -- NvmeWrite.vhd Nvme Write data module
3 -------------------------------------------------------------------------------
4 --!
5 --! @class      NvmeWrite
6 --! @author     Terry Barnaby (terry.barnaby@beam.ltd.uk)
7 --! @date       2020-05-11
8 --! @version    0.3.1
9 --!
10 --! @brief
11 --! This module performs the Nvme write data functionality.
12 --!
13 --! @details
14 --! This module is the heart of the DuneNvme system. Its purpose is to write
15 --! the incomming data blocks to the Nvme device.
16 --! For performance it will concurrently write NvmeWriteNum (8) blocks.
17 --! It implements a set of NvmeWriteNum x 4k buffers in a single BlockRAM.
18 --! An input process chooses a free buffer and then writes the data from the input
19 --! AXIS stream into this buffer. Once complete the input process adds the buffer number
20 --! to a processing queue.
21 --! A processing process takes buffer numbers from the processing queue. When available an
22 --! Nvme write request is sent to the Nvme write queue.
23 --! On the Nvme reading the queue entry it will perform "bus master" memory reads from the
24 --! appropriate data buffer.
25 --! Once the Nvme has completed the write (well taken in the write data) it will send
26 --! a reply to the reply queue. The NvmeWrite's reply process will process these, storing
27 --! any error status, and then free the buffer that was used.
28 --!
29 --! The module is controlled by the enable line, which enables the processing of input data, and
30 --!  the dataChunkSize register which specifies how many blocks to capture.
31 --!  It will continue to capture whist the enable line is high and the number of blocks captured
32 --!  is less than the number of blocks in the dataChunkSize register.
33 --!
34 --! As the system allows up to NvmeWriteNum concurrent writes, it is able to hide ittermitant
35 --! large write latencies.
36 --! Notes:
37 --!   The parameter NvmeWriteNum should be less than NvmeQueueNum.
38 --!   It assumes the DuneDvme block size is set in NvmeStorageBlockSize and the DataIn stream's
39 --!     last signal is synchonised with the end of each input block.
40 --!   At the moment it assumes the Nvme's internal block size is 512 Bytes.
41 --!   At the moment it stores the first status reply error but continues ignoring the error.
42 --!   There are no timeouts or any other error handling.
43 --!
44 --! @copyright GNU GPL License
45 --! Copyright (c) Beam Ltd, All rights reserved. <br>
46 --! This code is free software: you can redistribute it and/or modify
47 --! it under the terms of the GNU General Public License as published by
48 --! the Free Software Foundation, either version 3 of the License, or
49 --! (at your option) any later version.
50 --! This program is distributed in the hope that it will be useful,
51 --! but WITHOUT ANY WARRANTY; without even the implied warranty of
52 --! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
53 --! GNU General Public License for more details. <br>
54 --! You should have received a copy of the GNU General Public License
55 --! along with this code. If not, see <https://www.gnu.org/licenses/>.
56 --!
57 library ieee;
58 use ieee.std_logic_1164.all;
59 use ieee.numeric_std.all;
60
61 library unisim;
62 use unisim.vcomponents.all;
63
64 library work;
65 use work.NvmeStoragePkg.all;
66 use work.NvmeStorageIntPkg.all;
67
68 entity NvmeWrite is
69 generic(
70         Simulate        : boolean := False;                     --! Generate simulation core
71         ClockPeriod     : time := 8 ns;                         --! The clocks period
72         BlockSize       : integer := NvmeStorageBlockSize       --! System block size
73 );
74 port (
75         clk             : in std_logic;                         --! The interface clock line
76         reset           : in std_logic;                         --! The active high reset line
77
78         enable          : in std_logic;                         --! Enable the data writing process
79         dataIn          : inout AxisStreamType := AxisStreamInput;      --! Raw data to save stream
80
81         -- To Nvme Request/reply streams
82         requestOut      : inout AxisStreamType := AxisStreamOutput;     --! To Nvme request stream (3)
83         replyIn         : inout AxisStreamType := AxisStreamInput;      --! from Nvme reply stream
84
85         -- From Nvme Request/reply streams
86         memReqIn        : inout AxisStreamType := AxisStreamInput;      --! From Nvme request stream (4)
87         memReplyOut     : inout AxisStreamType := AxisStreamOutput;     --! To Nvme reply stream
88         
89         regWrite        : in std_logic;                         --! Enable write to register
90         regAddress      : in unsigned(3 downto 0);              --! Register to read/write
91         regDataIn       : in std_logic_vector(31 downto 0);     --! Register write data
92         regDataOut      : out std_logic_vector(31 downto 0)     --! Register contents
93 );
94 end;
95
96 architecture Behavioral of NvmeWrite is
97
98 constant TCQ            : time := 1 ns;
99 constant SimDelay       : boolean := False;                     --! Input data delay after each packet for simulation tests
100 constant SimWaitReply   : boolean := False;                     --! Wait for each write command to return a reply
101 constant DoWrite        : boolean := True;                      --! Perform write blocks
102 constant DoTrim         : boolean := True;                      --! Perform trim/deallocate functionality
103
104 constant NvmeBlocks     : integer := BlockSize / NvmeBlockSize;         --! The number of Nvme blocks per NvmeStorage system block
105 constant RamSize        : integer := (NvmeWriteNum * BlockSize) / 16;   -- One block per write buffer
106 constant AddressWidth   : integer := log2(RamSize);
107 constant BlockSizeWidth : integer := log2(BlockSize);
108 constant TrimNum        : integer := (32768 / NvmeBlocks);      --! The number of 4k blocks trimmed in one trim instructions
109
110 component Ram is
111 generic(
112         DataWidth       : integer := 128;
113         Size            : integer := RamSize;                   --! The Buffer size in 128 bit words
114         AddressWidth    : integer := AddressWidth
115 );
116 port (
117         clk             : in std_logic;                         --! The interface clock line
118         reset           : in std_logic;                         --! The active high reset line
119
120         writeEnable     : in std_logic;
121         writeAddress    : in unsigned(AddressWidth-1 downto 0); 
122         writeData       : in std_logic_vector(127 downto 0);    
123
124         readEnable      : in std_logic;
125         readAddress     : in unsigned(AddressWidth-1 downto 0); 
126         readData        : out std_logic_vector(127 downto 0)    
127 );
128 end component;
129
130 --! Input buffer status
131 type BufferType is record
132         inUse1          : std_logic;                            --! inUse1 and inUse2 are used to indicate buffer is in use when different
133         inUse2          : std_logic;
134         blockNumber     : unsigned(31 downto 0);                --! The first block number in the buffer
135         startTime       : unsigned(31 downto 0);                --! The start time for this buffer transaction
136 end record;
137
138 subtype RegisterType    is unsigned(31 downto 0);
139 type BufferArrayType    is array (0 to NvmeWriteNum-1) of BufferType;
140
141 type InStateType        is (INSTATE_IDLE, INSTATE_INIT, INSTATE_CHOOSE, INSTATE_INPUT_BLOCK, INSTATE_DELAY, INSTATE_COMPLETE);
142 type StateType          is (STATE_IDLE, STATE_INIT, STATE_RUN, STATE_COMPLETE,
143                                 STATE_WQUEUE_HEAD, STATE_WQUEUE_0, STATE_WQUEUE_1, STATE_WQUEUE_2, STATE_WQUEUE_3,
144                                 STATE_TQUEUE_HEAD, STATE_TQUEUE_0, STATE_TQUEUE_1, STATE_TQUEUE_2, STATE_TQUEUE_3,
145                                 STATE_WAIT_REPLY);
146 type ReplyStateType     is (REPSTATE_IDLE, REPSTATE_INIT, REPSTATE_COMPLETE, REPSTATE_QUEUE_REPLY1, REPSTATE_QUEUE_REPLY2);
147
148 signal inState          : InStateType := INSTATE_IDLE;
149 signal state            : StateType := STATE_IDLE;
150 signal replyState       : ReplyStateType := REPSTATE_QUEUE_REPLY1;
151
152 signal blockNumberIn    : unsigned(31 downto 0) := (others => '0');             --! Input block number
153 signal numBlocksProc    : unsigned(31 downto 0) := (others => '0');             --! Number of block write requests sent
154 signal numBlocksDone    : unsigned(31 downto 0) := (others => '0');             --! Number of block write completions received
155 signal numBlocksTrimmed : unsigned(31 downto 0) := (others => '0');             --! Number of blocks trimmed
156
157 signal trimQueueProc    : unsigned(3 downto 0) := (others => '0');              --! The number of trim tasks in progress
158 signal trimQueueDone    : unsigned(3 downto 0) := (others => '0');              --! The number of trim tasks completed
159
160 -- Input buffers
161 signal writeEnable      : std_logic := '0';
162 signal writeAddress     : unsigned(AddressWidth-1 downto 0) := (others => '0');
163 signal readEnable       : std_logic := '0';
164 signal readAddress      : unsigned(AddressWidth-1 downto 0) := (others => '0');
165 signal readData         : std_logic_vector(127 downto 0) := (others => '0');
166
167 signal buffers          : BufferArrayType := (others => ('Z', 'Z', (others => 'Z'), (others => 'Z')));
168 signal bufferInNum      : integer range 0 to NvmeWriteNum-1 := 0;
169 signal bufferOutNum     : integer range 0 to NvmeWriteNum-1 := 0;
170
171 -- Process queue
172 type ProcessQueueType   is array(0 to NvmeWriteNum) of integer range 0 to NvmeWriteNum-1;
173 signal processQueue     : ProcessQueueType := (others => 0);
174 signal processQueueIn   : integer range 0 to NvmeWriteNum := 0;
175 signal processQueueOut  : integer range 0 to NvmeWriteNum := 0;
176
177 -- Buffer read
178 type MemStateType       is (MEMSTATE_IDLE, MEMSTATE_READHEAD, MEMSTATE_READDATA);
179 signal memState         : MemStateType := MEMSTATE_IDLE;
180 signal memRequestHead   : PcieRequestHeadType;
181 signal memRequestHead1  : PcieRequestHeadType;
182 signal memReplyHead     : PcieReplyHeadType;
183 signal nvmeReplyHead    : NvmeReplyHeadType;
184 signal memCount         : unsigned(10 downto 0);                        -- DWord data send count
185 signal memChunkCount    : unsigned(10 downto 0);                        -- DWord data send within a chunk count
186 signal memData          : std_logic_vector(127 downto 0);
187
188 -- Register information
189 signal dataChunkStart   : RegisterType := (others => '0');      -- The data chunk start position in blocks
190 signal dataChunkSize    : RegisterType := (others => '0');      -- The data chunk size in blocks
191 signal error            : RegisterType := (others => '0');      -- The system errors status
192 signal timeUs           : RegisterType := (others => '0');      -- The time in us
193 signal peakLatency      : RegisterType := (others => '0');      -- The peak latency in us
194 signal timeCounter      : integer range 0 to (1 us / ClockPeriod) - 1 := 0;
195
196 --! Set the fields in the PCIe TLP header
197 function setHeader(request: integer; address: integer; count: integer; tag: integer) return std_logic_vector is
198 begin
199         return to_stl(set_PcieRequestHeadType(3, request, address, count, tag));
200 end function;
201
202 function incrementPos(v: integer) return integer is
203 begin
204         if(v = NvmeWriteNum-1) then
205                 return 0;
206         else
207                 return v + 1;
208         end if;
209 end;
210
211 function bufferAddress(bufferNum: integer) return unsigned is
212 begin
213         return to_unsigned(bufferNum, log2(NvmeWriteNum)) & to_unsigned(0, AddressWidth-log2(NvmeWriteNum));
214 end;
215
216 function pcieAddress(bufferNum: integer) return std_logic_vector is
217 begin
218         return x"05" & zeros(32-8-log2(NvmeWriteNum)-(BlockSizeWidth)) & to_stl(bufferNum, log2(NvmeWriteNum)) & zeros(BlockSizeWidth);
219 end;
220
221 function numTrimBlocks(total: unsigned; current: unsigned) return unsigned is
222 begin
223         if((current + TrimNum) > total) then
224                 return truncate(((total - current) * NvmeBlocks) - 1, 16);
225         else
226                 return to_unsigned(32768-1, 16);
227         end if;
228 end;
229
230 begin
231         -- Register access
232         regDataOut      <= std_logic_vector(dataChunkStart) when(regAddress = 0)
233                         else std_logic_vector(dataChunkSize) when(regAddress = 1)
234                         else std_logic_vector(error) when(regAddress = 2)
235                         else std_logic_vector(numBlocksDone) when(regAddress = 3)
236                         else std_logic_vector(timeUs) when(regAddress = 4)
237                         else std_logic_vector(peakLatency) when(regAddress = 5)
238                         else ones(32);
239         
240         -- Register process
241         process(clk)
242         begin
243                 if(rising_edge(clk)) then
244                         if(reset = '1') then
245                                 dataChunkStart  <= (others => '0');
246                                 dataChunkSize   <= (others => '0');
247                         elsif((regWrite = '1') and (regAddress = 0)) then
248                                 dataChunkStart  <= unsigned(regDataIn);
249                         elsif((regWrite = '1') and (regAddress = 1)) then
250                                 dataChunkSize   <= unsigned(regDataIn);
251                         end if;
252                 end if;
253         end process;
254
255         -- Input buffers in BlockRAM
256         dataBuffer0 : Ram
257         port map (
258                 clk             => clk,
259                 reset           => reset,
260
261                 writeEnable     => writeEnable,
262                 writeAddress    => writeAddress,
263                 writeData       => dataIn.data,
264
265                 readEnable      => readEnable,
266                 readAddress     => readAddress,
267                 readData        => readData
268         );
269
270         -- Input data process. Accepts data from input stream and stores it into a free buffer if available.
271         dataIn.ready <= writeEnable;
272
273         process(clk)
274         variable c: integer;
275         begin
276                 if(rising_edge(clk)) then
277                         if(reset = '1') then
278                                 for i in 0 to NvmeWriteNum-1 loop
279                                         buffers(i).inUse1 <= '0';
280                                         buffers(i).blockNumber <= (others => '0');
281                                 end loop;
282
283                                 blockNumberIn   <= (others => '0');
284                                 processQueueIn  <= 0;
285                                 writeEnable     <= '0';
286                                 inState         <= INSTATE_IDLE;
287                         else
288                                 case(inState) is
289                                 when INSTATE_IDLE =>
290                                         if(enable = '1') then
291                                                 inState <= INSTATE_INIT;
292                                         end if;
293
294                                 when INSTATE_INIT =>
295                                         -- Initialise for next run
296                                         for i in 0 to NvmeWriteNum-1 loop
297                                                 buffers(i).inUse1 <= '0';
298                                         end loop;
299
300                                         blockNumberIn   <= (others => '0');
301                                         processQueueIn  <= 0;
302                                         writeEnable     <= '0';
303                                         inState         <= INSTATE_CHOOSE;
304
305                                 when INSTATE_CHOOSE =>
306                                         if(enable = '1') then
307                                                 if(blockNumberIn >= dataChunkSize) then
308                                                         inState <= INSTATE_COMPLETE;
309                                                 else
310                                                         -- Decide on which buffer to use based on inuse state. We should implement a Fifo queue here
311                                                         for i in 0 to NvmeWriteNum-1 loop
312                                                                 if(buffers(i).inUse1 = buffers(i).inUse2) then
313                                                                         bufferInNum             <= i;
314                                                                         buffers(i).inUse1       <= not buffers(i).inUse2;
315                                                                         buffers(i).blockNumber  <= blockNumberIn;
316                                                                         writeAddress            <= bufferAddress(i);
317                                                                         writeEnable             <= '1';
318                                                                         inState                 <= INSTATE_INPUT_BLOCK;
319                                                                         exit;
320                                                                 end if;
321                                                         end loop;
322                                                 end if;
323                                         else
324                                                 inState <= INSTATE_IDLE;
325                                         end if;
326
327                                 when INSTATE_INPUT_BLOCK =>
328                                         -- Could check for buffer full status here instead of using last signal
329                                         if((dataIn.valid = '1') and (dataIn.ready = '1')) then
330                                                 if(dataIn.last = '1') then
331                                                         writeEnable                     <= '0';
332                                                         blockNumberIn                   <= blockNumberIn + 1;
333                                                         
334                                                         -- Add to process queue
335                                                         processQueue(processQueueIn) <= bufferInNum;
336                                                         processQueueIn <= incrementPos(processQueueIn);
337
338                                                         if(SimDelay) then
339                                                                 c       := 400;
340                                                                 inState <= INSTATE_DELAY;
341                                                         else
342                                                                 inState <= INSTATE_CHOOSE;
343                                                         end if;
344                                                 else
345                                                         writeAddress <= writeAddress + 1;
346                                                 end if;
347                                         end if;
348
349                                 when INSTATE_DELAY =>
350                                         -- This is for simulation so we can space out input blocks in time
351                                         c := c - 1;
352                                         if(c = 0) then
353                                                 inState <= INSTATE_CHOOSE;
354                                         end if;
355                                         
356                                 when INSTATE_COMPLETE =>
357                                         if(enable = '0') then
358                                                 inState <= INSTATE_IDLE;
359                                         end if;
360
361                                 end case;
362                         end if;
363                 end if;
364         end process;
365
366         nvmeReplyHead <= to_NvmeReplyHeadType(replyIn.data);
367         
368         -- Process data write. This takes the input buffers and sends a write request to the Nvme for each one that is full and
369         -- not already processed
370         process(clk)
371         begin
372                 if(rising_edge(clk)) then
373                         if(reset = '1') then
374                                 requestOut.valid        <= '0';
375                                 requestOut.last         <= '0';
376                                 requestOut.keep         <= (others => '1');
377                                 timeUs                  <= (others => '0');
378                                 timeCounter             <= 0;
379                                 bufferOutNum            <= 0;
380                                 numBlocksProc           <= (others => '0');
381                                 processQueueOut         <= 0;
382                                 numBlocksTrimmed        <= (others => '0');
383                                 trimQueueProc           <= (others => '0');
384                                 state                   <= STATE_IDLE;
385                         else
386                                 case(state) is
387                                 when STATE_IDLE =>
388                                         if(enable = '1') then
389                                                 state <= STATE_INIT;
390                                         end if;
391                                 
392                                 when STATE_INIT =>
393                                         -- Initialise for next run
394                                         timeUs          <= (others => '0');
395                                         timeCounter     <= 0;
396                                         numBlocksProc   <= (others => '0');
397                                         processQueueOut <= 0;
398                                         numBlocksTrimmed<= (others => '0');
399                                         state           <= STATE_RUN;
400                                         
401                                 when STATE_RUN =>
402                                         if(enable = '1') then
403                                                 if(numBlocksProc >= dataChunkSize) then
404                                                         state <= STATE_COMPLETE;
405                                                 
406                                                 elsif(DoWrite and (processQueueOut /= processQueueIn)) then
407                                                         bufferOutNum            <= processQueue(processQueueOut);
408                                                         processQueueOut         <= incrementPos(processQueueOut);
409                                                         buffers(bufferOutNum).startTime <= timeUs;
410                                                         requestOut.data         <= setHeader(1, 16#02010000#, 16, 0);
411                                                         requestOut.valid        <= '1';
412                                                         state                   <= STATE_WQUEUE_HEAD;
413
414                                                 elsif(DoTrim and (numBlocksTrimmed < dataChunkSize) and ((trimQueueProc - trimQueueDone) < 4)) then
415                                                         requestOut.data         <= setHeader(1, 16#02010000#, 16, 0);
416                                                         requestOut.valid        <= '1';
417                                                         state                   <= STATE_TQUEUE_HEAD;
418                                                 end if;
419                                         else
420                                                 state <= STATE_COMPLETE;
421                                         end if;
422                                 
423                                 when STATE_COMPLETE =>
424                                         if(enable = '0') then
425                                                 state <= STATE_IDLE;
426                                         end if;
427
428                                 -- Write blocks request
429                                 when STATE_WQUEUE_HEAD =>
430                                         if(requestOut.valid = '1' and requestOut.ready = '1') then
431                                                 requestOut.data <= zeros(64) & x"00000001" & x"04" & to_stl(bufferOutNum, 8) & x"0001"; -- Namespace 1, From stream4, opcode 1
432                                                 state           <= STATE_WQUEUE_0;
433                                         end if;
434
435                                 when STATE_WQUEUE_0 =>
436                                         if(requestOut.valid = '1' and requestOut.ready = '1') then
437                                                 requestOut.data <= zeros(32) & pcieAddress(bufferOutNum) & zeros(64);
438                                                 --requestOut.data       <= zeros(32) & x"01800000" & zeros(64); -- Data source address from host
439                                                 state           <= STATE_WQUEUE_1;
440                                         end if;
441
442                                 when STATE_WQUEUE_1 =>
443                                         if(requestOut.valid = '1' and requestOut.ready = '1') then
444                                                 requestOut.data <= zeros(32-log2(NvmeBlocks)) & std_logic_vector(dataChunkStart + buffers(bufferOutNum).blockNumber) & zeros(log2(NvmeBlocks) + 64);
445                                                 state           <= STATE_WQUEUE_2;
446                                         end if;
447
448                                 when STATE_WQUEUE_2 =>
449                                         if(requestOut.valid = '1' and requestOut.ready = '1') then
450                                                 requestOut.data <= zeros(96) & to_stl(NvmeBlocks-1, 32);        -- WriteMethod, NumBlocks (0 is 1 block)
451                                                 requestOut.last <= '1';
452                                                 numBlocksProc   <= numBlocksProc + 1;
453                                                 state           <= STATE_WQUEUE_3;
454                                         end if;
455
456                                 when STATE_WQUEUE_3 =>
457                                         if(requestOut.valid = '1' and requestOut.ready = '1') then
458                                                 requestOut.last         <= '0';
459                                                 requestOut.valid        <= '0';
460                                                 
461                                                 if(SimWaitReply) then
462                                                         state <= STATE_WAIT_REPLY;
463                                                 else
464                                                         state <= STATE_RUN;
465                                                 end if;
466                                         end if;
467
468                                 -- Trim/deallocate request
469                                 when STATE_TQUEUE_HEAD =>
470                                         if(requestOut.valid = '1' and requestOut.ready = '1') then
471                                                 requestOut.data <= zeros(64) & x"00000001" & x"04F" & to_stl(trimQueueProc(3 downto 0)) & x"0008";      -- Namespace 1, From stream4, opcode 8
472                                                 state           <= STATE_TQUEUE_0;
473                                         end if;
474
475                                 when STATE_TQUEUE_0 =>
476                                         if(requestOut.valid = '1' and requestOut.ready = '1') then
477                                                 requestOut.data <= zeros(128);
478                                                 state           <= STATE_TQUEUE_1;
479                                         end if;
480
481                                 when STATE_TQUEUE_1 =>
482                                         if(requestOut.valid = '1' and requestOut.ready = '1') then
483                                                 requestOut.data <= zeros(32-log2(NvmeBlocks)) & std_logic_vector(dataChunkStart + numBlocksTrimmed) & zeros(log2(NvmeBlocks) + 64);
484                                                 state           <= STATE_TQUEUE_2;
485                                         end if;
486
487                                 when STATE_TQUEUE_2 =>
488                                         if(requestOut.valid = '1' and requestOut.ready = '1') then
489                                                 requestOut.data <= zeros(96) & x"0200" & to_stl(numTrimBlocks(dataChunkSize, numBlocksTrimmed));        -- Deallocate, NumBlocks (0 is 1 block)
490                                                 requestOut.last <= '1';
491                                                 state           <= STATE_TQUEUE_3;
492                                         end if;
493
494                                 when STATE_TQUEUE_3 =>
495                                         if(requestOut.valid = '1' and requestOut.ready = '1') then
496                                                 requestOut.last         <= '0';
497                                                 requestOut.valid        <= '0';
498                                                 numBlocksTrimmed        <= numBlocksTrimmed + TrimNum;
499                                                 trimQueueProc           <= trimQueueProc + 1;
500                                                 
501                                                 if(SimWaitReply) then
502                                                         if(trimQueueDone = trimQueueProc) then
503                                                                 state <= STATE_RUN;
504                                                         end if;
505                                                 else
506                                                         state <= STATE_RUN;
507                                                 end if;
508                                         end if;
509
510
511
512                                 when STATE_WAIT_REPLY =>
513                                         if(numBlocksProc > numBlocksDone) then
514                                                 state <= STATE_WAIT_REPLY;
515                                         else
516                                                 state <= STATE_RUN;
517                                         end if;
518
519                                 end case;
520                                 
521                                 -- Microsecond counter for statistics
522                                 if(timeCounter = ((1 us / ClockPeriod) - 1)) then
523                                         if(state /= STATE_COMPLETE) then
524                                                 timeUs <= timeUs + 1;
525                                         end if;
526                                         timeCounter <= 0;
527                                 else
528                                         timeCounter <= timeCounter + 1;
529                                 end if;
530                         end if;
531                 end if;
532         end process;
533         
534         -- Process replies. This accepts Write request replies from the Nvme storing any errors and marking the buffer as free.
535         process(clk)
536         variable p: integer range 0 to NvmeWriteNum-1;
537         begin
538                 if(rising_edge(clk)) then
539                         if(reset = '1') then
540                                 for i in 0 to NvmeWriteNum-1 loop
541                                         buffers(i).inUse2       <= '0';
542                                 end loop;
543
544                                 replyIn.ready   <= '0';
545                                 error           <= (others => '0');
546                                 numBlocksDone   <= (others => '0');
547                                 peakLatency     <= (others => '0');
548                                 trimQueueDone   <= (others => '0');
549                                 replyState      <= REPSTATE_IDLE;
550                         else
551                                 case(replyState) is
552                                 when REPSTATE_IDLE =>
553                                         if(enable = '1') then
554                                                 replyState <= REPSTATE_INIT;
555                                         end if;
556                                 
557                                 when REPSTATE_INIT =>
558                                         -- Initialise for next run
559                                         for i in 0 to NvmeWriteNum-1 loop
560                                                 buffers(i).inUse2       <= '0';
561                                         end loop;
562
563                                         replyIn.ready   <= '1';
564                                         error           <= (others => '0');
565                                         numBlocksDone   <= (others => '0');
566                                         peakLatency     <= (others => '0');
567                                         replyState      <= REPSTATE_QUEUE_REPLY1;
568                                         
569                                 when REPSTATE_COMPLETE =>
570                                         if(enable = '0') then
571                                                 replyIn.ready   <= '0';
572                                                 replyState      <= REPSTATE_IDLE;
573                                         end if;
574                                 
575                                 when REPSTATE_QUEUE_REPLY1 =>
576                                         if(enable = '0') then
577                                                 if(replyIn.valid = '0') then
578                                                         replyIn.ready   <= '0';
579                                                 end if;
580                                                 replyState <= REPSTATE_COMPLETE;
581                                         else
582                                                 if(numBlocksDone >= dataChunkSize) then
583                                                         replyState <= REPSTATE_COMPLETE;
584                                         
585                                                 elsif(replyIn.valid = '1' and replyIn.ready = '1') then
586                                                         replyState <= REPSTATE_QUEUE_REPLY2;
587                                                 end if;
588                                         end if;
589
590                                 when REPSTATE_QUEUE_REPLY2 =>
591                                         if(enable = '0') then
592                                                 replyIn.ready   <= '0';
593                                                 replyState      <= REPSTATE_COMPLETE;
594
595                                         elsif(replyIn.valid = '1' and replyIn.ready = '1') then
596                                                 if(error = 0) then
597                                                         error(15 downto 0) <= '0' & nvmeReplyHead.status;
598                                                 end if;
599                                                 
600                                                 if(nvmeReplyHead.cid(7 downto 4) = x"F") then
601                                                         trimQueueDone <= trimQueueDone + 1;
602                                                 else
603                                                         numBlocksDone           <= numBlocksDone + 1;
604                                                         p                       := to_integer(nvmeReplyHead.cid(log2(NvmeWriteNum)-1 downto 0));
605                                                         buffers(p).inUse2       <= buffers(p).inUse1;
606
607                                                         if((timeUs - buffers(p).startTime) > peaklatency) then
608                                                                 peaklatency <= timeUs - buffers(p).startTime;
609                                                         end if;
610                                                 end if;
611
612                                                 replyState              <= REPSTATE_QUEUE_REPLY1;
613                                         end if;
614                                 
615                                 end case;
616                         end if;
617                 end if;
618         end process;
619         
620         -- Process Nvme read data requests
621         -- This processes the Nvme Pcie memory read requests for the data buffers memory.
622         readEnable <= '1';
623         -- readEnable <= memReplyOut.ready and not memReplyOut.last when((memState = MEMSTATE_READHEAD) or (memState = MEMSTATE_READDATA)) else '0';
624         memRequestHead  <= to_PcieRequestHeadType(memReqIn.data);
625         memReplyOut.data <= memData(31 downto 0) & to_stl(memReplyHead) when(memState = MEMSTATE_READHEAD)
626                 else readData(31 downto 0) & memData(127 downto 32);
627
628         process(clk)
629         begin
630                 if(rising_edge(clk)) then
631                         if(reset = '1') then
632                                 memReqIn.ready  <= '0';
633                                 memState        <= MEMSTATE_IDLE;
634                         else
635                                 case(MEMSTATE) is
636                                 when MEMSTATE_IDLE =>
637                                         if((memReqIn.ready = '1') and (memReqIn.valid = '1')) then
638                                                 memRequestHead1 <= memRequestHead;
639                                                 memCount        <= memRequestHead.count;
640
641                                                 if(memRequestHead.request = 0) then
642                                                         readAddress     <= memRequestHead.address(AddressWidth+3 downto 4);
643                                                         memReqIn.ready  <= '0';
644                                                         memState        <= MEMSTATE_READHEAD;
645                                                 end if;
646                                         else
647                                                 memReqIn.ready <= '1';
648                                         end if;
649
650                                 when MEMSTATE_READHEAD =>
651                                         if(memReplyOut.valid = '0') then
652                                                 memReplyHead.byteCount          <= memCount & "00";
653                                                 memReplyHead.address            <= memRequestHead1.address(memReplyHead.address'length - 1 downto 0);
654                                                 memReplyHead.error              <= (others => '0');
655                                                 memReplyHead.status             <= (others => '0');
656                                                 memReplyHead.tag                <= memRequestHead1.tag;
657                                                 memReplyHead.requesterId        <= memRequestHead1.requesterId;
658
659                                                 if(memCount > PcieMaxPayloadSize) then
660                                                         memReplyHead.count      <= to_unsigned(PcieMaxPayloadSize, memReplyHead.count'length);
661                                                         memChunkCount           <= to_unsigned(PcieMaxPayloadSize, memReplyHead.count'length);
662                                                 else
663                                                         memReplyHead.count      <= memCount;
664                                                         memChunkCount           <= memCount;
665                                                 end if;
666
667                                                 memReplyOut.valid       <= '1';
668                                                 memData                 <= readData;
669                                                 readAddress             <= readAddress + 1;
670                                         else
671                                                 memReplyOut.keep        <= (others => '1');
672
673                                                 if(memReplyOut.ready = '1' and memReplyOut.valid = '1') then
674                                                         readAddress     <= readAddress + 1;
675                                                         memState        <= MEMSTATE_READDATA;
676                                                 end if;
677                                         end if;
678                                 
679                                 when MEMSTATE_READDATA =>
680                                         if(memReplyOut.ready = '1' and memReplyOut.valid = '1') then
681                                                 memData         <= readData;
682
683                                                 if(memChunkCount = 4) then
684                                                         if(memCount = 4) then
685                                                                 memReplyOut.last        <= '0';
686                                                                 memReplyOut.valid       <= '0';
687                                                                 memState                <= MEMSTATE_IDLE;
688                                                         else
689                                                                 memReplyOut.last        <= '0';
690                                                                 memReplyOut.valid       <= '0';
691                                                                 memState                <= MEMSTATE_READHEAD;
692                                                         end if;
693
694                                                 elsif(memChunkCount = 8) then
695                                                         memReplyOut.keep <= "0111";
696                                                         memReplyOut.last <= '1';
697
698                                                 else
699                                                         readAddress             <= readAddress + 1;
700                                                         memReplyOut.last        <= '0';
701                                                 end if;
702
703                                                 memChunkCount           <= memChunkCount - 4;
704                                                 memCount                <= memCount - 4;
705                                                 memRequestHead1.address <= memRequestHead1.address + 16;
706                                         end if;
707
708                                 end case;
709                         end if;
710                 end if;
711         end process;
712 end;