]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/sdnvme.c
sdnvme: pass 0 instead of 0xffffffff as NSID for identify controller and create compl...
[plan9front.git] / sys / src / 9 / pc / sdnvme.c
1 #include "u.h"
2 #include "../port/lib.h"
3 #include "mem.h"
4 #include "dat.h"
5 #include "fns.h"
6 #include "io.h"
7 #include "ureg.h"
8 #include "../port/error.h"
9
10 #include "../port/sd.h"
11
12 typedef struct WS WS;
13 typedef struct CQ CQ;
14 typedef struct SQ SQ;
15 typedef struct Ctlr Ctlr;
16
17 struct WS
18 {
19         u32int  cdw0;
20         ushort  status;
21         Rendez  *sleep;
22         WS      **link;
23         SQ      *queue;
24 };
25
26 struct CQ
27 {
28         u32int  head;
29         u32int  mask;
30         u32int  shift;
31         u32int  *base;
32         Ctlr    *ctlr;
33 };
34
35 struct SQ
36 {
37         u32int  tail;
38         u32int  mask;
39         u32int  shift;
40         u32int  *base;
41         WS      **wait;
42         Ctlr    *ctlr;
43 };
44
45 struct Ctlr
46 {
47         QLock;
48
49         Lock    intr;
50         u32int  ints;
51         u32int  irqc[2];
52
53         Pcidev  *pci;
54         u32int  *reg;
55
56         u64int  cap;
57         uchar   *ident;
58         u32int  *nsid;
59         int     nnsid;
60
61         u32int  mps;            /* mps = 1<<mpsshift */
62         u32int  mpsshift;
63         u32int  dstrd;
64
65         CQ      cq[1+1];
66         SQ      sq[1+MAXMACH];
67
68         Ctlr    *next;
69 };
70
71 /* controller registers */
72 enum {
73         Cap0,
74         Cap1,
75         Ver,
76         IntMs,
77         IntMc,
78         CCfg,
79
80         CSts = 0x1C/4,
81         Nssr,
82         AQAttr,
83         ASQBase0,
84         ASQBase1,
85         ACQBase0,
86         ACQBase1,
87
88         DBell = 0x1000/4,
89 };
90
91 static u32int*
92 qcmd(WS *ws, Ctlr *ctlr, int adm, u32int opc, u32int nsid, void *mptr, void *data, ulong len)
93 {
94         u32int cid, *e;
95         u64int pa;
96         SQ *sq;
97
98         if(!adm){
99         Retry:
100                 splhi();
101                 sq = &ctlr->sq[1+m->machno];
102         } else {
103                 qlock(ctlr);
104                 sq = &ctlr->sq[0];
105         }
106         ws->sleep = &up->sleep;
107         ws->queue = sq;
108         ws->link = &sq->wait[sq->tail & sq->mask];
109         while(*ws->link != nil){
110                 sched();
111                 if(!adm){
112                         /* should be very rare */
113                         goto Retry;
114                 }
115         }
116         *ws->link = ws;
117
118         e = &sq->base[((cid = sq->tail++) & sq->mask)<<4];
119         e[0] = opc | cid<<16;
120         e[1] = nsid;
121         e[2] = 0;
122         e[3] = 0;
123         if(mptr != nil){
124                 pa = PADDR(mptr);
125                 e[4] = pa;
126                 e[5] = pa>>32;
127         } else {
128                 e[4] = 0;
129                 e[5] = 0;
130         }
131         if(len > 0){
132                 pa = PADDR(data);
133                 e[6] = pa;
134                 e[7] = pa>>32;
135                 if(len > ctlr->mps - (pa & ctlr->mps-1))
136                         pa += ctlr->mps - (pa & ctlr->mps-1);
137                 else
138                         pa = 0;
139         } else {
140                 e[6] = 0;
141                 e[7] = 0;
142                 pa = 0;
143         }
144         e[8] = pa;
145         e[9] = pa>>32;
146         return e;
147 }
148
149 static void
150 nvmeintr(Ureg *, void *arg)
151 {
152         u32int phaseshift, *e;
153         WS *ws, **wp;
154         Ctlr *ctlr;
155         SQ *sq;
156         CQ *cq;
157
158         ctlr = arg;
159         if(ctlr->ints == 0)
160                 return;
161
162         ilock(&ctlr->intr);
163         ctlr->reg[IntMs] = ctlr->ints;
164         for(cq = &ctlr->cq[nelem(ctlr->cq)-1]; cq >= ctlr->cq; cq--){
165                 if(cq->base == nil)
166                         continue;
167                 phaseshift = 16 - cq->shift;
168                 for(;;){
169                         e = &cq->base[(cq->head & cq->mask)<<2];
170                         if(((e[3] ^ (cq->head << phaseshift)) & 0x10000) == 0)
171                                 break;
172
173                         if(0) iprint("nvmeintr: cq%d [%.4ux] %.8ux %.8ux %.8ux %.8ux\n",
174                                 (int)(cq - ctlr->cq), cq->head & cq->mask,
175                                 e[0], e[1], e[2], e[3]);
176
177                         sq = &ctlr->sq[e[2] >> 16];
178                         wp = &sq->wait[e[3] & sq->mask];
179                         if((ws = *wp) != nil && ws->link == wp){
180                                 Rendez *z = ws->sleep;
181                                 ws->cdw0 = e[0];
182                                 ws->status = e[3]>>17;
183                                 *wp = nil;
184                                 wakeup(z);
185                         }
186                         ctlr->reg[DBell + ((cq-ctlr->cq)*2+1 << ctlr->dstrd)] = ++cq->head & cq->mask;
187                 }
188         }
189         ctlr->reg[IntMc] = ctlr->ints;
190         iunlock(&ctlr->intr);
191 }
192
193 static int
194 wdone(void *arg)
195 {
196         WS *ws = arg;
197         return *ws->link != ws;
198 }
199
200 static u32int
201 wcmd(WS *ws)
202 {
203         SQ *sq = ws->queue;
204         Ctlr *ctlr = sq->ctlr;
205
206         coherence();
207         ctlr->reg[DBell + ((sq-ctlr->sq)*2+0 << ctlr->dstrd)] = sq->tail & sq->mask;
208         if(sq > ctlr->sq) {
209                 assert(sq == &ctlr->sq[1+m->machno]);
210                 spllo();
211         } else
212                 qunlock(sq->ctlr);
213         while(waserror())
214                 ;
215         tsleep(ws->sleep, wdone, ws, 5);
216         while(!wdone(ws)){
217                 nvmeintr(nil, ctlr);
218                 tsleep(ws->sleep, wdone, ws, 10);
219         }
220         poperror();
221         return ws->status;
222 }
223
224 void
225 checkstatus(u32int status, char *info)
226 {
227         if(status == 0)
228                 return;
229         snprint(up->genbuf, sizeof(up->genbuf), "%s: status %ux", info, status);
230         error(up->genbuf);
231 }
232
233 static long
234 nvmebio(SDunit *u, int lun, int write, void *a, long count, uvlong lba)
235 {
236         u32int nsid, s, n, m, *e;
237         Ctlr *ctlr;
238         uchar *p;
239         WS ws;
240
241         USED(lun);
242
243         ctlr = u->dev->ctlr;
244         nsid = ctlr->nsid[u->subno];
245         s = u->secsize;
246         p = a;
247         while(count > 0){
248                 m = (2*ctlr->mps - ((uintptr)p & ctlr->mps-1)) / s;
249                 if((n = count) > m)
250                         n = m;
251                 e = qcmd(&ws, ctlr, 0, write ? 0x01 : 0x02, nsid, nil, p, n*s);
252                 e[10] = lba;
253                 e[11] = lba>>32;
254                 e[12] = n-1;
255                 e[13] = (count>n)<<6;   /* sequential request */
256                 e[14] = 0;
257                 e[15] = 0;
258                 checkstatus(wcmd(&ws), write ? "write" : "read");
259                 p += n*s;
260                 count -= n;
261                 lba += n;
262         }
263         return p - (uchar*)a;
264 }
265
266 static int
267 nvmerio(SDreq *r)
268 {
269         int i, count, rw;
270         uvlong lba;
271         SDunit *u;
272
273         u = r->unit;
274         if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91)
275                 return sdsetsense(r, SDok, 0, 0, 0);
276         if((i = sdfakescsi(r)) != SDnostatus)
277                 return r->status = i;
278         if((i = sdfakescsirw(r, &lba, &count, &rw)) != SDnostatus)
279                 return i;
280         r->rlen = nvmebio(u, r->lun, rw == SDwrite, r->data, count, lba);
281         return r->status = SDok;
282 }
283
284 static int
285 nvmeverify(SDunit *u)
286 {
287         Ctlr *ctlr = u->dev->ctlr;
288         return u->subno < ctlr->nnsid;
289 }
290
291 static int
292 nvmeonline(SDunit *u)
293 {
294         u32int *e, lbaf;
295         uchar *info, *p;
296         Ctlr *ctlr;
297         WS ws;
298
299         if(u->sectors != 0)
300                 return 1;
301
302         ctlr = u->dev->ctlr;
303         if((info = mallocalign(0x1000, ctlr->mps, 0, 0)) == nil)
304                 return 0;
305
306         e = qcmd(&ws, ctlr, 1, 0x06, ctlr->nsid[u->subno], nil, info, 0x1000);
307         e[10] = 0; // identify namespace
308         if(wcmd(&ws) != 0){
309                 free(info);
310                 return 0;
311         }
312         p = info;
313         u->sectors = p[0] | p[1]<<8 | p[2]<<16 | p[3]<<24
314                 | (u64int)p[4]<<32
315                 | (u64int)p[5]<<40
316                 | (u64int)p[6]<<48
317                 | (u64int)p[7]<<56;
318         p = &info[128 + 4*(info[26]&15)];
319         lbaf = p[0] | p[1]<<8 | p[2]<<16 | p[3]<<24;
320         u->secsize = 1<<((lbaf>>16)&0xFF);
321         free(info);
322
323         memset(u->inquiry, 0, sizeof u->inquiry);
324         u->inquiry[2] = 2;
325         u->inquiry[3] = 2;
326         u->inquiry[4] = sizeof u->inquiry - 4;
327         memmove(u->inquiry+8, ctlr->ident+24, 20);
328
329         return 2;
330 }
331
332 static int
333 nvmerctl(SDunit *u, char *p, int l)
334 {
335         Ctlr *ctlr;
336         char *e, *s;
337
338         if((ctlr = u->dev->ctlr) == nil || ctlr->ident == nil)
339                 return 0;
340
341         e = p+l;
342         s = p;
343
344         p = seprint(p, e, "model\t%.20s\n", (char*)ctlr->ident+24);
345         p = seprint(p, e, "serial\t%.10s\n", (char*)ctlr->ident+4);
346         p = seprint(p, e, "firm\t%.6s\n", (char*)ctlr->ident+64);
347         p = seprint(p, e, "geometry %llud %lud\n", u->sectors, u->secsize);
348
349         return p-s;
350 }
351
352 static void*
353 cqalloc(Ctlr *ctlr, CQ *cq, u32int lgsize)
354 {
355         cq->ctlr = ctlr;
356         cq->head = 0;
357         cq->shift = lgsize-4;
358         cq->mask = (1<<cq->shift)-1;
359         if((cq->base = mallocalign(1<<lgsize, ctlr->mps, 0, 0)) == nil)
360                 error(Enomem);
361         memset(cq->base, 0, 1<<lgsize);
362         return cq->base;
363 }
364
365 static void*
366 sqalloc(Ctlr *ctlr, SQ *sq, u32int lgsize)
367 {
368         sq->ctlr = ctlr;
369         sq->tail = 0;
370         sq->shift = lgsize-6;
371         sq->mask = (1<<sq->shift)-1;
372         if((sq->base = mallocalign(1<<lgsize, ctlr->mps, 0, 0)) == nil)
373                 error(Enomem);
374         if((sq->wait = mallocz(sizeof(WS*)*(sq->mask+1), 1)) == nil)
375                 error(Enomem);
376         memset(sq->base, 0, 1<<lgsize);
377         return sq->base;
378 }
379
380 static void
381 setupqueues(Ctlr *ctlr)
382 {
383         u32int lgsize, *e;
384         CQ *cq;
385         SQ *sq;
386         WS ws;
387         int i;
388
389         /* Overkill */
390         lgsize = 12-6+4;
391         while(lgsize < 16+4 && lgsize < ctlr->mpsshift && 1<<lgsize < conf.nmach<<12-6+4)
392                 lgsize++;
393
394         /* CQID1: shared completion queue */
395         cq = &ctlr->cq[1];
396         cqalloc(ctlr, cq, lgsize);
397         e = qcmd(&ws, ctlr, 1, 0x05, 0, nil, cq->base, 1<<lgsize);
398         e[10] = (cq - ctlr->cq) | cq->mask<<16;
399         e[11] = 3; /* IEN | PC */
400         checkstatus(wcmd(&ws), "create completion queue");
401
402         /* SQID[1..nmach]: submission queue per cpu */
403         for(i=1; i<=conf.nmach; i++){
404                 sq = &ctlr->sq[i];
405                 sqalloc(ctlr, sq, 12);
406                 e = qcmd(&ws, ctlr, 1, 0x01, 0, nil, sq->base, 0x1000);
407                 e[10] = i | sq->mask<<16;
408                 e[11] = (cq - ctlr->cq)<<16 | 1;        /* CQID<<16 | PC */
409                 checkstatus(wcmd(&ws), "create submission queue");
410         }
411
412         ilock(&ctlr->intr);
413         ctlr->ints |= 1<<(cq - ctlr->cq);
414         ctlr->reg[IntMc] = ctlr->ints;
415         iunlock(&ctlr->intr);
416 }
417
418 static void
419 identify(Ctlr *ctlr)
420 {
421         u32int *e;
422         WS ws;
423         
424         if(ctlr->ident == nil)
425                 if((ctlr->ident = mallocalign(0x1000, ctlr->mps, 0, 0)) == nil)
426                         error(Enomem);
427         if(ctlr->nsid == nil)
428                 if((ctlr->nsid = mallocalign(0x1000, ctlr->mps, 0, 0)) == nil)
429                         error(Enomem);
430
431         e = qcmd(&ws, ctlr, 1, 0x06, 0, nil, ctlr->ident, 0x1000);
432         e[10] = 1; // identify controller
433         checkstatus(wcmd(&ws), "identify controller");
434
435         e = qcmd(&ws, ctlr, 1, 0x06, 0, nil, ctlr->nsid, 0x1000);
436         e[10] = 2; // namespace list 
437         checkstatus(wcmd(&ws), "namespace list");
438
439         ctlr->nnsid = 0;
440         while(ctlr->nnsid < 1024 && ctlr->nsid[ctlr->nnsid] != 0)
441                 ctlr->nnsid++;
442 }
443
444 static int
445 nvmedisable(SDev *sd)
446 {
447         char name[32];
448         Ctlr *ctlr;
449         int i;
450
451         ctlr = sd->ctlr;
452
453         /* mask interrupts */
454         ilock(&ctlr->intr);
455         ctlr->ints = 0;
456         ctlr->reg[IntMs] = ~ctlr->ints;
457         iunlock(&ctlr->intr);
458
459         /* disable controller */
460         ctlr->reg[CCfg] = 0;
461
462         for(i = 0; i < 10; i++){
463                 if((ctlr->reg[CSts] & 1) == 0)
464                         break;
465                 tsleep(&up->sleep, return0, nil, 100);
466         }
467
468         snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
469         intrdisable(ctlr->pci->intl, nvmeintr, ctlr, ctlr->pci->tbdf, name);
470
471         pciclrbme(ctlr->pci);   /* dma disable */
472
473         for(i=0; i<nelem(ctlr->sq); i++){
474                 free(ctlr->sq[i].base);
475                 free(ctlr->sq[i].wait);
476         }
477         for(i=0; i<nelem(ctlr->cq); i++)
478                 free(ctlr->cq[i].base);
479
480         memset(ctlr->sq, 0, sizeof(ctlr->sq));
481         memset(ctlr->cq, 0, sizeof(ctlr->cq));
482
483         free(ctlr->ident);
484         ctlr->ident = nil;
485         free(ctlr->nsid);
486         ctlr->nsid = nil;
487         ctlr->nnsid = 0;
488
489         return 1;
490 }
491
492 static int
493 nvmeenable(SDev *sd)
494 {
495         char name[32];
496         Ctlr *ctlr;
497         u64int pa;
498         int to;
499
500         ctlr = sd->ctlr;
501
502         snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
503         intrenable(ctlr->pci->intl, nvmeintr, ctlr, ctlr->pci->tbdf, name);
504
505         if(waserror()){
506                 print("%s: %s\n", name, up->errstr);
507                 nvmedisable(sd);
508                 sd->nunit = 0;  /* hack: prevent further probing */
509                 return 0;
510         }
511         
512         pa = PADDR(cqalloc(ctlr, &ctlr->cq[0], ctlr->mpsshift));
513         ctlr->reg[ACQBase0] = pa;
514         ctlr->reg[ACQBase1] = pa>>32;
515
516         pa = PADDR(sqalloc(ctlr, &ctlr->sq[0], ctlr->mpsshift));
517         ctlr->reg[ASQBase0] = pa;
518         ctlr->reg[ASQBase1] = pa>>32;
519
520         ctlr->reg[AQAttr] = ctlr->sq[0].mask | ctlr->cq[0].mask<<16;
521
522         /* dma enable */
523         pcisetbme(ctlr->pci);
524
525         /* enable interrupt */
526         ilock(&ctlr->intr);
527         ctlr->ints = 1;
528         ctlr->reg[IntMc] = ctlr->ints;
529         iunlock(&ctlr->intr);
530
531         /* enable controller */
532         ctlr->reg[CCfg] = 1 | (ctlr->mpsshift-12)<<7 | 6<<16 | 4<<20;
533
534         for(to = (ctlr->cap>>24) & 255; to >= 0; to--){
535                 tsleep(&up->sleep, return0, nil, 500);
536                 if((ctlr->reg[CSts] & 3) == 1)
537                         goto Ready;
538         }
539         if(ctlr->reg[CSts] & 2)
540                 error("fatal controller status during initialization");
541         error("controller initialization timeout");
542 Ready:
543         identify(ctlr);
544         setupqueues(ctlr);
545
546         poperror();
547
548         return 1;
549 }
550
551 static Ctlr*
552 nvmepnpctlrs(void)
553 {
554         Ctlr *ctlr, *h, *t;
555         Pcidev *p;
556         int i;
557
558         h = t = nil;
559         for(p = nil; p = pcimatch(p, 0, 0);){
560                 if(p->ccrb != 1 || p->ccru != 8 || p->ccrp != 2)
561                         continue;
562                 if(p->mem[0].size == 0)
563                         continue;
564                 if((ctlr = malloc(sizeof(*ctlr))) == nil){
565                         print("nvme: no memory for Ctlr\n");
566                         break;
567                 }
568                 ctlr->pci = p;
569                 ctlr->reg = vmap(p->mem[0].bar & ~0xF, p->mem[0].size);
570                 if(ctlr->reg == nil){
571                         print("nvme: can't vmap bar0\n");
572                 Bad:
573                         if(ctlr->reg != nil)
574                                 vunmap(ctlr->reg, p->mem[0].size);
575                         free(ctlr);
576                         continue;
577                 }
578                 ctlr->cap = ctlr->reg[Cap0];
579                 ctlr->cap |= (u64int)ctlr->reg[Cap1]<<32;
580
581                 /* mask interrupts */
582                 ctlr->ints = 0;
583                 ctlr->reg[IntMs] = ~ctlr->ints;
584
585                 /* disable controller */
586                 ctlr->reg[CCfg] = 0;
587
588                 if((ctlr->cap&(1ULL<<37)) == 0){
589                         print("nvme: doesnt support NVM commactlr set: %ux\n",
590                                 (u32int)(ctlr->cap>>37) & 0xFF);
591                         goto Bad;
592                 }
593
594                 /* use 64K page size when possible */
595                 ctlr->dstrd = (ctlr->cap >> 32) & 15;
596                 for(i = (ctlr->cap >> 48) & 15; i < ((ctlr->cap >> 52) & 15); i++){
597                         if(i >= 16-12)  /* 64K */
598                                 break;
599                 }
600                 ctlr->mpsshift = i+12;
601                 ctlr->mps = 1 << ctlr->mpsshift;
602
603                 if(h == nil)
604                         h = ctlr;
605                 else
606                         t->next = ctlr;
607                 t = ctlr;
608         }
609
610         return h;
611 }
612
613 SDifc sdnvmeifc;
614
615 static SDev*
616 nvmepnp(void)
617 {
618         SDev *s, *h, *t;
619         Ctlr *ctlr;
620         int id;
621
622         h = t = nil;
623
624         id = 'N';
625         for(ctlr = nvmepnpctlrs(); ctlr != nil; ctlr = ctlr->next){
626                 if((s = malloc(sizeof(*s))) == nil)
627                         break;
628                 s->ctlr = ctlr;
629                 s->idno = id++;
630                 s->ifc = &sdnvmeifc;
631                 s->nunit = 1024;
632                 if(h)
633                         t->next = s;
634                 else
635                         h = s;
636                 t = s;
637         }
638
639         return h;
640 }
641
642 SDifc sdnvmeifc = {
643         "nvme",                         /* name */
644
645         nvmepnp,                        /* pnp */
646         nil,                            /* legacy */
647         nvmeenable,                     /* enable */
648         nvmedisable,                    /* disable */
649
650         nvmeverify,                     /* verify */
651         nvmeonline,                     /* online */
652         nvmerio,                        /* rio */
653         nvmerctl,                       /* rctl */
654         nil,                            /* wctl */
655
656         nvmebio,                        /* bio */
657         nil,                            /* probe */
658         nil,                            /* clear */
659         nil,                            /* rtopctl */
660         nil,                            /* wtopctl */
661 };