]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/sdvirtio10.c
df25df87aea97183239013bb7e06255f7ed6e639
[plan9front.git] / sys / src / 9 / pc / sdvirtio10.c
1 /*
2  * virtio 1.0 disk driver
3  * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
4  *
5  * In contrast to sdvirtio.c, this driver handles the non-legacy
6  * interface for virtio disk which uses mmio for all register accesses
7  * and requires a laborate pci capability structure dance to get working.
8  *
9  * It is kind of pointless as it is most likely slower than
10  * port i/o (harder to emulate on the pc platform).
11  * 
12  * The reason why this driver is needed it is that vultr set the
13  * disable-legacy=on option in the -device parameter for qemu
14  * on their hypervisor.
15  */
16 #include "u.h"
17 #include "../port/lib.h"
18 #include "mem.h"
19 #include "dat.h"
20 #include "fns.h"
21 #include "io.h"
22 #include "../port/pci.h"
23 #include "ureg.h"
24 #include "../port/error.h"
25
26 #include "../port/sd.h"
27
28 typedef struct Vscsidev Vscsidev;
29 typedef struct Vblkdev Vblkdev;
30
31 typedef struct Vconfig Vconfig;
32 typedef struct Vring Vring;
33 typedef struct Vdesc Vdesc;
34 typedef struct Vused Vused;
35 typedef struct Vqueue Vqueue;
36 typedef struct Vdev Vdev;
37
38
39 /* device types */
40 enum {
41         TypBlk  = 2,
42         TypSCSI = 8,
43 };
44
45 /* status flags */
46 enum {
47         Acknowledge = 1,
48         Driver = 2,
49         DriverOk = 4,
50         Failed = 0x80,
51 };
52
53 /* descriptor flags */
54 enum {
55         Next = 1,
56         Write = 2,
57         Indirect = 4,
58 };
59
60 /* struct sizes */
61 enum {
62         VringSize = 4,
63 };
64
65 enum {
66         CDBSIZE         = 32,
67         SENSESIZE       = 96,
68 };
69
70         
71 struct Vscsidev
72 {
73         u32int  num_queues;
74         u32int  seg_max;
75         u32int  max_sectors;
76         u32int  cmd_per_lun;
77         u32int  event_info_size;
78         u32int  sense_size;
79         u32int  cdb_size;
80         u16int  max_channel;
81         u16int  max_target;
82         u32int  max_lun;
83 };
84
85 struct Vblkdev
86 {
87         u64int  capacity;
88 };
89
90 struct Vconfig {
91         u32int  devfeatsel;
92         u32int  devfeat;
93         u32int  drvfeatsel;
94         u32int  drvfeat;
95
96         u16int  msixcfg;
97         u16int  nqueues;
98
99         u8int   status;
100         u8int   cfggen;
101         u16int  queuesel;
102
103         u16int  queuesize;
104         u16int  queuemsixvect;
105
106         u16int  queueenable;
107         u16int  queuenotifyoff;
108
109         u64int  queuedesc;
110         u64int  queueavail;
111         u64int  queueused;
112 };
113
114 struct Vring
115 {
116         u16int  flags;
117         u16int  idx;
118 };
119
120 struct Vdesc
121 {
122         u64int  addr;
123         u32int  len;
124         u16int  flags;
125         u16int  next;
126 };
127
128 struct Vused
129 {
130         u32int  id;
131         u32int  len;
132 };
133
134 struct Vqueue
135 {
136         Lock;
137
138         Vdev    *dev;
139         void    *notify;
140         int     idx;
141
142         int     size;
143
144         int     free;
145         int     nfree;
146
147         Vdesc   *desc;
148
149         Vring   *avail;
150         u16int  *availent;
151         u16int  *availevent;
152
153         Vring   *used;
154         Vused   *usedent;
155         u16int  *usedevent;
156         u16int  lastused;
157
158         void    *rock[];
159 };
160
161 struct Vdev
162 {
163         int     typ;
164
165         Pcidev  *pci;
166
167         uvlong  port;
168         ulong   feat[2];
169
170         int     nqueue;
171         Vqueue  *queue[16];
172
173         void    *dev;   /* device specific config (for scsi) */
174
175         /* registers */
176         Vconfig *cfg;
177         u8int   *isr;
178         u8int   *notify;
179         u32int  notifyoffmult;
180
181         Vdev    *next;
182 };
183
184 static Vqueue*
185 mkvqueue(int size)
186 {
187         Vqueue *q;
188         uchar *p;
189         int i;
190
191         q = malloc(sizeof(*q) + sizeof(void*)*size);
192         p = mallocalign(
193                 PGROUND(sizeof(Vdesc)*size + 
194                         VringSize + 
195                         sizeof(u16int)*size + 
196                         sizeof(u16int)) +
197                 PGROUND(VringSize + 
198                         sizeof(Vused)*size + 
199                         sizeof(u16int)), 
200                 BY2PG, 0, 0);
201         if(p == nil || q == nil){
202                 print("virtio: no memory for Vqueue\n");
203                 free(p);
204                 free(q);
205                 return nil;
206         }
207
208         q->desc = (void*)p;
209         p += sizeof(Vdesc)*size;
210         q->avail = (void*)p;
211         p += VringSize;
212         q->availent = (void*)p;
213         p += sizeof(u16int)*size;
214         q->availevent = (void*)p;
215         p += sizeof(u16int);
216
217         p = (uchar*)PGROUND((uintptr)p);
218         q->used = (void*)p;
219         p += VringSize;
220         q->usedent = (void*)p;
221         p += sizeof(Vused)*size;
222         q->usedevent = (void*)p;
223
224         q->free = -1;
225         q->nfree = q->size = size;
226         for(i=0; i<size; i++){
227                 q->desc[i].next = q->free;
228                 q->free = i;
229         }
230
231         return q;
232 }
233
234 static int
235 matchvirtiocfgcap(Pcidev *p, int cap, int off, int typ)
236 {
237         int bar;
238
239         if(cap != 9 || pcicfgr8(p, off+3) != typ)
240                 return 1;
241
242         /* skip invalid or non memory bars */
243         bar = pcicfgr8(p, off+4);
244         if(bar < 0 || bar >= nelem(p->mem) 
245         || p->mem[bar].size == 0
246         || (p->mem[bar].bar & 3) != 0)
247                 return 1;
248
249         return 0;
250 }
251
252 static int
253 virtiocap(Pcidev *p, int typ)
254 {
255         return pcienumcaps(p, matchvirtiocfgcap, typ);
256 }
257
258 static void*
259 virtiomapregs(Pcidev *p, int cap, int size)
260 {
261         int bar, len;
262         uvlong addr;
263
264         if(cap < 0)
265                 return nil;
266         bar = pcicfgr8(p, cap+4) % nelem(p->mem);
267         addr = pcicfgr32(p, cap+8);
268         len = pcicfgr32(p, cap+12);
269         if(size <= 0)
270                 size = len;
271         else if(len < size)
272                 return nil;
273         if(addr+len > p->mem[bar].size)
274                 return nil;
275         addr += p->mem[bar].bar & ~0xFULL;
276         return vmap(addr, size);
277 }
278
279 static Vdev*
280 viopnpdevs(int typ)
281 {
282         Vdev *vd, *h, *t;
283         Vconfig *cfg;
284         Vqueue *q;
285         Pcidev *p;
286         int cap, bar;
287         int n, i;
288
289         h = t = nil;
290         for(p = nil; p = pcimatch(p, 0x1AF4, 0x1040+typ);){
291                 if(p->rid == 0)
292                         continue;
293                 if((cap = virtiocap(p, 1)) < 0)
294                         continue;
295                 bar = pcicfgr8(p, cap+4) % nelem(p->mem);
296                 cfg = virtiomapregs(p, cap, sizeof(Vconfig));
297                 if(cfg == nil)
298                         continue;
299                 if((vd = malloc(sizeof(*vd))) == nil){
300                         print("virtio: no memory for Vdev\n");
301                         break;
302                 }
303                 vd->port = p->mem[bar].bar & ~0xFULL;
304                 vd->typ = typ;
305                 vd->pci = p;
306                 vd->cfg = cfg;
307                 pcienable(p);
308
309                 vd->isr = virtiomapregs(p, virtiocap(p, 3), 0);
310                 if(vd->isr == nil){
311 Baddev:
312                         pcidisable(p);
313                         /* TODO: vunmap */
314                         free(vd);
315                         continue;
316                 }
317                 cap = virtiocap(p, 2);
318                 vd->notify = virtiomapregs(p, cap, 0);
319                 if(vd->notify == nil)
320                         goto Baddev;
321                 vd->notifyoffmult = pcicfgr32(p, cap+16);
322
323                 /* reset */
324                 cfg->status = 0;
325                 while(cfg->status != 0)
326                         delay(1);
327                 cfg->status = Acknowledge|Driver;
328
329                 /* negotiate feature bits */
330                 cfg->devfeatsel = 1;
331                 vd->feat[1] = cfg->devfeat;
332                 cfg->devfeatsel = 0;
333                 vd->feat[0] = cfg->devfeat;
334                 cfg->drvfeatsel = 1;
335                 cfg->drvfeat = vd->feat[1] & 1;
336                 cfg->drvfeatsel = 0;
337                 cfg->drvfeat = 0;
338
339                 for(i=0; i<nelem(vd->queue); i++){
340                         cfg->queuesel = i;
341                         n = cfg->queuesize;
342                         if(n == 0 || (n & (n-1)) != 0)
343                                 break;
344                         if((q = mkvqueue(n)) == nil)
345                                 break;
346                         q->notify = vd->notify + vd->notifyoffmult * cfg->queuenotifyoff;
347                         q->dev = vd;
348                         q->idx = i;
349                         vd->queue[i] = q;
350                         coherence();
351                         cfg->queuedesc = PADDR(q->desc);
352                         cfg->queueavail = PADDR(q->avail);
353                         cfg->queueused = PADDR(q->used);
354                 }
355                 vd->nqueue = i;
356         
357                 if(h == nil)
358                         h = vd;
359                 else
360                         t->next = vd;
361                 t = vd;
362         }
363
364         return h;
365 }
366
367 struct Rock {
368         int done;
369         Rendez *sleep;
370 };
371
372 static void
373 vqinterrupt(Vqueue *q)
374 {
375         int id, free, m;
376         struct Rock *r;
377         Rendez *z;
378
379         m = q->size-1;
380
381         ilock(q);
382         while((q->lastused ^ q->used->idx) & m){
383                 id = q->usedent[q->lastused++ & m].id;
384                 if(r = q->rock[id]){
385                         q->rock[id] = nil;
386                         z = r->sleep;
387                         r->done = 1;    /* hands off */
388                         if(z != nil)
389                                 wakeup(z);
390                 }
391                 do {
392                         free = id;
393                         id = q->desc[free].next;
394                         q->desc[free].next = q->free;
395                         q->free = free;
396                         q->nfree++;
397                 } while(q->desc[free].flags & Next);
398         }
399         iunlock(q);
400 }
401
402 static void
403 viointerrupt(Ureg *, void *arg)
404 {
405         Vdev *vd = arg;
406
407         if(vd->isr[0] & 1)
408                 vqinterrupt(vd->queue[vd->typ == TypSCSI ? 2 : 0]);
409 }
410
411 static int
412 viodone(void *arg)
413 {
414         return ((struct Rock*)arg)->done;
415 }
416
417 static void
418 vqio(Vqueue *q, int head)
419 {
420         struct Rock rock;
421
422         rock.done = 0;
423         rock.sleep = &up->sleep;
424         q->rock[head] = &rock;
425         q->availent[q->avail->idx & (q->size-1)] = head;
426         coherence();
427         q->avail->idx++;
428         iunlock(q);
429         if((q->used->flags & 1) == 0)
430                 *((u16int*)q->notify) = q->idx;
431         while(!rock.done){
432                 while(waserror())
433                         ;
434                 tsleep(rock.sleep, viodone, &rock, 1000);
435                 poperror();
436
437                 if(!rock.done)
438                         vqinterrupt(q);
439         }
440 }
441
442 static int
443 vioblkreq(Vdev *vd, int typ, void *a, long count, long secsize, uvlong lba)
444 {
445         int need, free, head;
446         Vqueue *q;
447         Vdesc *d;
448
449         u8int status;
450         struct Vioblkreqhdr {
451                 u32int  typ;
452                 u32int  prio;
453                 u64int  lba;
454         } req;
455
456         need = 2;
457         if(a != nil)
458                 need = 3;
459
460         status = -1;
461         req.typ = typ;
462         req.prio = 0;
463         req.lba = lba;
464
465         q = vd->queue[0];
466         ilock(q);
467         while(q->nfree < need){
468                 iunlock(q);
469
470                 if(!waserror())
471                         tsleep(&up->sleep, return0, 0, 500);
472                 poperror();
473
474                 ilock(q);
475         }
476
477         head = free = q->free;
478
479         d = &q->desc[free]; free = d->next;
480         d->addr = PADDR(&req);
481         d->len = sizeof(req);
482         d->flags = Next;
483
484         if(a != nil){
485                 d = &q->desc[free]; free = d->next;
486                 d->addr = PADDR(a);
487                 d->len = secsize*count;
488                 d->flags = typ ? Next : (Write|Next);
489         }
490
491         d = &q->desc[free]; free = d->next;
492         d->addr = PADDR(&status);
493         d->len = sizeof(status);
494         d->flags = Write;
495
496         q->free = free;
497         q->nfree -= need;
498
499         /* queue io, unlock and wait for completion */
500         vqio(q, head);
501
502         return status;
503 }
504
505 static int
506 vioscsireq(SDreq *r)
507 {
508         u8int resp[4+4+2+2+SENSESIZE];
509         u8int req[8+8+3+CDBSIZE];
510         int free, head;
511         u32int len;
512         Vqueue *q;
513         Vdesc *d;
514         Vdev *vd;
515         SDunit *u;
516         Vscsidev *scsi;
517
518         u = r->unit;
519         vd = u->dev->ctlr;
520         scsi = vd->dev;
521
522         memset(resp, 0, sizeof(resp));
523         memset(req, 0, sizeof(req));
524         req[0] = 1;
525         req[1] = u->subno;
526         req[2] = r->lun>>8;
527         req[3] = r->lun&0xFF;
528         *(u64int*)(&req[8]) = (uintptr)r;
529
530         memmove(&req[8+8+3], r->cmd, r->clen);
531
532         q = vd->queue[2];
533         ilock(q);
534         while(q->nfree < 3){
535                 iunlock(q);
536
537                 if(!waserror())
538                         tsleep(&up->sleep, return0, 0, 500);
539                 poperror();
540
541                 ilock(q);
542         }
543
544         head = free = q->free;
545
546         d = &q->desc[free]; free = d->next;
547         d->addr = PADDR(req);
548         d->len = 8+8+3+scsi->cdb_size;
549         d->flags = Next;
550
551         if(r->write && r->dlen > 0){
552                 d = &q->desc[free]; free = d->next;
553                 d->addr = PADDR(r->data);
554                 d->len = r->dlen;
555                 d->flags = Next;
556         }
557
558         d = &q->desc[free]; free = d->next;
559         d->addr = PADDR(resp);
560         d->len = 4+4+2+2+scsi->sense_size;
561         d->flags = Write;
562
563         if(!r->write && r->dlen > 0){
564                 d->flags |= Next;
565
566                 d = &q->desc[free]; free = d->next;
567                 d->addr = PADDR(r->data);
568                 d->len = r->dlen;
569                 d->flags = Write;
570         }
571         
572         q->free = free;
573         q->nfree -= 2 + (r->dlen > 0);
574
575         /* queue io, unlock and wait for completion */
576         vqio(q, head);
577
578         /* response+status */
579         r->status = resp[10];
580         if(resp[11] != 0)
581                 r->status = SDcheck;
582
583         /* sense_len */
584         len = *((u32int*)&resp[0]);
585         if(len > 0){
586                 if(len > sizeof(r->sense))
587                         len = sizeof(r->sense);
588                 memmove(r->sense, &resp[4+4+2+2], len);
589                 r->flags |= SDvalidsense;
590         }
591
592         /* data residue */
593         len = *((u32int*)&resp[4]);
594         if(len > r->dlen)
595                 r->rlen = 0;
596         else
597                 r->rlen = r->dlen - len;
598
599         return r->status;
600
601 }
602
603 static long
604 viobio(SDunit *u, int lun, int write, void *a, long count, uvlong lba)
605 {
606         long ss, cc, max, ret;
607         Vdev *vd;
608
609         vd = u->dev->ctlr;
610         if(vd->typ == TypSCSI)
611                 return scsibio(u, lun, write, a, count, lba);
612
613         max = 32;
614         ss = u->secsize;
615         ret = 0;
616         while(count > 0){
617                 if((cc = count) > max)
618                         cc = max;
619                 if(vioblkreq(vd, write != 0, (uchar*)a + ret, cc, ss, lba) != 0)
620                         error(Eio);
621                 ret += cc*ss;
622                 count -= cc;
623                 lba += cc;
624         }
625         return ret;
626 }
627
628 static int
629 viorio(SDreq *r)
630 {
631         int i, count, rw;
632         uvlong lba;
633         SDunit *u;
634         Vdev *vd;
635
636         u = r->unit;
637         vd = u->dev->ctlr;
638         if(vd->typ == TypSCSI)
639                 return vioscsireq(r);
640         if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91){
641                 if(vioblkreq(vd, 4, nil, 0, 0, 0) != 0)
642                         return sdsetsense(r, SDcheck, 3, 0xc, 2);
643                 return sdsetsense(r, SDok, 0, 0, 0);
644         }
645         if((i = sdfakescsi(r)) != SDnostatus)
646                 return r->status = i;
647         if((i = sdfakescsirw(r, &lba, &count, &rw)) != SDnostatus)
648                 return i;
649         r->rlen = viobio(u, r->lun, rw == SDwrite, r->data, count, lba);
650         return r->status = SDok;
651 }
652
653 static int
654 vioonline(SDunit *u)
655 {
656         Vdev *vd;
657         Vblkdev *blk;
658         uvlong cap;
659
660         vd = u->dev->ctlr;
661         if(vd->typ == TypSCSI)
662                 return scsionline(u);
663
664         blk = vd->dev;
665         cap = blk->capacity;
666         if(u->sectors != cap){
667                 u->sectors = cap;
668                 u->secsize = 512;
669                 return 2;
670         }
671         return 1;
672 }
673
674 static int
675 vioverify(SDunit *u)
676 {
677         Vdev *vd;
678
679         vd = u->dev->ctlr;
680         if(vd->typ == TypSCSI)
681                 return scsiverify(u);
682
683         return 1;
684 }
685
686 SDifc sdvirtio10ifc;
687
688 static int
689 vioenable(SDev *sd)
690 {
691         char name[32];
692         Vdev *vd;
693         int i;
694
695         vd = sd->ctlr;
696         pcisetbme(vd->pci);
697         snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
698         intrenable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, name);
699         coherence();
700
701         vd->cfg->status |= DriverOk;
702         for(i = 0; i < vd->nqueue; i++){
703                 vd->cfg->queuesel = i;
704                 vd->cfg->queueenable = 1;
705         }
706
707         return 1;
708 }
709
710 static int
711 viodisable(SDev *sd)
712 {
713         char name[32];
714         Vdev *vd;
715
716         vd = sd->ctlr;
717         snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
718         intrdisable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, name);
719         pciclrbme(vd->pci);
720         return 1;
721 }
722
723 static SDev*
724 viopnp(void)
725 {
726         SDev *s, *h, *t;
727         Vdev *vd;
728         int id;
729
730         h = t = nil;
731
732         id = 'F';
733         for(vd =  viopnpdevs(TypBlk); vd; vd = vd->next){
734                 if(vd->nqueue == 0)
735                         continue;
736
737                 if((vd->dev = virtiomapregs(vd->pci, virtiocap(vd->pci, 4), sizeof(Vblkdev))) == nil)
738                         break;
739                 if((s = malloc(sizeof(*s))) == nil)
740                         break;
741                 s->ctlr = vd;
742                 s->idno = id++;
743                 s->ifc = &sdvirtio10ifc;
744                 s->nunit = 1;
745                 if(h)
746                         t->next = s;
747                 else
748                         h = s;
749                 t = s;
750         }
751
752         id = '0';
753         for(vd = viopnpdevs(TypSCSI); vd; vd = vd->next){
754                 Vscsidev *scsi;
755
756                 if(vd->nqueue < 3)
757                         continue;
758
759                 if((scsi = virtiomapregs(vd->pci, virtiocap(vd->pci, 4), sizeof(Vscsidev))) == nil)
760                         break;
761                 if(scsi->max_target == 0){
762                         vunmap(scsi, sizeof(Vscsidev));
763                         continue;
764                 }
765                 if((scsi->cdb_size > CDBSIZE) || (scsi->sense_size > SENSESIZE)){
766                         print("sdvirtio: cdb %ud or sense size %ud too big\n",
767                                 scsi->cdb_size, scsi->sense_size);
768                         vunmap(scsi, sizeof(Vscsidev));
769                         continue;
770                 }
771                 vd->dev = scsi;
772
773                 if((s = malloc(sizeof(*s))) == nil)
774                         break;
775                 s->ctlr = vd;
776                 s->idno = id++;
777                 s->ifc = &sdvirtio10ifc;
778                 s->nunit = scsi->max_target;
779
780                 if(h)
781                         t->next = s;
782                 else
783                         h = s;
784                 t = s;
785         }
786         return h;
787 }
788
789 SDifc sdvirtio10ifc = {
790         "virtio10",                     /* name */
791
792         viopnp,                         /* pnp */
793         nil,                            /* legacy */
794         vioenable,                      /* enable */
795         viodisable,                     /* disable */
796
797         vioverify,                      /* verify */
798         vioonline,                      /* online */
799         viorio,                         /* rio */
800         nil,                            /* rctl */
801         nil,                            /* wctl */
802
803         viobio,                         /* bio */
804         nil,                            /* probe */
805         nil,                            /* clear */
806         nil,                            /* rtopctl */
807         nil,                            /* wtopctl */
808 };