]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/sdvirtio10.c
virtio: set FeaturesOk flag after feature negotiation, and enable queues before Drive...
[plan9front.git] / sys / src / 9 / pc / sdvirtio10.c
1 /*
2  * virtio 1.0 disk driver
3  * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
4  *
5  * In contrast to sdvirtio.c, this driver handles the non-legacy
6  * interface for virtio disk which uses mmio for all register accesses
7  * and requires a laborate pci capability structure dance to get working.
8  *
9  * It is kind of pointless as it is most likely slower than
10  * port i/o (harder to emulate on the pc platform).
11  * 
12  * The reason why this driver is needed it is that vultr set the
13  * disable-legacy=on option in the -device parameter for qemu
14  * on their hypervisor.
15  */
16 #include "u.h"
17 #include "../port/lib.h"
18 #include "mem.h"
19 #include "dat.h"
20 #include "fns.h"
21 #include "io.h"
22 #include "../port/pci.h"
23 #include "ureg.h"
24 #include "../port/error.h"
25
26 #include "../port/sd.h"
27
28 typedef struct Vscsidev Vscsidev;
29 typedef struct Vblkdev Vblkdev;
30
31 typedef struct Vconfig Vconfig;
32 typedef struct Vring Vring;
33 typedef struct Vdesc Vdesc;
34 typedef struct Vused Vused;
35 typedef struct Vqueue Vqueue;
36 typedef struct Vdev Vdev;
37
38
39 /* device types */
40 enum {
41         TypBlk  = 2,
42         TypSCSI = 8,
43 };
44
45 /* status flags */
46 enum {
47         Acknowledge = 1,
48         Driver = 2,
49         FeaturesOk = 8,
50         DriverOk = 4,
51         Failed = 0x80,
52 };
53
54 /* descriptor flags */
55 enum {
56         Next = 1,
57         Write = 2,
58         Indirect = 4,
59 };
60
61 /* struct sizes */
62 enum {
63         VringSize = 4,
64 };
65
66 enum {
67         CDBSIZE         = 32,
68         SENSESIZE       = 96,
69 };
70
71         
72 struct Vscsidev
73 {
74         u32int  num_queues;
75         u32int  seg_max;
76         u32int  max_sectors;
77         u32int  cmd_per_lun;
78         u32int  event_info_size;
79         u32int  sense_size;
80         u32int  cdb_size;
81         u16int  max_channel;
82         u16int  max_target;
83         u32int  max_lun;
84 };
85
86 struct Vblkdev
87 {
88         u64int  capacity;
89 };
90
91 struct Vconfig {
92         u32int  devfeatsel;
93         u32int  devfeat;
94         u32int  drvfeatsel;
95         u32int  drvfeat;
96
97         u16int  msixcfg;
98         u16int  nqueues;
99
100         u8int   status;
101         u8int   cfggen;
102         u16int  queuesel;
103
104         u16int  queuesize;
105         u16int  queuemsixvect;
106
107         u16int  queueenable;
108         u16int  queuenotifyoff;
109
110         u64int  queuedesc;
111         u64int  queueavail;
112         u64int  queueused;
113 };
114
115 struct Vring
116 {
117         u16int  flags;
118         u16int  idx;
119 };
120
121 struct Vdesc
122 {
123         u64int  addr;
124         u32int  len;
125         u16int  flags;
126         u16int  next;
127 };
128
129 struct Vused
130 {
131         u32int  id;
132         u32int  len;
133 };
134
135 struct Vqueue
136 {
137         Lock;
138
139         Vdev    *dev;
140         void    *notify;
141         int     idx;
142
143         int     size;
144
145         int     free;
146         int     nfree;
147
148         Vdesc   *desc;
149
150         Vring   *avail;
151         u16int  *availent;
152         u16int  *availevent;
153
154         Vring   *used;
155         Vused   *usedent;
156         u16int  *usedevent;
157         u16int  lastused;
158
159         void    *rock[];
160 };
161
162 struct Vdev
163 {
164         int     typ;
165
166         Pcidev  *pci;
167
168         uvlong  port;
169         ulong   feat[2];
170
171         int     nqueue;
172         Vqueue  *queue[16];
173
174         void    *dev;   /* device specific config (for scsi) */
175
176         /* registers */
177         Vconfig *cfg;
178         u8int   *isr;
179         u8int   *notify;
180         u32int  notifyoffmult;
181
182         Vdev    *next;
183 };
184
185 static Vqueue*
186 mkvqueue(int size)
187 {
188         Vqueue *q;
189         uchar *p;
190         int i;
191
192         q = malloc(sizeof(*q) + sizeof(void*)*size);
193         p = mallocalign(
194                 PGROUND(sizeof(Vdesc)*size + 
195                         VringSize + 
196                         sizeof(u16int)*size + 
197                         sizeof(u16int)) +
198                 PGROUND(VringSize + 
199                         sizeof(Vused)*size + 
200                         sizeof(u16int)), 
201                 BY2PG, 0, 0);
202         if(p == nil || q == nil){
203                 print("virtio: no memory for Vqueue\n");
204                 free(p);
205                 free(q);
206                 return nil;
207         }
208
209         q->desc = (void*)p;
210         p += sizeof(Vdesc)*size;
211         q->avail = (void*)p;
212         p += VringSize;
213         q->availent = (void*)p;
214         p += sizeof(u16int)*size;
215         q->availevent = (void*)p;
216         p += sizeof(u16int);
217
218         p = (uchar*)PGROUND((uintptr)p);
219         q->used = (void*)p;
220         p += VringSize;
221         q->usedent = (void*)p;
222         p += sizeof(Vused)*size;
223         q->usedevent = (void*)p;
224
225         q->free = -1;
226         q->nfree = q->size = size;
227         for(i=0; i<size; i++){
228                 q->desc[i].next = q->free;
229                 q->free = i;
230         }
231
232         return q;
233 }
234
235 static int
236 matchvirtiocfgcap(Pcidev *p, int cap, int off, int typ)
237 {
238         int bar;
239
240         if(cap != 9 || pcicfgr8(p, off+3) != typ)
241                 return 1;
242
243         /* skip invalid or non memory bars */
244         bar = pcicfgr8(p, off+4);
245         if(bar < 0 || bar >= nelem(p->mem) 
246         || p->mem[bar].size == 0
247         || (p->mem[bar].bar & 3) != 0)
248                 return 1;
249
250         return 0;
251 }
252
253 static int
254 virtiocap(Pcidev *p, int typ)
255 {
256         return pcienumcaps(p, matchvirtiocfgcap, typ);
257 }
258
259 static void*
260 virtiomapregs(Pcidev *p, int cap, int size)
261 {
262         int bar, len;
263         uvlong addr;
264
265         if(cap < 0)
266                 return nil;
267         bar = pcicfgr8(p, cap+4) % nelem(p->mem);
268         addr = pcicfgr32(p, cap+8);
269         len = pcicfgr32(p, cap+12);
270         if(size <= 0)
271                 size = len;
272         else if(len < size)
273                 return nil;
274         if(addr+len > p->mem[bar].size)
275                 return nil;
276         addr += p->mem[bar].bar & ~0xFULL;
277         return vmap(addr, size);
278 }
279
280 static Vdev*
281 viopnpdevs(int typ)
282 {
283         Vdev *vd, *h, *t;
284         Vconfig *cfg;
285         Vqueue *q;
286         Pcidev *p;
287         int cap, bar;
288         int n, i;
289
290         h = t = nil;
291         for(p = nil; p = pcimatch(p, 0x1AF4, 0x1040+typ);){
292                 if(p->rid == 0)
293                         continue;
294                 if((cap = virtiocap(p, 1)) < 0)
295                         continue;
296                 bar = pcicfgr8(p, cap+4) % nelem(p->mem);
297                 cfg = virtiomapregs(p, cap, sizeof(Vconfig));
298                 if(cfg == nil)
299                         continue;
300                 if((vd = malloc(sizeof(*vd))) == nil){
301                         print("virtio: no memory for Vdev\n");
302                         break;
303                 }
304                 vd->port = p->mem[bar].bar & ~0xFULL;
305                 vd->typ = typ;
306                 vd->pci = p;
307                 vd->cfg = cfg;
308                 pcienable(p);
309
310                 vd->isr = virtiomapregs(p, virtiocap(p, 3), 0);
311                 if(vd->isr == nil){
312 Baddev:
313                         pcidisable(p);
314                         /* TODO: vunmap */
315                         free(vd);
316                         continue;
317                 }
318                 cap = virtiocap(p, 2);
319                 vd->notify = virtiomapregs(p, cap, 0);
320                 if(vd->notify == nil)
321                         goto Baddev;
322                 vd->notifyoffmult = pcicfgr32(p, cap+16);
323
324                 /* reset */
325                 cfg->status = 0;
326                 while(cfg->status != 0)
327                         delay(1);
328                 cfg->status = Acknowledge|Driver;
329
330                 /* negotiate feature bits */
331                 cfg->devfeatsel = 1;
332                 vd->feat[1] = cfg->devfeat;
333                 cfg->devfeatsel = 0;
334                 vd->feat[0] = cfg->devfeat;
335                 cfg->drvfeatsel = 1;
336                 cfg->drvfeat = vd->feat[1] & 1;
337                 cfg->drvfeatsel = 0;
338                 cfg->drvfeat = 0;
339                 cfg->status |= FeaturesOk;
340
341                 for(i=0; i<nelem(vd->queue); i++){
342                         cfg->queuesel = i;
343                         n = cfg->queuesize;
344                         if(n == 0 || (n & (n-1)) != 0)
345                                 break;
346                         if((q = mkvqueue(n)) == nil)
347                                 break;
348                         q->notify = vd->notify + vd->notifyoffmult * cfg->queuenotifyoff;
349                         q->dev = vd;
350                         q->idx = i;
351                         vd->queue[i] = q;
352                         coherence();
353                         cfg->queuedesc = PADDR(q->desc);
354                         cfg->queueavail = PADDR(q->avail);
355                         cfg->queueused = PADDR(q->used);
356                 }
357                 vd->nqueue = i;
358         
359                 if(h == nil)
360                         h = vd;
361                 else
362                         t->next = vd;
363                 t = vd;
364         }
365
366         return h;
367 }
368
369 struct Rock {
370         int done;
371         Rendez *sleep;
372 };
373
374 static void
375 vqinterrupt(Vqueue *q)
376 {
377         int id, free, m;
378         struct Rock *r;
379         Rendez *z;
380
381         m = q->size-1;
382
383         ilock(q);
384         while((q->lastused ^ q->used->idx) & m){
385                 id = q->usedent[q->lastused++ & m].id;
386                 if(r = q->rock[id]){
387                         q->rock[id] = nil;
388                         z = r->sleep;
389                         r->done = 1;    /* hands off */
390                         if(z != nil)
391                                 wakeup(z);
392                 }
393                 do {
394                         free = id;
395                         id = q->desc[free].next;
396                         q->desc[free].next = q->free;
397                         q->free = free;
398                         q->nfree++;
399                 } while(q->desc[free].flags & Next);
400         }
401         iunlock(q);
402 }
403
404 static void
405 viointerrupt(Ureg *, void *arg)
406 {
407         Vdev *vd = arg;
408
409         if(vd->isr[0] & 1)
410                 vqinterrupt(vd->queue[vd->typ == TypSCSI ? 2 : 0]);
411 }
412
413 static int
414 viodone(void *arg)
415 {
416         return ((struct Rock*)arg)->done;
417 }
418
419 static void
420 vqio(Vqueue *q, int head)
421 {
422         struct Rock rock;
423
424         rock.done = 0;
425         rock.sleep = &up->sleep;
426         q->rock[head] = &rock;
427         q->availent[q->avail->idx & (q->size-1)] = head;
428         coherence();
429         q->avail->idx++;
430         iunlock(q);
431         if((q->used->flags & 1) == 0)
432                 *((u16int*)q->notify) = q->idx;
433         while(!rock.done){
434                 while(waserror())
435                         ;
436                 tsleep(rock.sleep, viodone, &rock, 1000);
437                 poperror();
438
439                 if(!rock.done)
440                         vqinterrupt(q);
441         }
442 }
443
444 static int
445 vioblkreq(Vdev *vd, int typ, void *a, long count, long secsize, uvlong lba)
446 {
447         int need, free, head;
448         Vqueue *q;
449         Vdesc *d;
450
451         u8int status;
452         struct Vioblkreqhdr {
453                 u32int  typ;
454                 u32int  prio;
455                 u64int  lba;
456         } req;
457
458         need = 2;
459         if(a != nil)
460                 need = 3;
461
462         status = -1;
463         req.typ = typ;
464         req.prio = 0;
465         req.lba = lba;
466
467         q = vd->queue[0];
468         ilock(q);
469         while(q->nfree < need){
470                 iunlock(q);
471
472                 if(!waserror())
473                         tsleep(&up->sleep, return0, 0, 500);
474                 poperror();
475
476                 ilock(q);
477         }
478
479         head = free = q->free;
480
481         d = &q->desc[free]; free = d->next;
482         d->addr = PADDR(&req);
483         d->len = sizeof(req);
484         d->flags = Next;
485
486         if(a != nil){
487                 d = &q->desc[free]; free = d->next;
488                 d->addr = PADDR(a);
489                 d->len = secsize*count;
490                 d->flags = typ ? Next : (Write|Next);
491         }
492
493         d = &q->desc[free]; free = d->next;
494         d->addr = PADDR(&status);
495         d->len = sizeof(status);
496         d->flags = Write;
497
498         q->free = free;
499         q->nfree -= need;
500
501         /* queue io, unlock and wait for completion */
502         vqio(q, head);
503
504         return status;
505 }
506
507 static int
508 vioscsireq(SDreq *r)
509 {
510         u8int resp[4+4+2+2+SENSESIZE];
511         u8int req[8+8+3+CDBSIZE];
512         int free, head;
513         u32int len;
514         Vqueue *q;
515         Vdesc *d;
516         Vdev *vd;
517         SDunit *u;
518         Vscsidev *scsi;
519
520         u = r->unit;
521         vd = u->dev->ctlr;
522         scsi = vd->dev;
523
524         memset(resp, 0, sizeof(resp));
525         memset(req, 0, sizeof(req));
526         req[0] = 1;
527         req[1] = u->subno;
528         req[2] = r->lun>>8;
529         req[3] = r->lun&0xFF;
530         *(u64int*)(&req[8]) = (uintptr)r;
531
532         memmove(&req[8+8+3], r->cmd, r->clen);
533
534         q = vd->queue[2];
535         ilock(q);
536         while(q->nfree < 3){
537                 iunlock(q);
538
539                 if(!waserror())
540                         tsleep(&up->sleep, return0, 0, 500);
541                 poperror();
542
543                 ilock(q);
544         }
545
546         head = free = q->free;
547
548         d = &q->desc[free]; free = d->next;
549         d->addr = PADDR(req);
550         d->len = 8+8+3+scsi->cdb_size;
551         d->flags = Next;
552
553         if(r->write && r->dlen > 0){
554                 d = &q->desc[free]; free = d->next;
555                 d->addr = PADDR(r->data);
556                 d->len = r->dlen;
557                 d->flags = Next;
558         }
559
560         d = &q->desc[free]; free = d->next;
561         d->addr = PADDR(resp);
562         d->len = 4+4+2+2+scsi->sense_size;
563         d->flags = Write;
564
565         if(!r->write && r->dlen > 0){
566                 d->flags |= Next;
567
568                 d = &q->desc[free]; free = d->next;
569                 d->addr = PADDR(r->data);
570                 d->len = r->dlen;
571                 d->flags = Write;
572         }
573         
574         q->free = free;
575         q->nfree -= 2 + (r->dlen > 0);
576
577         /* queue io, unlock and wait for completion */
578         vqio(q, head);
579
580         /* response+status */
581         r->status = resp[10];
582         if(resp[11] != 0)
583                 r->status = SDcheck;
584
585         /* sense_len */
586         len = *((u32int*)&resp[0]);
587         if(len > 0){
588                 if(len > sizeof(r->sense))
589                         len = sizeof(r->sense);
590                 memmove(r->sense, &resp[4+4+2+2], len);
591                 r->flags |= SDvalidsense;
592         }
593
594         /* data residue */
595         len = *((u32int*)&resp[4]);
596         if(len > r->dlen)
597                 r->rlen = 0;
598         else
599                 r->rlen = r->dlen - len;
600
601         return r->status;
602
603 }
604
605 static long
606 viobio(SDunit *u, int lun, int write, void *a, long count, uvlong lba)
607 {
608         long ss, cc, max, ret;
609         Vdev *vd;
610
611         vd = u->dev->ctlr;
612         if(vd->typ == TypSCSI)
613                 return scsibio(u, lun, write, a, count, lba);
614
615         max = 32;
616         ss = u->secsize;
617         ret = 0;
618         while(count > 0){
619                 if((cc = count) > max)
620                         cc = max;
621                 if(vioblkreq(vd, write != 0, (uchar*)a + ret, cc, ss, lba) != 0)
622                         error(Eio);
623                 ret += cc*ss;
624                 count -= cc;
625                 lba += cc;
626         }
627         return ret;
628 }
629
630 static int
631 viorio(SDreq *r)
632 {
633         int i, count, rw;
634         uvlong lba;
635         SDunit *u;
636         Vdev *vd;
637
638         u = r->unit;
639         vd = u->dev->ctlr;
640         if(vd->typ == TypSCSI)
641                 return vioscsireq(r);
642         if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91){
643                 if(vioblkreq(vd, 4, nil, 0, 0, 0) != 0)
644                         return sdsetsense(r, SDcheck, 3, 0xc, 2);
645                 return sdsetsense(r, SDok, 0, 0, 0);
646         }
647         if((i = sdfakescsi(r)) != SDnostatus)
648                 return r->status = i;
649         if((i = sdfakescsirw(r, &lba, &count, &rw)) != SDnostatus)
650                 return i;
651         r->rlen = viobio(u, r->lun, rw == SDwrite, r->data, count, lba);
652         return r->status = SDok;
653 }
654
655 static int
656 vioonline(SDunit *u)
657 {
658         Vdev *vd;
659         Vblkdev *blk;
660         uvlong cap;
661
662         vd = u->dev->ctlr;
663         if(vd->typ == TypSCSI)
664                 return scsionline(u);
665
666         blk = vd->dev;
667         cap = blk->capacity;
668         if(u->sectors != cap){
669                 u->sectors = cap;
670                 u->secsize = 512;
671                 return 2;
672         }
673         return 1;
674 }
675
676 static int
677 vioverify(SDunit *u)
678 {
679         Vdev *vd;
680
681         vd = u->dev->ctlr;
682         if(vd->typ == TypSCSI)
683                 return scsiverify(u);
684
685         return 1;
686 }
687
688 SDifc sdvirtio10ifc;
689
690 static int
691 vioenable(SDev *sd)
692 {
693         char name[32];
694         Vdev *vd;
695         int i;
696
697         vd = sd->ctlr;
698         pcisetbme(vd->pci);
699         snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
700         intrenable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, name);
701         coherence();
702
703         for(i = 0; i < vd->nqueue; i++){
704                 vd->cfg->queuesel = i;
705                 vd->cfg->queueenable = 1;
706         }
707         vd->cfg->status |= DriverOk;
708
709         return 1;
710 }
711
712 static int
713 viodisable(SDev *sd)
714 {
715         char name[32];
716         Vdev *vd;
717
718         vd = sd->ctlr;
719         snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
720         intrdisable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, name);
721         pciclrbme(vd->pci);
722         return 1;
723 }
724
725 static SDev*
726 viopnp(void)
727 {
728         SDev *s, *h, *t;
729         Vdev *vd;
730         int id;
731
732         h = t = nil;
733
734         id = 'F';
735         for(vd =  viopnpdevs(TypBlk); vd; vd = vd->next){
736                 if(vd->nqueue == 0)
737                         continue;
738
739                 if((vd->dev = virtiomapregs(vd->pci, virtiocap(vd->pci, 4), sizeof(Vblkdev))) == nil)
740                         break;
741                 if((s = malloc(sizeof(*s))) == nil)
742                         break;
743                 s->ctlr = vd;
744                 s->idno = id++;
745                 s->ifc = &sdvirtio10ifc;
746                 s->nunit = 1;
747                 if(h)
748                         t->next = s;
749                 else
750                         h = s;
751                 t = s;
752         }
753
754         id = '0';
755         for(vd = viopnpdevs(TypSCSI); vd; vd = vd->next){
756                 Vscsidev *scsi;
757
758                 if(vd->nqueue < 3)
759                         continue;
760
761                 if((scsi = virtiomapregs(vd->pci, virtiocap(vd->pci, 4), sizeof(Vscsidev))) == nil)
762                         break;
763                 if(scsi->max_target == 0){
764                         vunmap(scsi, sizeof(Vscsidev));
765                         continue;
766                 }
767                 if((scsi->cdb_size > CDBSIZE) || (scsi->sense_size > SENSESIZE)){
768                         print("sdvirtio: cdb %ud or sense size %ud too big\n",
769                                 scsi->cdb_size, scsi->sense_size);
770                         vunmap(scsi, sizeof(Vscsidev));
771                         continue;
772                 }
773                 vd->dev = scsi;
774
775                 if((s = malloc(sizeof(*s))) == nil)
776                         break;
777                 s->ctlr = vd;
778                 s->idno = id++;
779                 s->ifc = &sdvirtio10ifc;
780                 s->nunit = scsi->max_target;
781
782                 if(h)
783                         t->next = s;
784                 else
785                         h = s;
786                 t = s;
787         }
788         return h;
789 }
790
791 SDifc sdvirtio10ifc = {
792         "virtio10",                     /* name */
793
794         viopnp,                         /* pnp */
795         nil,                            /* legacy */
796         vioenable,                      /* enable */
797         viodisable,                     /* disable */
798
799         vioverify,                      /* verify */
800         vioonline,                      /* online */
801         viorio,                         /* rio */
802         nil,                            /* rctl */
803         nil,                            /* wctl */
804
805         viobio,                         /* bio */
806         nil,                            /* probe */
807         nil,                            /* clear */
808         nil,                            /* rtopctl */
809         nil,                            /* wtopctl */
810 };