]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/ethervirtio10.c
03108cad92d9d5559c1300e1f28a30d935192070
[plan9front.git] / sys / src / 9 / pc / ethervirtio10.c
1 /*
2  * virtio 1.0 ethernet driver
3  * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
4  *
5  * In contrast to ethervirtio.c, this driver handles the non-legacy
6  * interface for virtio ethernet which uses mmio for all register accesses
7  * and requires a laborate pci capability structure dance to get working.
8  *
9  * It is kind of pointless as it is most likely slower than
10  * port i/o (harder to emulate on the pc platform).
11  * 
12  * The reason why this driver is needed it is that vultr set the
13  * disable-legacy=on option in the -device parameter for qemu
14  * on their hypervisor.
15  */
16 #include "u.h"
17 #include "../port/lib.h"
18 #include "mem.h"
19 #include "dat.h"
20 #include "fns.h"
21 #include "io.h"
22 #include "../port/pci.h"
23 #include "../port/error.h"
24 #include "../port/netif.h"
25 #include "../port/etherif.h"
26
27 typedef struct Vconfig Vconfig;
28 typedef struct Vnetcfg Vnetcfg;
29
30 typedef struct Vring Vring;
31 typedef struct Vdesc Vdesc;
32 typedef struct Vused Vused;
33 typedef struct Vheader Vheader;
34 typedef struct Vqueue Vqueue;
35
36 typedef struct Ctlr Ctlr;
37
38 enum {
39         /* §2.1 Device Status Field */
40         Sacknowledge = 1,
41         Sdriver = 2,
42         Sdriverok = 4,
43         Sfeatureok = 8,
44         Sfailed = 128,
45
46         /* flags in Qnetstatus */
47         Nlinkup = (1<<0),
48         Nannounce = (1<<1),
49
50         /* feat[0] bits */
51         Fmac = 1<<5,
52         Fstatus = 1<<16,
53         Fctrlvq = 1<<17,
54         Fctrlrx = 1<<18,
55
56         /* feat[1] bits */
57         Fversion1 = 1<<(32-32),
58
59         /* vring used flags */
60         Unonotify = 1,
61         /* vring avail flags */
62         Rnointerrupt = 1,
63
64         /* descriptor flags */
65         Dnext = 1,
66         Dwrite = 2,
67         Dindirect = 4,
68
69         /* struct sizes */
70         VringSize = 4,
71         VdescSize = 16,
72         VusedSize = 8,
73         VheaderSize = 12,
74
75         Vrxq    = 0,
76         Vtxq    = 1,
77         Vctlq   = 2,
78
79         /* class/cmd for Vctlq */
80         CtrlRx  = 0x00,
81                 CmdPromisc      = 0x00,
82                 CmdAllmulti     = 0x01,
83         CtrlMac = 0x01,
84                 CmdMacTableSet  = 0x00,
85         CtrlVlan= 0x02,
86                 CmdVlanAdd      = 0x00,
87                 CmdVlanDel      = 0x01,
88 };
89
90 struct Vconfig {
91         u32int  devfeatsel;
92         u32int  devfeat;
93         u32int  drvfeatsel;
94         u32int  drvfeat;
95
96         u16int  msixcfg;
97         u16int  nqueues;
98
99         u8int   status;
100         u8int   cfggen;
101         u16int  queuesel;
102
103         u16int  queuesize;
104         u16int  queuemsixvect;
105
106         u16int  queueenable;
107         u16int  queuenotifyoff;
108
109         u64int  queuedesc;
110         u64int  queueavail;
111         u64int  queueused;
112 };
113
114 struct Vnetcfg
115 {
116         u16int  mac0;
117         u16int  mac1;
118         u16int  mac2;
119         u16int  status;
120         u16int  maxqueuepairs;
121         u16int  mtu;
122 };
123
124 struct Vring
125 {
126         u16int  flags;
127         u16int  idx;
128 };
129
130 struct Vdesc
131 {
132         u64int  addr;
133         u32int  len;
134         u16int  flags;
135         u16int  next;
136 };
137
138 struct Vused
139 {
140         u32int  id;
141         u32int  len;
142 };
143
144 struct Vheader
145 {
146         u8int   flags;
147         u8int   segtype;
148         u16int  hlen;
149         u16int  seglen;
150         u16int  csumstart;
151         u16int  csumend;
152 };
153
154 struct Vqueue
155 {
156         Rendez;
157
158         uint    qsize;
159         uint    qmask;
160
161         Vdesc   *desc;
162
163         Vring   *avail;
164         u16int  *availent;
165         u16int  *availevent;
166
167         Vring   *used;
168         Vused   *usedent;
169         u16int  *usedevent;
170         u16int  lastused;
171
172         uint    nintr;
173         uint    nnote;
174
175         /* notify register */
176         void    *notify;
177 };
178
179 struct Ctlr {
180         Lock;
181
182         QLock   ctllock;
183
184         int     attached;
185
186         /* registers */
187         Vconfig *cfg;
188         Vnetcfg *dev;
189         u8int   *isr;
190         u8int   *notify;
191         u32int  notifyoffmult;
192
193         uvlong  port;
194         Pcidev  *pcidev;
195         Ctlr    *next;
196         int     active;
197         ulong   feat[2];
198         int     nqueue;
199
200         /* virtioether has 3 queues: rx, tx and ctl */
201         Vqueue  queue[3];
202 };
203
204 static Ctlr *ctlrhead;
205
206 static int
207 vhasroom(void *v)
208 {
209         Vqueue *q = v;
210         return q->lastused != q->used->idx;
211 }
212
213 static void
214 vqnotify(Ctlr *ctlr, int x)
215 {
216         Vqueue *q;
217
218         coherence();
219         q = &ctlr->queue[x];
220         if(q->used->flags & Unonotify)
221                 return;
222         q->nnote++;
223         *((u16int*)q->notify) = x;
224 }
225
226 static void
227 txproc(void *v)
228 {
229         Vheader *header;
230         Block **blocks;
231         Ether *edev;
232         Ctlr *ctlr;
233         Vqueue *q;
234         Vused *u;
235         Block *b;
236         int i, j;
237
238         edev = v;
239         ctlr = edev->ctlr;
240         q = &ctlr->queue[Vtxq];
241
242         header = smalloc(VheaderSize);
243         blocks = smalloc(sizeof(Block*) * (q->qsize/2));
244
245         for(i = 0; i < q->qsize/2; i++){
246                 j = i << 1;
247                 q->desc[j].addr = PADDR(header);
248                 q->desc[j].len = VheaderSize;
249                 q->desc[j].next = j | 1;
250                 q->desc[j].flags = Dnext;
251
252                 q->availent[i] = q->availent[i + q->qsize/2] = j;
253
254                 j |= 1;
255                 q->desc[j].next = 0;
256                 q->desc[j].flags = 0;
257         }
258
259         q->avail->flags &= ~Rnointerrupt;
260
261         while(waserror())
262                 ;
263
264         while((b = qbread(edev->oq, 1000000)) != nil){
265                 for(;;){
266                         /* retire completed packets */
267                         while((i = q->lastused) != q->used->idx){
268                                 u = &q->usedent[i & q->qmask];
269                                 i = (u->id & q->qmask) >> 1;
270                                 if(blocks[i] == nil)
271                                         break;
272                                 freeb(blocks[i]);
273                                 blocks[i] = nil;
274                                 q->lastused++;
275                         }
276
277                         /* have free slot? */
278                         i = q->avail->idx & (q->qmask >> 1);
279                         if(blocks[i] == nil)
280                                 break;
281
282                         /* ring full, wait and retry */
283                         if(!vhasroom(q))
284                                 sleep(q, vhasroom, q);
285                 }
286
287                 /* slot is free, fill in descriptor */
288                 blocks[i] = b;
289                 j = (i << 1) | 1;
290                 q->desc[j].addr = PADDR(b->rp);
291                 q->desc[j].len = BLEN(b);
292                 coherence();
293                 q->avail->idx++;
294                 vqnotify(ctlr, Vtxq);
295         }
296
297         pexit("ether out queue closed", 1);
298 }
299
300 static void
301 rxproc(void *v)
302 {
303         Vheader *header;
304         Block **blocks;
305         Ether *edev;
306         Ctlr *ctlr;
307         Vqueue *q;
308         Vused *u;
309         Block *b;
310         int i, j;
311
312         edev = v;
313         ctlr = edev->ctlr;
314         q = &ctlr->queue[Vrxq];
315
316         header = smalloc(VheaderSize);
317         blocks = smalloc(sizeof(Block*) * (q->qsize/2));
318
319         for(i = 0; i < q->qsize/2; i++){
320                 j = i << 1;
321                 q->desc[j].addr = PADDR(header);
322                 q->desc[j].len = VheaderSize;
323                 q->desc[j].next = j | 1;
324                 q->desc[j].flags = Dwrite|Dnext;
325
326                 q->availent[i] = q->availent[i + q->qsize/2] = j;
327
328                 j |= 1;
329                 q->desc[j].next = 0;
330                 q->desc[j].flags = Dwrite;
331         }
332
333         q->avail->flags &= ~Rnointerrupt;
334
335         while(waserror())
336                 ;
337
338         for(;;){
339                 /* replenish receive ring */
340                 do {
341                         i = q->avail->idx & (q->qmask >> 1);
342                         if(blocks[i] != nil)
343                                 break;
344                         if((b = iallocb(ETHERMAXTU)) == nil)
345                                 break;
346                         blocks[i] = b;
347                         j = (i << 1) | 1;
348                         q->desc[j].addr = PADDR(b->rp);
349                         q->desc[j].len = BALLOC(b);
350                         coherence();
351                         q->avail->idx++;
352                 } while(q->avail->idx != q->used->idx);
353                 vqnotify(ctlr, Vrxq);
354
355                 /* wait for any packets to complete */
356                 if(!vhasroom(q))
357                         sleep(q, vhasroom, q);
358
359                 /* retire completed packets */
360                 while((i = q->lastused) != q->used->idx) {
361                         u = &q->usedent[i & q->qmask];
362                         i = (u->id & q->qmask) >> 1;
363                         if((b = blocks[i]) == nil)
364                                 break;
365
366                         blocks[i] = nil;
367                         b->wp = b->rp + u->len - VheaderSize;
368                         etheriq(edev, b);
369                         q->lastused++;
370                 }
371         }
372 }
373
374 static int
375 vctlcmd(Ether *edev, uchar class, uchar cmd, uchar *data, int ndata)
376 {
377         uchar hdr[2], ack[1];
378         Ctlr *ctlr;
379         Vqueue *q;
380         Vdesc *d;
381         int i;
382
383         ctlr = edev->ctlr;
384         q = &ctlr->queue[Vctlq];
385         if(q->qsize < 3)
386                 return -1;
387
388         qlock(&ctlr->ctllock);
389         while(waserror())
390                 ;
391
392         ack[0] = 0x55;
393         hdr[0] = class;
394         hdr[1] = cmd;
395
396         d = &q->desc[0];
397         d->addr = PADDR(hdr);
398         d->len = sizeof(hdr);
399         d->next = 1;
400         d->flags = Dnext;
401         d++;
402         d->addr = PADDR(data);
403         d->len = ndata;
404         d->next = 2;
405         d->flags = Dnext;
406         d++;
407         d->addr = PADDR(ack);
408         d->len = sizeof(ack);
409         d->next = 0;
410         d->flags = Dwrite;
411
412         i = q->avail->idx & q->qmask;
413         q->availent[i] = 0;
414         coherence();
415
416         q->avail->flags &= ~Rnointerrupt;
417         q->avail->idx++;
418         vqnotify(ctlr, Vctlq);
419         while(!vhasroom(q))
420                 sleep(q, vhasroom, q);
421         q->lastused = q->used->idx;
422         q->avail->flags |= Rnointerrupt;
423
424         qunlock(&ctlr->ctllock);
425         poperror();
426
427         if(ack[0] != 0)
428                 print("#l%d: vctlcmd: %ux.%ux -> %ux\n", edev->ctlrno, class, cmd, ack[0]);
429
430         return ack[0];
431 }
432
433 static void
434 interrupt(Ureg*, void* arg)
435 {
436         Ether *edev;
437         Ctlr *ctlr;
438         Vqueue *q;
439         int i;
440
441         edev = arg;
442         ctlr = edev->ctlr;
443         if(*ctlr->isr & 1){
444                 for(i = 0; i < ctlr->nqueue; i++){
445                         q = &ctlr->queue[i];
446                         if(vhasroom(q)){
447                                 q->nintr++;
448                                 wakeup(q);
449                         }
450                 }
451         }
452 }
453
454 static void
455 attach(Ether* edev)
456 {
457         char name[KNAMELEN];
458         Ctlr* ctlr;
459         int i;
460
461         ctlr = edev->ctlr;
462         ilock(ctlr);
463         if(ctlr->attached){
464                 iunlock(ctlr);
465                 return;
466         }
467         ctlr->attached = 1;
468
469         /* driver is ready */
470         ctlr->cfg->status |= Sdriverok;
471
472         /* enable the queues */
473         for(i = 0; i < ctlr->nqueue; i++){
474                 ctlr->cfg->queuesel = i;
475                 ctlr->cfg->queueenable = 1;
476         }
477         iunlock(ctlr);
478
479         /* start kprocs */
480         snprint(name, sizeof name, "#l%drx", edev->ctlrno);
481         kproc(name, rxproc, edev);
482         snprint(name, sizeof name, "#l%dtx", edev->ctlrno);
483         kproc(name, txproc, edev);
484 }
485
486 static long
487 ifstat(Ether *edev, void *a, long n, ulong offset)
488 {
489         int i, l;
490         char *p;
491         Ctlr *ctlr;
492         Vqueue *q;
493
494         ctlr = edev->ctlr;
495
496         p = smalloc(READSTR);
497
498         l = snprint(p, READSTR, "devfeat %32.32lub %32.32lub\n", ctlr->feat[1], ctlr->feat[0]);
499         l += snprint(p+l, READSTR-l, "devstatus %8.8ub\n", ctlr->cfg->status);
500
501         for(i = 0; i < ctlr->nqueue; i++){
502                 q = &ctlr->queue[i];
503                 l += snprint(p+l, READSTR-l,
504                         "vq%d %#p size %d avail->idx %d used->idx %d lastused %hud nintr %ud nnote %ud\n",
505                         i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused, q->nintr, q->nnote);
506         }
507
508         n = readstr(offset, a, n, p);
509         free(p);
510
511         return n;
512 }
513
514 static void
515 shutdown(Ether* edev)
516 {
517         Ctlr *ctlr = edev->ctlr;
518
519         coherence();
520         ctlr->cfg->status = 0;
521         coherence();
522
523         pciclrbme(ctlr->pcidev);
524 }
525
526 static void
527 promiscuous(void *arg, int on)
528 {
529         Ether *edev = arg;
530         uchar b[1];
531
532         b[0] = on != 0;
533         vctlcmd(edev, CtrlRx, CmdPromisc, b, sizeof(b));
534 }
535
536 static void
537 multicast(void *arg, uchar*, int)
538 {
539         Ether *edev = arg;
540         uchar b[1];
541
542         b[0] = edev->nmaddr > 0;
543         vctlcmd(edev, CtrlRx, CmdAllmulti, b, sizeof(b));
544 }
545
546 static int
547 initqueue(Vqueue *q, int size)
548 {
549         uchar *p;
550
551         q->desc = mallocalign(VdescSize*size, 16, 0, 0);
552         if(q->desc == nil)
553                 return -1;
554         p = mallocalign(VringSize + 2*size + 2, 2, 0, 0);
555         if(p == nil){
556 FreeDesc:
557                 free(q->desc);
558                 q->desc = nil;
559                 return -1;
560         }
561         q->avail = (void*)p;
562         p += VringSize;
563         q->availent = (void*)p;
564         p += sizeof(u16int)*size;
565         q->availevent = (void*)p;
566         p = mallocalign(VringSize + VusedSize*size + 2, 4, 0, 0);
567         if(p == nil){
568                 free(q->avail);
569                 q->avail = nil;
570                 goto FreeDesc;
571         }
572         q->used = (void*)p;
573         p += VringSize;
574         q->usedent = (void*)p;
575         p += VusedSize*size;
576         q->usedevent = (void*)p;
577
578         q->qsize = size;
579         q->qmask = q->qsize - 1;
580
581         q->lastused = q->avail->idx = q->used->idx = 0;
582
583         q->avail->flags |= Rnointerrupt;
584
585         return 0;
586 }
587
588 static int
589 matchvirtiocfgcap(Pcidev *p, int cap, int off, int typ)
590 {
591         int bar;
592
593         if(cap != 9 || pcicfgr8(p, off+3) != typ)
594                 return 1;
595
596         /* skip invalid or non memory bars */
597         bar = pcicfgr8(p, off+4);
598         if(bar < 0 || bar >= nelem(p->mem) 
599         || p->mem[bar].size == 0
600         || (p->mem[bar].bar & 3) != 0)
601                 return 1;
602
603         return 0;
604 }
605
606 static int
607 virtiocap(Pcidev *p, int typ)
608 {
609         return pcienumcaps(p, matchvirtiocfgcap, typ);
610 }
611
612 static void*
613 virtiomapregs(Pcidev *p, int cap, int size)
614 {
615         int bar, len;
616         uvlong addr;
617
618         if(cap < 0)
619                 return nil;
620         bar = pcicfgr8(p, cap+4) % nelem(p->mem);
621         addr = pcicfgr32(p, cap+8);
622         len = pcicfgr32(p, cap+12);
623         if(size <= 0)
624                 size = len;
625         else if(len < size)
626                 return nil;
627         if(addr+len > p->mem[bar].size)
628                 return nil;
629         addr += p->mem[bar].bar & ~0xFULL;
630         return vmap(addr, size);
631 }
632
633 static Ctlr*
634 pciprobe(void)
635 {
636         Ctlr *c, *h, *t;
637         Pcidev *p;
638         Vconfig *cfg;
639         int bar, cap, n, i;
640
641         h = t = nil;
642
643         /* §4.1.2 PCI Device Discovery */
644         for(p = nil; p = pcimatch(p, 0x1AF4, 0x1041);){
645                 /* non-transitional devices will have a revision > 0 */
646                 if(p->rid == 0)
647                         continue;
648                 if((cap = virtiocap(p, 1)) < 0)
649                         continue;
650                 bar = pcicfgr8(p, cap+4) % nelem(p->mem);
651                 cfg = virtiomapregs(p, cap, sizeof(Vconfig));
652                 if(cfg == nil)
653                         continue;
654                 if((c = mallocz(sizeof(Ctlr), 1)) == nil){
655                         print("ethervirtio: no memory for Ctlr\n");
656                         break;
657                 }
658                 c->cfg = cfg;
659                 c->pcidev = p;
660                 c->port = p->mem[bar].bar & ~0xFULL;
661
662                 pcienable(p);
663                 c->dev = virtiomapregs(p, virtiocap(p, 4), sizeof(Vnetcfg));
664                 if(c->dev == nil)
665                         goto Baddev;
666                 c->isr = virtiomapregs(p, virtiocap(p, 3), 0);
667                 if(c->isr == nil)
668                         goto Baddev;
669                 cap = virtiocap(p, 2);
670                 c->notify = virtiomapregs(p, cap, 0);
671                 if(c->notify == nil)
672                         goto Baddev;
673                 c->notifyoffmult = pcicfgr32(p, cap+16);
674
675                 /* device reset */
676                 coherence();
677                 cfg->status = 0;
678                 while(cfg->status != 0)
679                         delay(1);
680                 cfg->status = Sacknowledge|Sdriver;
681
682                 /* negotiate feature bits */
683                 cfg->devfeatsel = 1;
684                 c->feat[1] = cfg->devfeat;
685
686                 cfg->devfeatsel = 0;
687                 c->feat[0] = cfg->devfeat;
688
689                 cfg->drvfeatsel = 1;
690                 cfg->drvfeat = c->feat[1] & Fversion1;
691
692                 cfg->drvfeatsel = 0;
693                 cfg->drvfeat = c->feat[0] & (Fmac|Fctrlvq|Fctrlrx);
694
695                 for(i=0; i<nelem(c->queue); i++){
696                         cfg->queuesel = i;
697                         n = cfg->queuesize;
698                         if(n == 0 || (n & (n-1)) != 0){
699                                 if(i < 2)
700                                         print("ethervirtio: queue %d has invalid size %d\n", i, n);
701                                 break;
702                         }
703                         if(initqueue(&c->queue[i], n) < 0)
704                                 break;
705                         c->queue[i].notify = c->notify + c->notifyoffmult * cfg->queuenotifyoff;
706                         coherence();
707                         cfg->queuedesc = PADDR(c->queue[i].desc);
708                         cfg->queueavail = PADDR(c->queue[i].avail);
709                         cfg->queueused = PADDR(c->queue[i].used);
710                 }
711                 if(i < 2){
712                         print("ethervirtio: no queues\n");
713 Baddev:
714                         pcidisable(p);
715                         /* TODO, vunmap */
716                         free(c);
717                         continue;
718                 }
719                 c->nqueue = i;          
720
721                 if(h == nil)
722                         h = c;
723                 else
724                         t->next = c;
725                 t = c;
726         }
727
728         return h;
729 }
730
731
732 static int
733 reset(Ether* edev)
734 {
735         static uchar zeros[Eaddrlen];
736         Ctlr *ctlr;
737         int i;
738
739         if(ctlrhead == nil)
740                 ctlrhead = pciprobe();
741
742         for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){
743                 if(ctlr->active)
744                         continue;
745                 if(edev->port == 0 || edev->port == ctlr->port){
746                         ctlr->active = 1;
747                         break;
748                 }
749         }
750
751         if(ctlr == nil)
752                 return -1;
753
754         edev->ctlr = ctlr;
755         edev->port = ctlr->port;
756         edev->irq = ctlr->pcidev->intl;
757         edev->tbdf = ctlr->pcidev->tbdf;
758         edev->mbps = 1000;
759         edev->link = 1;
760
761         if((ctlr->feat[0] & Fmac) != 0 && memcmp(edev->ea, zeros, Eaddrlen) == 0){
762                 for(i = 0; i < Eaddrlen; i++)
763                         edev->ea[i] = ((uchar*)ctlr->dev)[i];
764         } else {
765                 for(i = 0; i < Eaddrlen; i++)
766                         ((uchar*)ctlr->dev)[i] = edev->ea[i];
767         }
768
769         edev->arg = edev;
770
771         edev->attach = attach;
772         edev->shutdown = shutdown;
773         edev->ifstat = ifstat;
774
775         if((ctlr->feat[0] & (Fctrlvq|Fctrlrx)) == (Fctrlvq|Fctrlrx)){
776                 edev->multicast = multicast;
777                 edev->promiscuous = promiscuous;
778         }
779
780         pcisetbme(ctlr->pcidev);
781         intrenable(edev->irq, interrupt, edev, edev->tbdf, edev->name);
782
783         return 0;
784 }
785
786 void
787 ethervirtio10link(void)
788 {
789         addethercard("virtio10", reset);
790 }