2 * virtio 1.0 ethernet driver
3 * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
5 * In contrast to ethervirtio.c, this driver handles the non-legacy
6 * interface for virtio ethernet which uses mmio for all register accesses
7 * and requires a laborate pci capability structure dance to get working.
9 * It is kind of pointless as it is most likely slower than
10 * port i/o (harder to emulate on the pc platform).
12 * The reason why this driver is needed it is that vultr set the
13 * disable-legacy=on option in the -device parameter for qemu
14 * on their hypervisor.
17 #include "../port/lib.h"
22 #include "../port/pci.h"
23 #include "../port/error.h"
24 #include "../port/netif.h"
25 #include "../port/etherif.h"
27 typedef struct Vconfig Vconfig;
28 typedef struct Vnetcfg Vnetcfg;
30 typedef struct Vring Vring;
31 typedef struct Vdesc Vdesc;
32 typedef struct Vused Vused;
33 typedef struct Vheader Vheader;
34 typedef struct Vqueue Vqueue;
36 typedef struct Ctlr Ctlr;
39 /* §2.1 Device Status Field */
46 /* flags in Qnetstatus */
57 Fversion1 = 1<<(32-32),
59 /* vring used flags */
61 /* vring avail flags */
64 /* descriptor flags */
79 /* class/cmd for Vctlq */
84 CmdMacTableSet = 0x00,
104 u16int queuemsixvect;
107 u16int queuenotifyoff;
120 u16int maxqueuepairs;
175 /* notify register */
191 u32int notifyoffmult;
200 /* virtioether has 3 queues: rx, tx and ctl */
204 static Ctlr *ctlrhead;
210 return q->lastused != q->used->idx;
214 vqnotify(Ctlr *ctlr, int x)
220 if(q->used->flags & Unonotify)
223 *((u16int*)q->notify) = x;
240 q = &ctlr->queue[Vtxq];
242 header = smalloc(VheaderSize);
243 blocks = smalloc(sizeof(Block*) * (q->qsize/2));
245 for(i = 0; i < q->qsize/2; i++){
247 q->desc[j].addr = PADDR(header);
248 q->desc[j].len = VheaderSize;
249 q->desc[j].next = j | 1;
250 q->desc[j].flags = Dnext;
252 q->availent[i] = q->availent[i + q->qsize/2] = j;
256 q->desc[j].flags = 0;
259 q->avail->flags &= ~Rnointerrupt;
264 while((b = qbread(edev->oq, 1000000)) != nil){
266 /* retire completed packets */
267 while((i = q->lastused) != q->used->idx){
268 u = &q->usedent[i & q->qmask];
269 i = (u->id & q->qmask) >> 1;
277 /* have free slot? */
278 i = q->avail->idx & (q->qmask >> 1);
282 /* ring full, wait and retry */
284 sleep(q, vhasroom, q);
287 /* slot is free, fill in descriptor */
290 q->desc[j].addr = PADDR(b->rp);
291 q->desc[j].len = BLEN(b);
294 vqnotify(ctlr, Vtxq);
297 pexit("ether out queue closed", 1);
314 q = &ctlr->queue[Vrxq];
316 header = smalloc(VheaderSize);
317 blocks = smalloc(sizeof(Block*) * (q->qsize/2));
319 for(i = 0; i < q->qsize/2; i++){
321 q->desc[j].addr = PADDR(header);
322 q->desc[j].len = VheaderSize;
323 q->desc[j].next = j | 1;
324 q->desc[j].flags = Dwrite|Dnext;
326 q->availent[i] = q->availent[i + q->qsize/2] = j;
330 q->desc[j].flags = Dwrite;
333 q->avail->flags &= ~Rnointerrupt;
339 /* replenish receive ring */
341 i = q->avail->idx & (q->qmask >> 1);
344 if((b = iallocb(ETHERMAXTU)) == nil)
348 q->desc[j].addr = PADDR(b->rp);
349 q->desc[j].len = BALLOC(b);
352 } while(q->avail->idx != q->used->idx);
353 vqnotify(ctlr, Vrxq);
355 /* wait for any packets to complete */
357 sleep(q, vhasroom, q);
359 /* retire completed packets */
360 while((i = q->lastused) != q->used->idx) {
361 u = &q->usedent[i & q->qmask];
362 i = (u->id & q->qmask) >> 1;
363 if((b = blocks[i]) == nil)
367 b->wp = b->rp + u->len - VheaderSize;
375 vctlcmd(Ether *edev, uchar class, uchar cmd, uchar *data, int ndata)
377 uchar hdr[2], ack[1];
384 q = &ctlr->queue[Vctlq];
388 qlock(&ctlr->ctllock);
397 d->addr = PADDR(hdr);
398 d->len = sizeof(hdr);
402 d->addr = PADDR(data);
407 d->addr = PADDR(ack);
408 d->len = sizeof(ack);
412 i = q->avail->idx & q->qmask;
416 q->avail->flags &= ~Rnointerrupt;
418 vqnotify(ctlr, Vctlq);
420 sleep(q, vhasroom, q);
421 q->lastused = q->used->idx;
422 q->avail->flags |= Rnointerrupt;
424 qunlock(&ctlr->ctllock);
428 print("#l%d: vctlcmd: %ux.%ux -> %ux\n", edev->ctlrno, class, cmd, ack[0]);
434 interrupt(Ureg*, void* arg)
444 for(i = 0; i < ctlr->nqueue; i++){
469 /* driver is ready */
470 ctlr->cfg->status |= Sdriverok;
472 /* enable the queues */
473 for(i = 0; i < ctlr->nqueue; i++){
474 ctlr->cfg->queuesel = i;
475 ctlr->cfg->queueenable = 1;
480 snprint(name, sizeof name, "#l%drx", edev->ctlrno);
481 kproc(name, rxproc, edev);
482 snprint(name, sizeof name, "#l%dtx", edev->ctlrno);
483 kproc(name, txproc, edev);
487 ifstat(Ether *edev, void *a, long n, ulong offset)
496 p = smalloc(READSTR);
498 l = snprint(p, READSTR, "devfeat %32.32lub %32.32lub\n", ctlr->feat[1], ctlr->feat[0]);
499 l += snprint(p+l, READSTR-l, "devstatus %8.8ub\n", ctlr->cfg->status);
501 for(i = 0; i < ctlr->nqueue; i++){
503 l += snprint(p+l, READSTR-l,
504 "vq%d %#p size %d avail->idx %d used->idx %d lastused %hud nintr %ud nnote %ud\n",
505 i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused, q->nintr, q->nnote);
508 n = readstr(offset, a, n, p);
515 shutdown(Ether* edev)
517 Ctlr *ctlr = edev->ctlr;
520 ctlr->cfg->status = 0;
523 pciclrbme(ctlr->pcidev);
527 promiscuous(void *arg, int on)
533 vctlcmd(edev, CtrlRx, CmdPromisc, b, sizeof(b));
537 multicast(void *arg, uchar*, int)
542 b[0] = edev->nmaddr > 0;
543 vctlcmd(edev, CtrlRx, CmdAllmulti, b, sizeof(b));
547 initqueue(Vqueue *q, int size)
551 q->desc = mallocalign(VdescSize*size, 16, 0, 0);
554 p = mallocalign(VringSize + 2*size + 2, 2, 0, 0);
563 q->availent = (void*)p;
564 p += sizeof(u16int)*size;
565 q->availevent = (void*)p;
566 p = mallocalign(VringSize + VusedSize*size + 2, 4, 0, 0);
574 q->usedent = (void*)p;
576 q->usedevent = (void*)p;
579 q->qmask = q->qsize - 1;
581 q->lastused = q->avail->idx = q->used->idx = 0;
583 q->avail->flags |= Rnointerrupt;
589 matchvirtiocfgcap(Pcidev *p, int cap, int off, int typ)
593 if(cap != 9 || pcicfgr8(p, off+3) != typ)
596 /* skip invalid or non memory bars */
597 bar = pcicfgr8(p, off+4);
598 if(bar < 0 || bar >= nelem(p->mem)
599 || p->mem[bar].size == 0
600 || (p->mem[bar].bar & 3) != 0)
607 virtiocap(Pcidev *p, int typ)
609 return pcienumcaps(p, matchvirtiocfgcap, typ);
613 virtiomapregs(Pcidev *p, int cap, int size)
620 bar = pcicfgr8(p, cap+4) % nelem(p->mem);
621 addr = pcicfgr32(p, cap+8);
622 len = pcicfgr32(p, cap+12);
627 if(addr+len > p->mem[bar].size)
629 addr += p->mem[bar].bar & ~0xFULL;
630 return vmap(addr, size);
643 /* §4.1.2 PCI Device Discovery */
644 for(p = nil; p = pcimatch(p, 0x1AF4, 0x1041);){
645 /* non-transitional devices will have a revision > 0 */
648 if((cap = virtiocap(p, 1)) < 0)
650 bar = pcicfgr8(p, cap+4) % nelem(p->mem);
651 cfg = virtiomapregs(p, cap, sizeof(Vconfig));
654 if((c = mallocz(sizeof(Ctlr), 1)) == nil){
655 print("ethervirtio: no memory for Ctlr\n");
660 c->port = p->mem[bar].bar & ~0xFULL;
663 c->dev = virtiomapregs(p, virtiocap(p, 4), sizeof(Vnetcfg));
666 c->isr = virtiomapregs(p, virtiocap(p, 3), 0);
669 cap = virtiocap(p, 2);
670 c->notify = virtiomapregs(p, cap, 0);
673 c->notifyoffmult = pcicfgr32(p, cap+16);
678 while(cfg->status != 0)
680 cfg->status = Sacknowledge|Sdriver;
682 /* negotiate feature bits */
684 c->feat[1] = cfg->devfeat;
687 c->feat[0] = cfg->devfeat;
690 cfg->drvfeat = c->feat[1] & Fversion1;
693 cfg->drvfeat = c->feat[0] & (Fmac|Fctrlvq|Fctrlrx);
695 for(i=0; i<nelem(c->queue); i++){
698 if(n == 0 || (n & (n-1)) != 0){
700 print("ethervirtio: queue %d has invalid size %d\n", i, n);
703 if(initqueue(&c->queue[i], n) < 0)
705 c->queue[i].notify = c->notify + c->notifyoffmult * cfg->queuenotifyoff;
707 cfg->queuedesc = PADDR(c->queue[i].desc);
708 cfg->queueavail = PADDR(c->queue[i].avail);
709 cfg->queueused = PADDR(c->queue[i].used);
712 print("ethervirtio: no queues\n");
735 static uchar zeros[Eaddrlen];
740 ctlrhead = pciprobe();
742 for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){
745 if(edev->port == 0 || edev->port == ctlr->port){
755 edev->port = ctlr->port;
756 edev->irq = ctlr->pcidev->intl;
757 edev->tbdf = ctlr->pcidev->tbdf;
761 if((ctlr->feat[0] & Fmac) != 0 && memcmp(edev->ea, zeros, Eaddrlen) == 0){
762 for(i = 0; i < Eaddrlen; i++)
763 edev->ea[i] = ((uchar*)ctlr->dev)[i];
765 for(i = 0; i < Eaddrlen; i++)
766 ((uchar*)ctlr->dev)[i] = edev->ea[i];
771 edev->attach = attach;
772 edev->shutdown = shutdown;
773 edev->ifstat = ifstat;
775 if((ctlr->feat[0] & (Fctrlvq|Fctrlrx)) == (Fctrlvq|Fctrlrx)){
776 edev->multicast = multicast;
777 edev->promiscuous = promiscuous;
780 pcisetbme(ctlr->pcidev);
781 intrenable(edev->irq, interrupt, edev, edev->tbdf, edev->name);
787 ethervirtio10link(void)
789 addethercard("virtio10", reset);