]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/ethervirtio.c
ethervirtio: implement promisc and multicast mode, cleanup
[plan9front.git] / sys / src / 9 / pc / ethervirtio.c
1 #include "u.h"
2 #include "../port/lib.h"
3 #include "mem.h"
4 #include "dat.h"
5 #include "fns.h"
6 #include "io.h"
7 #include "../port/error.h"
8 #include "../port/netif.h"
9 #include "etherif.h"
10
11 /*
12  * virtio ethernet driver
13  * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
14  */
15
16 typedef struct Vring Vring;
17 typedef struct Vdesc Vdesc;
18 typedef struct Vused Vused;
19 typedef struct Vheader Vheader;
20 typedef struct Vqueue Vqueue;
21 typedef struct Ctlr Ctlr;
22
23 enum {
24         /* §2.1 Device Status Field */
25         Sacknowledge = 1,
26         Sdriver = 2,
27         Sdriverok = 4,
28         Sfeatureok = 8,
29         Sfailed = 128,
30
31         /* §4.1.4.8 Legacy Interfaces: A Note on PCI Device Layout */
32         Qdevfeat = 0,
33         Qdrvfeat = 4,
34         Qaddr = 8,
35         Qsize = 12,
36         Qselect = 14,
37         Qnotify = 16,
38         Qstatus = 18,
39         Qisr = 19,
40         Qmac = 20,
41         Qnetstatus = 26,
42
43         /* flags in Qnetstatus */
44         Nlinkup = (1<<0),
45         Nannounce = (1<<1),
46
47         /* feature bits */
48         Fmac = (1<<5),
49         Fstatus = (1<<16),
50         Fctrlvq = (1<<17),
51
52         /* vring used flags */
53         Unonotify = 1,
54         /* vring avail flags */
55         Rnointerrupt = 1,
56
57         /* descriptor flags */
58         Dnext = 1,
59         Dwrite = 2,
60         Dindirect = 4,
61
62         /* struct sizes */
63         VringSize = 4,
64         VdescSize = 16,
65         VusedSize = 8,
66         VheaderSize = 10,
67
68         /* §4.1.5.1.4.1 says pages are 4096 bytes
69          * for the purposes of the driver.
70          */
71         VBY2PG  = 4096,
72 #define VPGROUND(s)     ROUND(s, VBY2PG)
73
74         Vrxq    = 0,
75         Vtxq    = 1,
76         Vctlq   = 2,
77
78         /* class/cmd for Vctlq */
79         CtrlRx  = 0x00,
80                 CmdPromisc      = 0x00,
81                 CmdAllmulti     = 0x01,
82         CtrlMac = 0x01,
83                 CmdMacTableSet  = 0x00,
84         CtrlVlan= 0x02,
85                 CmdVlanAdd      = 0x00,
86                 CmdVlanDel      = 0x01,
87 };
88
89 struct Vring
90 {
91         u16int  flags;
92         u16int  idx;
93 };
94
95 struct Vdesc
96 {
97         u64int  addr;
98         u32int  len;
99         u16int  flags;
100         u16int  next;
101 };
102
103 struct Vused
104 {
105         u32int  id;
106         u32int  len;
107 };
108
109 struct Vheader
110 {
111         u8int   flags;
112         u8int   segtype;
113         u16int  hlen;
114         u16int  seglen;
115         u16int  csumstart;
116         u16int  csumend;
117 };
118
119 /* §2.4 Virtqueues */
120 struct Vqueue
121 {
122         Rendez;
123
124         uint    qsize;
125         uint    qmask;
126
127         Vdesc   *desc;
128
129         Vring   *avail;
130         u16int  *availent;
131         u16int  *availevent;
132
133         Vring   *used;
134         Vused   *usedent;
135         u16int  *usedevent;
136         u16int  lastused;
137 };
138
139 struct Ctlr {
140         Lock;
141
142         QLock   ctllock;
143
144         int     attached;
145
146         int     port;
147         Pcidev  *pcidev;
148         Ctlr    *next;
149         int     active;
150         int     id;
151         int     typ;
152         ulong   feat;
153         int     nqueue;
154
155         /* virtioether has 3 queues: rx, tx and ctl */
156         Vqueue  *queue[3];
157
158         /* MAC address */
159         uchar   ea[Eaddrlen];
160 };
161
162 static Ctlr *ctlrhead;
163
164 static int
165 vhasroom(void *v)
166 {
167         Vqueue *q = v;
168         return q->lastused != q->used->idx;
169 }
170
171 static void
172 txproc(void *v)
173 {
174         Vheader *header;
175         Block **blocks;
176         Ether *edev;
177         Ctlr *ctlr;
178         Vqueue *q;
179         Vused *u;
180         Block *b;
181         int i, j;
182
183         edev = v;
184         ctlr = edev->ctlr;
185         q = ctlr->queue[Vtxq];
186
187         header = smalloc(VheaderSize);
188         blocks = smalloc(sizeof(Block*) * (q->qsize/2));
189
190         for(i = 0; i < q->qsize/2; i++){
191                 j = i << 1;
192                 q->desc[j].addr = PADDR(header);
193                 q->desc[j].len = VheaderSize;
194                 q->desc[j].next = j | 1;
195                 q->desc[j].flags = Dnext;
196
197                 q->availent[i] = q->availent[i + q->qsize/2] = j;
198
199                 j |= 1;
200                 q->desc[j].next = 0;
201                 q->desc[j].flags = 0;
202         }
203
204         q->used->flags &= ~Rnointerrupt;
205
206         while(waserror())
207                 ;
208
209         while((b = qbread(edev->oq, 1000000)) != nil){
210                 for(;;){
211                         /* retire completed packets */
212                         while((i = q->lastused) != q->used->idx){
213                                 u = &q->usedent[i & q->qmask];
214                                 i = (u->id & q->qmask) >> 1;
215                                 if(blocks[i] == nil)
216                                         break;
217                                 freeb(blocks[i]);
218                                 blocks[i] = nil;
219                                 q->lastused++;
220                         }
221
222                         /* have free slot? */
223                         i = q->avail->idx & (q->qmask >> 1);
224                         if(blocks[i] == nil)
225                                 break;
226
227                         /* ring full, wait and retry */
228                         if(!vhasroom(q))
229                                 sleep(q, vhasroom, q);
230                 }
231
232                 /* slot is free, fill in descriptor */
233                 blocks[i] = b;
234                 j = (i << 1) | 1;
235                 q->desc[j].addr = PADDR(b->rp);
236                 q->desc[j].len = BLEN(b);
237                 coherence();
238                 q->avail->idx++;
239                 outs(ctlr->port+Qnotify, Vtxq);
240         }
241
242         pexit("ether out queue closed", 1);
243 }
244
245 static void
246 rxproc(void *v)
247 {
248         Vheader *header;
249         Block **blocks;
250         Ether *edev;
251         Ctlr *ctlr;
252         Vqueue *q;
253         Vused *u;
254         Block *b;
255         int i, j;
256
257         edev = v;
258         ctlr = edev->ctlr;
259         q = ctlr->queue[Vrxq];
260
261         header = smalloc(VheaderSize);
262         blocks = smalloc(sizeof(Block*) * (q->qsize/2));
263
264         for(i = 0; i < q->qsize/2; i++){
265                 j = i << 1;
266                 q->desc[j].addr = PADDR(header);
267                 q->desc[j].len = VheaderSize;
268                 q->desc[j].next = j | 1;
269                 q->desc[j].flags = Dwrite|Dnext;
270
271                 q->availent[i] = q->availent[i + q->qsize/2] = j;
272
273                 j |= 1;
274                 q->desc[j].next = 0;
275                 q->desc[j].flags = Dwrite;
276         }
277
278         q->used->flags &= ~Rnointerrupt;
279
280         while(waserror())
281                 ;
282
283         for(;;){
284                 /* replenish receive ring */
285                 do {
286                         i = q->avail->idx & (q->qmask >> 1);
287                         if(blocks[i] != nil)
288                                 break;
289                         if((b = iallocb(ETHERMAXTU)) == nil)
290                                 break;
291                         blocks[i] = b;
292                         j = (i << 1) | 1;
293                         q->desc[j].addr = PADDR(b->rp);
294                         q->desc[j].len = BALLOC(b);
295                         coherence();
296                         q->avail->idx++;
297                         outs(ctlr->port+Qnotify, Vrxq);
298                 } while(q->avail->idx != q->used->idx);
299
300                 /* wait for any packets to complete */
301                 if(!vhasroom(q))
302                         sleep(q, vhasroom, q);
303
304                 /* retire completed packets */
305                 while((i = q->lastused) != q->used->idx) {
306                         u = &q->usedent[i & q->qmask];
307                         i = (u->id & q->qmask) >> 1;
308                         if((b = blocks[i]) == nil)
309                                 break;
310
311                         blocks[i] = nil;
312
313                         b->wp = b->rp + u->len;
314                         etheriq(edev, b, 1);
315                         q->lastused++;
316                 }
317         }
318 }
319
320 static int
321 vctlcmd(Ether *edev, uchar class, uchar cmd, uchar *data, int ndata)
322 {
323         uchar hdr[2], ack[1];
324         Ctlr *ctlr;
325         Vqueue *q;
326         Vdesc *d;
327         int i;
328
329         ctlr = edev->ctlr;
330         q = ctlr->queue[Vctlq];
331         if(q == nil || q->qsize < 3)
332                 return -1;
333
334         qlock(&ctlr->ctllock);
335         while(waserror())
336                 ;
337
338         ack[0] = 0x55;
339         hdr[0] = class;
340         hdr[1] = cmd;
341
342         d = &q->desc[0];
343         d->addr = PADDR(hdr);
344         d->len = sizeof(hdr);
345         d->next = 1;
346         d->flags = Dnext;
347         d++;
348         d->addr = PADDR(data);
349         d->len = ndata;
350         d->next = 2;
351         d->flags = Dnext;
352         d++;
353         d->addr = PADDR(ack);
354         d->len = sizeof(ack);
355         d->next = 0;
356         d->flags = Dwrite;
357
358         i = q->avail->idx & q->qmask;
359         q->availent[i] = 0;
360         coherence();
361
362         q->used->flags &= ~Rnointerrupt;
363         q->avail->idx++;
364         outs(ctlr->port+Qnotify, Vctlq);
365         while(!vhasroom(q))
366                 sleep(q, vhasroom, q);
367         q->lastused = q->used->idx;
368         q->used->flags |= Rnointerrupt;
369
370         qunlock(&ctlr->ctllock);
371         poperror();
372
373         if(ack[0] != 0)
374                 print("#l%d: vctlcmd: %ux.%ux -> %ux\n", edev->ctlrno, class, cmd, ack[0]);
375
376         return ack[0];
377 }
378
379 static void
380 interrupt(Ureg*, void* arg)
381 {
382         Ether *edev;
383         Ctlr *ctlr;
384         Vqueue *q;
385         int i;
386
387         edev = arg;
388         ctlr = edev->ctlr;
389         if(inb(ctlr->port+Qisr) & 1){
390                 for(i = 0; i < ctlr->nqueue; i++){
391                         q = ctlr->queue[i];
392                         if(vhasroom(q))
393                                 wakeup(q);
394                 }
395         }
396 }
397
398 static void
399 attach(Ether* edev)
400 {
401         char name[KNAMELEN];
402         Ctlr* ctlr;
403
404         ctlr = edev->ctlr;
405         lock(ctlr);
406         if(!ctlr->attached){
407                 ctlr->attached = 1;
408
409                 /* ready to go */
410                 outb(ctlr->port+Qstatus, inb(ctlr->port+Qstatus) | Sdriverok);
411
412                 /* start kprocs */
413                 snprint(name, sizeof name, "#l%drx", edev->ctlrno);
414                 kproc(name, rxproc, edev);
415                 snprint(name, sizeof name, "#l%dtx", edev->ctlrno);
416                 kproc(name, txproc, edev);
417         }
418         unlock(ctlr);
419 }
420
421 static long
422 ifstat(Ether *edev, void *a, long n, ulong offset)
423 {
424         int i, l;
425         char *p;
426         Ctlr *ctlr;
427         Vqueue *q;
428
429         ctlr = edev->ctlr;
430
431         p = smalloc(READSTR);
432
433         l = snprint(p, READSTR, "devfeat %32.32lub\n", ctlr->feat);
434         l += snprint(p+l, READSTR-l, "drvfeat %32.32lub\n", inl(ctlr->port+Qdrvfeat));
435         l += snprint(p+l, READSTR-l, "devstatus %8.8ub\n", inb(ctlr->port+Qstatus));
436         l += snprint(p+l, READSTR-l, "isr %8.8ub\n",  inb(ctlr->port+Qisr));
437         l += snprint(p+l, READSTR-l, "netstatus %8.8ub\n",  inb(ctlr->port+Qnetstatus));
438
439         for(i = 0; i < ctlr->nqueue; i++){
440                 q = ctlr->queue[i];
441                 l += snprint(p+l, READSTR-l, "vq%d %#p size %d avail->idx %d used->idx %d lastused %hud\n",
442                         i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused);
443         }
444
445         n = readstr(offset, a, n, p);
446         free(p);
447
448         return n;
449 }
450
451 static void
452 shutdown(Ether* edev)
453 {
454         Ctlr *ctlr = edev->ctlr;
455         outb(ctlr->port+Qstatus, 0);
456 }
457
458 static void
459 promiscuous(void *arg, int on)
460 {
461         Ether *edev = arg;
462         uchar b[1];
463
464         b[0] = on != 0;
465         vctlcmd(edev, CtrlRx, CmdPromisc, b, sizeof(b));
466 }
467
468 static void
469 multicast(void *arg, uchar*, int)
470 {
471         Ether *edev = arg;
472         uchar b[1];
473
474         b[0] = edev->nmaddr > 0;
475         vctlcmd(edev, CtrlRx, CmdAllmulti, b, sizeof(b));
476 }
477
478 /* §2.4.2 Legacy Interfaces: A Note on Virtqueue Layout */
479 static ulong
480 queuesize(ulong size)
481 {
482         return VPGROUND(VdescSize*size + sizeof(u16int)*(3+size))
483                 + VPGROUND(sizeof(u16int)*3 + VusedSize*size);
484 }
485
486 static Vqueue*
487 mkqueue(int size)
488 {
489         Vqueue *q;
490         uchar *p;
491
492         /* §2.4: Queue Size value is always a power of 2 and <= 32768 */
493         assert(!(size & (size - 1)) && size <= 32768);
494
495         q = mallocz(sizeof(Vqueue), 1);
496         p = mallocalign(queuesize(size), VBY2PG, 0, 0);
497         if(p == nil || q == nil){
498                 print("ethervirtio: no memory for Vqueue\n");
499                 free(p);
500                 free(q);
501                 return nil;
502         }
503
504         q->desc = (void*)p;
505         p += VdescSize*size;
506         q->avail = (void*)p;
507         p += VringSize;
508         q->availent = (void*)p;
509         p += sizeof(u16int)*size;
510         q->availevent = (void*)p;
511         p += sizeof(u16int);
512
513         p = (uchar*)VPGROUND((uintptr)p);
514         q->used = (void*)p;
515         p += VringSize;
516         q->usedent = (void*)p;
517         p += VusedSize*size;
518         q->usedevent = (void*)p;
519
520         q->qsize = size;
521         q->qmask = q->qsize - 1;
522
523         q->lastused = q->avail->idx = q->used->idx = 0;
524
525         /* disable interrupts
526          * virtio spec says we still get interrupts if
527          * VnotifyEmpty is set in Drvfeat */
528         q->used->flags |= Rnointerrupt;
529
530         return q;
531 }
532
533 static Ctlr*
534 pciprobe(int typ)
535 {
536         Ctlr *c, *h, *t;
537         Pcidev *p;
538         int n, i;
539
540         h = t = nil;
541
542         /* §4.1.2 PCI Device Discovery */
543         for(p = nil; p = pcimatch(p, 0, 0);){
544                 if(p->vid != 0x1AF4)
545                         continue;
546                 /* the two possible DIDs for virtio-net
547                 if(p->did != 0x1000 && p->did != 0x1041)
548                         continue;
549                 /* non-transitional devices will have a revision > 0 */
550                 if(p->rid != 0)
551                         continue;
552                 /* non-transitional device will have typ+0x40 */
553                 if(pcicfgr16(p, 0x2E) != typ)
554                         continue;
555                 if((c = malloc(sizeof(Ctlr))) == nil){
556                         print("ethervirtio: no memory for Ctlr\n");
557                         break;
558                 }
559
560                 c->port = p->mem[0].bar & ~0x1;
561
562                 if(ioalloc(c->port, p->mem[0].size, 0, "ethervirtio") < 0){
563                         print("ethervirtio: port %ux in use\n", c->port);
564                         free(c);
565                         continue;
566                 }
567
568                 c->typ = typ;
569                 c->pcidev = p;
570                 c->id = (p->did<<16)|p->vid;
571
572                 /* §3.1.2 Legacy Device Initialization */
573                 outb(c->port+Qstatus, 0);
574
575                 outb(c->port+Qstatus, Sacknowledge|Sdriver);
576
577                 c->feat = inl(c->port+Qdevfeat);
578
579                 if((c->feat & (Fmac|Fstatus|Fctrlvq)) != (Fmac|Fstatus|Fctrlvq)){
580                         print("ethervirtio: feature mismatch %32.32lub\n", c->feat);
581                         outb(c->port+Qstatus, Sfailed);
582                         iofree(c->port);
583                         free(c);
584                         continue;
585                 }
586
587                 outl(c->port+Qdrvfeat, Fmac|Fstatus|Fctrlvq);
588
589                 /* part of the 1.0 spec, not used in legacy */
590                 /*
591                 outb(vd->port+Status, inb(vd->port+Status) | FeatureOk);
592                 i = inb(vd->port+Status);
593                 if(!(i & FeatureOk)){
594                         print("ethervirtio: feature mismatch %32.32lub\n", vd->feat);
595                         outb(vd->port+Status, Failed);
596                         iofree(vd->port);
597                         free(vd);
598                         continue;
599                 }
600                 */
601
602                 /* §4.1.5.1.4 Virtqueue Configuration */
603                 for(i=0; i<nelem(c->queue); i++){
604                         outs(c->port+Qselect, i);
605                         n = ins(c->port+Qsize);
606                         if(n == 0 || (n & (n-1)) != 0){
607                                 c->queue[i] = nil;
608                                 break;
609                         }
610                         if((c->queue[i] = mkqueue(n)) == nil)
611                                 break;
612                         coherence();
613                         outl(c->port+Qaddr, PADDR(c->queue[i]->desc)/VBY2PG);
614                 }
615                 c->nqueue = i;
616         
617                 /* read virtio mac */
618                 for(i = 0; i < Eaddrlen; i++)
619                         c->ea[i] = inb(c->port+Qmac+i);
620
621                 if(h == nil)
622                         h = c;
623                 else
624                         t->next = c;
625                 t = c;
626         }
627
628         return h;
629 }
630
631
632 static int
633 reset(Ether* edev)
634 {
635         Ctlr *ctlr;
636
637         if(ctlrhead == nil) {
638                 ctlrhead = pciprobe(1);
639         }
640
641         for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){
642                 if(ctlr->active)
643                         continue;
644                 if(edev->port == 0 || edev->port == ctlr->port){
645                         ctlr->active = 1;
646                         break;
647                 }
648         }
649
650         if(ctlr == nil)
651                 return -1;
652
653         edev->ctlr = ctlr;
654         edev->port = ctlr->port;
655         edev->irq = ctlr->pcidev->intl;
656         edev->tbdf = ctlr->pcidev->tbdf;
657         edev->mbps = 1000;
658         edev->link = 1;
659
660         memmove(edev->ea, ctlr->ea, Eaddrlen);
661
662         edev->arg = edev;
663
664         edev->attach = attach;
665         edev->shutdown = shutdown;
666
667         edev->interrupt = interrupt;
668
669         edev->ifstat = ifstat;
670         edev->multicast = multicast;
671         edev->promiscuous = promiscuous;
672
673         return 0;
674 }
675
676 void
677 ethervirtiolink(void)
678 {
679         addethercard("ethervirtio", reset);
680 }
681