]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/ethervirtio.c
devkbd: bits bad! revert repeat/delay, better patches welcome
[plan9front.git] / sys / src / 9 / pc / ethervirtio.c
1 #include "u.h"
2 #include "../port/lib.h"
3 #include "mem.h"
4 #include "dat.h"
5 #include "fns.h"
6 #include "io.h"
7 #include "../port/error.h"
8 #include "../port/netif.h"
9 #include "../port/etherif.h"
10
11 /*
12  * virtio ethernet driver
13  * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
14  */
15
16 typedef struct Vring Vring;
17 typedef struct Vdesc Vdesc;
18 typedef struct Vused Vused;
19 typedef struct Vheader Vheader;
20 typedef struct Vqueue Vqueue;
21 typedef struct Ctlr Ctlr;
22
23 enum {
24         /* §2.1 Device Status Field */
25         Sacknowledge = 1,
26         Sdriver = 2,
27         Sdriverok = 4,
28         Sfeatureok = 8,
29         Sfailed = 128,
30
31         /* §4.1.4.8 Legacy Interfaces: A Note on PCI Device Layout */
32         Qdevfeat = 0,
33         Qdrvfeat = 4,
34         Qaddr = 8,
35         Qsize = 12,
36         Qselect = 14,
37         Qnotify = 16,
38         Qstatus = 18,
39         Qisr = 19,
40         Qmac = 20,
41         Qnetstatus = 26,
42
43         /* flags in Qnetstatus */
44         Nlinkup = (1<<0),
45         Nannounce = (1<<1),
46
47         /* feature bits */
48         Fmac = (1<<5),
49         Fstatus = (1<<16),
50         Fctrlvq = (1<<17),
51         Fctrlrx = (1<<18),
52
53         /* vring used flags */
54         Unonotify = 1,
55         /* vring avail flags */
56         Rnointerrupt = 1,
57
58         /* descriptor flags */
59         Dnext = 1,
60         Dwrite = 2,
61         Dindirect = 4,
62
63         /* struct sizes */
64         VringSize = 4,
65         VdescSize = 16,
66         VusedSize = 8,
67         VheaderSize = 10,
68
69         /* §4.1.5.1.4.1 says pages are 4096 bytes
70          * for the purposes of the driver.
71          */
72         VBY2PG  = 4096,
73 #define VPGROUND(s)     ROUND(s, VBY2PG)
74
75         Vrxq    = 0,
76         Vtxq    = 1,
77         Vctlq   = 2,
78
79         /* class/cmd for Vctlq */
80         CtrlRx  = 0x00,
81                 CmdPromisc      = 0x00,
82                 CmdAllmulti     = 0x01,
83         CtrlMac = 0x01,
84                 CmdMacTableSet  = 0x00,
85         CtrlVlan= 0x02,
86                 CmdVlanAdd      = 0x00,
87                 CmdVlanDel      = 0x01,
88 };
89
90 struct Vring
91 {
92         u16int  flags;
93         u16int  idx;
94 };
95
96 struct Vdesc
97 {
98         u64int  addr;
99         u32int  len;
100         u16int  flags;
101         u16int  next;
102 };
103
104 struct Vused
105 {
106         u32int  id;
107         u32int  len;
108 };
109
110 struct Vheader
111 {
112         u8int   flags;
113         u8int   segtype;
114         u16int  hlen;
115         u16int  seglen;
116         u16int  csumstart;
117         u16int  csumend;
118 };
119
120 /* §2.4 Virtqueues */
121 struct Vqueue
122 {
123         Rendez;
124
125         uint    qsize;
126         uint    qmask;
127
128         Vdesc   *desc;
129
130         Vring   *avail;
131         u16int  *availent;
132         u16int  *availevent;
133
134         Vring   *used;
135         Vused   *usedent;
136         u16int  *usedevent;
137         u16int  lastused;
138
139         uint    nintr;
140         uint    nnote;
141 };
142
143 struct Ctlr {
144         Lock;
145
146         QLock   ctllock;
147
148         int     attached;
149
150         int     port;
151         Pcidev  *pcidev;
152         Ctlr    *next;
153         int     active;
154         int     id;
155         int     typ;
156         ulong   feat;
157         int     nqueue;
158
159         /* virtioether has 3 queues: rx, tx and ctl */
160         Vqueue  queue[3];
161 };
162
163 static Ctlr *ctlrhead;
164
165 static int
166 vhasroom(void *v)
167 {
168         Vqueue *q = v;
169         return q->lastused != q->used->idx;
170 }
171
172 static void
173 vqnotify(Ctlr *ctlr, int x)
174 {
175         Vqueue *q;
176
177         coherence();
178         q = &ctlr->queue[x];
179         if(q->used->flags & Unonotify)
180                 return;
181         q->nnote++;
182         outs(ctlr->port+Qnotify, x);
183 }
184
185 static void
186 txproc(void *v)
187 {
188         Vheader *header;
189         Block **blocks;
190         Ether *edev;
191         Ctlr *ctlr;
192         Vqueue *q;
193         Vused *u;
194         Block *b;
195         int i, j;
196
197         edev = v;
198         ctlr = edev->ctlr;
199         q = &ctlr->queue[Vtxq];
200
201         header = smalloc(VheaderSize);
202         blocks = smalloc(sizeof(Block*) * (q->qsize/2));
203
204         for(i = 0; i < q->qsize/2; i++){
205                 j = i << 1;
206                 q->desc[j].addr = PADDR(header);
207                 q->desc[j].len = VheaderSize;
208                 q->desc[j].next = j | 1;
209                 q->desc[j].flags = Dnext;
210
211                 q->availent[i] = q->availent[i + q->qsize/2] = j;
212
213                 j |= 1;
214                 q->desc[j].next = 0;
215                 q->desc[j].flags = 0;
216         }
217
218         q->avail->flags &= ~Rnointerrupt;
219
220         while(waserror())
221                 ;
222
223         while((b = qbread(edev->oq, 1000000)) != nil){
224                 for(;;){
225                         /* retire completed packets */
226                         while((i = q->lastused) != q->used->idx){
227                                 u = &q->usedent[i & q->qmask];
228                                 i = (u->id & q->qmask) >> 1;
229                                 if(blocks[i] == nil)
230                                         break;
231                                 freeb(blocks[i]);
232                                 blocks[i] = nil;
233                                 q->lastused++;
234                         }
235
236                         /* have free slot? */
237                         i = q->avail->idx & (q->qmask >> 1);
238                         if(blocks[i] == nil)
239                                 break;
240
241                         /* ring full, wait and retry */
242                         if(!vhasroom(q))
243                                 sleep(q, vhasroom, q);
244                 }
245
246                 /* slot is free, fill in descriptor */
247                 blocks[i] = b;
248                 j = (i << 1) | 1;
249                 q->desc[j].addr = PADDR(b->rp);
250                 q->desc[j].len = BLEN(b);
251                 coherence();
252                 q->avail->idx++;
253                 vqnotify(ctlr, Vtxq);
254         }
255
256         pexit("ether out queue closed", 1);
257 }
258
259 static void
260 rxproc(void *v)
261 {
262         Vheader *header;
263         Block **blocks;
264         Ether *edev;
265         Ctlr *ctlr;
266         Vqueue *q;
267         Vused *u;
268         Block *b;
269         int i, j;
270
271         edev = v;
272         ctlr = edev->ctlr;
273         q = &ctlr->queue[Vrxq];
274
275         header = smalloc(VheaderSize);
276         blocks = smalloc(sizeof(Block*) * (q->qsize/2));
277
278         for(i = 0; i < q->qsize/2; i++){
279                 j = i << 1;
280                 q->desc[j].addr = PADDR(header);
281                 q->desc[j].len = VheaderSize;
282                 q->desc[j].next = j | 1;
283                 q->desc[j].flags = Dwrite|Dnext;
284
285                 q->availent[i] = q->availent[i + q->qsize/2] = j;
286
287                 j |= 1;
288                 q->desc[j].next = 0;
289                 q->desc[j].flags = Dwrite;
290         }
291
292         q->avail->flags &= ~Rnointerrupt;
293
294         while(waserror())
295                 ;
296
297         for(;;){
298                 /* replenish receive ring */
299                 do {
300                         i = q->avail->idx & (q->qmask >> 1);
301                         if(blocks[i] != nil)
302                                 break;
303                         if((b = iallocb(ETHERMAXTU)) == nil)
304                                 break;
305                         blocks[i] = b;
306                         j = (i << 1) | 1;
307                         q->desc[j].addr = PADDR(b->rp);
308                         q->desc[j].len = BALLOC(b);
309                         coherence();
310                         q->avail->idx++;
311                 } while(q->avail->idx != q->used->idx);
312                 vqnotify(ctlr, Vrxq);
313
314                 /* wait for any packets to complete */
315                 if(!vhasroom(q))
316                         sleep(q, vhasroom, q);
317
318                 /* retire completed packets */
319                 while((i = q->lastused) != q->used->idx) {
320                         u = &q->usedent[i & q->qmask];
321                         i = (u->id & q->qmask) >> 1;
322                         if((b = blocks[i]) == nil)
323                                 break;
324
325                         blocks[i] = nil;
326
327                         b->wp = b->rp + u->len - VheaderSize;
328                         etheriq(edev, b);
329                         q->lastused++;
330                 }
331         }
332 }
333
334 static int
335 vctlcmd(Ether *edev, uchar class, uchar cmd, uchar *data, int ndata)
336 {
337         uchar hdr[2], ack[1];
338         Ctlr *ctlr;
339         Vqueue *q;
340         Vdesc *d;
341         int i;
342
343         ctlr = edev->ctlr;
344         q = &ctlr->queue[Vctlq];
345         if(q->qsize < 3)
346                 return -1;
347
348         qlock(&ctlr->ctllock);
349         while(waserror())
350                 ;
351
352         ack[0] = 0x55;
353         hdr[0] = class;
354         hdr[1] = cmd;
355
356         d = &q->desc[0];
357         d->addr = PADDR(hdr);
358         d->len = sizeof(hdr);
359         d->next = 1;
360         d->flags = Dnext;
361         d++;
362         d->addr = PADDR(data);
363         d->len = ndata;
364         d->next = 2;
365         d->flags = Dnext;
366         d++;
367         d->addr = PADDR(ack);
368         d->len = sizeof(ack);
369         d->next = 0;
370         d->flags = Dwrite;
371
372         i = q->avail->idx & q->qmask;
373         q->availent[i] = 0;
374         coherence();
375
376         q->avail->flags &= ~Rnointerrupt;
377         q->avail->idx++;
378         vqnotify(ctlr, Vctlq);
379         while(!vhasroom(q))
380                 sleep(q, vhasroom, q);
381         q->lastused = q->used->idx;
382         q->avail->flags |= Rnointerrupt;
383
384         qunlock(&ctlr->ctllock);
385         poperror();
386
387         if(ack[0] != 0)
388                 print("#l%d: vctlcmd: %ux.%ux -> %ux\n", edev->ctlrno, class, cmd, ack[0]);
389
390         return ack[0];
391 }
392
393 static void
394 interrupt(Ureg*, void* arg)
395 {
396         Ether *edev;
397         Ctlr *ctlr;
398         Vqueue *q;
399         int i;
400
401         edev = arg;
402         ctlr = edev->ctlr;
403         if(inb(ctlr->port+Qisr) & 1){
404                 for(i = 0; i < ctlr->nqueue; i++){
405                         q = &ctlr->queue[i];
406                         if(vhasroom(q)){
407                                 q->nintr++;
408                                 wakeup(q);
409                         }
410                 }
411         }
412 }
413
414 static void
415 attach(Ether* edev)
416 {
417         char name[KNAMELEN];
418         Ctlr* ctlr;
419
420         ctlr = edev->ctlr;
421         lock(ctlr);
422         if(!ctlr->attached){
423                 ctlr->attached = 1;
424
425                 /* ready to go */
426                 outb(ctlr->port+Qstatus, inb(ctlr->port+Qstatus) | Sdriverok);
427
428                 /* start kprocs */
429                 snprint(name, sizeof name, "#l%drx", edev->ctlrno);
430                 kproc(name, rxproc, edev);
431                 snprint(name, sizeof name, "#l%dtx", edev->ctlrno);
432                 kproc(name, txproc, edev);
433         }
434         unlock(ctlr);
435 }
436
437 static long
438 ifstat(Ether *edev, void *a, long n, ulong offset)
439 {
440         int i, l;
441         char *p;
442         Ctlr *ctlr;
443         Vqueue *q;
444
445         ctlr = edev->ctlr;
446
447         p = smalloc(READSTR);
448
449         l = snprint(p, READSTR, "devfeat %32.32lub\n", ctlr->feat);
450         l += snprint(p+l, READSTR-l, "drvfeat %32.32lub\n", inl(ctlr->port+Qdrvfeat));
451         l += snprint(p+l, READSTR-l, "devstatus %8.8ub\n", inb(ctlr->port+Qstatus));
452         if(ctlr->feat & Fstatus)
453                 l += snprint(p+l, READSTR-l, "netstatus %8.8ub\n",  inb(ctlr->port+Qnetstatus));
454
455         for(i = 0; i < ctlr->nqueue; i++){
456                 q = &ctlr->queue[i];
457                 l += snprint(p+l, READSTR-l,
458                         "vq%d %#p size %d avail->idx %d used->idx %d lastused %hud nintr %ud nnote %ud\n",
459                         i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused, q->nintr, q->nnote);
460         }
461
462         n = readstr(offset, a, n, p);
463         free(p);
464
465         return n;
466 }
467
468 static void
469 shutdown(Ether* edev)
470 {
471         Ctlr *ctlr = edev->ctlr;
472         outb(ctlr->port+Qstatus, 0);
473         pciclrbme(ctlr->pcidev);
474 }
475
476 static void
477 promiscuous(void *arg, int on)
478 {
479         Ether *edev = arg;
480         uchar b[1];
481
482         b[0] = on != 0;
483         vctlcmd(edev, CtrlRx, CmdPromisc, b, sizeof(b));
484 }
485
486 static void
487 multicast(void *arg, uchar*, int)
488 {
489         Ether *edev = arg;
490         uchar b[1];
491
492         b[0] = edev->nmaddr > 0;
493         vctlcmd(edev, CtrlRx, CmdAllmulti, b, sizeof(b));
494 }
495
496 /* §2.4.2 Legacy Interfaces: A Note on Virtqueue Layout */
497 static ulong
498 queuesize(ulong size)
499 {
500         return VPGROUND(VdescSize*size + sizeof(u16int)*(3+size))
501                 + VPGROUND(sizeof(u16int)*3 + VusedSize*size);
502 }
503
504 static int
505 initqueue(Vqueue *q, int size)
506 {
507         uchar *p;
508
509         /* §2.4: Queue Size value is always a power of 2 and <= 32768 */
510         assert(!(size & (size - 1)) && size <= 32768);
511
512         p = mallocalign(queuesize(size), VBY2PG, 0, 0);
513         if(p == nil){
514                 print("ethervirtio: no memory for Vqueue\n");
515                 free(p);
516                 return -1;
517         }
518
519         q->desc = (void*)p;
520         p += VdescSize*size;
521         q->avail = (void*)p;
522         p += VringSize;
523         q->availent = (void*)p;
524         p += sizeof(u16int)*size;
525         q->availevent = (void*)p;
526         p += sizeof(u16int);
527
528         p = (uchar*)VPGROUND((uintptr)p);
529         q->used = (void*)p;
530         p += VringSize;
531         q->usedent = (void*)p;
532         p += VusedSize*size;
533         q->usedevent = (void*)p;
534
535         q->qsize = size;
536         q->qmask = q->qsize - 1;
537
538         q->lastused = q->avail->idx = q->used->idx = 0;
539
540         q->avail->flags |= Rnointerrupt;
541
542         return 0;
543 }
544
545 static Ctlr*
546 pciprobe(int typ)
547 {
548         Ctlr *c, *h, *t;
549         Pcidev *p;
550         int n, i;
551
552         h = t = nil;
553
554         /* §4.1.2 PCI Device Discovery */
555         for(p = nil; p = pcimatch(p, 0, 0);){
556                 if(p->vid != 0x1AF4)
557                         continue;
558                 /* the two possible DIDs for virtio-net */
559                 if(p->did != 0x1000 && p->did != 0x1041)
560                         continue;
561                 /* non-transitional devices will have a revision > 0 */
562                 if(p->rid != 0)
563                         continue;
564                 /* non-transitional device will have typ+0x40 */
565                 if(pcicfgr16(p, 0x2E) != typ)
566                         continue;
567                 if((c = mallocz(sizeof(Ctlr), 1)) == nil){
568                         print("ethervirtio: no memory for Ctlr\n");
569                         break;
570                 }
571
572                 c->port = p->mem[0].bar & ~0x1;
573                 if(ioalloc(c->port, p->mem[0].size, 0, "ethervirtio") < 0){
574                         print("ethervirtio: port %ux in use\n", c->port);
575                         free(c);
576                         continue;
577                 }
578
579                 c->typ = typ;
580                 c->pcidev = p;
581                 pcienable(p);
582                 c->id = (p->did<<16)|p->vid;
583
584                 /* §3.1.2 Legacy Device Initialization */
585                 outb(c->port+Qstatus, 0);
586                 outb(c->port+Qstatus, Sacknowledge|Sdriver);
587
588                 /* negotiate feature bits */
589                 c->feat = inl(c->port+Qdevfeat);
590                 outl(c->port+Qdrvfeat, c->feat & (Fmac|Fstatus|Fctrlvq|Fctrlrx));
591
592                 /* §4.1.5.1.4 Virtqueue Configuration */
593                 for(i=0; i<nelem(c->queue); i++){
594                         outs(c->port+Qselect, i);
595                         n = ins(c->port+Qsize);
596                         if(n == 0 || (n & (n-1)) != 0){
597                                 if(i < 2)
598                                         print("ethervirtio: queue %d has invalid size %d\n", i, n);
599                                 break;
600                         }
601                         if(initqueue(&c->queue[i], n) < 0)
602                                 break;
603                         coherence();
604                         outl(c->port+Qaddr, PADDR(c->queue[i].desc)/VBY2PG);
605                 }
606                 if(i < 2){
607                         print("ethervirtio: no queues\n");
608                         pcidisable(p);
609                         free(c);
610                         continue;
611                 }
612                 c->nqueue = i;          
613         
614                 if(h == nil)
615                         h = c;
616                 else
617                         t->next = c;
618                 t = c;
619         }
620
621         return h;
622 }
623
624
625 static int
626 reset(Ether* edev)
627 {
628         static uchar zeros[Eaddrlen];
629         Ctlr *ctlr;
630         int i;
631
632         if(ctlrhead == nil)
633                 ctlrhead = pciprobe(1);
634
635         for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){
636                 if(ctlr->active)
637                         continue;
638                 if(edev->port == 0 || edev->port == ctlr->port){
639                         ctlr->active = 1;
640                         break;
641                 }
642         }
643
644         if(ctlr == nil)
645                 return -1;
646
647         edev->ctlr = ctlr;
648         edev->port = ctlr->port;
649         edev->irq = ctlr->pcidev->intl;
650         edev->tbdf = ctlr->pcidev->tbdf;
651         edev->mbps = 1000;
652         edev->link = 1;
653
654         if((ctlr->feat & Fmac) != 0 && memcmp(edev->ea, zeros, Eaddrlen) == 0){
655                 for(i = 0; i < Eaddrlen; i++)
656                         edev->ea[i] = inb(ctlr->port+Qmac+i);
657         } else {
658                 for(i = 0; i < Eaddrlen; i++)
659                         outb(ctlr->port+Qmac+i, edev->ea[i]);
660         }
661
662         edev->arg = edev;
663
664         edev->attach = attach;
665         edev->shutdown = shutdown;
666         edev->ifstat = ifstat;
667
668         if((ctlr->feat & (Fctrlvq|Fctrlrx)) == (Fctrlvq|Fctrlrx)){
669                 edev->multicast = multicast;
670                 edev->promiscuous = promiscuous;
671         }
672
673         pcisetbme(ctlr->pcidev);
674         intrenable(edev->irq, interrupt, edev, edev->tbdf, edev->name);
675
676         return 0;
677 }
678
679 void
680 ethervirtiolink(void)
681 {
682         addethercard("virtio", reset);
683 }
684