]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/ethervirtio.c
ethervirtio: report correct length; fail gracefully if we can't initialize queues
[plan9front.git] / sys / src / 9 / pc / ethervirtio.c
1 #include "u.h"
2 #include "../port/lib.h"
3 #include "mem.h"
4 #include "dat.h"
5 #include "fns.h"
6 #include "io.h"
7 #include "../port/error.h"
8 #include "../port/netif.h"
9 #include "etherif.h"
10
11 /*
12  * virtio ethernet driver
13  * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
14  */
15
16 typedef struct Vring Vring;
17 typedef struct Vdesc Vdesc;
18 typedef struct Vused Vused;
19 typedef struct Vheader Vheader;
20 typedef struct Vqueue Vqueue;
21 typedef struct Ctlr Ctlr;
22
23 enum {
24         /* §2.1 Device Status Field */
25         Sacknowledge = 1,
26         Sdriver = 2,
27         Sdriverok = 4,
28         Sfeatureok = 8,
29         Sfailed = 128,
30
31         /* §4.1.4.8 Legacy Interfaces: A Note on PCI Device Layout */
32         Qdevfeat = 0,
33         Qdrvfeat = 4,
34         Qaddr = 8,
35         Qsize = 12,
36         Qselect = 14,
37         Qnotify = 16,
38         Qstatus = 18,
39         Qisr = 19,
40         Qmac = 20,
41         Qnetstatus = 26,
42
43         /* flags in Qnetstatus */
44         Nlinkup = (1<<0),
45         Nannounce = (1<<1),
46
47         /* feature bits */
48         Fmac = (1<<5),
49         Fstatus = (1<<16),
50         Fctrlvq = (1<<17),
51         Fctrlrx = (1<<18),
52
53         /* vring used flags */
54         Unonotify = 1,
55         /* vring avail flags */
56         Rnointerrupt = 1,
57
58         /* descriptor flags */
59         Dnext = 1,
60         Dwrite = 2,
61         Dindirect = 4,
62
63         /* struct sizes */
64         VringSize = 4,
65         VdescSize = 16,
66         VusedSize = 8,
67         VheaderSize = 10,
68
69         /* §4.1.5.1.4.1 says pages are 4096 bytes
70          * for the purposes of the driver.
71          */
72         VBY2PG  = 4096,
73 #define VPGROUND(s)     ROUND(s, VBY2PG)
74
75         Vrxq    = 0,
76         Vtxq    = 1,
77         Vctlq   = 2,
78
79         /* class/cmd for Vctlq */
80         CtrlRx  = 0x00,
81                 CmdPromisc      = 0x00,
82                 CmdAllmulti     = 0x01,
83         CtrlMac = 0x01,
84                 CmdMacTableSet  = 0x00,
85         CtrlVlan= 0x02,
86                 CmdVlanAdd      = 0x00,
87                 CmdVlanDel      = 0x01,
88 };
89
90 struct Vring
91 {
92         u16int  flags;
93         u16int  idx;
94 };
95
96 struct Vdesc
97 {
98         u64int  addr;
99         u32int  len;
100         u16int  flags;
101         u16int  next;
102 };
103
104 struct Vused
105 {
106         u32int  id;
107         u32int  len;
108 };
109
110 struct Vheader
111 {
112         u8int   flags;
113         u8int   segtype;
114         u16int  hlen;
115         u16int  seglen;
116         u16int  csumstart;
117         u16int  csumend;
118 };
119
120 /* §2.4 Virtqueues */
121 struct Vqueue
122 {
123         Rendez;
124
125         uint    qsize;
126         uint    qmask;
127
128         Vdesc   *desc;
129
130         Vring   *avail;
131         u16int  *availent;
132         u16int  *availevent;
133
134         Vring   *used;
135         Vused   *usedent;
136         u16int  *usedevent;
137         u16int  lastused;
138
139         uint    nintr;
140         uint    nnote;
141 };
142
143 struct Ctlr {
144         Lock;
145
146         QLock   ctllock;
147
148         int     attached;
149
150         int     port;
151         Pcidev  *pcidev;
152         Ctlr    *next;
153         int     active;
154         int     id;
155         int     typ;
156         ulong   feat;
157         int     nqueue;
158
159         /* virtioether has 3 queues: rx, tx and ctl */
160         Vqueue  queue[3];
161 };
162
163 static Ctlr *ctlrhead;
164
165 static int
166 vhasroom(void *v)
167 {
168         Vqueue *q = v;
169         return q->lastused != q->used->idx;
170 }
171
172 static void
173 vqnotify(Ctlr *ctlr, int x)
174 {
175         Vqueue *q;
176
177         coherence();
178         q = &ctlr->queue[x];
179         if(q->used->flags & Unonotify)
180                 return;
181         q->nnote++;
182         outs(ctlr->port+Qnotify, x);
183 }
184
185 static void
186 txproc(void *v)
187 {
188         Vheader *header;
189         Block **blocks;
190         Ether *edev;
191         Ctlr *ctlr;
192         Vqueue *q;
193         Vused *u;
194         Block *b;
195         int i, j;
196
197         edev = v;
198         ctlr = edev->ctlr;
199         q = &ctlr->queue[Vtxq];
200
201         header = smalloc(VheaderSize);
202         blocks = smalloc(sizeof(Block*) * (q->qsize/2));
203
204         for(i = 0; i < q->qsize/2; i++){
205                 j = i << 1;
206                 q->desc[j].addr = PADDR(header);
207                 q->desc[j].len = VheaderSize;
208                 q->desc[j].next = j | 1;
209                 q->desc[j].flags = Dnext;
210
211                 q->availent[i] = q->availent[i + q->qsize/2] = j;
212
213                 j |= 1;
214                 q->desc[j].next = 0;
215                 q->desc[j].flags = 0;
216         }
217
218         q->avail->flags &= ~Rnointerrupt;
219
220         while(waserror())
221                 ;
222
223         while((b = qbread(edev->oq, 1000000)) != nil){
224                 for(;;){
225                         /* retire completed packets */
226                         while((i = q->lastused) != q->used->idx){
227                                 u = &q->usedent[i & q->qmask];
228                                 i = (u->id & q->qmask) >> 1;
229                                 if(blocks[i] == nil)
230                                         break;
231                                 freeb(blocks[i]);
232                                 blocks[i] = nil;
233                                 q->lastused++;
234                         }
235
236                         /* have free slot? */
237                         i = q->avail->idx & (q->qmask >> 1);
238                         if(blocks[i] == nil)
239                                 break;
240
241                         /* ring full, wait and retry */
242                         if(!vhasroom(q))
243                                 sleep(q, vhasroom, q);
244                 }
245
246                 /* slot is free, fill in descriptor */
247                 blocks[i] = b;
248                 j = (i << 1) | 1;
249                 q->desc[j].addr = PADDR(b->rp);
250                 q->desc[j].len = BLEN(b);
251                 coherence();
252                 q->avail->idx++;
253                 vqnotify(ctlr, Vtxq);
254         }
255
256         pexit("ether out queue closed", 1);
257 }
258
259 static void
260 rxproc(void *v)
261 {
262         Vheader *header;
263         Block **blocks;
264         Ether *edev;
265         Ctlr *ctlr;
266         Vqueue *q;
267         Vused *u;
268         Block *b;
269         int i, j;
270
271         edev = v;
272         ctlr = edev->ctlr;
273         q = &ctlr->queue[Vrxq];
274
275         header = smalloc(VheaderSize);
276         blocks = smalloc(sizeof(Block*) * (q->qsize/2));
277
278         for(i = 0; i < q->qsize/2; i++){
279                 j = i << 1;
280                 q->desc[j].addr = PADDR(header);
281                 q->desc[j].len = VheaderSize;
282                 q->desc[j].next = j | 1;
283                 q->desc[j].flags = Dwrite|Dnext;
284
285                 q->availent[i] = q->availent[i + q->qsize/2] = j;
286
287                 j |= 1;
288                 q->desc[j].next = 0;
289                 q->desc[j].flags = Dwrite;
290         }
291
292         q->avail->flags &= ~Rnointerrupt;
293
294         while(waserror())
295                 ;
296
297         for(;;){
298                 /* replenish receive ring */
299                 do {
300                         i = q->avail->idx & (q->qmask >> 1);
301                         if(blocks[i] != nil)
302                                 break;
303                         if((b = iallocb(ETHERMAXTU)) == nil)
304                                 break;
305                         blocks[i] = b;
306                         j = (i << 1) | 1;
307                         q->desc[j].addr = PADDR(b->rp);
308                         q->desc[j].len = BALLOC(b);
309                         coherence();
310                         q->avail->idx++;
311                 } while(q->avail->idx != q->used->idx);
312                 vqnotify(ctlr, Vrxq);
313
314                 /* wait for any packets to complete */
315                 if(!vhasroom(q))
316                         sleep(q, vhasroom, q);
317
318                 /* retire completed packets */
319                 while((i = q->lastused) != q->used->idx) {
320                         u = &q->usedent[i & q->qmask];
321                         i = (u->id & q->qmask) >> 1;
322                         if((b = blocks[i]) == nil)
323                                 break;
324
325                         blocks[i] = nil;
326
327                         b->wp = b->rp + u->len - VheaderSize;
328                         etheriq(edev, b, 1);
329                         q->lastused++;
330                 }
331         }
332 }
333
334 static int
335 vctlcmd(Ether *edev, uchar class, uchar cmd, uchar *data, int ndata)
336 {
337         uchar hdr[2], ack[1];
338         Ctlr *ctlr;
339         Vqueue *q;
340         Vdesc *d;
341         int i;
342
343         ctlr = edev->ctlr;
344         q = &ctlr->queue[Vctlq];
345         if(q->qsize < 3)
346                 return -1;
347
348         qlock(&ctlr->ctllock);
349         while(waserror())
350                 ;
351
352         ack[0] = 0x55;
353         hdr[0] = class;
354         hdr[1] = cmd;
355
356         d = &q->desc[0];
357         d->addr = PADDR(hdr);
358         d->len = sizeof(hdr);
359         d->next = 1;
360         d->flags = Dnext;
361         d++;
362         d->addr = PADDR(data);
363         d->len = ndata;
364         d->next = 2;
365         d->flags = Dnext;
366         d++;
367         d->addr = PADDR(ack);
368         d->len = sizeof(ack);
369         d->next = 0;
370         d->flags = Dwrite;
371
372         i = q->avail->idx & q->qmask;
373         q->availent[i] = 0;
374         coherence();
375
376         q->avail->flags &= ~Rnointerrupt;
377         q->avail->idx++;
378         vqnotify(ctlr, Vctlq);
379         while(!vhasroom(q))
380                 sleep(q, vhasroom, q);
381         q->lastused = q->used->idx;
382         q->avail->flags |= Rnointerrupt;
383
384         qunlock(&ctlr->ctllock);
385         poperror();
386
387         if(ack[0] != 0)
388                 print("#l%d: vctlcmd: %ux.%ux -> %ux\n", edev->ctlrno, class, cmd, ack[0]);
389
390         return ack[0];
391 }
392
393 static void
394 interrupt(Ureg*, void* arg)
395 {
396         Ether *edev;
397         Ctlr *ctlr;
398         Vqueue *q;
399         int i;
400
401         edev = arg;
402         ctlr = edev->ctlr;
403         if(inb(ctlr->port+Qisr) & 1){
404                 for(i = 0; i < ctlr->nqueue; i++){
405                         q = &ctlr->queue[i];
406                         if(vhasroom(q)){
407                                 q->nintr++;
408                                 wakeup(q);
409                         }
410                 }
411         }
412 }
413
414 static void
415 attach(Ether* edev)
416 {
417         char name[KNAMELEN];
418         Ctlr* ctlr;
419
420         ctlr = edev->ctlr;
421         lock(ctlr);
422         if(!ctlr->attached){
423                 ctlr->attached = 1;
424
425                 /* ready to go */
426                 outb(ctlr->port+Qstatus, inb(ctlr->port+Qstatus) | Sdriverok);
427
428                 /* start kprocs */
429                 snprint(name, sizeof name, "#l%drx", edev->ctlrno);
430                 kproc(name, rxproc, edev);
431                 snprint(name, sizeof name, "#l%dtx", edev->ctlrno);
432                 kproc(name, txproc, edev);
433         }
434         unlock(ctlr);
435 }
436
437 static long
438 ifstat(Ether *edev, void *a, long n, ulong offset)
439 {
440         int i, l;
441         char *p;
442         Ctlr *ctlr;
443         Vqueue *q;
444
445         ctlr = edev->ctlr;
446
447         p = smalloc(READSTR);
448
449         l = snprint(p, READSTR, "devfeat %32.32lub\n", ctlr->feat);
450         l += snprint(p+l, READSTR-l, "drvfeat %32.32lub\n", inl(ctlr->port+Qdrvfeat));
451         l += snprint(p+l, READSTR-l, "devstatus %8.8ub\n", inb(ctlr->port+Qstatus));
452         if(ctlr->feat & Fstatus)
453                 l += snprint(p+l, READSTR-l, "netstatus %8.8ub\n",  inb(ctlr->port+Qnetstatus));
454
455         for(i = 0; i < ctlr->nqueue; i++){
456                 q = &ctlr->queue[i];
457                 l += snprint(p+l, READSTR-l,
458                         "vq%d %#p size %d avail->idx %d used->idx %d lastused %hud nintr %ud nnote %ud\n",
459                         i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused, q->nintr, q->nnote);
460         }
461
462         n = readstr(offset, a, n, p);
463         free(p);
464
465         return n;
466 }
467
468 static void
469 shutdown(Ether* edev)
470 {
471         Ctlr *ctlr = edev->ctlr;
472         outb(ctlr->port+Qstatus, 0);
473 }
474
475 static void
476 promiscuous(void *arg, int on)
477 {
478         Ether *edev = arg;
479         uchar b[1];
480
481         b[0] = on != 0;
482         vctlcmd(edev, CtrlRx, CmdPromisc, b, sizeof(b));
483 }
484
485 static void
486 multicast(void *arg, uchar*, int)
487 {
488         Ether *edev = arg;
489         uchar b[1];
490
491         b[0] = edev->nmaddr > 0;
492         vctlcmd(edev, CtrlRx, CmdAllmulti, b, sizeof(b));
493 }
494
495 /* §2.4.2 Legacy Interfaces: A Note on Virtqueue Layout */
496 static ulong
497 queuesize(ulong size)
498 {
499         return VPGROUND(VdescSize*size + sizeof(u16int)*(3+size))
500                 + VPGROUND(sizeof(u16int)*3 + VusedSize*size);
501 }
502
503 static int
504 initqueue(Vqueue *q, int size)
505 {
506         uchar *p;
507
508         /* §2.4: Queue Size value is always a power of 2 and <= 32768 */
509         assert(!(size & (size - 1)) && size <= 32768);
510
511         p = mallocalign(queuesize(size), VBY2PG, 0, 0);
512         if(p == nil){
513                 print("ethervirtio: no memory for Vqueue\n");
514                 free(p);
515                 return -1;
516         }
517
518         q->desc = (void*)p;
519         p += VdescSize*size;
520         q->avail = (void*)p;
521         p += VringSize;
522         q->availent = (void*)p;
523         p += sizeof(u16int)*size;
524         q->availevent = (void*)p;
525         p += sizeof(u16int);
526
527         p = (uchar*)VPGROUND((uintptr)p);
528         q->used = (void*)p;
529         p += VringSize;
530         q->usedent = (void*)p;
531         p += VusedSize*size;
532         q->usedevent = (void*)p;
533
534         q->qsize = size;
535         q->qmask = q->qsize - 1;
536
537         q->lastused = q->avail->idx = q->used->idx = 0;
538
539         q->avail->flags |= Rnointerrupt;
540
541         return 0;
542 }
543
544 static Ctlr*
545 pciprobe(int typ)
546 {
547         Ctlr *c, *h, *t;
548         Pcidev *p;
549         int n, i;
550
551         h = t = nil;
552
553         /* §4.1.2 PCI Device Discovery */
554         for(p = nil; p = pcimatch(p, 0, 0);){
555                 if(p->vid != 0x1AF4)
556                         continue;
557                 /* the two possible DIDs for virtio-net */
558                 if(p->did != 0x1000 && p->did != 0x1041)
559                         continue;
560                 /* non-transitional devices will have a revision > 0 */
561                 if(p->rid != 0)
562                         continue;
563                 /* non-transitional device will have typ+0x40 */
564                 if(pcicfgr16(p, 0x2E) != typ)
565                         continue;
566                 if((c = mallocz(sizeof(Ctlr), 1)) == nil){
567                         print("ethervirtio: no memory for Ctlr\n");
568                         break;
569                 }
570
571                 c->port = p->mem[0].bar & ~0x1;
572                 if(ioalloc(c->port, p->mem[0].size, 0, "ethervirtio") < 0){
573                         print("ethervirtio: port %ux in use\n", c->port);
574                         free(c);
575                         continue;
576                 }
577
578                 c->typ = typ;
579                 c->pcidev = p;
580                 c->id = (p->did<<16)|p->vid;
581
582                 /* §3.1.2 Legacy Device Initialization */
583                 outb(c->port+Qstatus, 0);
584                 outb(c->port+Qstatus, Sacknowledge|Sdriver);
585
586                 /* negotiate feature bits */
587                 c->feat = inl(c->port+Qdevfeat);
588                 outl(c->port+Qdrvfeat, c->feat & (Fmac|Fstatus|Fctrlvq|Fctrlrx));
589
590                 /* §4.1.5.1.4 Virtqueue Configuration */
591                 for(i=0; i<nelem(c->queue); i++){
592                         outs(c->port+Qselect, i);
593                         n = ins(c->port+Qsize);
594                         if(n == 0 || (n & (n-1)) != 0){
595                                 if(i < 2)
596                                         print("ethervirtio: queue %d has invalid size %d\n", i, n);
597                                 break;
598                         }
599                         if(initqueue(&c->queue[i], n) < 0)
600                                 break;
601                         coherence();
602                         outl(c->port+Qaddr, PADDR(c->queue[i].desc)/VBY2PG);
603                 }
604                 if(i < 2){
605                         print("ethervirtio: no queues\n");
606                         free(c);
607                         continue;
608                 }
609                 c->nqueue = i;          
610         
611                 if(h == nil)
612                         h = c;
613                 else
614                         t->next = c;
615                 t = c;
616         }
617
618         return h;
619 }
620
621
622 static int
623 reset(Ether* edev)
624 {
625         static uchar zeros[Eaddrlen];
626         Ctlr *ctlr;
627         int i;
628
629         if(ctlrhead == nil) {
630                 ctlrhead = pciprobe(1);
631         }
632
633         for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){
634                 if(ctlr->active)
635                         continue;
636                 if(edev->port == 0 || edev->port == ctlr->port){
637                         ctlr->active = 1;
638                         break;
639                 }
640         }
641
642         if(ctlr == nil)
643                 return -1;
644
645         edev->ctlr = ctlr;
646         edev->port = ctlr->port;
647         edev->irq = ctlr->pcidev->intl;
648         edev->tbdf = ctlr->pcidev->tbdf;
649         edev->mbps = 1000;
650         edev->link = 1;
651
652         if((ctlr->feat & Fmac) != 0 && memcmp(edev->ea, zeros, Eaddrlen) == 0){
653                 for(i = 0; i < Eaddrlen; i++)
654                         edev->ea[i] = inb(ctlr->port+Qmac+i);
655         } else {
656                 for(i = 0; i < Eaddrlen; i++)
657                         outb(ctlr->port+Qmac+i, edev->ea[i]);
658         }
659
660         edev->arg = edev;
661
662         edev->attach = attach;
663         edev->shutdown = shutdown;
664         edev->interrupt = interrupt;
665         edev->ifstat = ifstat;
666
667         if((ctlr->feat & (Fctrlvq|Fctrlrx)) == (Fctrlvq|Fctrlrx)){
668                 edev->multicast = multicast;
669                 edev->promiscuous = promiscuous;
670         }
671
672         return 0;
673 }
674
675 void
676 ethervirtiolink(void)
677 {
678         addethercard("virtio", reset);
679 }
680