2 * virtio 1.0 disk driver
3 * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
5 * In contrast to sdvirtio.c, this driver handles the non-legacy
6 * interface for virtio disk which uses mmio for all register accesses
7 * and requires a laborate pci capability structure dance to get working.
9 * It is kind of pointless as it is most likely slower than
10 * port i/o (harder to emulate on the pc platform).
12 * The reason why this driver is needed it is that vultr set the
13 * disable-legacy=on option in the -device parameter for qemu
14 * on their hypervisor.
17 #include "../port/lib.h"
22 #include "../port/pci.h"
24 #include "../port/error.h"
26 #include "../port/sd.h"
28 typedef struct Vscsidev Vscsidev;
29 typedef struct Vblkdev Vblkdev;
31 typedef struct Vconfig Vconfig;
32 typedef struct Vring Vring;
33 typedef struct Vdesc Vdesc;
34 typedef struct Vused Vused;
35 typedef struct Vqueue Vqueue;
36 typedef struct Vdev Vdev;
54 /* descriptor flags */
78 u32int event_info_size;
105 u16int queuemsixvect;
108 u16int queuenotifyoff;
174 void *dev; /* device specific config (for scsi) */
180 u32int notifyoffmult;
192 q = malloc(sizeof(*q) + sizeof(void*)*size);
194 PGROUND(sizeof(Vdesc)*size +
196 sizeof(u16int)*size +
202 if(p == nil || q == nil){
203 print("virtio: no memory for Vqueue\n");
210 p += sizeof(Vdesc)*size;
213 q->availent = (void*)p;
214 p += sizeof(u16int)*size;
215 q->availevent = (void*)p;
218 p = (uchar*)PGROUND((uintptr)p);
221 q->usedent = (void*)p;
222 p += sizeof(Vused)*size;
223 q->usedevent = (void*)p;
226 q->nfree = q->size = size;
227 for(i=0; i<size; i++){
228 q->desc[i].next = q->free;
236 matchvirtiocfgcap(Pcidev *p, int cap, int off, int typ)
240 if(cap != 9 || pcicfgr8(p, off+3) != typ)
243 /* skip invalid or non memory bars */
244 bar = pcicfgr8(p, off+4);
245 if(bar < 0 || bar >= nelem(p->mem)
246 || p->mem[bar].size == 0
247 || (p->mem[bar].bar & 3) != 0)
254 virtiocap(Pcidev *p, int typ)
256 return pcienumcaps(p, matchvirtiocfgcap, typ);
260 virtiomapregs(Pcidev *p, int cap, int size)
267 bar = pcicfgr8(p, cap+4) % nelem(p->mem);
268 addr = pcicfgr32(p, cap+8);
269 len = pcicfgr32(p, cap+12);
274 if(addr+len > p->mem[bar].size)
276 addr += p->mem[bar].bar & ~0xFULL;
277 return vmap(addr, size);
291 for(p = nil; p = pcimatch(p, 0x1AF4, 0x1040+typ);){
294 if((cap = virtiocap(p, 1)) < 0)
296 bar = pcicfgr8(p, cap+4) % nelem(p->mem);
297 cfg = virtiomapregs(p, cap, sizeof(Vconfig));
300 if((vd = malloc(sizeof(*vd))) == nil){
301 print("virtio: no memory for Vdev\n");
304 vd->port = p->mem[bar].bar & ~0xFULL;
310 vd->isr = virtiomapregs(p, virtiocap(p, 3), 0);
318 cap = virtiocap(p, 2);
319 vd->notify = virtiomapregs(p, cap, 0);
320 if(vd->notify == nil)
322 vd->notifyoffmult = pcicfgr32(p, cap+16);
326 while(cfg->status != 0)
328 cfg->status = Acknowledge|Driver;
330 /* negotiate feature bits */
332 vd->feat[1] = cfg->devfeat;
334 vd->feat[0] = cfg->devfeat;
336 cfg->drvfeat = vd->feat[1] & 1;
339 cfg->status |= FeaturesOk;
341 for(i=0; i<nelem(vd->queue); i++){
344 if(n == 0 || (n & (n-1)) != 0)
346 if((q = mkvqueue(n)) == nil)
348 q->notify = vd->notify + vd->notifyoffmult * cfg->queuenotifyoff;
353 cfg->queuedesc = PADDR(q->desc);
354 cfg->queueavail = PADDR(q->avail);
355 cfg->queueused = PADDR(q->used);
375 vqinterrupt(Vqueue *q)
384 while((q->lastused ^ q->used->idx) & m){
385 id = q->usedent[q->lastused++ & m].id;
389 r->done = 1; /* hands off */
395 id = q->desc[free].next;
396 q->desc[free].next = q->free;
399 } while(q->desc[free].flags & Next);
405 viointerrupt(Ureg *, void *arg)
410 vqinterrupt(vd->queue[vd->typ == TypSCSI ? 2 : 0]);
416 return ((struct Rock*)arg)->done;
420 vqio(Vqueue *q, int head)
425 rock.sleep = &up->sleep;
426 q->rock[head] = &rock;
427 q->availent[q->avail->idx & (q->size-1)] = head;
431 if((q->used->flags & 1) == 0)
432 *((u16int*)q->notify) = q->idx;
436 tsleep(rock.sleep, viodone, &rock, 1000);
445 vioblkreq(Vdev *vd, int typ, void *a, long count, long secsize, uvlong lba)
447 int need, free, head;
452 struct Vioblkreqhdr {
469 while(q->nfree < need){
473 tsleep(&up->sleep, return0, 0, 500);
479 head = free = q->free;
481 d = &q->desc[free]; free = d->next;
482 d->addr = PADDR(&req);
483 d->len = sizeof(req);
487 d = &q->desc[free]; free = d->next;
489 d->len = secsize*count;
490 d->flags = typ ? Next : (Write|Next);
493 d = &q->desc[free]; free = d->next;
494 d->addr = PADDR(&status);
495 d->len = sizeof(status);
501 /* queue io, unlock and wait for completion */
510 u8int resp[4+4+2+2+SENSESIZE];
511 u8int req[8+8+3+CDBSIZE];
524 memset(resp, 0, sizeof(resp));
525 memset(req, 0, sizeof(req));
529 req[3] = r->lun&0xFF;
530 *(u64int*)(&req[8]) = (uintptr)r;
532 memmove(&req[8+8+3], r->cmd, r->clen);
540 tsleep(&up->sleep, return0, 0, 500);
546 head = free = q->free;
548 d = &q->desc[free]; free = d->next;
549 d->addr = PADDR(req);
550 d->len = 8+8+3+scsi->cdb_size;
553 if(r->write && r->dlen > 0){
554 d = &q->desc[free]; free = d->next;
555 d->addr = PADDR(r->data);
560 d = &q->desc[free]; free = d->next;
561 d->addr = PADDR(resp);
562 d->len = 4+4+2+2+scsi->sense_size;
565 if(!r->write && r->dlen > 0){
568 d = &q->desc[free]; free = d->next;
569 d->addr = PADDR(r->data);
575 q->nfree -= 2 + (r->dlen > 0);
577 /* queue io, unlock and wait for completion */
580 /* response+status */
581 r->status = resp[10];
586 len = *((u32int*)&resp[0]);
588 if(len > sizeof(r->sense))
589 len = sizeof(r->sense);
590 memmove(r->sense, &resp[4+4+2+2], len);
591 r->flags |= SDvalidsense;
595 len = *((u32int*)&resp[4]);
599 r->rlen = r->dlen - len;
606 viobio(SDunit *u, int lun, int write, void *a, long count, uvlong lba)
608 long ss, cc, max, ret;
612 if(vd->typ == TypSCSI)
613 return scsibio(u, lun, write, a, count, lba);
619 if((cc = count) > max)
621 if(vioblkreq(vd, write != 0, (uchar*)a + ret, cc, ss, lba) != 0)
640 if(vd->typ == TypSCSI)
641 return vioscsireq(r);
642 if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91){
643 if(vioblkreq(vd, 4, nil, 0, 0, 0) != 0)
644 return sdsetsense(r, SDcheck, 3, 0xc, 2);
645 return sdsetsense(r, SDok, 0, 0, 0);
647 if((i = sdfakescsi(r)) != SDnostatus)
648 return r->status = i;
649 if((i = sdfakescsirw(r, &lba, &count, &rw)) != SDnostatus)
651 r->rlen = viobio(u, r->lun, rw == SDwrite, r->data, count, lba);
652 return r->status = SDok;
663 if(vd->typ == TypSCSI)
664 return scsionline(u);
668 if(u->sectors != cap){
682 if(vd->typ == TypSCSI)
683 return scsiverify(u);
699 snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
700 intrenable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, name);
703 for(i = 0; i < vd->nqueue; i++){
704 vd->cfg->queuesel = i;
705 vd->cfg->queueenable = 1;
707 vd->cfg->status |= DriverOk;
719 snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
720 intrdisable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, name);
735 for(vd = viopnpdevs(TypBlk); vd; vd = vd->next){
739 if((vd->dev = virtiomapregs(vd->pci, virtiocap(vd->pci, 4), sizeof(Vblkdev))) == nil)
741 if((s = malloc(sizeof(*s))) == nil)
745 s->ifc = &sdvirtio10ifc;
755 for(vd = viopnpdevs(TypSCSI); vd; vd = vd->next){
761 if((scsi = virtiomapregs(vd->pci, virtiocap(vd->pci, 4), sizeof(Vscsidev))) == nil)
763 if(scsi->max_target == 0){
764 vunmap(scsi, sizeof(Vscsidev));
767 if((scsi->cdb_size > CDBSIZE) || (scsi->sense_size > SENSESIZE)){
768 print("sdvirtio: cdb %ud or sense size %ud too big\n",
769 scsi->cdb_size, scsi->sense_size);
770 vunmap(scsi, sizeof(Vscsidev));
775 if((s = malloc(sizeof(*s))) == nil)
779 s->ifc = &sdvirtio10ifc;
780 s->nunit = scsi->max_target;
791 SDifc sdvirtio10ifc = {
792 "virtio10", /* name */
796 vioenable, /* enable */
797 viodisable, /* disable */
799 vioverify, /* verify */
800 vioonline, /* online */