2 * intel 10GB ethernet pci-express driver
3 * copyright © 2007, coraid, inc.
6 #include "../port/lib.h"
11 #include "../port/error.h"
12 #include "../port/netif.h"
13 #include "../port/etherif.h"
16 * // comments note conflicts with 82563-style drivers,
17 * and the registers are all different.
22 Ctrl = 0x00000/4, /* Device Control */
23 Status = 0x00008/4, /* Device Status */
24 Ctrlext = 0x00018/4, /* Extended Device Control */
25 Esdp = 0x00020/4, /* extended sdp control */
26 Esodp = 0x00028/4, /* extended od sdp control */
27 Ledctl = 0x00200/4, /* led control */
28 Tcptimer = 0x0004c/4, /* tcp timer */
29 Ecc = 0x110b0/4, /* errata ecc control magic */
32 Eec = 0x10010/4, /* eeprom/flash control */
33 Eerd = 0x10014/4, /* eeprom read */
34 Fla = 0x1001c/4, /* flash access */
35 Flop = 0x1013c/4, /* flash opcode */
36 Grc = 0x10200/4, /* general rx control */
39 Icr = 0x00800/4, /* interrupt cause read */
40 Ics = 0x00808/4, /* " set */
41 Ims = 0x00880/4, /* " mask read/set */
42 Imc = 0x00888/4, /* " mask clear */
43 Iac = 0x00810/4, /* " ayto clear */
44 Iam = 0x00890/4, /* " auto mask enable */
45 Itr = 0x00820/4, /* " throttling rate (0-19) */
46 Ivar = 0x00900/4, /* " vector allocation regs. */
48 Msixt = 0x0000/4, /* msix table (bar3) */
49 Msipba = 0x2000/4, /* msix pending bit array (bar3) */
50 Pbacl = 0x11068/4, /* pba clear */
51 Gpie = 0x00898/4, /* general purpose int enable */
54 Pfctop = 0x03008/4, /* priority flow ctl type opcode */
55 Fcttv = 0x03200/4, /* " transmit timer value (0-3) */
56 Fcrtl = 0x03220/4, /* " rx threshold low (0-7) +8n */
57 Fcrth = 0x03260/4, /* " rx threshold high (0-7) +8n */
58 Rcrtv = 0x032a0/4, /* " refresh value threshold */
59 Tfcs = 0x0ce00/4, /* " tx status */
62 Rbal = 0x01000/4, /* rx desc base low (0-63) +0x40n */
63 Rbah = 0x01004/4, /* " high */
64 Rdlen = 0x01008/4, /* " length */
65 Rdh = 0x01010/4, /* " head */
66 Rdt = 0x01018/4, /* " tail */
67 Rxdctl = 0x01028/4, /* " control */
69 Srrctl = 0x02100/4, /* split and replication rx ctl. */
70 Dcarxctl = 0x02200/4, /* rx dca control */
71 Rdrxctl = 0x02f00/4, /* rx dma control */
72 Rxpbsize = 0x03c00/4, /* rx packet buffer size */
73 Rxctl = 0x03000/4, /* rx control */
74 Dropen = 0x03d04/4, /* drop enable control */
77 Rxcsum = 0x05000/4, /* rx checksum control */
78 Rfctl = 0x04008/4, /* rx filter control */
79 Mta = 0x05200/4, /* multicast table array (0-127) */
80 Ral = 0x05400/4, /* rx address low */
82 Psrtype = 0x05480/4, /* packet split rx type. */
83 Vfta = 0x0a000/4, /* vlan filter table array. */
84 Fctrl = 0x05080/4, /* filter control */
85 Vlnctrl = 0x05088/4, /* vlan control */
86 Msctctrl = 0x05090/4, /* multicast control */
87 Mrqc = 0x05818/4, /* multiple rx queues cmd */
88 Vmdctl = 0x0581c/4, /* vmdq control */
89 Imir = 0x05a80/4, /* immediate irq rx (0-7) */
90 Imirext = 0x05aa0/4, /* immediate irq rx ext */
91 Imirvp = 0x05ac0/4, /* immediate irq vlan priority */
92 Reta = 0x05c00/4, /* redirection table */
93 Rssrk = 0x05c80/4, /* rss random key */
96 Tdbal = 0x06000/4, /* tx desc base low +0x40n */
97 Tdbah = 0x06004/4, /* " high */
98 Tdlen = 0x06008/4, /* " len */
99 Tdh = 0x06010/4, /* " head */
100 Tdt = 0x06018/4, /* " tail */
101 Txdctl = 0x06028/4, /* " control */
102 Tdwbal = 0x06038/4, /* " write-back address low */
104 Dmatxctl = 0x04a80/4,
106 Dtxctl = 0x07e00/4, /* tx dma control */
107 Tdcatxctrl = 0x07200/4, /* tx dca register (0-15) */
108 Tipg = 0x0cb00/4, /* tx inter-packet gap */
109 Txpbsize = 0x0cc00/4, /* tx packet-buffer size (0-15) */
112 Hlreg0 = 0x04240/4, /* highlander control reg 0 */
113 Hlreg1 = 0x04244/4, /* highlander control reg 1 (ro) */
114 Msca = 0x0425c/4, /* mdi signal cmd & addr */
115 Msrwd = 0x04260/4, /* mdi single rw data */
116 Mhadd = 0x04268/4, /* mac addr high & max frame */
117 Pcss1 = 0x04288/4, /* xgxs status 1 */
119 Xpcss = 0x04290/4, /* 10gb-x pcs status */
120 Serdesc = 0x04298/4, /* serdes control */
121 Macs = 0x0429c/4, /* fifo control & report */
122 Autoc = 0x042a0/4, /* autodetect control & status */
123 Links = 0x042a4/4, /* link status */
129 Rst = 1<<26, /* full nic reset */
135 Bam = 1<<10, /* broadcast accept mode */
136 Upe = 1<<9, /* unicast promiscuous */
137 Mpe = 1<<8, /* multicast promiscuous */
140 Pthresh = 0, /* prefresh threshold shift in bits */
141 Hthresh = 8, /* host buffer minimum threshold " */
142 Wthresh = 16, /* writeback threshold */
158 Ippcse = 1<<12, /* ip payload checksum enable */
161 EEstart = 1<<0, /* Start Read */
162 EEdone = 1<<1, /* Read done */
165 Irx0 = 1<<0, /* driver defined */
166 Itx0 = 1<<1, /* driver defined */
167 Lsc = 1<<20, /* link status change */
184 0x4004, "illegal byte",
185 0x4008, "short packet",
186 0x3fa0, "missed pkt0",
187 0x4034, "mac local flt",
188 0x4038, "mac rmt flt",
189 0x4040, "rx length err",
202 0x3fc0, "rx no buf0",
222 Pif = 1<<7, /* past exact filter (sic) */
223 Ipcs = 1<<6, /* ip checksum calcuated */
224 L4cs = 1<<5, /* layer 2 */
225 Tcpcs = 1<<4, /* tcp checksum calcuated */
226 Vp = 1<<3, /* 802.1q packet matched vet */
227 Ixsm = 1<<2, /* ignore checksum */
228 Reop = 1<<1, /* end of packet */
229 Rdd = 1<<0, /* descriptor done */
302 ulong stats[nelem(stattab)];
306 /* tweakable paramaters */
314 static Ctlr *ctlrtab[4];
323 for(i = 0; i < nelem(c->stats); i++)
324 c->stats[i] += c->reg[stattab[i].reg >> 2];
328 static int speedtab[] = {
335 ifstat(Ether *e, void *a, long n, ulong offset)
341 p = s = smalloc(READSTR);
346 for(i = 0; i < nelem(stattab); i++)
348 p = seprint(p, q, "%.10s %uld\n", stattab[i].name, c->stats[i]);
350 p = seprint(p, q, "speeds: 0:%d 1000:%d 10000:%d\n", t[0], t[1], t[2]);
351 seprint(p, q, "rdfree %d rdh %d rdt %d\n", c->rdfree, c->reg[Rdt],
353 n = readstr(offset, a, n, s);
371 return ((Ctlr*)v)->lim != 0;
387 e->link = (r & Lnkup) != 0;
390 i = 1 + ((r & Lnkspd) != 0);
392 e->mbps = speedtab[i];
395 sleep(&c->lrendez, lim, c);
401 ctl(Ether *, void *, long)
407 #define Next(x, m) (((x)+1) & (m))
410 cleanup(Ctlr *c, int tdh)
416 while(c->tdba[n = Next(tdh, m)].status & Tdd){
421 c->tdba[tdh].status = 0;
435 if(!canqlock(&c->tlock)){
439 tdh = c->tdh = cleanup(c, c->tdh);
442 for(i = 0; i < 8; i++){
443 if(Next(tdt, m) == tdh){
447 if(!(b = qget(e->oq)))
450 t->addr[0] = PCIWADDR(b->rp);
452 t->cmd = Rs | Ifcs | Teop;
466 return ((Ctlr*)c)->tim != 0;
480 sleep(&c->trendez, tim, c); /* transmit kicks us */
492 c->reg[Rxctl] &= ~Rxen;
493 for(i = 0; i < c->nrd; i++){
501 c->reg[Fctrl] |= Bam;
502 c->reg[Rxcsum] |= Ipcs;
503 c->reg[Srrctl] = c->rbsz / 1024;
504 c->reg[Mhadd] = c->rbsz << 16;
505 c->reg[Hlreg0] |= Jumboen;
507 c->reg[Rbal] = PCIWADDR(c->rdba);
509 c->reg[Rdlen] = c->nrd*sizeof(Rd);
511 c->reg[Rdt] = c->rdt = 0;
513 c->reg[Rdrxctl] = Rdmt¼;
514 c->reg[Rxdctl] = 8<<Wthresh | 8<<Pthresh | 4<<Hthresh | Renable;
515 c->reg[Rxctl] |= Rxen | Dmbyps;
519 replenish(Ctlr *c, uint rdh)
527 for(rdt = c->rdt; Next(rdt, m) != rdh; rdt = Next(rdt, m)){
528 b = allocb(c->rbsz+BY2PG);
529 b->rp = (uchar*)PGROUND((uintptr)b->base);
533 r->addr[0] = PCIWADDR(b->rp);
539 c->reg[Rdt] = c->rdt = rdt;
545 return ((Ctlr*)v)->rim != 0;
548 static uchar zeroea[Eaddrlen];
568 sleep(&c->rrendez, rim, c);
571 if(c->nrd - c->rdfree >= 16)
574 if(!(r->status & Rdd))
579 if(!(r->status & Ixsm)){
582 if(r->status & Tcpcs)
583 b->flag |= Btcpck | Budpck;
584 b->checksum = r->cksum;
590 goto loop1; /* UGH */
594 promiscuous(void *a, int on)
602 c->reg[Fctrl] |= Upe | Mpe;
604 c->reg[Fctrl] &= ~(Upe | Mpe);
608 multicast(void *a, uchar *ea, int on)
618 * multiple ether addresses can hash to the same filter bit,
619 * so it's never safe to clear a filter bit.
620 * if we want to clear filter bits, we need to keep track of
621 * all the multicast addresses in use, clear all the filter bits,
622 * then set the ones corresponding to in-use addresses.
625 b = (ea[5]&1)<<4 | ea[4]>>4;
631 c->reg[Mta+i] = c->mta[i];
641 for(i = 0; i < 100; i++){
643 if((c->reg[Ctrl] & Rst) == 0)
650 c->reg[Ecc] &= ~(1<<21 | 1<<18 | 1<<9 | 1<<6);
652 /* not cleared by reset; kill it manually. */
653 for(i = 1; i < 16; i++)
654 c->reg[Rah] &= ~(1 << 31);
655 for(i = 0; i < 128; i++)
657 for(i = 1; i < 640; i++)
658 c->reg[Vfta + i] = 0;
670 eeread(Ctlr *c, int i)
672 c->reg[Eerd] = EEstart | i<<2;
673 while((c->reg[Eerd] & EEdone) == 0)
675 return c->reg[Eerd] >> 16;
681 ushort u, v, p, l, i, j;
683 if((eeread(c, 0) & 0xc0) != 0x40)
686 for(i = 0; i < 0x40; i++)
688 for(i = 3; i < 0xf; i++){
691 if((int)p + l + 1 > 0xffff)
693 for(j = p; j < p + l; j++)
698 if(c->reg[Status] & (1<<3))
703 for(i = 0; i < Eaddrlen;){
704 v = eeread(c, u + i/2);
708 c->ra[5] += (c->reg[Status] & 0xc) >> 2;
718 print("82598: reset timeout\n");
722 print("82598: eeprom failure\n");
723 memset(c->ra, 0, Eaddrlen);
727 for(i = 0; i<nelem(c->stats); i++)
730 c->reg[Ctrlext] |= 1 << 16;
731 /* make some guesses for flow control */
732 c->reg[Fcrtl] = 0x10000 | 1<<31;
733 c->reg[Fcrth] = 0x40000 | 1<<31;
734 c->reg[Rcrtv] = 0x6000;
736 /* configure interrupt mapping (don't ask) */
737 c->reg[Ivar+0] = 0 | 1<<7;
738 c->reg[Ivar+64/4] = 1 | 1<<7;
739 // c->reg[Ivar+97/4] = (2 | 1<<7) << (8*(97%4));
741 /* interrupt throttling goes here. */
742 for(i = Itr; i < Itr + 20; i++)
743 c->reg[i] = 128; /* ¼µs intervals */
744 c->reg[Itr + Itx0] = 256;
754 c->reg[Txdctl] = 16<<Wthresh | 16<<Pthresh;
755 for(i = 0; i < c->ntd; i++){
761 memset(c->tdba, 0, c->ntd * sizeof(Td));
762 c->reg[Tdbal] = PCIWADDR(c->tdba);
764 c->reg[Tdlen] = c->ntd*sizeof(Td);
769 c->reg[Txdctl] |= Ten;
771 c->reg[Dmatxctl] |= Txen;
782 c->edev = e; /* point back to Ether* */
791 t = c->nrd * sizeof *c->rdba + 255;
792 t += c->ntd * sizeof *c->tdba + 255;
793 t += (c->ntd + c->nrd) * sizeof(Block*);
794 c->alloc = malloc(t);
799 c->rdba = (Rd*)ROUNDUP((uintptr)c->alloc, 256);
800 c->tdba = (Td*)ROUNDUP((uintptr)(c->rdba + c->nrd), 256);
801 c->rb = (Block**)(c->tdba + c->ntd);
802 c->tb = (Block**)(c->rb + c->nrd);
807 snprint(buf, sizeof buf, "#l%dl", e->ctlrno);
808 kproc(buf, lproc, e);
809 snprint(buf, sizeof buf, "#l%dr", e->ctlrno);
810 kproc(buf, rproc, e);
811 snprint(buf, sizeof buf, "#l%dt", e->ctlrno);
812 kproc(buf, tproc, e);
816 interrupt(Ureg*, void *v)
827 while((icr = c->reg[Icr] & c->im) != 0){
844 c->reg[Ims] = c->im = im;
853 int pciregs, pcimsix;
858 while(p = pcimatch(p, 0x8086, 0)){
860 case 0x10c6: /* 82598 af dual port */
861 case 0x10c7: /* 82598 af single port */
862 case 0x10b6: /* 82598 backplane */
863 case 0x10dd: /* 82598 at cx4 */
864 case 0x10ec: /* 82598 at cx4 dual port */
867 case 0x10fb: /* 82599 */
868 case 0x1528: /* T540-T1 */
876 if(nctlr == nelem(ctlrtab)){
877 print("i82598: too many controllers\n");
880 c = malloc(sizeof *c);
882 print("i82598: can't allocate memory\n");
885 io = p->mem[pciregs].bar & ~0xf;
886 mem = vmap(io, p->mem[pciregs].size);
888 print("i82598: can't map regs %#p\n", io);
892 iomsi = p->mem[pcimsix].bar & ~0xf;
893 memmsi = vmap(iomsi, p->mem[pcimsix].size);
895 print("i82598: can't map msi-x regs %#p\n", iomsi);
896 vunmap(mem, p->mem[pciregs].size);
903 c->reg = (u32int*)mem;
904 c->regmsi = (u32int*)memmsi;
905 c->rbsz = ROUND(Mtu, 1024);
907 print("i82598: can't reset\n");
909 vunmap(mem, p->mem[pciregs].size);
910 vunmap(memmsi, p->mem[pcimsix].size);
914 ctlrtab[nctlr++] = c;
921 static uchar zeros[Eaddrlen];
928 for(i = 0; i < nctlr; i++){
930 if(c == nil || c->flag & Factive)
932 if(e->port == 0 || e->port == c->io)
938 if(memcmp(c->ra, zeros, Eaddrlen) != 0)
939 memmove(e->ea, c->ra, Eaddrlen);
942 c->reg[Ral] = p[3]<<24 | p[2]<<16 | p[1]<<8 | p[0];
943 c->reg[Rah] = p[5]<<8 | p[4] | 1<<31;
947 e->port = (uintptr)c->reg;
949 e->tbdf = c->p->tbdf;
957 e->multicast = multicast;
958 e->promiscuous = promiscuous;
959 e->shutdown = shutdown;
960 e->transmit = transmit;
962 intrenable(e->irq, interrupt, e, e->tbdf, e->name);
970 addethercard("i82598", pnp);