2 * intel 10GB ethernet pci-express driver
3 * copyright © 2007, coraid, inc.
6 #include "../port/lib.h"
11 #include "../port/error.h"
12 #include "../port/netif.h"
16 * // comments note conflicts with 82563-style drivers,
17 * and the registers are all different.
22 Ctrl = 0x00000/4, /* Device Control */
23 Status = 0x00008/4, /* Device Status */
24 Ctrlext = 0x00018/4, /* Extended Device Control */
25 Esdp = 0x00020/4, /* extended sdp control */
26 Esodp = 0x00028/4, /* extended od sdp control */
27 Ledctl = 0x00200/4, /* led control */
28 Tcptimer = 0x0004c/4, /* tcp timer */
29 Ecc = 0x110b0/4, /* errata ecc control magic */
32 Eec = 0x10010/4, /* eeprom/flash control */
33 Eerd = 0x10014/4, /* eeprom read */
34 Fla = 0x1001c/4, /* flash access */
35 Flop = 0x1013c/4, /* flash opcode */
36 Grc = 0x10200/4, /* general rx control */
39 Icr = 0x00800/4, /* interrupt cause read */
40 Ics = 0x00808/4, /* " set */
41 Ims = 0x00880/4, /* " mask read/set */
42 Imc = 0x00888/4, /* " mask clear */
43 Iac = 0x00810/4, /* " ayto clear */
44 Iam = 0x00890/4, /* " auto mask enable */
45 Itr = 0x00820/4, /* " throttling rate (0-19) */
46 Ivar = 0x00900/4, /* " vector allocation regs. */
48 Msixt = 0x0000/4, /* msix table (bar3) */
49 Msipba = 0x2000/4, /* msix pending bit array (bar3) */
50 Pbacl = 0x11068/4, /* pba clear */
51 Gpie = 0x00898/4, /* general purpose int enable */
54 Pfctop = 0x03008/4, /* priority flow ctl type opcode */
55 Fcttv = 0x03200/4, /* " transmit timer value (0-3) */
56 Fcrtl = 0x03220/4, /* " rx threshold low (0-7) +8n */
57 Fcrth = 0x03260/4, /* " rx threshold high (0-7) +8n */
58 Rcrtv = 0x032a0/4, /* " refresh value threshold */
59 Tfcs = 0x0ce00/4, /* " tx status */
62 Rbal = 0x01000/4, /* rx desc base low (0-63) +0x40n */
63 Rbah = 0x01004/4, /* " high */
64 Rdlen = 0x01008/4, /* " length */
65 Rdh = 0x01010/4, /* " head */
66 Rdt = 0x01018/4, /* " tail */
67 Rxdctl = 0x01028/4, /* " control */
69 Srrctl = 0x02100/4, /* split and replication rx ctl. */
70 Dcarxctl = 0x02200/4, /* rx dca control */
71 Rdrxctl = 0x02f00/4, /* rx dma control */
72 Rxpbsize = 0x03c00/4, /* rx packet buffer size */
73 Rxctl = 0x03000/4, /* rx control */
74 Dropen = 0x03d04/4, /* drop enable control */
77 Rxcsum = 0x05000/4, /* rx checksum control */
78 Rfctl = 0x04008/4, /* rx filter control */
79 Mta = 0x05200/4, /* multicast table array (0-127) */
80 Ral = 0x05400/4, /* rx address low */
82 Psrtype = 0x05480/4, /* packet split rx type. */
83 Vfta = 0x0a000/4, /* vlan filter table array. */
84 Fctrl = 0x05080/4, /* filter control */
85 Vlnctrl = 0x05088/4, /* vlan control */
86 Msctctrl = 0x05090/4, /* multicast control */
87 Mrqc = 0x05818/4, /* multiple rx queues cmd */
88 Vmdctl = 0x0581c/4, /* vmdq control */
89 Imir = 0x05a80/4, /* immediate irq rx (0-7) */
90 Imirext = 0x05aa0/4, /* immediate irq rx ext */
91 Imirvp = 0x05ac0/4, /* immediate irq vlan priority */
92 Reta = 0x05c00/4, /* redirection table */
93 Rssrk = 0x05c80/4, /* rss random key */
96 Tdbal = 0x06000/4, /* tx desc base low +0x40n */
97 Tdbah = 0x06004/4, /* " high */
98 Tdlen = 0x06008/4, /* " len */
99 Tdh = 0x06010/4, /* " head */
100 Tdt = 0x06018/4, /* " tail */
101 Txdctl = 0x06028/4, /* " control */
102 Tdwbal = 0x06038/4, /* " write-back address low */
105 Dtxctl = 0x07e00/4, /* tx dma control */
106 Tdcatxctrl = 0x07200/4, /* tx dca register (0-15) */
107 Tipg = 0x0cb00/4, /* tx inter-packet gap */
108 Txpbsize = 0x0cc00/4, /* tx packet-buffer size (0-15) */
111 Hlreg0 = 0x04240/4, /* highlander control reg 0 */
112 Hlreg1 = 0x04244/4, /* highlander control reg 1 (ro) */
113 Msca = 0x0425c/4, /* mdi signal cmd & addr */
114 Msrwd = 0x04260/4, /* mdi single rw data */
115 Mhadd = 0x04268/4, /* mac addr high & max frame */
116 Pcss1 = 0x04288/4, /* xgxs status 1 */
118 Xpcss = 0x04290/4, /* 10gb-x pcs status */
119 Serdesc = 0x04298/4, /* serdes control */
120 Macs = 0x0429c/4, /* fifo control & report */
121 Autoc = 0x042a0/4, /* autodetect control & status */
122 Links = 0x042a4/4, /* link status */
128 Rst = 1<<26, /* full nic reset */
134 Bam = 1<<10, /* broadcast accept mode */
135 Upe = 1<<9, /* unicast promiscuous */
136 Mpe = 1<<8, /* multicast promiscuous */
139 Pthresh = 0, /* prefresh threshold shift in bits */
140 Hthresh = 8, /* host buffer minimum threshold " */
141 Wthresh = 16, /* writeback threshold */
154 Ippcse = 1<<12, /* ip payload checksum enable */
157 EEstart = 1<<0, /* Start Read */
158 EEdone = 1<<1, /* Read done */
161 Irx0 = 1<<0, /* driver defined */
162 Itx0 = 1<<1, /* driver defined */
163 Lsc = 1<<20, /* link status change */
180 0x4004, "illegal byte",
181 0x4008, "short packet",
182 0x3fa0, "missed pkt0",
183 0x4034, "mac local flt",
184 0x4038, "mac rmt flt",
185 0x4040, "rx length err",
198 0x3fc0, "rx no buf0",
218 Pif = 1<<7, /* past exact filter (sic) */
219 Ipcs = 1<<6, /* ip checksum calcuated */
220 L4cs = 1<<5, /* layer 2 */
221 Tcpcs = 1<<4, /* tcp checksum calcuated */
222 Vp = 1<<3, /* 802.1q packet matched vet */
223 Ixsm = 1<<2, /* ignore checksum */
224 Reop = 1<<1, /* end of packet */
225 Rdd = 1<<0, /* descriptor done */
297 ulong stats[nelem(stattab)];
301 /* tweakable paramaters */
309 static Ctlr *ctlrtab[4];
318 for(i = 0; i < nelem(c->stats); i++)
319 c->stats[i] += c->reg[stattab[i].reg >> 2];
323 static int speedtab[] = {
330 ifstat(Ether *e, void *a, long n, ulong offset)
336 p = s = smalloc(READSTR);
341 for(i = 0; i < nelem(stattab); i++)
343 p = seprint(p, q, "%.10s %uld\n", stattab[i].name, c->stats[i]);
345 p = seprint(p, q, "speeds: 0:%d 1000:%d 10000:%d\n", t[0], t[1], t[2]);
346 seprint(p, q, "rdfree %d rdh %d rdt %d\n", c->rdfree, c->reg[Rdt],
348 n = readstr(offset, a, n, s);
366 return ((Ctlr*)v)->lim != 0;
382 e->link = (r & Lnkup) != 0;
385 i = 1 + ((r & Lnkspd) != 0);
387 e->mbps = speedtab[i];
390 sleep(&c->lrendez, lim, c);
396 ctl(Ether *, void *, long)
402 #define Next(x, m) (((x)+1) & (m))
405 cleanup(Ctlr *c, int tdh)
411 while(c->tdba[n = Next(tdh, m)].status & Tdd){
416 c->tdba[tdh].status = 0;
430 if(!canqlock(&c->tlock)){
434 tdh = c->tdh = cleanup(c, c->tdh);
437 for(i = 0; i < 8; i++){
438 if(Next(tdt, m) == tdh){
442 if(!(b = qget(e->oq)))
445 t->addr[0] = PCIWADDR(b->rp);
447 t->cmd = Rs | Ifcs | Teop;
461 return ((Ctlr*)c)->tim != 0;
475 sleep(&c->trendez, tim, c); /* transmit kicks us */
487 c->reg[Rxctl] &= ~Rxen;
488 for(i = 0; i < c->nrd; i++){
496 c->reg[Fctrl] |= Bam;
497 c->reg[Rxcsum] |= Ipcs;
498 c->reg[Srrctl] = (c->rbsz + 1023)/1024;
499 c->reg[Mhadd] = c->rbsz << 16;
500 c->reg[Hlreg0] |= Jumboen;
502 c->reg[Rbal] = PCIWADDR(c->rdba);
504 c->reg[Rdlen] = c->nrd*sizeof(Rd);
506 c->reg[Rdt] = c->rdt = 0;
508 c->reg[Rdrxctl] = Rdmt¼;
509 c->reg[Rxdctl] = 8<<Wthresh | 8<<Pthresh | 4<<Hthresh | Renable;
510 c->reg[Rxctl] |= Rxen | Dmbyps;
514 replenish(Ctlr *c, uint rdh)
522 for(rdt = c->rdt; Next(rdt, m) != rdh; rdt = Next(rdt, m)){
523 b = allocb(c->rbsz+BY2PG);
524 b->rp = (uchar*)PGROUND((uintptr)b->base);
528 r->addr[0] = PCIWADDR(b->rp);
534 c->reg[Rdt] = c->rdt = rdt;
540 return ((Ctlr*)v)->rim != 0;
543 static uchar zeroea[Eaddrlen];
563 sleep(&c->rrendez, rim, c);
566 if(c->nrd - c->rdfree >= 16)
569 if(!(r->status & Rdd))
574 if(!(r->status & Ixsm)){
577 if(r->status & Tcpcs)
578 b->flag |= Btcpck | Budpck;
579 b->checksum = r->cksum;
585 goto loop1; /* UGH */
589 promiscuous(void *a, int on)
597 c->reg[Fctrl] |= Upe | Mpe;
599 c->reg[Fctrl] &= ~(Upe | Mpe);
603 multicast(void *a, uchar *ea, int on)
613 * multiple ether addresses can hash to the same filter bit,
614 * so it's never safe to clear a filter bit.
615 * if we want to clear filter bits, we need to keep track of
616 * all the multicast addresses in use, clear all the filter bits,
617 * then set the ones corresponding to in-use addresses.
620 b = (ea[5]&1)<<4 | ea[4]>>4;
626 c->reg[Mta+i] = c->mta[i];
636 for(i = 0; i < 100; i++){
638 if((c->reg[Ctrl] & Rst) == 0)
645 c->reg[Ecc] &= ~(1<<21 | 1<<18 | 1<<9 | 1<<6);
647 /* not cleared by reset; kill it manually. */
648 for(i = 1; i < 16; i++)
649 c->reg[Rah] &= ~(1 << 31);
650 for(i = 0; i < 128; i++)
652 for(i = 1; i < 640; i++)
653 c->reg[Vfta + i] = 0;
665 eeread(Ctlr *c, int i)
667 c->reg[Eerd] = EEstart | i<<2;
668 while((c->reg[Eerd] & EEdone) == 0)
670 return c->reg[Eerd] >> 16;
676 ushort u, v, p, l, i, j;
678 if((eeread(c, 0) & 0xc0) != 0x40)
681 for(i = 0; i < 0x40; i++)
683 for(i = 3; i < 0xf; i++){
686 if((int)p + l + 1 > 0xffff)
688 for(j = p; j < p + l; j++)
693 if(c->reg[Status] & (1<<3))
698 for(i = 0; i < Eaddrlen;){
699 v = eeread(c, u + i/2);
703 c->ra[5] += (c->reg[Status] & 0xc) >> 2;
714 print("82598: reset timeout\n");
718 print("82598: eeprom failure\n");
722 c->reg[Ral] = p[3]<<24 | p[2]<<16 | p[1]<<8 | p[0];
723 c->reg[Rah] = p[5]<<8 | p[4] | 1<<31;
726 for(i = 0; i<nelem(c->stats); i++)
729 c->reg[Ctrlext] |= 1 << 16;
730 /* make some guesses for flow control */
731 c->reg[Fcrtl] = 0x10000 | 1<<31;
732 c->reg[Fcrth] = 0x40000 | 1<<31;
733 c->reg[Rcrtv] = 0x6000;
735 /* configure interrupt mapping (don't ask) */
736 c->reg[Ivar+0] = 0 | 1<<7;
737 c->reg[Ivar+64/4] = 1 | 1<<7;
738 // c->reg[Ivar+97/4] = (2 | 1<<7) << (8*(97%4));
740 /* interrupt throttling goes here. */
741 for(i = Itr; i < Itr + 20; i++)
742 c->reg[i] = 128; /* ¼µs intervals */
743 c->reg[Itr + Itx0] = 256;
753 c->reg[Txdctl] = 16<<Wthresh | 16<<Pthresh;
754 for(i = 0; i < c->ntd; i++){
760 memset(c->tdba, 0, c->ntd * sizeof(Td));
761 c->reg[Tdbal] = PCIWADDR(c->tdba);
763 c->reg[Tdlen] = c->ntd*sizeof(Td);
768 c->reg[Txdctl] |= Ten;
779 c->edev = e; /* point back to Ether* */
788 t = c->nrd * sizeof *c->rdba + 255;
789 t += c->ntd * sizeof *c->tdba + 255;
790 t += (c->ntd + c->nrd) * sizeof(Block*);
791 c->alloc = malloc(t);
796 c->rdba = (Rd*)ROUNDUP((uintptr)c->alloc, 256);
797 c->tdba = (Td*)ROUNDUP((uintptr)(c->rdba + c->nrd), 256);
798 c->rb = (Block**)(c->tdba + c->ntd);
799 c->tb = (Block**)(c->rb + c->nrd);
804 snprint(buf, sizeof buf, "#l%dl", e->ctlrno);
805 kproc(buf, lproc, e);
806 snprint(buf, sizeof buf, "#l%dr", e->ctlrno);
807 kproc(buf, rproc, e);
808 snprint(buf, sizeof buf, "#l%dt", e->ctlrno);
809 kproc(buf, tproc, e);
813 interrupt(Ureg*, void *v)
824 while((icr = c->reg[Icr] & c->im) != 0){
841 c->reg[Ims] = c->im = im;
850 int pciregs, pcimsix;
855 while(p = pcimatch(p, 0x8086, 0)){
857 case 0x10c6: /* 82598 af dual port */
858 case 0x10c7: /* 82598 af single port */
859 case 0x10b6: /* 82598 backplane */
860 case 0x10dd: /* 82598 at cx4 */
861 case 0x10ec: /* 82598 at cx4 dual port */
864 case 0x10fb: /* 82599 */
871 if(nctlr == nelem(ctlrtab)){
872 print("i82598: too many controllers\n");
875 c = malloc(sizeof *c);
877 print("i82598: can't allocate memory\n");
880 io = p->mem[pciregs].bar & ~0xf;
881 mem = vmap(io, p->mem[pciregs].size);
883 print("i82598: can't map regs %#p\n", p->mem[pciregs].bar);
887 iomsi = p->mem[pcimsix].bar & ~0xf;
888 memmsi = vmap(iomsi, p->mem[pcimsix].size);
890 print("i82598: can't map msi-x regs %#p\n", p->mem[pcimsix].bar);
891 vunmap(mem, p->mem[pciregs].size);
896 c->reg = (u32int*)mem;
897 c->regmsi = (u32int*)memmsi;
900 print("i82598: can't reset\n");
902 vunmap(mem, p->mem[pciregs].size);
903 vunmap(memmsi, p->mem[pcimsix].size);
907 ctlrtab[nctlr++] = c;
919 for(i = 0; i < nctlr; i++){
921 if(c == nil || c->flag & Factive)
923 if(e->port == 0 || e->port == (ulong)c->reg)
930 e->port = (uintptr)c->reg;
932 e->tbdf = c->p->tbdf;
935 memmove(e->ea, c->ra, Eaddrlen);
940 e->interrupt = interrupt;
941 e->multicast = multicast;
942 e->promiscuous = promiscuous;
943 e->shutdown = shutdown;
944 e->transmit = transmit;
952 addethercard("i82598", pnp);