2 * intel 10GB ethernet pci-express driver
3 * copyright © 2007, coraid, inc.
6 #include "../port/lib.h"
11 #include "../port/error.h"
12 #include "../port/netif.h"
16 * // comments note conflicts with 82563-style drivers,
17 * and the registers are all different.
22 Ctrl = 0x00000/4, /* Device Control */
23 Status = 0x00008/4, /* Device Status */
24 Ctrlext = 0x00018/4, /* Extended Device Control */
25 Esdp = 0x00020/4, /* extended sdp control */
26 Esodp = 0x00028/4, /* extended od sdp control */
27 Ledctl = 0x00200/4, /* led control */
28 Tcptimer = 0x0004c/4, /* tcp timer */
29 Ecc = 0x110b0/4, /* errata ecc control magic */
32 Eec = 0x10010/4, /* eeprom/flash control */
33 Eerd = 0x10014/4, /* eeprom read */
34 Fla = 0x1001c/4, /* flash access */
35 Flop = 0x1013c/4, /* flash opcode */
36 Grc = 0x10200/4, /* general rx control */
39 Icr = 0x00800/4, /* interrupt cause read */
40 Ics = 0x00808/4, /* " set */
41 Ims = 0x00880/4, /* " mask read/set */
42 Imc = 0x00888/4, /* " mask clear */
43 Iac = 0x00810/4, /* " ayto clear */
44 Iam = 0x00890/4, /* " auto mask enable */
45 Itr = 0x00820/4, /* " throttling rate (0-19) */
46 Ivar = 0x00900/4, /* " vector allocation regs. */
48 Msixt = 0x0000/4, /* msix table (bar3) */
49 Msipba = 0x2000/4, /* msix pending bit array (bar3) */
50 Pbacl = 0x11068/4, /* pba clear */
51 Gpie = 0x00898/4, /* general purpose int enable */
54 Pfctop = 0x03008/4, /* priority flow ctl type opcode */
55 Fcttv = 0x03200/4, /* " transmit timer value (0-3) */
56 Fcrtl = 0x03220/4, /* " rx threshold low (0-7) +8n */
57 Fcrth = 0x03260/4, /* " rx threshold high (0-7) +8n */
58 Rcrtv = 0x032a0/4, /* " refresh value threshold */
59 Tfcs = 0x0ce00/4, /* " tx status */
62 Rbal = 0x01000/4, /* rx desc base low (0-63) +0x40n */
63 Rbah = 0x01004/4, /* " high */
64 Rdlen = 0x01008/4, /* " length */
65 Rdh = 0x01010/4, /* " head */
66 Rdt = 0x01018/4, /* " tail */
67 Rxdctl = 0x01028/4, /* " control */
69 Srrctl = 0x02100/4, /* split and replication rx ctl. */
70 Dcarxctl = 0x02200/4, /* rx dca control */
71 Rdrxctl = 0x02f00/4, /* rx dma control */
72 Rxpbsize = 0x03c00/4, /* rx packet buffer size */
73 Rxctl = 0x03000/4, /* rx control */
74 Dropen = 0x03d04/4, /* drop enable control */
77 Rxcsum = 0x05000/4, /* rx checksum control */
78 Rfctl = 0x04008/4, /* rx filter control */
79 Mta = 0x05200/4, /* multicast table array (0-127) */
80 Ral = 0x05400/4, /* rx address low */
82 Psrtype = 0x05480/4, /* packet split rx type. */
83 Vfta = 0x0a000/4, /* vlan filter table array. */
84 Fctrl = 0x05080/4, /* filter control */
85 Vlnctrl = 0x05088/4, /* vlan control */
86 Msctctrl = 0x05090/4, /* multicast control */
87 Mrqc = 0x05818/4, /* multiple rx queues cmd */
88 Vmdctl = 0x0581c/4, /* vmdq control */
89 Imir = 0x05a80/4, /* immediate irq rx (0-7) */
90 Imirext = 0x05aa0/4, /* immediate irq rx ext */
91 Imirvp = 0x05ac0/4, /* immediate irq vlan priority */
92 Reta = 0x05c00/4, /* redirection table */
93 Rssrk = 0x05c80/4, /* rss random key */
96 Tdbal = 0x06000/4, /* tx desc base low +0x40n */
97 Tdbah = 0x06004/4, /* " high */
98 Tdlen = 0x06008/4, /* " len */
99 Tdh = 0x06010/4, /* " head */
100 Tdt = 0x06018/4, /* " tail */
101 Txdctl = 0x06028/4, /* " control */
102 Tdwbal = 0x06038/4, /* " write-back address low */
105 Dtxctl = 0x07e00/4, /* tx dma control */
106 Tdcatxctrl = 0x07200/4, /* tx dca register (0-15) */
107 Tipg = 0x0cb00/4, /* tx inter-packet gap */
108 Txpbsize = 0x0cc00/4, /* tx packet-buffer size (0-15) */
111 Hlreg0 = 0x04240/4, /* highlander control reg 0 */
112 Hlreg1 = 0x04244/4, /* highlander control reg 1 (ro) */
113 Msca = 0x0425c/4, /* mdi signal cmd & addr */
114 Msrwd = 0x04260/4, /* mdi single rw data */
115 Mhadd = 0x04268/4, /* mac addr high & max frame */
116 Pcss1 = 0x04288/4, /* xgxs status 1 */
118 Xpcss = 0x04290/4, /* 10gb-x pcs status */
119 Serdesc = 0x04298/4, /* serdes control */
120 Macs = 0x0429c/4, /* fifo control & report */
121 Autoc = 0x042a0/4, /* autodetect control & status */
122 Links = 0x042a4/4, /* link status */
128 Rst = 1<<26, /* full nic reset */
134 Bam = 1<<10, /* broadcast accept mode */
135 Upe = 1<<9, /* unicast promiscuous */
136 Mpe = 1<<8, /* multicast promiscuous */
139 Pthresh = 0, /* prefresh threshold shift in bits */
140 Hthresh = 8, /* host buffer minimum threshold " */
141 Wthresh = 16, /* writeback threshold */
154 Ippcse = 1<<12, /* ip payload checksum enable */
157 EEstart = 1<<0, /* Start Read */
158 EEdone = 1<<1, /* Read done */
161 Irx0 = 1<<0, /* driver defined */
162 Itx0 = 1<<1, /* driver defined */
163 Lsc = 1<<20, /* link status change */
180 0x4004, "illegal byte",
181 0x4008, "short packet",
182 0x3fa0, "missed pkt0",
183 0x4034, "mac local flt",
184 0x4038, "mac rmt flt",
185 0x4040, "rx length err",
198 0x3fc0, "rx no buf0",
218 Pif = 1<<7, /* past exact filter (sic) */
219 Ipcs = 1<<6, /* ip checksum calcuated */
220 L4cs = 1<<5, /* layer 2 */
221 Tcpcs = 1<<4, /* tcp checksum calcuated */
222 Vp = 1<<3, /* 802.1q packet matched vet */
223 Ixsm = 1<<2, /* ignore checksum */
224 Reop = 1<<1, /* end of packet */
225 Rdd = 1<<0, /* descriptor done */
298 ulong stats[nelem(stattab)];
302 /* tweakable paramaters */
310 static Ctlr *ctlrtab[4];
313 static Block *rbpool;
321 for(i = 0; i < nelem(c->stats); i++)
322 c->stats[i] += c->reg[stattab[i].reg >> 2];
326 static int speedtab[] = {
333 ifstat(Ether *e, void *a, long n, ulong offset)
340 p = s = malloc(READSTR);
344 for(i = 0; i < nelem(stattab); i++)
346 p = seprint(p, q, "%.10s %uld\n", stattab[i].name, c->stats[i]);
348 p = seprint(p, q, "speeds: 0:%d 1000:%d 10000:%d\n", t[0], t[1], t[2]);
349 seprint(p, q, "rdfree %d rdh %d rdt %d\n", c->rdfree, c->reg[Rdt],
351 n = readstr(offset, a, n, s);
369 return ((Ctlr*)v)->lim != 0;
383 e->link = (r & Lnkup) != 0;
386 i = 1 + ((r & Lnkspd) != 0);
388 e->mbps = speedtab[i];
391 sleep(&c->lrendez, lim, c);
397 ctl(Ether *, void *, long)
409 if((bp = rbpool) != nil){
412 _xinc(&bp->ref); /* prevent bp from being freed */
421 b->rp = b->wp = (uchar*)PGROUND((uintptr)b->base);
422 b->flag &= ~(Bipck | Budpck | Btcpck | Bpktck);
429 #define Next(x, m) (((x)+1) & (m))
432 cleanup(Ctlr *c, int tdh)
438 while(c->tdba[n = Next(tdh, m)].status & Tdd){
443 c->tdba[tdh].status = 0;
457 if(!canqlock(&c->tlock)){
461 tdh = c->tdh = cleanup(c, c->tdh);
464 for(i = 0; i < 8; i++){
465 if(Next(tdt, m) == tdh){
469 if(!(b = qget(e->oq)))
472 t->addr[0] = PCIWADDR(b->rp);
474 t->cmd = Rs | Ifcs | Teop;
488 return ((Ctlr*)c)->tim != 0;
500 sleep(&c->trendez, tim, c); /* transmit kicks us */
512 c->reg[Rxctl] &= ~Rxen;
513 for(i = 0; i < c->nrd; i++){
521 c->reg[Fctrl] |= Bam;
522 c->reg[Rxcsum] |= Ipcs;
523 c->reg[Srrctl] = (c->rbsz + 1023)/1024;
524 c->reg[Mhadd] = c->rbsz << 16;
525 c->reg[Hlreg0] |= Jumboen;
527 c->reg[Rbal] = PCIWADDR(c->rdba);
529 c->reg[Rdlen] = c->nrd*sizeof(Rd);
531 c->reg[Rdt] = c->rdt = 0;
533 c->reg[Rdrxctl] = Rdmt¼;
534 c->reg[Rxdctl] = 8<<Wthresh | 8<<Pthresh | 4<<Hthresh | Renable;
535 c->reg[Rxctl] |= Rxen | Dmbyps;
539 replenish(Ctlr *c, uint rdh)
547 for(rdt = c->rdt; Next(rdt, m) != rdh; rdt = Next(rdt, m)){
549 if(!(b = rballoc())){
550 print("82598: no buffers\n");
554 r->addr[0] = PCIWADDR(b->rp);
560 c->reg[Rdt] = c->rdt = rdt;
566 return ((Ctlr*)v)->rim != 0;
569 static uchar zeroea[Eaddrlen];
587 sleep(&c->rrendez, rim, c);
590 if(c->nrd - c->rdfree >= 16)
593 if(!(r->status & Rdd))
598 b->lim = b->wp; /* lie like a dog */
599 if(!(r->status & Ixsm)){
602 if(r->status & Tcpcs)
603 b->flag |= Btcpck | Budpck;
604 b->checksum = r->cksum;
610 goto loop1; /* UGH */
614 promiscuous(void *a, int on)
622 c->reg[Fctrl] |= Upe | Mpe;
624 c->reg[Fctrl] &= ~(Upe | Mpe);
628 multicast(void *a, uchar *ea, int on)
638 * multiple ether addresses can hash to the same filter bit,
639 * so it's never safe to clear a filter bit.
640 * if we want to clear filter bits, we need to keep track of
641 * all the multicast addresses in use, clear all the filter bits,
642 * then set the ones corresponding to in-use addresses.
645 b = (ea[5]&1)<<4 | ea[4]>>4;
651 c->reg[Mta+i] = c->mta[i];
661 for(i = 0; i < 100; i++){
663 if((c->reg[Ctrl] & Rst) == 0)
670 c->reg[Ecc] &= ~(1<<21 | 1<<18 | 1<<9 | 1<<6);
672 /* not cleared by reset; kill it manually. */
673 for(i = 1; i < 16; i++)
674 c->reg[Rah] &= ~(1 << 31);
675 for(i = 0; i < 128; i++)
677 for(i = 1; i < 640; i++)
678 c->reg[Vfta + i] = 0;
690 eeread(Ctlr *c, int i)
692 c->reg[Eerd] = EEstart | i<<2;
693 while((c->reg[Eerd] & EEdone) == 0)
695 return c->reg[Eerd] >> 16;
701 ushort u, v, p, l, i, j;
703 if((eeread(c, 0) & 0xc0) != 0x40)
706 for(i = 0; i < 0x40; i++)
708 for(i = 3; i < 0xf; i++){
711 if((int)p + l + 1 > 0xffff)
713 for(j = p; j < p + l; j++)
718 if(c->reg[Status] & (1<<3))
723 for(i = 0; i < Eaddrlen;){
724 v = eeread(c, u + i/2);
728 c->ra[5] += (c->reg[Status] & 0xc) >> 2;
739 print("82598: reset timeout\n");
743 print("82598: eeprom failure\n");
747 c->reg[Ral] = p[3]<<24 | p[2]<<16 | p[1]<<8 | p[0];
748 c->reg[Rah] = p[5]<<8 | p[4] | 1<<31;
751 for(i = 0; i<nelem(c->stats); i++)
754 c->reg[Ctrlext] |= 1 << 16;
755 /* make some guesses for flow control */
756 c->reg[Fcrtl] = 0x10000 | 1<<31;
757 c->reg[Fcrth] = 0x40000 | 1<<31;
758 c->reg[Rcrtv] = 0x6000;
760 /* configure interrupt mapping (don't ask) */
761 c->reg[Ivar+0] = 0 | 1<<7;
762 c->reg[Ivar+64/4] = 1 | 1<<7;
763 // c->reg[Ivar+97/4] = (2 | 1<<7) << (8*(97%4));
765 /* interrupt throttling goes here. */
766 for(i = Itr; i < Itr + 20; i++)
767 c->reg[i] = 128; /* ¼µs intervals */
768 c->reg[Itr + Itx0] = 256;
778 c->reg[Txdctl] = 16<<Wthresh | 16<<Pthresh;
779 for(i = 0; i < c->ntd; i++){
785 memset(c->tdba, 0, c->ntd * sizeof(Td));
786 c->reg[Tdbal] = PCIWADDR(c->tdba);
788 c->reg[Tdlen] = c->ntd*sizeof(Td);
793 c->reg[Txdctl] |= Ten;
805 c->edev = e; /* point back to Ether* */
814 t = c->nrd * sizeof *c->rdba + 255;
815 t += c->ntd * sizeof *c->tdba + 255;
816 t += (c->ntd + c->nrd) * sizeof(Block*);
817 c->alloc = malloc(t);
822 c->rdba = (Rd*)ROUNDUP((uintptr)c->alloc, 256);
823 c->tdba = (Td*)ROUNDUP((uintptr)(c->rdba + c->nrd), 256);
824 c->rb = (Block**)(c->tdba + c->ntd);
825 c->tb = (Block**)(c->rb + c->nrd);
828 while(b = rballoc()){
836 for(c->nrb = 0; c->nrb < 2*Nrb; c->nrb++){
837 if(!(b = allocb(c->rbsz+BY2PG)))
847 snprint(buf, sizeof buf, "#l%dl", e->ctlrno);
848 kproc(buf, lproc, e);
849 snprint(buf, sizeof buf, "#l%dr", e->ctlrno);
850 kproc(buf, rproc, e);
851 snprint(buf, sizeof buf, "#l%dt", e->ctlrno);
852 kproc(buf, tproc, e);
856 interrupt(Ureg*, void *v)
867 while((icr = c->reg[Icr] & c->im) != 0){
884 c->reg[Ims] = c->im = im;
897 while(p = pcimatch(p, 0x8086, 0)){
899 case 0x10c6: /* 82598 af dual port */
900 case 0x10c7: /* 82598 af single port */
901 case 0x10b6: /* 82598 backplane */
902 case 0x10dd: /* 82598 at cx4 */
903 case 0x10ec: /* 82598 at cx4 dual port */
908 if(nctlr == nelem(ctlrtab)){
909 print("i82598: too many controllers\n");
912 io = p->mem[0].bar & ~0xf;
913 mem = vmap(io, p->mem[0].size);
915 print("i82598: can't map %#p\n", p->mem[0].bar);
918 io3 = p->mem[3].bar & ~0xf;
919 mem3 = vmap(io3, p->mem[3].size);
921 print("i82598: can't map %#p\n", p->mem[3].bar);
922 vunmap(mem, p->mem[0].size);
925 c = malloc(sizeof *c);
927 c->reg = (u32int*)mem;
928 c->reg3 = (u32int*)mem3;
931 print("i82598: can't reset\n");
933 vunmap(mem, p->mem[0].size);
934 vunmap(mem3, p->mem[3].size);
938 ctlrtab[nctlr++] = c;
950 for(i = 0; i < nctlr; i++){
952 if(c == nil || c->flag & Factive)
954 if(e->port == 0 || e->port == (ulong)c->reg)
961 e->port = (uintptr)c->reg;
963 e->tbdf = c->p->tbdf;
966 memmove(e->ea, c->ra, Eaddrlen);
971 e->interrupt = interrupt;
972 e->multicast = multicast;
973 e->promiscuous = promiscuous;
974 e->shutdown = shutdown;
975 e->transmit = transmit;
983 addethercard("i82598", pnp);