5 #include "../port/lib.h"
10 #include "../port/netif.h"
11 #include "../port/error.h"
13 typedef struct Bridge Bridge;
14 typedef struct Port Port;
15 typedef struct Centry Centry;
16 typedef struct Iphdr Iphdr;
17 typedef struct Tcphdr Tcphdr;
21 Qtopdir= 1, /* top level directory */
23 Qbridgedir, /* bridge* directory */
29 Qportdir, /* directory for a protocol */
37 Maxport= 128, // power of 2
38 CacheHash= 257, // prime
39 CacheLook= 5, // how many cache entries to examine
40 CacheSize= (CacheHash+CacheLook-1),
41 CacheTimeout= 5*60, // timeout for cache entry in seconds
42 MaxMTU= IP_MAX, // allow for jumbo frames and large UDP
44 TcpMssMax = 1300, // max desirable Tcp MSS value
48 static Dirtab bridgedirtab[]={
49 "ctl", {Qbctl}, 0, 0666,
50 "stats", {Qstats}, 0, 0444,
51 "cache", {Qcache}, 0, 0444,
52 "log", {Qlog}, 0, 0666,
55 static Dirtab portdirtab[]={
56 "ctl", {Qpctl}, 0, 0666,
57 "local", {Qlocal}, 0, 0444,
58 "status", {Qstatus}, 0, 0444,
66 // types of interfaces
73 static Logflag logflags[] =
75 { "cache", Logcache, },
76 { "multicast", Logmcast, },
80 static Dirtab *dirtab[MaxQ];
82 #define TYPE(x) (((ulong)(x).path) & 0xff)
83 #define PORT(x) ((((ulong)(x).path) >> 8)&(Maxport-1))
84 #define QID(x, y) (((x)<<8) | (y))
90 long expire; // entry expires this many seconds after bootime
100 Centry cache[CacheSize];
104 long delay0; // constant microsecond delay per packet
105 long delayn; // microsecond delay per byte
106 int tcpmss; // modify tcpmss value
118 Chan *data[2]; // channel to data
120 Proc *readp; // read proc
122 // the following uniquely identifies the port
126 // owner hash - avoids bind/unbind races
130 int in; // number of packets read
131 int inmulti; // multicast or broadcast
132 int inunknown; // unknown address
133 int out; // number of packets read
134 int outmulti; // multicast or broadcast
135 int outunknown; // unknown address
136 int outfrag; // fragmented the packet
137 int nentry; // number of cache entries for this port
145 MSS_LENGTH = 4, /* Mean segment size */
146 SYN = 0x02, /* Pkt. is synchronise */
147 IPHDR = 20, /* sizeof(Iphdr) */
152 uchar vihl; /* Version and header length */
153 uchar tos; /* Type of service */
154 uchar length[2]; /* packet length */
155 uchar id[2]; /* ip->identification */
156 uchar frag[2]; /* Fragment information */
157 uchar ttl; /* Time to live */
158 uchar proto; /* Protocol */
159 uchar cksum[2]; /* Header checksum */
160 uchar src[4]; /* IP source */
161 uchar dst[4]; /* IP destination */
176 static Bridge bridgetab[Maxbridge];
184 static int bridgegen(Chan *c, char*, Dirtab*, int, int s, Dir *dp);
185 static void portbind(Bridge *b, int argc, char *argv[]);
186 static void portunbind(Bridge *b, int argc, char *argv[]);
187 static void etherread(void *a);
188 static char *cachedump(Bridge *b);
189 static void portfree(Port *port);
190 static void cacheflushport(Bridge *b, int port);
191 static void etherwrite(Port *port, Block *bp);
199 // setup dirtab with non directory entries
200 for(i=0; i<nelem(bridgedirtab); i++) {
201 dt = bridgedirtab + i;
202 dirtab[TYPE(dt->qid)] = dt;
204 for(i=0; i<nelem(portdirtab); i++) {
206 dirtab[TYPE(dt->qid)] = dt;
211 bridgeattach(char* spec)
217 if(dev<0 || dev >= Maxbridge)
218 error("bad specification");
220 c = devattach('B', spec);
221 mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR);
227 bridgewalk(Chan *c, Chan *nc, char **name, int nname)
229 return devwalk(c, nc, name, nname, (Dirtab*)0, 0, bridgegen);
233 bridgestat(Chan* c, uchar* db, int n)
235 return devstat(c, db, n, (Dirtab *)0, 0L, bridgegen);
239 bridgeopen(Chan* c, int omode)
248 b = bridgetab + c->dev;
251 switch(TYPE(c->qid)) {
258 c->aux = cachedump(b);
261 c->mode = openmode(omode);
270 Bridge *b = bridgetab + c->dev;
272 switch(TYPE(c->qid)) {
285 bridgeread(Chan *c, void *a, long n, vlong off)
288 Bridge *b = bridgetab + c->dev;
290 int i, ingood, outgood;
293 switch(TYPE(c->qid)) {
299 return devdirread(c, a, n, 0, 0, bridgegen);
301 return logread(b, a, off, n);
303 return 0; /* TO DO */
310 port = b->port[PORT(c->qid)];
312 strcpy(buf, "unbound\n");
317 panic("bridgeread: unknown port type: %d",
320 i += snprint(buf+i, sizeof(buf)-i, "ether %s: ", port->name);
323 i += snprint(buf+i, sizeof(buf)-i, "tunnel %s: ", port->name);
326 ingood = port->in - port->inmulti - port->inunknown;
327 outgood = port->out - port->outmulti - port->outunknown;
328 snprint(buf+i, sizeof(buf)-i,
329 "in=%d(%d:%d:%d) out=%d(%d:%d:%d:%d)\n",
330 port->in, ingood, port->inmulti, port->inunknown,
331 port->out, outgood, port->outmulti,
332 port->outunknown, port->outfrag);
336 return readstr(off, a, n, buf);
338 snprint(buf, sizeof(buf), "%s tcpmss\ndelay %ld %ld\n",
339 b->tcpmss ? "set" : "clear", b->delay0, b->delayn);
340 n = readstr(off, a, n, buf);
343 n = readstr(off, a, n, c->aux);
346 snprint(buf, sizeof(buf), "hit=%uld miss=%uld copy=%uld\n",
347 b->hit, b->miss, b->copy);
348 n = readstr(off, a, n, buf);
354 bridgeoption(Bridge *b, char *option, int value)
356 if(strcmp(option, "tcpmss") == 0)
359 error("unknown bridge option");
364 bridgewrite(Chan *c, void *a, long n, vlong off)
366 Bridge *b = bridgetab + c->dev;
371 switch(TYPE(c->qid)) {
383 error("short write");
385 if(strcmp(arg0, "bind") == 0) {
386 portbind(b, cb->nf-1, cb->f+1);
387 } else if(strcmp(arg0, "unbind") == 0) {
388 portunbind(b, cb->nf-1, cb->f+1);
389 } else if(strcmp(arg0, "cacheflush") == 0) {
390 log(b, Logcache, "cache flush\n");
391 memset(b->cache, 0, CacheSize*sizeof(Centry));
392 } else if(strcmp(arg0, "set") == 0) {
394 error("usage: set option");
395 bridgeoption(b, cb->f[1], 1);
396 } else if(strcmp(arg0, "clear") == 0) {
398 error("usage: clear option");
399 bridgeoption(b, cb->f[1], 0);
400 } else if(strcmp(arg0, "delay") == 0) {
402 error("usage: delay delay0 delayn");
403 b->delay0 = strtol(cb->f[1], nil, 10);
404 b->delayn = strtol(cb->f[2], nil, 10);
406 error("unknown control request");
413 p = logctl(b, cb->nf, cb->f, logflags);
422 bridgegen(Chan *c, char *, Dirtab*, int, int s, Dir *dp)
424 Bridge *b = bridgetab + c->dev;
425 int type = TYPE(c->qid);
430 switch(TYPE(c->qid)){
433 snprint(up->genbuf, sizeof(up->genbuf), "#B%ld", c->dev);
434 mkqid(&qid, Qtopdir, 0, QTDIR);
435 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
438 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
439 mkqid(&qid, Qbridgedir, 0, QTDIR);
440 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
443 panic("bridgewalk %llux", c->qid.path);
450 /* non-directory entries end up here */
451 if(c->qid.type & QTDIR)
452 panic("bridgegen: unexpected directory");
455 dt = dirtab[TYPE(c->qid)];
457 panic("bridgegen: unknown type: %lud", TYPE(c->qid));
458 devdir(c, c->qid, dt->name, dt->length, eve, dt->perm, dp);
463 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
464 mkqid(&qid, QID(0, Qbridgedir), 0, QTDIR);
465 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
468 if(s<nelem(bridgedirtab)) {
470 devdir(c, dt->qid, dt->name, dt->length, eve, dt->perm, dp);
473 s -= nelem(bridgedirtab);
476 mkqid(&qid, QID(s, Qportdir), 0, QTDIR);
477 snprint(up->genbuf, sizeof(up->genbuf), "%d", s);
478 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
481 if(s>=nelem(portdirtab))
484 mkqid(&qid, QID(PORT(c->qid),TYPE(dt->qid)), 0, QTFILE);
485 devdir(c, qid, dt->name, dt->length, eve, dt->perm, dp);
490 // parse mac address; also in netif.c
492 parseaddr(uchar *to, char *from, int alen)
499 for(i = 0; i < alen; i++){
507 to[i] = strtoul(nip, 0, 16);
514 // assumes b is locked
516 portbind(Bridge *b, int argc, char *argv[])
522 char *dev, *dev2 = nil;
523 char buf[100], name[KNAMELEN], path[8*KNAMELEN];
524 static char usage[] = "usage: bind ether|tunnel name ownhash dev [dev2]";
526 memset(name, 0, KNAMELEN);
529 if(strcmp(argv[0], "ether") == 0) {
533 strncpy(name, argv[1], KNAMELEN);
534 name[KNAMELEN-1] = 0;
535 // parseaddr(addr, argv[1], Eaddrlen);
536 } else if(strcmp(argv[0], "tunnel") == 0) {
540 strncpy(name, argv[1], KNAMELEN);
541 name[KNAMELEN-1] = 0;
542 // parseip(addr, argv[1]);
546 ownhash = atoi(argv[2]);
548 for(i=0; i<b->nport; i++) {
550 if(port != nil && port->type == type &&
551 memcmp(port->name, name, KNAMELEN) == 0)
552 error("port in use");
554 for(i=0; i<Maxport; i++)
555 if(b->port[i] == nil)
558 error("no more ports");
559 port = smalloc(sizeof(Port));
562 port->ownhash = ownhash;
569 memmove(port->name, name, KNAMELEN);
572 panic("portbind: unknown port type: %d", type);
574 snprint(path, sizeof(path), "%s/clone", dev);
575 ctl = namec(path, Aopen, ORDWR, 0);
582 // get directory name
583 n = devtab[ctl->type]->read(ctl, buf, sizeof(buf)-1, 0);
585 snprint(path, sizeof(path), "%s/%lud/data", dev, strtoul(buf, 0, 0));
587 // setup connection to be promiscuous
588 snprint(buf, sizeof(buf), "connect -1");
589 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
590 snprint(buf, sizeof(buf), "promiscuous");
591 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
592 snprint(buf, sizeof(buf), "bridge");
593 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
596 port->data[0] = namec(path, Aopen, ORDWR, 0);
598 incref(port->data[0]);
599 port->data[1] = port->data[0];
606 port->data[0] = namec(dev, Aopen, OREAD, 0);
607 port->data[1] = namec(dev2, Aopen, OWRITE, 0);
613 /* committed to binding port */
614 b->port[port->id] = port;
616 if(b->nport <= port->id)
617 b->nport = port->id+1;
619 // assumes kproc always succeeds
621 snprint(buf, sizeof(buf), "bridge:%s", dev);
622 kproc(buf, etherread, port);
625 // assumes b is locked
627 portunbind(Bridge *b, int argc, char *argv[])
633 static char usage[] = "usage: unbind ether|tunnel addr [ownhash]";
635 memset(name, 0, KNAMELEN);
636 if(argc < 2 || argc > 3)
638 if(strcmp(argv[0], "ether") == 0) {
640 strncpy(name, argv[1], KNAMELEN);
641 name[KNAMELEN-1] = 0;
642 // parseaddr(addr, argv[1], Eaddrlen);
643 } else if(strcmp(argv[0], "tunnel") == 0) {
645 strncpy(name, argv[1], KNAMELEN);
646 name[KNAMELEN-1] = 0;
647 // parseip(addr, argv[1]);
651 ownhash = atoi(argv[2]);
654 for(i=0; i<b->nport; i++) {
656 if(port != nil && port->type == type &&
657 memcmp(port->name, name, KNAMELEN) == 0)
661 error("port not found");
662 if(ownhash != 0 && port->ownhash != 0 && ownhash != port->ownhash)
663 error("bad owner hash");
666 b->port[i] = nil; // port is now unbound
667 cacheflushport(b, i);
669 // try and stop reader
671 postnote(port->readp, 1, "unbind", 0);
675 // assumes b is locked
677 cachelookup(Bridge *b, uchar d[Eaddrlen])
684 // dont cache multicast or broadcast
689 for(i=0; i<Eaddrlen; i++) {
695 sec = TK2SEC(m->ticks);
696 for(i=0; i<CacheLook; i++,p++) {
697 if(memcmp(d, p->d, Eaddrlen) == 0) {
699 if(sec >= p->expire) {
700 log(b, Logcache, "expired cache entry: %E %d\n",
704 p->expire = sec + CacheTimeout;
708 log(b, Logcache, "cache miss: %E\n", d);
712 // assumes b is locked
714 cacheupdate(Bridge *b, uchar d[Eaddrlen], int port)
721 // dont cache multicast or broadcast
723 log(b, Logcache, "bad source address: %E\n", d);
728 for(i=0; i<Eaddrlen; i++) {
737 // look for oldest entry
738 for(i=0; i<CacheLook; i++,p++) {
739 if(memcmp(p->d, d, Eaddrlen) == 0) {
740 p->expire = TK2SEC(m->ticks) + CacheTimeout;
741 if(p->port != port) {
742 log(b, Logcache, "NIC changed port %d->%d: %E\n",
749 if(p->expire < sec) {
755 log(b, Logcache, "bumping from cache: %E %d\n", pp->d, pp->port);
756 pp->expire = TK2SEC(m->ticks) + CacheTimeout;
757 memmove(pp->d, d, Eaddrlen);
761 log(b, Logcache, "adding to cache: %E %d\n", pp->d, pp->port);
764 // assumes b is locked
766 cacheflushport(Bridge *b, int port)
772 for(i=0; i<CacheSize; i++,ce++) {
775 memset(ce, 0, sizeof(Centry));
793 sec = TK2SEC(m->ticks);
795 for(i=0; i<CacheSize; i++)
796 if(b->cache[i].expire != 0)
799 n *= 51; // change if print format is changed
800 n += 10; // some slop at the end
807 off = seconds() - sec;
808 for(i=0; i<CacheSize; i++,ce++) {
811 c = (sec < ce->expire)?'v':'e';
812 p += snprint(p, ep-p, "%E %2d %10ld %10ld %10ld %c\n", ce->d,
813 ce->port, ce->src, ce->dst, ce->expire+off, c);
824 // assumes b is locked, no error return
826 ethermultiwrite(Bridge *b, Block *bp, Port *port)
832 ep = (Etherpkt*)bp->rp;
833 mcast = ep->d[0] & 1; /* multicast bit of ethernet address */
836 for(i=0; i<b->nport; i++) {
837 if(i == port->id || b->port[i] == nil)
840 * we need to forward multicast packets for ipv6,
844 b->port[i]->outmulti++;
846 b->port[i]->outunknown++;
848 // delay one so that the last write does not copy
851 etherwrite(oport, copyblock(bp, BLEN(bp)));
856 // last write free block
858 etherwrite(oport, bp);
864 tcpmsshack(Etherpkt *epkt, int n)
872 /* ignore non-ipv4 packets */
873 if(nhgets(epkt->type) != ETIP4)
875 iphdr = (Iphdr*)(epkt->data);
880 /* ignore bad packets */
881 if(iphdr->vihl != (IP_VER4|IP_HLEN4)) {
882 hl = (iphdr->vihl&0xF)<<2;
883 if((iphdr->vihl&0xF0) != IP_VER4 || hl < (IP_HLEN4<<2))
888 /* ignore non-tcp packets */
889 if(iphdr->proto != IP_TCPPROTO)
892 if(n < sizeof(Tcphdr))
894 tcphdr = (Tcphdr*)((uchar*)(iphdr) + hl);
895 // MSS can only appear in SYN packet
896 if(!(tcphdr->flag[1] & SYN))
898 hl = (tcphdr->flag[0] & 0xf0)>>2;
902 // check for MSS option
903 optr = (uchar*)tcphdr + sizeof(Tcphdr);
904 n = hl - sizeof(Tcphdr);
906 if(n <= 0 || *optr == EOLOPT)
908 if(*optr == NOOPOPT) {
914 if(optlen < 2 || optlen > n)
916 if(*optr == MSSOPT && optlen == MSS_LENGTH)
922 mss = nhgets(optr+2);
926 cksum = nhgets(tcphdr->cksum);
927 if(optr-(uchar*)tcphdr & 1) {
928 print("tcpmsshack: odd alignment!\n");
929 // odd alignments are a pain
930 cksum += nhgets(optr+1);
931 cksum -= (optr[1]<<8)|(TcpMssMax>>8);
932 cksum += (cksum>>16);
934 cksum += nhgets(optr+3);
935 cksum -= ((TcpMssMax&0xff)<<8)|optr[4];
936 cksum += (cksum>>16);
940 cksum += (cksum>>16);
942 hnputs(tcphdr->cksum, cksum);
943 hnputs(optr+2, TcpMssMax);
947 * process to read from the ethernet
953 Bridge *b = port->bridge;
960 port->readp = up; /* hide identity under a rock for unbind */
962 while(!port->closed){
963 // release lock to read - error means it is time to quit
966 print("etherread read error: %s\n", up->errstr);
970 bp = devtab[port->data[0]->type]->bread(port->data[0], MaxMTU, 0);
976 if(port->closed || n < ETHERMINTU){
981 // print("etherread bridge error\n");
987 ep = (Etherpkt*)bp->rp;
988 cacheupdate(b, ep->s, port->id);
993 * delay packets to simulate a slow link
995 if(b->delay0 != 0 || b->delayn != 0){
996 md = b->delay0 + b->delayn * n;
1001 poperror(); /* must now dispose of bp */
1004 log(b, Logmcast, "multicast: port=%d src=%E dst=%E type=%#.4ux\n",
1005 port->id, ep->s, ep->d, ep->type[0]<<8|ep->type[1]);
1007 ethermultiwrite(b, bp, port);
1009 ce = cachelookup(b, ep->d);
1013 ethermultiwrite(b, bp, port);
1014 }else if(ce->port != port->id){
1016 etherwrite(b->port[ce->port], bp);
1021 // print("etherread: trying to exit\n");
1029 fragment(Etherpkt *epkt, int n)
1036 /* ignore non-ipv4 packets */
1037 if(nhgets(epkt->type) != ETIP4)
1039 iphdr = (Iphdr*)(epkt->data);
1042 * ignore: IP runt packets, bad packets (I don't handle IP
1043 * options for the moment), packets with don't-fragment set,
1046 if(n < IPHDR || iphdr->vihl != (IP_VER4|IP_HLEN4) ||
1047 iphdr->frag[0] & (IP_DF>>8) || nhgets(iphdr->length) > n)
1054 etherwrite(Port *port, Block *bp)
1058 int n, lid, len, seglen, dlen, blklen, mf;
1060 ushort fragoff, frag;
1064 epkt = (Etherpkt*)bp->rp;
1065 if(port->type != Ttun || !fragment(epkt, n)) {
1067 devtab[port->data[1]->type]->bwrite(port->data[1], bp, 0);
1078 seglen = (TunnelMtu - ETHERHDRSIZE - IPHDR) & ~7;
1079 eh = (Iphdr*)(epkt->data);
1080 len = nhgets(eh->length);
1081 frag = nhgets(eh->frag);
1085 lid = nhgets(eh->id);
1086 bp->rp += ETHERHDRSIZE+IPHDR;
1089 print("seglen=%d, dlen=%d, mf=%x, frag=%d\n",
1090 seglen, dlen, mf, frag);
1091 for(fragoff = 0; fragoff < dlen; fragoff += seglen) {
1092 nb = allocb(ETHERHDRSIZE+IPHDR+seglen);
1094 feh = (Iphdr*)(nb->wp+ETHERHDRSIZE);
1096 memmove(nb->wp, epkt, ETHERHDRSIZE+IPHDR);
1097 nb->wp += ETHERHDRSIZE+IPHDR;
1099 if((fragoff + seglen) >= dlen) {
1100 seglen = dlen - fragoff;
1101 hnputs(feh->frag, (frag+fragoff)>>3 | mf);
1104 hnputs(feh->frag, (frag+fragoff>>3) | IP_MF);
1106 hnputs(feh->length, seglen + IPHDR);
1107 hnputs(feh->id, lid);
1113 memmove(nb->wp, bp->rp, blklen);
1120 hnputs(feh->cksum, ipcsum(&feh->vihl));
1122 /* don't generate small packets */
1123 if(BLEN(nb) < ETHERMINTU)
1124 nb = adjustblock(nb, ETHERMINTU);
1125 devtab[port->data[1]->type]->bwrite(port->data[1], nb, 0);
1133 portfree(Port *port)
1135 if(decref(port) != 0)
1139 cclose(port->data[0]);
1141 cclose(port->data[1]);
1142 memset(port, 0, sizeof(Port));
1146 Dev bridgedevtab = {