5 #include "../port/lib.h"
10 #include "../ip/ipv6.h"
11 #include "../port/netif.h"
12 #include "../port/error.h"
14 typedef struct Bridge Bridge;
15 typedef struct Port Port;
16 typedef struct Centry Centry;
17 typedef struct Tcphdr Tcphdr;
21 Qtopdir= 1, /* top level directory */
23 Qbridgedir, /* bridge* directory */
29 Qportdir, /* directory for a protocol */
37 Maxport= 128, // power of 2
38 CacheHash= 257, // prime
39 CacheLook= 5, // how many cache entries to examine
40 CacheSize= (CacheHash+CacheLook-1),
41 CacheTimeout= 5*60, // timeout for cache entry in seconds
42 MaxMTU= IP_MAX, // allow for jumbo frames and large UDP
44 TcpMssMax = 1300, // max desirable Tcp MSS value
48 static Dirtab bridgedirtab[]={
49 "ctl", {Qbctl}, 0, 0666,
50 "stats", {Qstats}, 0, 0444,
51 "cache", {Qcache}, 0, 0444,
52 "log", {Qlog}, 0, 0666,
55 static Dirtab portdirtab[]={
56 "ctl", {Qpctl}, 0, 0666,
57 "local", {Qlocal}, 0, 0444,
58 "status", {Qstatus}, 0, 0444,
66 // types of interfaces
73 static Logflag logflags[] =
75 { "cache", Logcache, },
76 { "multicast", Logmcast, },
80 static Dirtab *dirtab[MaxQ];
82 #define TYPE(x) (((ulong)(x).path) & 0xff)
83 #define PORT(x) ((((ulong)(x).path) >> 8)&(Maxport-1))
84 #define QID(x, y) (((x)<<8) | (y))
90 long expire; // entry expires this many seconds after bootime
100 Centry cache[CacheSize];
104 long delay0; // constant microsecond delay per packet
105 long delayn; // microsecond delay per byte
106 int tcpmss; // modify tcpmss value
118 Chan *data[2]; // channel to data
120 Proc *readp; // read proc
122 // the following uniquely identifies the port
126 // owner hash - avoids bind/unbind races
130 int in; // number of packets read
131 int inmulti; // multicast or broadcast
132 int inunknown; // unknown address
133 int out; // number of packets read
134 int outmulti; // multicast or broadcast
135 int outunknown; // unknown address
136 int outfrag; // fragmented the packet
137 int nentry; // number of cache entries for this port
144 MSS_LENGTH = 4, /* Mean segment size */
145 SYN = 0x02, /* Pkt. is synchronise */
161 static Bridge bridgetab[Maxbridge];
169 static int bridgegen(Chan *c, char*, Dirtab*, int, int s, Dir *dp);
170 static void portbind(Bridge *b, int argc, char *argv[]);
171 static void portunbind(Bridge *b, int argc, char *argv[]);
172 static void etherread(void *a);
173 static char *cachedump(Bridge *b);
174 static void portfree(Port *port);
175 static void cacheflushport(Bridge *b, int port);
176 static void etherwrite(Port *port, Block *bp);
184 // setup dirtab with non directory entries
185 for(i=0; i<nelem(bridgedirtab); i++) {
186 dt = bridgedirtab + i;
187 dirtab[TYPE(dt->qid)] = dt;
189 for(i=0; i<nelem(portdirtab); i++) {
191 dirtab[TYPE(dt->qid)] = dt;
196 bridgeattach(char *spec)
201 dev = strtoul(spec, nil, 10);
205 c = devattach('B', spec);
206 mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR);
212 bridgewalk(Chan *c, Chan *nc, char **name, int nname)
214 return devwalk(c, nc, name, nname, (Dirtab*)0, 0, bridgegen);
218 bridgestat(Chan* c, uchar* db, int n)
220 return devstat(c, db, n, (Dirtab *)0, 0L, bridgegen);
224 bridgeopen(Chan* c, int omode)
233 b = bridgetab + c->dev;
236 switch(TYPE(c->qid)) {
243 c->aux = cachedump(b);
246 c->mode = openmode(omode);
255 Bridge *b = bridgetab + c->dev;
257 switch(TYPE(c->qid)) {
270 bridgeread(Chan *c, void *a, long n, vlong off)
273 Bridge *b = bridgetab + c->dev;
275 int i, ingood, outgood;
278 switch(TYPE(c->qid)) {
284 return devdirread(c, a, n, 0, 0, bridgegen);
286 return logread(b, a, off, n);
288 return 0; /* TO DO */
295 port = b->port[PORT(c->qid)];
297 strcpy(buf, "unbound\n");
302 panic("bridgeread: unknown port type: %d",
305 i += snprint(buf+i, sizeof(buf)-i, "ether %s: ", port->name);
308 i += snprint(buf+i, sizeof(buf)-i, "tunnel %s: ", port->name);
311 ingood = port->in - port->inmulti - port->inunknown;
312 outgood = port->out - port->outmulti - port->outunknown;
313 snprint(buf+i, sizeof(buf)-i,
314 "in=%d(%d:%d:%d) out=%d(%d:%d:%d:%d)\n",
315 port->in, ingood, port->inmulti, port->inunknown,
316 port->out, outgood, port->outmulti,
317 port->outunknown, port->outfrag);
321 return readstr(off, a, n, buf);
323 snprint(buf, sizeof(buf), "%s tcpmss\ndelay %ld %ld\n",
324 b->tcpmss ? "set" : "clear", b->delay0, b->delayn);
325 n = readstr(off, a, n, buf);
328 n = readstr(off, a, n, c->aux);
331 snprint(buf, sizeof(buf), "hit=%uld miss=%uld copy=%uld\n",
332 b->hit, b->miss, b->copy);
333 n = readstr(off, a, n, buf);
339 bridgeoption(Bridge *b, char *option, int value)
341 if(strcmp(option, "tcpmss") == 0)
344 error("unknown bridge option");
349 bridgewrite(Chan *c, void *a, long n, vlong off)
351 Bridge *b = bridgetab + c->dev;
356 switch(TYPE(c->qid)) {
368 error("short write");
370 if(strcmp(arg0, "bind") == 0) {
371 portbind(b, cb->nf-1, cb->f+1);
372 } else if(strcmp(arg0, "unbind") == 0) {
373 portunbind(b, cb->nf-1, cb->f+1);
374 } else if(strcmp(arg0, "cacheflush") == 0) {
375 log(b, Logcache, "cache flush\n");
376 memset(b->cache, 0, CacheSize*sizeof(Centry));
377 } else if(strcmp(arg0, "set") == 0) {
379 error("usage: set option");
380 bridgeoption(b, cb->f[1], 1);
381 } else if(strcmp(arg0, "clear") == 0) {
383 error("usage: clear option");
384 bridgeoption(b, cb->f[1], 0);
385 } else if(strcmp(arg0, "delay") == 0) {
387 error("usage: delay delay0 delayn");
388 b->delay0 = strtol(cb->f[1], nil, 10);
389 b->delayn = strtol(cb->f[2], nil, 10);
391 error("unknown control request");
398 p = logctl(b, cb->nf, cb->f, logflags);
407 bridgegen(Chan *c, char *, Dirtab*, int, int s, Dir *dp)
409 Bridge *b = bridgetab + c->dev;
410 int type = TYPE(c->qid);
415 switch(TYPE(c->qid)){
418 snprint(up->genbuf, sizeof(up->genbuf), "#B%ld", c->dev);
419 mkqid(&qid, Qtopdir, 0, QTDIR);
420 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
423 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
424 mkqid(&qid, Qbridgedir, 0, QTDIR);
425 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
428 panic("bridgewalk %llux", c->qid.path);
435 /* non-directory entries end up here */
436 if(c->qid.type & QTDIR)
437 panic("bridgegen: unexpected directory");
440 dt = dirtab[TYPE(c->qid)];
442 panic("bridgegen: unknown type: %lud", TYPE(c->qid));
443 devdir(c, c->qid, dt->name, dt->length, eve, dt->perm, dp);
448 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
449 mkqid(&qid, QID(0, Qbridgedir), 0, QTDIR);
450 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
453 if(s<nelem(bridgedirtab)) {
455 devdir(c, dt->qid, dt->name, dt->length, eve, dt->perm, dp);
458 s -= nelem(bridgedirtab);
461 mkqid(&qid, QID(s, Qportdir), 0, QTDIR);
462 snprint(up->genbuf, sizeof(up->genbuf), "%d", s);
463 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
466 if(s>=nelem(portdirtab))
469 mkqid(&qid, QID(PORT(c->qid),TYPE(dt->qid)), 0, QTFILE);
470 devdir(c, qid, dt->name, dt->length, eve, dt->perm, dp);
475 // parse mac address; also in netif.c
477 parseaddr(uchar *to, char *from, int alen)
484 for(i = 0; i < alen; i++){
492 to[i] = strtoul(nip, 0, 16);
499 // assumes b is locked
501 portbind(Bridge *b, int argc, char *argv[])
507 char *dev, *dev2 = nil;
508 char buf[100], name[KNAMELEN], path[8*KNAMELEN];
509 static char usage[] = "usage: bind ether|tunnel name ownhash dev [dev2]";
511 memset(name, 0, KNAMELEN);
514 if(strcmp(argv[0], "ether") == 0) {
518 strncpy(name, argv[1], KNAMELEN);
519 name[KNAMELEN-1] = 0;
520 // parseaddr(addr, argv[1], Eaddrlen);
521 } else if(strcmp(argv[0], "tunnel") == 0) {
525 strncpy(name, argv[1], KNAMELEN);
526 name[KNAMELEN-1] = 0;
527 // parseip(addr, argv[1]);
531 ownhash = atoi(argv[2]);
533 for(i=0; i<b->nport; i++) {
535 if(port != nil && port->type == type &&
536 memcmp(port->name, name, KNAMELEN) == 0)
537 error("port in use");
539 for(i=0; i<Maxport; i++)
540 if(b->port[i] == nil)
543 error("no more ports");
544 port = smalloc(sizeof(Port));
547 port->ownhash = ownhash;
554 memmove(port->name, name, KNAMELEN);
557 panic("portbind: unknown port type: %d", type);
559 snprint(path, sizeof(path), "%s/clone", dev);
560 ctl = namec(path, Aopen, ORDWR, 0);
567 // get directory name
568 n = devtab[ctl->type]->read(ctl, buf, sizeof(buf)-1, 0);
570 snprint(path, sizeof(path), "%s/%lud/data", dev, strtoul(buf, 0, 0));
572 // setup connection to be promiscuous
573 snprint(buf, sizeof(buf), "connect -1");
574 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
575 snprint(buf, sizeof(buf), "nonblocking");
576 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
577 snprint(buf, sizeof(buf), "promiscuous");
578 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
579 snprint(buf, sizeof(buf), "bridge");
580 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
583 port->data[0] = namec(path, Aopen, ORDWR, 0);
585 incref(port->data[0]);
586 port->data[1] = port->data[0];
593 port->data[0] = namec(dev, Aopen, OREAD, 0);
594 port->data[1] = namec(dev2, Aopen, OWRITE, 0);
600 /* committed to binding port */
601 b->port[port->id] = port;
603 if(b->nport <= port->id)
604 b->nport = port->id+1;
606 // assumes kproc always succeeds
608 snprint(buf, sizeof(buf), "bridge:%s", dev);
609 kproc(buf, etherread, port);
612 // assumes b is locked
614 portunbind(Bridge *b, int argc, char *argv[])
620 static char usage[] = "usage: unbind ether|tunnel addr [ownhash]";
622 memset(name, 0, KNAMELEN);
623 if(argc < 2 || argc > 3)
625 if(strcmp(argv[0], "ether") == 0) {
627 strncpy(name, argv[1], KNAMELEN);
628 name[KNAMELEN-1] = 0;
629 // parseaddr(addr, argv[1], Eaddrlen);
630 } else if(strcmp(argv[0], "tunnel") == 0) {
632 strncpy(name, argv[1], KNAMELEN);
633 name[KNAMELEN-1] = 0;
634 // parseip(addr, argv[1]);
638 ownhash = atoi(argv[2]);
641 for(i=0; i<b->nport; i++) {
643 if(port != nil && port->type == type &&
644 memcmp(port->name, name, KNAMELEN) == 0)
648 error("port not found");
649 if(ownhash != 0 && port->ownhash != 0 && ownhash != port->ownhash)
650 error("bad owner hash");
653 b->port[i] = nil; // port is now unbound
654 cacheflushport(b, i);
656 // try and stop reader
658 postnote(port->readp, 1, "unbind", 0);
662 // assumes b is locked
664 cachelookup(Bridge *b, uchar d[Eaddrlen])
671 // dont cache multicast or broadcast
676 for(i=0; i<Eaddrlen; i++) {
682 sec = TK2SEC(m->ticks);
683 for(i=0; i<CacheLook; i++,p++) {
684 if(memcmp(d, p->d, Eaddrlen) == 0) {
686 if(sec >= p->expire) {
687 log(b, Logcache, "expired cache entry: %E %d\n",
691 p->expire = sec + CacheTimeout;
695 log(b, Logcache, "cache miss: %E\n", d);
699 // assumes b is locked
701 cacheupdate(Bridge *b, uchar d[Eaddrlen], int port)
708 // dont cache multicast or broadcast
710 log(b, Logcache, "bad source address: %E\n", d);
715 for(i=0; i<Eaddrlen; i++) {
724 // look for oldest entry
725 for(i=0; i<CacheLook; i++,p++) {
726 if(memcmp(p->d, d, Eaddrlen) == 0) {
727 p->expire = TK2SEC(m->ticks) + CacheTimeout;
728 if(p->port != port) {
729 log(b, Logcache, "NIC changed port %d->%d: %E\n",
736 if(p->expire < sec) {
742 log(b, Logcache, "bumping from cache: %E %d\n", pp->d, pp->port);
743 pp->expire = TK2SEC(m->ticks) + CacheTimeout;
744 memmove(pp->d, d, Eaddrlen);
748 log(b, Logcache, "adding to cache: %E %d\n", pp->d, pp->port);
751 // assumes b is locked
753 cacheflushport(Bridge *b, int port)
759 for(i=0; i<CacheSize; i++,ce++) {
762 memset(ce, 0, sizeof(Centry));
780 sec = TK2SEC(m->ticks);
782 for(i=0; i<CacheSize; i++)
783 if(b->cache[i].expire != 0)
786 n *= 51; // change if print format is changed
787 n += 10; // some slop at the end
794 off = seconds() - sec;
795 for(i=0; i<CacheSize; i++,ce++) {
798 c = (sec < ce->expire)?'v':'e';
799 p += snprint(p, ep-p, "%E %2d %10ld %10ld %10ld %c\n", ce->d,
800 ce->port, ce->src, ce->dst, ce->expire+off, c);
811 // assumes b is locked, no error return
813 ethermultiwrite(Bridge *b, Block *bp, Port *port)
819 ep = (Etherpkt*)bp->rp;
820 mcast = ep->d[0] & 1; /* multicast bit of ethernet address */
823 for(i=0; i<b->nport; i++) {
824 if(i == port->id || b->port[i] == nil)
827 * we need to forward multicast packets for ipv6,
831 b->port[i]->outmulti++;
833 b->port[i]->outunknown++;
835 // delay one so that the last write does not copy
838 etherwrite(oport, copyblock(bp, BLEN(bp)));
843 // last write free block
845 etherwrite(oport, bp);
851 tcpmsshack(Etherpkt *epkt, int n)
858 /* ignore non-ipv4 packets */
859 switch(nhgets(epkt->type)){
869 switch(epkt->data[0]&0xF0){
871 hl = (epkt->data[0]&15)<<2;
872 if(n < hl+TCPHDR || hl < IP4HDR || epkt->data[9] != TCP)
875 tcphdr = (Tcphdr*)(epkt->data + hl);
878 if(n < IP6HDR+TCPHDR || epkt->data[6] != TCP)
881 tcphdr = (Tcphdr*)(epkt->data + IP6HDR);
886 // MSS can only appear in SYN packet
887 if(!(tcphdr->flag[1] & SYN))
889 hl = (tcphdr->flag[0] & 0xf0)>>2;
893 // check for MSS option
894 optr = (uchar*)tcphdr + TCPHDR;
897 if(n <= 0 || *optr == EOLOPT)
899 if(*optr == NOOPOPT) {
905 if(optlen < 2 || optlen > n)
907 if(*optr == MSSOPT && optlen == MSS_LENGTH)
913 mss = nhgets(optr+2);
918 cksum = nhgets(tcphdr->cksum);
919 if(optr-(uchar*)tcphdr & 1) {
920 // print("tcpmsshack: odd alignment!\n");
921 // odd alignments are a pain
922 cksum += nhgets(optr+1);
923 cksum -= (optr[1]<<8)|(TcpMssMax>>8);
924 cksum += (cksum>>16);
926 cksum += nhgets(optr+3);
927 cksum -= ((TcpMssMax&0xff)<<8)|optr[4];
928 cksum += (cksum>>16);
932 cksum += (cksum>>16);
934 hnputs(tcphdr->cksum, cksum);
935 hnputs(optr+2, TcpMssMax);
939 * process to read from the ethernet
945 Bridge *b = port->bridge;
952 port->readp = up; /* hide identity under a rock for unbind */
954 while(!port->closed){
955 // release lock to read - error means it is time to quit
958 print("etherread read error: %s\n", up->errstr);
962 bp = devtab[port->data[0]->type]->bread(port->data[0], MaxMTU, 0);
968 if(port->closed || n < ETHERHDRSIZE){
973 // print("etherread bridge error\n");
979 ep = (Etherpkt*)bp->rp;
980 cacheupdate(b, ep->s, port->id);
985 * delay packets to simulate a slow link
987 if(b->delay0 != 0 || b->delayn != 0){
988 md = b->delay0 + b->delayn * n;
993 poperror(); /* must now dispose of bp */
996 log(b, Logmcast, "multicast: port=%d src=%E dst=%E type=%#.4ux\n",
997 port->id, ep->s, ep->d, ep->type[0]<<8|ep->type[1]);
999 ethermultiwrite(b, bp, port);
1001 ce = cachelookup(b, ep->d);
1005 ethermultiwrite(b, bp, port);
1006 }else if(ce->port != port->id){
1008 etherwrite(b->port[ce->port], bp);
1013 // print("etherread: trying to exit\n");
1021 fragment(Etherpkt *epkt, int n)
1028 /* ignore non-ipv4 packets */
1029 if(nhgets(epkt->type) != ETIP4)
1031 iphdr = (Ip4hdr*)(epkt->data);
1034 * ignore: IP runt packets, bad packets (I don't handle IP
1035 * options for the moment), packets with don't-fragment set,
1038 if(n < IP4HDR || iphdr->vihl != (IP_VER4|IP_HLEN4) ||
1039 iphdr->frag[0] & (IP_DF>>8) || nhgets(iphdr->length) > n)
1046 etherwrite(Port *port, Block *bp)
1050 int n, lid, len, seglen, dlen, blklen, mf;
1052 ushort fragoff, frag;
1056 epkt = (Etherpkt*)bp->rp;
1057 if(port->type != Ttun || !fragment(epkt, n)) {
1059 /* don't generate small packets */
1061 bp = adjustblock(bp, ETHERMINTU);
1062 devtab[port->data[1]->type]->bwrite(port->data[1], bp, 0);
1073 seglen = (TunnelMtu - ETHERHDRSIZE - IP4HDR) & ~7;
1074 eh = (Ip4hdr*)(epkt->data);
1075 len = nhgets(eh->length);
1076 frag = nhgets(eh->frag);
1079 dlen = len - IP4HDR;
1080 lid = nhgets(eh->id);
1081 bp->rp += ETHERHDRSIZE+IP4HDR;
1084 print("seglen=%d, dlen=%d, mf=%x, frag=%d\n",
1085 seglen, dlen, mf, frag);
1086 for(fragoff = 0; fragoff < dlen; fragoff += seglen) {
1087 nb = allocb(ETHERHDRSIZE+IP4HDR+seglen);
1089 feh = (Ip4hdr*)(nb->wp+ETHERHDRSIZE);
1091 memmove(nb->wp, epkt, ETHERHDRSIZE+IP4HDR);
1092 nb->wp += ETHERHDRSIZE+IP4HDR;
1094 if((fragoff + seglen) >= dlen) {
1095 seglen = dlen - fragoff;
1096 hnputs(feh->frag, (frag+fragoff)>>3 | mf);
1099 hnputs(feh->frag, (frag+fragoff>>3) | IP_MF);
1101 hnputs(feh->length, seglen + IP4HDR);
1102 hnputs(feh->id, lid);
1108 memmove(nb->wp, bp->rp, blklen);
1115 hnputs(feh->cksum, ipcsum(&feh->vihl));
1117 /* don't generate small packets */
1118 if(BLEN(nb) < ETHERMINTU)
1119 nb = adjustblock(nb, ETHERMINTU);
1120 devtab[port->data[1]->type]->bwrite(port->data[1], nb, 0);
1128 portfree(Port *port)
1130 if(decref(port) != 0)
1134 cclose(port->data[0]);
1136 cclose(port->data[1]);
1137 memset(port, 0, sizeof(Port));
1141 Dev bridgedevtab = {