5 #include "../port/lib.h"
10 #include "../port/netif.h"
11 #include "../port/error.h"
13 typedef struct Bridge Bridge;
14 typedef struct Port Port;
15 typedef struct Centry Centry;
16 typedef struct Iphdr Iphdr;
17 typedef struct Tcphdr Tcphdr;
21 Qtopdir= 1, /* top level directory */
23 Qbridgedir, /* bridge* directory */
29 Qportdir, /* directory for a protocol */
37 Maxport= 128, // power of 2
38 CacheHash= 257, // prime
39 CacheLook= 5, // how many cache entries to examine
40 CacheSize= (CacheHash+CacheLook-1),
41 CacheTimeout= 5*60, // timeout for cache entry in seconds
43 TcpMssMax = 1300, // max desirable Tcp MSS value
47 static Dirtab bridgedirtab[]={
48 "ctl", {Qbctl}, 0, 0666,
49 "stats", {Qstats}, 0, 0444,
50 "cache", {Qcache}, 0, 0444,
51 "log", {Qlog}, 0, 0666,
54 static Dirtab portdirtab[]={
55 "ctl", {Qpctl}, 0, 0666,
56 "local", {Qlocal}, 0, 0444,
57 "status", {Qstatus}, 0, 0444,
65 // types of interfaces
72 static Logflag logflags[] =
74 { "cache", Logcache, },
75 { "multicast", Logmcast, },
79 static Dirtab *dirtab[MaxQ];
81 #define TYPE(x) (((ulong)(x).path) & 0xff)
82 #define PORT(x) ((((ulong)(x).path) >> 8)&(Maxport-1))
83 #define QID(x, y) (((x)<<8) | (y))
89 long expire; // entry expires this many seconds after bootime
99 Centry cache[CacheSize];
103 long delay0; // constant microsecond delay per packet
104 long delayn; // microsecond delay per byte
105 int tcpmss; // modify tcpmss value
117 Chan *data[2]; // channel to data
119 Proc *readp; // read proc
121 // the following uniquely identifies the port
125 // owner hash - avoids bind/unbind races
129 int in; // number of packets read
130 int inmulti; // multicast or broadcast
131 int inunknown; // unknown address
132 int out; // number of packets read
133 int outmulti; // multicast or broadcast
134 int outunknown; // unknown address
135 int outfrag; // fragmented the packet
136 int nentry; // number of cache entries for this port
144 MSS_LENGTH = 4, /* Mean segment size */
145 SYN = 0x02, /* Pkt. is synchronise */
146 IPHDR = 20, /* sizeof(Iphdr) */
151 uchar vihl; /* Version and header length */
152 uchar tos; /* Type of service */
153 uchar length[2]; /* packet length */
154 uchar id[2]; /* ip->identification */
155 uchar frag[2]; /* Fragment information */
156 uchar ttl; /* Time to live */
157 uchar proto; /* Protocol */
158 uchar cksum[2]; /* Header checksum */
159 uchar src[4]; /* IP source */
160 uchar dst[4]; /* IP destination */
175 static Bridge bridgetab[Maxbridge];
183 static int bridgegen(Chan *c, char*, Dirtab*, int, int s, Dir *dp);
184 static void portbind(Bridge *b, int argc, char *argv[]);
185 static void portunbind(Bridge *b, int argc, char *argv[]);
186 static void etherread(void *a);
187 static char *cachedump(Bridge *b);
188 static void portfree(Port *port);
189 static void cacheflushport(Bridge *b, int port);
190 static void etherwrite(Port *port, Block *bp);
198 // setup dirtab with non directory entries
199 for(i=0; i<nelem(bridgedirtab); i++) {
200 dt = bridgedirtab + i;
201 dirtab[TYPE(dt->qid)] = dt;
203 for(i=0; i<nelem(portdirtab); i++) {
205 dirtab[TYPE(dt->qid)] = dt;
210 bridgeattach(char* spec)
216 if(dev<0 || dev >= Maxbridge)
217 error("bad specification");
219 c = devattach('B', spec);
220 mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR);
226 bridgewalk(Chan *c, Chan *nc, char **name, int nname)
228 return devwalk(c, nc, name, nname, (Dirtab*)0, 0, bridgegen);
232 bridgestat(Chan* c, uchar* db, int n)
234 return devstat(c, db, n, (Dirtab *)0, 0L, bridgegen);
238 bridgeopen(Chan* c, int omode)
247 b = bridgetab + c->dev;
250 switch(TYPE(c->qid)) {
257 c->aux = cachedump(b);
260 c->mode = openmode(omode);
269 Bridge *b = bridgetab + c->dev;
271 switch(TYPE(c->qid)) {
284 bridgeread(Chan *c, void *a, long n, vlong off)
287 Bridge *b = bridgetab + c->dev;
289 int i, ingood, outgood;
292 switch(TYPE(c->qid)) {
298 return devdirread(c, a, n, 0, 0, bridgegen);
300 return logread(b, a, off, n);
303 port = b->port[PORT(c->qid)];
305 strcpy(buf, "unbound\n");
310 panic("bridgeread: unknown port type: %d",
313 i += snprint(buf+i, sizeof(buf)-i, "ether %s: ", port->name);
316 i += snprint(buf+i, sizeof(buf)-i, "tunnel %s: ", port->name);
319 ingood = port->in - port->inmulti - port->inunknown;
320 outgood = port->out - port->outmulti - port->outunknown;
321 i += snprint(buf+i, sizeof(buf)-i,
322 "in=%d(%d:%d:%d) out=%d(%d:%d:%d:%d)\n",
323 port->in, ingood, port->inmulti, port->inunknown,
324 port->out, outgood, port->outmulti,
325 port->outunknown, port->outfrag);
328 n = readstr(off, a, n, buf);
332 snprint(buf, sizeof(buf), "%s tcpmss\ndelay %ld %ld\n",
333 b->tcpmss ? "set" : "clear", b->delay0, b->delayn);
334 n = readstr(off, a, n, buf);
337 n = readstr(off, a, n, c->aux);
340 snprint(buf, sizeof(buf), "hit=%uld miss=%uld copy=%uld\n",
341 b->hit, b->miss, b->copy);
342 n = readstr(off, a, n, buf);
348 bridgeoption(Bridge *b, char *option, int value)
350 if(strcmp(option, "tcpmss") == 0)
353 error("unknown bridge option");
358 bridgewrite(Chan *c, void *a, long n, vlong off)
360 Bridge *b = bridgetab + c->dev;
365 switch(TYPE(c->qid)) {
377 error("short write");
379 if(strcmp(arg0, "bind") == 0) {
380 portbind(b, cb->nf-1, cb->f+1);
381 } else if(strcmp(arg0, "unbind") == 0) {
382 portunbind(b, cb->nf-1, cb->f+1);
383 } else if(strcmp(arg0, "cacheflush") == 0) {
384 log(b, Logcache, "cache flush\n");
385 memset(b->cache, 0, CacheSize*sizeof(Centry));
386 } else if(strcmp(arg0, "set") == 0) {
388 error("usage: set option");
389 bridgeoption(b, cb->f[1], 1);
390 } else if(strcmp(arg0, "clear") == 0) {
392 error("usage: clear option");
393 bridgeoption(b, cb->f[1], 0);
394 } else if(strcmp(arg0, "delay") == 0) {
396 error("usage: delay delay0 delayn");
397 b->delay0 = strtol(cb->f[1], nil, 10);
398 b->delayn = strtol(cb->f[2], nil, 10);
400 error("unknown control request");
407 p = logctl(b, cb->nf, cb->f, logflags);
416 bridgegen(Chan *c, char *, Dirtab*, int, int s, Dir *dp)
418 Bridge *b = bridgetab + c->dev;
419 int type = TYPE(c->qid);
424 switch(TYPE(c->qid)){
427 snprint(up->genbuf, sizeof(up->genbuf), "#B%ld", c->dev);
428 mkqid(&qid, Qtopdir, 0, QTDIR);
429 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
432 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
433 mkqid(&qid, Qbridgedir, 0, QTDIR);
434 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
437 panic("bridgewalk %llux", c->qid.path);
444 /* non-directory entries end up here */
445 if(c->qid.type & QTDIR)
446 panic("bridgegen: unexpected directory");
449 dt = dirtab[TYPE(c->qid)];
451 panic("bridgegen: unknown type: %lud", TYPE(c->qid));
452 devdir(c, c->qid, dt->name, dt->length, eve, dt->perm, dp);
457 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
458 mkqid(&qid, QID(0, Qbridgedir), 0, QTDIR);
459 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
462 if(s<nelem(bridgedirtab)) {
464 devdir(c, dt->qid, dt->name, dt->length, eve, dt->perm, dp);
467 s -= nelem(bridgedirtab);
470 mkqid(&qid, QID(s, Qportdir), 0, QTDIR);
471 snprint(up->genbuf, sizeof(up->genbuf), "%d", s);
472 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
475 if(s>=nelem(portdirtab))
478 mkqid(&qid, QID(PORT(c->qid),TYPE(dt->qid)), 0, QTFILE);
479 devdir(c, qid, dt->name, dt->length, eve, dt->perm, dp);
484 // parse mac address; also in netif.c
486 parseaddr(uchar *to, char *from, int alen)
493 for(i = 0; i < alen; i++){
501 to[i] = strtoul(nip, 0, 16);
508 // assumes b is locked
510 portbind(Bridge *b, int argc, char *argv[])
516 char *dev, *dev2 = nil, *p;
517 char buf[100], name[KNAMELEN], path[8*KNAMELEN];
518 static char usage[] = "usage: bind ether|tunnel name ownhash dev [dev2]";
520 memset(name, 0, KNAMELEN);
523 if(strcmp(argv[0], "ether") == 0) {
527 strncpy(name, argv[1], KNAMELEN-1);
528 name[KNAMELEN-1] = 0;
529 // parseaddr(addr, argv[1], Eaddrlen);
530 } else if(strcmp(argv[0], "tunnel") == 0) {
534 strncpy(name, argv[1], KNAMELEN-1);
535 name[KNAMELEN-1] = 0;
536 // parseip(addr, argv[1]);
540 ownhash = atoi(argv[2]);
542 for(i=0; i<b->nport; i++) {
544 if(port != nil && port->type == type &&
545 memcmp(port->name, name, KNAMELEN) == 0)
546 error("port in use");
548 for(i=0; i<Maxport; i++)
549 if(b->port[i] == nil)
552 error("no more ports");
553 port = smalloc(sizeof(Port));
556 port->ownhash = ownhash;
563 memmove(port->name, name, KNAMELEN);
566 panic("portbind: unknown port type: %d", type);
568 snprint(path, sizeof(path), "%s/clone", dev);
569 ctl = namec(path, Aopen, ORDWR, 0);
576 // get directory name
577 n = devtab[ctl->type]->read(ctl, buf, sizeof(buf), 0);
579 for(p = buf; *p == ' '; p++)
581 snprint(path, sizeof(path), "%s/%lud/data", dev, strtoul(p, 0, 0));
583 // setup connection to be promiscuous
584 snprint(buf, sizeof(buf), "connect -1");
585 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
586 snprint(buf, sizeof(buf), "promiscuous");
587 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
588 snprint(buf, sizeof(buf), "bridge");
589 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
592 port->data[0] = namec(path, Aopen, ORDWR, 0);
594 incref(port->data[0]);
595 port->data[1] = port->data[0];
602 port->data[0] = namec(dev, Aopen, OREAD, 0);
603 port->data[1] = namec(dev2, Aopen, OWRITE, 0);
609 /* committed to binding port */
610 b->port[port->id] = port;
612 if(b->nport <= port->id)
613 b->nport = port->id+1;
615 // assumes kproc always succeeds
616 kproc("etherread", etherread, port); // poperror must be next
620 // assumes b is locked
622 portunbind(Bridge *b, int argc, char *argv[])
628 static char usage[] = "usage: unbind ether|tunnel addr [ownhash]";
630 memset(name, 0, KNAMELEN);
631 if(argc < 2 || argc > 3)
633 if(strcmp(argv[0], "ether") == 0) {
635 strncpy(name, argv[1], KNAMELEN-1);
636 name[KNAMELEN-1] = 0;
637 // parseaddr(addr, argv[1], Eaddrlen);
638 } else if(strcmp(argv[0], "tunnel") == 0) {
640 strncpy(name, argv[1], KNAMELEN-1);
641 name[KNAMELEN-1] = 0;
642 // parseip(addr, argv[1]);
646 ownhash = atoi(argv[2]);
649 for(i=0; i<b->nport; i++) {
651 if(port != nil && port->type == type &&
652 memcmp(port->name, name, KNAMELEN) == 0)
656 error("port not found");
657 if(ownhash != 0 && port->ownhash != 0 && ownhash != port->ownhash)
658 error("bad owner hash");
661 b->port[i] = nil; // port is now unbound
662 cacheflushport(b, i);
664 // try and stop reader
666 postnote(port->readp, 1, "unbind", 0);
670 // assumes b is locked
672 cachelookup(Bridge *b, uchar d[Eaddrlen])
679 // dont cache multicast or broadcast
684 for(i=0; i<Eaddrlen; i++) {
690 sec = TK2SEC(m->ticks);
691 for(i=0; i<CacheLook; i++,p++) {
692 if(memcmp(d, p->d, Eaddrlen) == 0) {
694 if(sec >= p->expire) {
695 log(b, Logcache, "expired cache entry: %E %d\n",
699 p->expire = sec + CacheTimeout;
703 log(b, Logcache, "cache miss: %E\n", d);
707 // assumes b is locked
709 cacheupdate(Bridge *b, uchar d[Eaddrlen], int port)
716 // dont cache multicast or broadcast
718 log(b, Logcache, "bad source address: %E\n", d);
723 for(i=0; i<Eaddrlen; i++) {
732 // look for oldest entry
733 for(i=0; i<CacheLook; i++,p++) {
734 if(memcmp(p->d, d, Eaddrlen) == 0) {
735 p->expire = TK2SEC(m->ticks) + CacheTimeout;
736 if(p->port != port) {
737 log(b, Logcache, "NIC changed port %d->%d: %E\n",
744 if(p->expire < sec) {
750 log(b, Logcache, "bumping from cache: %E %d\n", pp->d, pp->port);
751 pp->expire = TK2SEC(m->ticks) + CacheTimeout;
752 memmove(pp->d, d, Eaddrlen);
756 log(b, Logcache, "adding to cache: %E %d\n", pp->d, pp->port);
759 // assumes b is locked
761 cacheflushport(Bridge *b, int port)
767 for(i=0; i<CacheSize; i++,ce++) {
770 memset(ce, 0, sizeof(Centry));
784 sec = TK2SEC(m->ticks);
786 for(i=0; i<CacheSize; i++)
787 if(b->cache[i].expire != 0)
789 n *= 51; // change if print format is changed
790 n += 10; // some slop at the end
795 off = seconds() - sec;
796 for(i=0; i<CacheSize; i++,ce++) {
799 c = (sec < ce->expire)?'v':'e';
800 p += snprint(p, ep-p, "%E %2d %10ld %10ld %10ld %c\n", ce->d,
801 ce->port, ce->src, ce->dst, ce->expire+off, c);
810 // assumes b is locked
812 ethermultiwrite(Bridge *b, Block *bp, Port *port)
825 ep = (Etherpkt*)bp->rp;
826 mcast = ep->d[0] & 1; /* multicast bit of ethernet address */
829 for(i=0; i<b->nport; i++) {
830 if(i == port->id || b->port[i] == nil)
833 * we need to forward multicast packets for ipv6,
837 b->port[i]->outmulti++;
839 b->port[i]->outunknown++;
841 // delay one so that the last write does not copy
844 bp2 = copyblock(bp, blocklen(bp));
846 etherwrite(oport, bp2);
853 // last write free block
855 bp2 = bp; bp = nil; USED(bp);
857 etherwrite(oport, bp2);
867 tcpmsshack(Etherpkt *epkt, int n)
875 /* ignore non-ipv4 packets */
876 if(nhgets(epkt->type) != ETIP4)
878 iphdr = (Iphdr*)(epkt->data);
883 /* ignore bad packets */
884 if(iphdr->vihl != (IP_VER4|IP_HLEN4)) {
885 hl = (iphdr->vihl&0xF)<<2;
886 if((iphdr->vihl&0xF0) != IP_VER4 || hl < (IP_HLEN4<<2))
891 /* ignore non-tcp packets */
892 if(iphdr->proto != IP_TCPPROTO)
895 if(n < sizeof(Tcphdr))
897 tcphdr = (Tcphdr*)((uchar*)(iphdr) + hl);
898 // MSS can only appear in SYN packet
899 if(!(tcphdr->flag[1] & SYN))
901 hl = (tcphdr->flag[0] & 0xf0)>>2;
905 // check for MSS option
906 optr = (uchar*)tcphdr + sizeof(Tcphdr);
907 n = hl - sizeof(Tcphdr);
909 if(n <= 0 || *optr == EOLOPT)
911 if(*optr == NOOPOPT) {
917 if(optlen < 2 || optlen > n)
919 if(*optr == MSSOPT && optlen == MSS_LENGTH)
925 mss = nhgets(optr+2);
929 cksum = nhgets(tcphdr->cksum);
930 if(optr-(uchar*)tcphdr & 1) {
931 print("tcpmsshack: odd alignment!\n");
932 // odd alignments are a pain
933 cksum += nhgets(optr+1);
934 cksum -= (optr[1]<<8)|(TcpMssMax>>8);
935 cksum += (cksum>>16);
937 cksum += nhgets(optr+3);
938 cksum -= ((TcpMssMax&0xff)<<8)|optr[4];
939 cksum += (cksum>>16);
943 cksum += (cksum>>16);
945 hnputs(tcphdr->cksum, cksum);
946 hnputs(optr+2, TcpMssMax);
950 * process to read from the ethernet
956 Bridge *b = port->bridge;
963 port->readp = up; /* hide identity under a rock for unbind */
965 while(!port->closed){
966 // release lock to read - error means it is time to quit
969 print("etherread read error: %s\n", up->errstr);
974 print("devbridge: etherread: reading\n");
975 bp = devtab[port->data[0]->type]->bread(port->data[0],
978 print("devbridge: etherread: blocklen = %d\n",
982 if(bp == nil || port->closed)
985 // print("etherread bridge error\n");
990 if(blocklen(bp) < ETHERMINTU)
991 error("short packet");
994 ep = (Etherpkt*)bp->rp;
995 cacheupdate(b, ep->s, port->id);
997 tcpmsshack(ep, BLEN(bp));
1000 * delay packets to simulate a slow link
1002 if(b->delay0 || b->delayn){
1003 md = b->delay0 + b->delayn * BLEN(bp);
1009 log(b, Logmcast, "multicast: port=%d src=%E dst=%E type=%#.4ux\n",
1010 port->id, ep->s, ep->d, ep->type[0]<<8|ep->type[1]);
1013 ethermultiwrite(b, bp2, port);
1015 ce = cachelookup(b, ep->d);
1020 ethermultiwrite(b, bp2, port);
1021 }else if(ce->port != port->id){
1024 etherwrite(b->port[ce->port], bp2);
1032 // print("etherread: trying to exit\n");
1040 fragment(Etherpkt *epkt, int n)
1047 /* ignore non-ipv4 packets */
1048 if(nhgets(epkt->type) != ETIP4)
1050 iphdr = (Iphdr*)(epkt->data);
1053 * ignore: IP runt packets, bad packets (I don't handle IP
1054 * options for the moment), packets with don't-fragment set,
1057 if(n < IPHDR || iphdr->vihl != (IP_VER4|IP_HLEN4) ||
1058 iphdr->frag[0] & (IP_DF>>8) || nhgets(iphdr->length) > n)
1066 etherwrite(Port *port, Block *bp)
1070 int n, lid, len, seglen, chunk, dlen, blklen, offset, mf;
1072 ushort fragoff, frag;
1075 epkt = (Etherpkt*)bp->rp;
1077 if(port->type != Ttun || !fragment(epkt, n)) {
1078 devtab[port->data[1]->type]->bwrite(port->data[1], bp, 0);
1087 seglen = (TunnelMtu - ETHERHDRSIZE - IPHDR) & ~7;
1088 eh = (Iphdr*)(epkt->data);
1089 len = nhgets(eh->length);
1090 frag = nhgets(eh->frag);
1095 lid = nhgets(eh->id);
1096 offset = ETHERHDRSIZE+IPHDR;
1097 while(xp != nil && offset && offset >= BLEN(xp)) {
1104 print("seglen=%d, dlen=%d, mf=%x, frag=%d\n",
1105 seglen, dlen, mf, frag);
1106 for(fragoff = 0; fragoff < dlen; fragoff += seglen) {
1107 nb = allocb(ETHERHDRSIZE+IPHDR+seglen);
1109 feh = (Iphdr*)(nb->wp+ETHERHDRSIZE);
1111 memmove(nb->wp, epkt, ETHERHDRSIZE+IPHDR);
1112 nb->wp += ETHERHDRSIZE+IPHDR;
1114 if((fragoff + seglen) >= dlen) {
1115 seglen = dlen - fragoff;
1116 hnputs(feh->frag, (frag+fragoff)>>3 | mf);
1119 hnputs(feh->frag, (frag+fragoff>>3) | IP_MF);
1121 hnputs(feh->length, seglen + IPHDR);
1122 hnputs(feh->id, lid);
1124 /* Copy up the data area */
1128 if(BLEN(xp) < chunk)
1130 memmove(nb->wp, xp->rp, blklen);
1134 if(xp->rp == xp->wp)
1140 hnputs(feh->cksum, ipcsum(&feh->vihl));
1142 /* don't generate small packets */
1143 if(BLEN(nb) < ETHERMINTU)
1144 nb->wp = nb->rp + ETHERMINTU;
1145 devtab[port->data[1]->type]->bwrite(port->data[1], nb, 0);
1153 portfree(Port *port)
1157 panic("portfree: bad ref");
1162 cclose(port->data[0]);
1164 cclose(port->data[1]);
1165 memset(port, 0, sizeof(Port));
1169 Dev bridgedevtab = {