]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/port/devbridge.c
devproc: remove pgrpid == 1 check for notepg open
[plan9front.git] / sys / src / 9 / port / devbridge.c
1 /*
2  * IPv4 Ethernet bridge
3  */
4 #include "u.h"
5 #include "../port/lib.h"
6 #include "mem.h"
7 #include "dat.h"
8 #include "fns.h"
9 #include "../ip/ip.h"
10 #include "../port/netif.h"
11 #include "../port/error.h"
12
13 typedef struct Bridge   Bridge;
14 typedef struct Port     Port;
15 typedef struct Centry   Centry;
16 typedef struct Iphdr    Iphdr;
17 typedef struct Tcphdr   Tcphdr;
18
19 enum
20 {
21         Qtopdir=        1,              /* top level directory */
22
23         Qbridgedir,                     /* bridge* directory */
24         Qbctl,
25         Qstats,
26         Qcache,
27         Qlog,
28
29         Qportdir,                       /* directory for a protocol */
30         Qpctl,
31         Qlocal,
32         Qstatus,
33
34         MaxQ,
35
36         Maxbridge=      4,
37         Maxport=        128,            // power of 2
38         CacheHash=      257,            // prime
39         CacheLook=      5,              // how many cache entries to examine
40         CacheSize=      (CacheHash+CacheLook-1),
41         CacheTimeout=   5*60,           // timeout for cache entry in seconds
42
43         TcpMssMax = 1300,               // max desirable Tcp MSS value
44         TunnelMtu = 1400,
45 };
46
47 static Dirtab bridgedirtab[]={
48         "ctl",          {Qbctl},        0,      0666,
49         "stats",        {Qstats},       0,      0444,
50         "cache",        {Qcache},       0,      0444,
51         "log",          {Qlog},         0,      0666,
52 };
53
54 static Dirtab portdirtab[]={
55         "ctl",          {Qpctl},        0,      0666,
56         "local",        {Qlocal},       0,      0444,
57         "status",       {Qstatus},      0,      0444,
58 };
59
60 enum {
61         Logcache=       (1<<0),
62         Logmcast=       (1<<1),
63 };
64
65 // types of interfaces
66 enum
67 {
68         Tether,
69         Ttun,
70 };
71
72 static Logflag logflags[] =
73 {
74         { "cache",      Logcache, },
75         { "multicast",  Logmcast, },
76         { nil,          0, },
77 };
78
79 static Dirtab   *dirtab[MaxQ];
80
81 #define TYPE(x)         (((ulong)(x).path) & 0xff)
82 #define PORT(x)         ((((ulong)(x).path) >> 8)&(Maxport-1))
83 #define QID(x, y)       (((x)<<8) | (y))
84
85 struct Centry
86 {
87         uchar   d[Eaddrlen];
88         int     port;
89         long    expire;         // entry expires this many seconds after bootime
90         long    src;
91         long    dst;
92 };
93
94 struct Bridge
95 {
96         QLock;
97         int     nport;
98         Port    *port[Maxport];
99         Centry  cache[CacheSize];
100         ulong   hit;
101         ulong   miss;
102         ulong   copy;
103         long    delay0;         // constant microsecond delay per packet
104         long    delayn;         // microsecond delay per byte
105         int     tcpmss;         // modify tcpmss value
106
107         Log;
108 };
109
110 struct Port
111 {
112         int     id;
113         Bridge  *bridge;
114         int     ref;
115         int     closed;
116
117         Chan    *data[2];       // channel to data
118
119         Proc    *readp;         // read proc
120         
121         // the following uniquely identifies the port
122         int     type;
123         char    name[KNAMELEN];
124         
125         // owner hash - avoids bind/unbind races
126         ulong   ownhash;
127
128         // various stats
129         int     in;             // number of packets read
130         int     inmulti;        // multicast or broadcast
131         int     inunknown;      // unknown address
132         int     out;            // number of packets read
133         int     outmulti;       // multicast or broadcast
134         int     outunknown;     // unknown address
135         int     outfrag;        // fragmented the packet
136         int     nentry;         // number of cache entries for this port
137 };
138
139 enum {
140         IP_TCPPROTO     = 6,
141         EOLOPT          = 0,
142         NOOPOPT         = 1,
143         MSSOPT          = 2,
144         MSS_LENGTH      = 4,            /* Mean segment size */
145         SYN             = 0x02,         /* Pkt. is synchronise */
146         IPHDR           = 20,           /* sizeof(Iphdr) */
147 };
148
149 struct Iphdr
150 {
151         uchar   vihl;           /* Version and header length */
152         uchar   tos;            /* Type of service */
153         uchar   length[2];      /* packet length */
154         uchar   id[2];          /* ip->identification */
155         uchar   frag[2];        /* Fragment information */
156         uchar   ttl;            /* Time to live */
157         uchar   proto;          /* Protocol */
158         uchar   cksum[2];       /* Header checksum */
159         uchar   src[4];         /* IP source */
160         uchar   dst[4];         /* IP destination */
161 };
162
163 struct Tcphdr
164 {
165         uchar   sport[2];
166         uchar   dport[2];
167         uchar   seq[4];
168         uchar   ack[4];
169         uchar   flag[2];
170         uchar   win[2];
171         uchar   cksum[2];
172         uchar   urg[2];
173 };
174
175 static Bridge bridgetab[Maxbridge];
176
177 static int m2p[] = {
178         [OREAD]         4,
179         [OWRITE]        2,
180         [ORDWR]         6
181 };
182
183 static int      bridgegen(Chan *c, char*, Dirtab*, int, int s, Dir *dp);
184 static void     portbind(Bridge *b, int argc, char *argv[]);
185 static void     portunbind(Bridge *b, int argc, char *argv[]);
186 static void     etherread(void *a);
187 static char     *cachedump(Bridge *b);
188 static void     portfree(Port *port);
189 static void     cacheflushport(Bridge *b, int port);
190 static void     etherwrite(Port *port, Block *bp);
191
192 static void
193 bridgeinit(void)
194 {
195         int i;
196         Dirtab *dt;
197
198         // setup dirtab with non directory entries
199         for(i=0; i<nelem(bridgedirtab); i++) {
200                 dt = bridgedirtab + i;
201                 dirtab[TYPE(dt->qid)] = dt;
202         }
203         for(i=0; i<nelem(portdirtab); i++) {
204                 dt = portdirtab + i;
205                 dirtab[TYPE(dt->qid)] = dt;
206         }
207 }
208
209 static Chan*
210 bridgeattach(char* spec)
211 {
212         Chan *c;
213         int dev;
214
215         dev = atoi(spec);
216         if(dev<0 || dev >= Maxbridge)
217                 error("bad specification");
218
219         c = devattach('B', spec);
220         mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR);
221         c->dev = dev;
222         return c;
223 }
224
225 static Walkqid*
226 bridgewalk(Chan *c, Chan *nc, char **name, int nname)
227 {
228         return devwalk(c, nc, name, nname, (Dirtab*)0, 0, bridgegen);
229 }
230
231 static int
232 bridgestat(Chan* c, uchar* db, int n)
233 {
234         return devstat(c, db, n, (Dirtab *)0, 0L, bridgegen);
235 }
236
237 static Chan*
238 bridgeopen(Chan* c, int omode)
239 {
240         int perm;
241         Bridge *b;
242
243         omode &= 3;
244         perm = m2p[omode];
245         USED(perm);
246
247         b = bridgetab + c->dev;
248         USED(b);
249
250         switch(TYPE(c->qid)) {
251         default:
252                 break;
253         case Qlog:
254                 logopen(b);
255                 break;
256         case Qcache:
257                 c->aux = cachedump(b);
258                 break;
259         }
260         c->mode = openmode(omode);
261         c->flag |= COPEN;
262         c->offset = 0;
263         return c;
264 }
265
266 static void
267 bridgeclose(Chan* c)
268 {
269         Bridge *b  = bridgetab + c->dev;
270
271         switch(TYPE(c->qid)) {
272         case Qcache:
273                 if(c->flag & COPEN)
274                         free(c->aux);
275                 break;
276         case Qlog:
277                 if(c->flag & COPEN)
278                         logclose(b);
279                 break;
280         }
281 }
282
283 static long
284 bridgeread(Chan *c, void *a, long n, vlong off)
285 {
286         char buf[256];
287         Bridge *b = bridgetab + c->dev;
288         Port *port;
289         int i, ingood, outgood;
290
291         USED(off);
292         switch(TYPE(c->qid)) {
293         default:
294                 error(Eperm);
295         case Qtopdir:
296         case Qbridgedir:
297         case Qportdir:
298                 return devdirread(c, a, n, 0, 0, bridgegen);
299         case Qlog:
300                 return logread(b, a, off, n);
301         case Qstatus:
302                 qlock(b);
303                 port = b->port[PORT(c->qid)];
304                 if(port == 0)
305                         strcpy(buf, "unbound\n");
306                 else {
307                         i = 0;
308                         switch(port->type) {
309                         default:
310                                 panic("bridgeread: unknown port type: %d",
311                                         port->type);
312                         case Tether:
313                                 i += snprint(buf+i, sizeof(buf)-i, "ether %s: ", port->name);
314                                 break;
315                         case Ttun:
316                                 i += snprint(buf+i, sizeof(buf)-i, "tunnel %s: ", port->name);
317                                 break;
318                         }
319                         ingood = port->in - port->inmulti - port->inunknown;
320                         outgood = port->out - port->outmulti - port->outunknown;
321                         i += snprint(buf+i, sizeof(buf)-i,
322                                 "in=%d(%d:%d:%d) out=%d(%d:%d:%d:%d)\n",
323                                 port->in, ingood, port->inmulti, port->inunknown,
324                                 port->out, outgood, port->outmulti,
325                                 port->outunknown, port->outfrag);
326                         USED(i);
327                 }
328                 n = readstr(off, a, n, buf);
329                 qunlock(b);
330                 return n;
331         case Qbctl:
332                 snprint(buf, sizeof(buf), "%s tcpmss\ndelay %ld %ld\n",
333                         b->tcpmss ? "set" : "clear", b->delay0, b->delayn);
334                 n = readstr(off, a, n, buf);
335                 return n;
336         case Qcache:
337                 n = readstr(off, a, n, c->aux);
338                 return n;
339         case Qstats:
340                 snprint(buf, sizeof(buf), "hit=%uld miss=%uld copy=%uld\n",
341                         b->hit, b->miss, b->copy);
342                 n = readstr(off, a, n, buf);
343                 return n;
344         }
345 }
346
347 static void
348 bridgeoption(Bridge *b, char *option, int value)
349 {
350         if(strcmp(option, "tcpmss") == 0)
351                 b->tcpmss = value;
352         else
353                 error("unknown bridge option");
354 }
355
356
357 static long
358 bridgewrite(Chan *c, void *a, long n, vlong off)
359 {
360         Bridge *b = bridgetab + c->dev;
361         Cmdbuf *cb;
362         char *arg0, *p;
363         
364         USED(off);
365         switch(TYPE(c->qid)) {
366         default:
367                 error(Eperm);
368         case Qbctl:
369                 cb = parsecmd(a, n);
370                 qlock(b);
371                 if(waserror()) {
372                         qunlock(b);
373                         free(cb);
374                         nexterror();
375                 }
376                 if(cb->nf == 0)
377                         error("short write");
378                 arg0 = cb->f[0];
379                 if(strcmp(arg0, "bind") == 0) {
380                         portbind(b, cb->nf-1, cb->f+1);
381                 } else if(strcmp(arg0, "unbind") == 0) {
382                         portunbind(b, cb->nf-1, cb->f+1);
383                 } else if(strcmp(arg0, "cacheflush") == 0) {
384                         log(b, Logcache, "cache flush\n");
385                         memset(b->cache, 0, CacheSize*sizeof(Centry));
386                 } else if(strcmp(arg0, "set") == 0) {
387                         if(cb->nf != 2)
388                                 error("usage: set option");
389                         bridgeoption(b, cb->f[1], 1);
390                 } else if(strcmp(arg0, "clear") == 0) {
391                         if(cb->nf != 2)
392                                 error("usage: clear option");
393                         bridgeoption(b, cb->f[1], 0);
394                 } else if(strcmp(arg0, "delay") == 0) {
395                         if(cb->nf != 3)
396                                 error("usage: delay delay0 delayn");
397                         b->delay0 = strtol(cb->f[1], nil, 10);
398                         b->delayn = strtol(cb->f[2], nil, 10);
399                 } else
400                         error("unknown control request");
401                 poperror();
402                 qunlock(b);
403                 free(cb);
404                 return n;
405         case Qlog:
406                 cb = parsecmd(a, n);
407                 p = logctl(b, cb->nf, cb->f, logflags);
408                 free(cb);
409                 if(p != nil)
410                         error(p);
411                 return n;
412         }
413 }
414
415 static int
416 bridgegen(Chan *c, char *, Dirtab*, int, int s, Dir *dp)
417 {
418         Bridge *b = bridgetab + c->dev;
419         int type = TYPE(c->qid);
420         Dirtab *dt;
421         Qid qid;
422
423         if(s  == DEVDOTDOT){
424                 switch(TYPE(c->qid)){
425                 case Qtopdir:
426                 case Qbridgedir:
427                         snprint(up->genbuf, sizeof(up->genbuf), "#B%ld", c->dev);
428                         mkqid(&qid, Qtopdir, 0, QTDIR);
429                         devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
430                         break;
431                 case Qportdir:
432                         snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
433                         mkqid(&qid, Qbridgedir, 0, QTDIR);
434                         devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
435                         break;
436                 default:
437                         panic("bridgewalk %llux", c->qid.path);
438                 }
439                 return 1;
440         }
441
442         switch(type) {
443         default:
444                 /* non-directory entries end up here */
445                 if(c->qid.type & QTDIR)
446                         panic("bridgegen: unexpected directory");       
447                 if(s != 0)
448                         return -1;
449                 dt = dirtab[TYPE(c->qid)];
450                 if(dt == nil)
451                         panic("bridgegen: unknown type: %lud", TYPE(c->qid));
452                 devdir(c, c->qid, dt->name, dt->length, eve, dt->perm, dp);
453                 return 1;
454         case Qtopdir:
455                 if(s != 0)
456                         return -1;
457                 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
458                 mkqid(&qid, QID(0, Qbridgedir), 0, QTDIR);
459                 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
460                 return 1;
461         case Qbridgedir:
462                 if(s<nelem(bridgedirtab)) {
463                         dt = bridgedirtab+s;
464                         devdir(c, dt->qid, dt->name, dt->length, eve, dt->perm, dp);
465                         return 1;
466                 }
467                 s -= nelem(bridgedirtab);
468                 if(s >= b->nport)
469                         return -1;
470                 mkqid(&qid, QID(s, Qportdir), 0, QTDIR);
471                 snprint(up->genbuf, sizeof(up->genbuf), "%d", s);
472                 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
473                 return 1;
474         case Qportdir:
475                 if(s>=nelem(portdirtab))
476                         return -1;
477                 dt = portdirtab+s;
478                 mkqid(&qid, QID(PORT(c->qid),TYPE(dt->qid)), 0, QTFILE);
479                 devdir(c, qid, dt->name, dt->length, eve, dt->perm, dp);
480                 return 1;
481         }
482 }
483
484 // parse mac address; also in netif.c
485 static int
486 parseaddr(uchar *to, char *from, int alen)
487 {
488         char nip[4];
489         char *p;
490         int i;
491
492         p = from;
493         for(i = 0; i < alen; i++){
494                 if(*p == 0)
495                         return -1;
496                 nip[0] = *p++;
497                 if(*p == 0)
498                         return -1;
499                 nip[1] = *p++;
500                 nip[2] = 0;
501                 to[i] = strtoul(nip, 0, 16);
502                 if(*p == ':')
503                         p++;
504         }
505         return 0;
506 }
507
508 // assumes b is locked
509 static void
510 portbind(Bridge *b, int argc, char *argv[])
511 {
512         Port *port;
513         Chan *ctl;
514         int type = 0, i, n;
515         ulong ownhash;
516         char *dev, *dev2 = nil, *p;
517         char buf[100], name[KNAMELEN], path[8*KNAMELEN];
518         static char usage[] = "usage: bind ether|tunnel name ownhash dev [dev2]";
519
520         memset(name, 0, KNAMELEN);
521         if(argc < 4)
522                 error(usage);
523         if(strcmp(argv[0], "ether") == 0) {
524                 if(argc != 4)
525                         error(usage);
526                 type = Tether;
527                 strncpy(name, argv[1], KNAMELEN-1);
528                 name[KNAMELEN-1] = 0;
529 //              parseaddr(addr, argv[1], Eaddrlen);
530         } else if(strcmp(argv[0], "tunnel") == 0) {
531                 if(argc != 5)
532                         error(usage);
533                 type = Ttun;
534                 strncpy(name, argv[1], KNAMELEN-1);
535                 name[KNAMELEN-1] = 0;
536 //              parseip(addr, argv[1]);
537                 dev2 = argv[4];
538         } else
539                 error(usage);
540         ownhash = atoi(argv[2]);
541         dev = argv[3];
542         for(i=0; i<b->nport; i++) {
543                 port = b->port[i];
544                 if(port != nil && port->type == type &&
545                     memcmp(port->name, name, KNAMELEN) == 0)
546                         error("port in use");
547         }
548         for(i=0; i<Maxport; i++)
549                 if(b->port[i] == nil)
550                         break;
551         if(i == Maxport)
552                 error("no more ports");
553         port = smalloc(sizeof(Port));
554         port->ref = 1;
555         port->id = i;
556         port->ownhash = ownhash;
557
558         if(waserror()) {
559                 portfree(port);
560                 nexterror();
561         }
562         port->type = type;
563         memmove(port->name, name, KNAMELEN);
564         switch(port->type) {
565         default:
566                 panic("portbind: unknown port type: %d", type);
567         case Tether:
568                 snprint(path, sizeof(path), "%s/clone", dev);
569                 ctl = namec(path, Aopen, ORDWR, 0);
570                 if(waserror()) {
571                         cclose(ctl);
572                         nexterror();
573                 }
574                 // check addr?
575
576                 // get directory name
577                 n = devtab[ctl->type]->read(ctl, buf, sizeof(buf), 0);
578                 buf[n] = 0;
579                 for(p = buf; *p == ' '; p++)
580                         ;
581                 snprint(path, sizeof(path), "%s/%lud/data", dev, strtoul(p, 0, 0));
582
583                 // setup connection to be promiscuous
584                 snprint(buf, sizeof(buf), "connect -1");
585                 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
586                 snprint(buf, sizeof(buf), "promiscuous");
587                 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
588                 snprint(buf, sizeof(buf), "bridge");
589                 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
590
591                 // open data port
592                 port->data[0] = namec(path, Aopen, ORDWR, 0);
593                 // dup it
594                 incref(port->data[0]);
595                 port->data[1] = port->data[0];
596
597                 poperror();
598                 cclose(ctl);            
599
600                 break;
601         case Ttun:
602                 port->data[0] = namec(dev, Aopen, OREAD, 0);
603                 port->data[1] = namec(dev2, Aopen, OWRITE, 0);
604                 break;
605         }
606
607         poperror();
608
609         /* committed to binding port */
610         b->port[port->id] = port;
611         port->bridge = b;
612         if(b->nport <= port->id)
613                 b->nport = port->id+1;
614
615         // assumes kproc always succeeds
616         kproc("etherread", etherread, port);    // poperror must be next
617         port->ref++;
618 }
619
620 // assumes b is locked
621 static void
622 portunbind(Bridge *b, int argc, char *argv[])
623 {
624         int type = 0, i;
625         char name[KNAMELEN];
626         ulong ownhash;
627         Port *port = nil;
628         static char usage[] = "usage: unbind ether|tunnel addr [ownhash]";
629
630         memset(name, 0, KNAMELEN);
631         if(argc < 2 || argc > 3)
632                 error(usage);
633         if(strcmp(argv[0], "ether") == 0) {
634                 type = Tether;
635                 strncpy(name, argv[1], KNAMELEN-1);
636                 name[KNAMELEN-1] = 0;
637 //              parseaddr(addr, argv[1], Eaddrlen);
638         } else if(strcmp(argv[0], "tunnel") == 0) {
639                 type = Ttun;
640                 strncpy(name, argv[1], KNAMELEN-1);
641                 name[KNAMELEN-1] = 0;
642 //              parseip(addr, argv[1]);
643         } else
644                 error(usage);
645         if(argc == 3)
646                 ownhash = atoi(argv[2]);
647         else
648                 ownhash = 0;
649         for(i=0; i<b->nport; i++) {
650                 port = b->port[i];
651                 if(port != nil && port->type == type &&
652                     memcmp(port->name, name, KNAMELEN) == 0)
653                         break;
654         }
655         if(i == b->nport)
656                 error("port not found");
657         if(ownhash != 0 && port->ownhash != 0 && ownhash != port->ownhash)
658                 error("bad owner hash");
659
660         port->closed = 1;
661         b->port[i] = nil;       // port is now unbound
662         cacheflushport(b, i);
663
664         // try and stop reader
665         if(port->readp)
666                 postnote(port->readp, 1, "unbind", 0);
667         portfree(port);
668 }
669
670 // assumes b is locked
671 static Centry *
672 cachelookup(Bridge *b, uchar d[Eaddrlen])
673 {
674         int i;
675         uint h;
676         Centry *p;
677         long sec;
678
679         // dont cache multicast or broadcast
680         if(d[0] & 1)
681                 return 0;
682
683         h = 0;
684         for(i=0; i<Eaddrlen; i++) {
685                 h *= 7;
686                 h += d[i];
687         }
688         h %= CacheHash;
689         p = b->cache + h;
690         sec = TK2SEC(m->ticks);
691         for(i=0; i<CacheLook; i++,p++) {
692                 if(memcmp(d, p->d, Eaddrlen) == 0) {
693                         p->dst++;
694                         if(sec >= p->expire) {
695                                 log(b, Logcache, "expired cache entry: %E %d\n",
696                                         d, p->port);
697                                 return nil;
698                         }
699                         p->expire = sec + CacheTimeout;
700                         return p;
701                 }
702         }
703         log(b, Logcache, "cache miss: %E\n", d);
704         return nil;
705 }
706
707 // assumes b is locked
708 static void
709 cacheupdate(Bridge *b, uchar d[Eaddrlen], int port)
710 {
711         int i;
712         uint h;
713         Centry *p, *pp;
714         long sec;
715
716         // dont cache multicast or broadcast
717         if(d[0] & 1) {
718                 log(b, Logcache, "bad source address: %E\n", d);
719                 return;
720         }
721         
722         h = 0;
723         for(i=0; i<Eaddrlen; i++) {
724                 h *= 7;
725                 h += d[i];
726         }
727         h %= CacheHash;
728         p = b->cache + h;
729         pp = p;
730         sec = p->expire;
731
732         // look for oldest entry
733         for(i=0; i<CacheLook; i++,p++) {
734                 if(memcmp(p->d, d, Eaddrlen) == 0) {
735                         p->expire = TK2SEC(m->ticks) + CacheTimeout;
736                         if(p->port != port) {
737                                 log(b, Logcache, "NIC changed port %d->%d: %E\n",
738                                         p->port, port, d);
739                                 p->port = port;
740                         }
741                         p->src++;
742                         return;
743                 }
744                 if(p->expire < sec) {
745                         sec = p->expire;
746                         pp = p;
747                 }
748         }
749         if(pp->expire != 0)
750                 log(b, Logcache, "bumping from cache: %E %d\n", pp->d, pp->port);
751         pp->expire = TK2SEC(m->ticks) + CacheTimeout;
752         memmove(pp->d, d, Eaddrlen);
753         pp->port = port;
754         pp->src = 1;
755         pp->dst = 0;
756         log(b, Logcache, "adding to cache: %E %d\n", pp->d, pp->port);
757 }
758
759 // assumes b is locked
760 static void
761 cacheflushport(Bridge *b, int port)
762 {
763         Centry *ce;
764         int i;
765
766         ce = b->cache;
767         for(i=0; i<CacheSize; i++,ce++) {
768                 if(ce->port != port)
769                         continue;
770                 memset(ce, 0, sizeof(Centry));
771         }
772 }
773
774 static char *
775 cachedump(Bridge *b)
776 {
777         int i, n;
778         long sec, off;
779         char *buf, *p, *ep;
780         Centry *ce;
781         char c;
782
783         qlock(b);
784         sec = TK2SEC(m->ticks);
785         n = 0;
786         for(i=0; i<CacheSize; i++)
787                 if(b->cache[i].expire != 0)
788                         n++;
789         n *= 51;        // change if print format is changed
790         n += 10;        // some slop at the end
791         buf = smalloc(n);
792         p = buf;
793         ep = buf + n;
794         ce = b->cache;
795         off = seconds() - sec;
796         for(i=0; i<CacheSize; i++,ce++) {
797                 if(ce->expire == 0)
798                         continue;       
799                 c = (sec < ce->expire)?'v':'e';
800                 p += snprint(p, ep-p, "%E %2d %10ld %10ld %10ld %c\n", ce->d,
801                         ce->port, ce->src, ce->dst, ce->expire+off, c);
802         }
803         *p = 0;
804         qunlock(b);
805         return buf;
806 }
807
808
809
810 // assumes b is locked
811 static void
812 ethermultiwrite(Bridge *b, Block *bp, Port *port)
813 {
814         Port *oport;
815         Block *bp2;
816         Etherpkt *ep;
817         int i, mcast;
818
819         if(waserror()) {
820                 if(bp)
821                         freeb(bp);
822                 nexterror();
823         }
824         
825         ep = (Etherpkt*)bp->rp;
826         mcast = ep->d[0] & 1;           /* multicast bit of ethernet address */
827
828         oport = nil;
829         for(i=0; i<b->nport; i++) {
830                 if(i == port->id || b->port[i] == nil)
831                         continue;
832                 /*
833                  * we need to forward multicast packets for ipv6,
834                  * so always do it.
835                  */
836                 if(mcast)
837                         b->port[i]->outmulti++;
838                 else
839                         b->port[i]->outunknown++;
840
841                 // delay one so that the last write does not copy
842                 if(oport != nil) {
843                         b->copy++;
844                         bp2 = copyblock(bp, blocklen(bp));
845                         if(!waserror()) {
846                                 etherwrite(oport, bp2);
847                                 poperror();
848                         }
849                 }
850                 oport = b->port[i];
851         }
852
853         // last write free block
854         if(oport) {
855                 bp2 = bp; bp = nil; USED(bp);
856                 if(!waserror()) {
857                         etherwrite(oport, bp2);
858                         poperror();
859                 }
860         } else
861                 freeb(bp);
862
863         poperror();
864 }
865
866 static void
867 tcpmsshack(Etherpkt *epkt, int n)
868 {
869         int hl, optlen;
870         Iphdr *iphdr;
871         Tcphdr *tcphdr;
872         ulong mss, cksum;
873         uchar *optr;
874
875         /* ignore non-ipv4 packets */
876         if(nhgets(epkt->type) != ETIP4)
877                 return;
878         iphdr = (Iphdr*)(epkt->data);
879         n -= ETHERHDRSIZE;
880         if(n < IPHDR)
881                 return;
882
883         /* ignore bad packets */
884         if(iphdr->vihl != (IP_VER4|IP_HLEN4)) {
885                 hl = (iphdr->vihl&0xF)<<2;
886                 if((iphdr->vihl&0xF0) != IP_VER4 || hl < (IP_HLEN4<<2))
887                         return;
888         } else
889                 hl = IP_HLEN4<<2;
890
891         /* ignore non-tcp packets */
892         if(iphdr->proto != IP_TCPPROTO)
893                 return;
894         n -= hl;
895         if(n < sizeof(Tcphdr))
896                 return;
897         tcphdr = (Tcphdr*)((uchar*)(iphdr) + hl);
898         // MSS can only appear in SYN packet
899         if(!(tcphdr->flag[1] & SYN))
900                 return;
901         hl = (tcphdr->flag[0] & 0xf0)>>2;
902         if(n < hl)
903                 return;
904
905         // check for MSS option
906         optr = (uchar*)tcphdr + sizeof(Tcphdr);
907         n = hl - sizeof(Tcphdr);
908         for(;;) {
909                 if(n <= 0 || *optr == EOLOPT)
910                         return;
911                 if(*optr == NOOPOPT) {
912                         n--;
913                         optr++;
914                         continue;
915                 }
916                 optlen = optr[1];
917                 if(optlen < 2 || optlen > n)
918                         return;
919                 if(*optr == MSSOPT && optlen == MSS_LENGTH)
920                         break;
921                 n -= optlen;
922                 optr += optlen;
923         }
924
925         mss = nhgets(optr+2);
926         if(mss <= TcpMssMax)
927                 return;
928         // fit checksum
929         cksum = nhgets(tcphdr->cksum);
930         if(optr-(uchar*)tcphdr & 1) {
931 print("tcpmsshack: odd alignment!\n");
932                 // odd alignments are a pain
933                 cksum += nhgets(optr+1);
934                 cksum -= (optr[1]<<8)|(TcpMssMax>>8);
935                 cksum += (cksum>>16);
936                 cksum &= 0xffff;
937                 cksum += nhgets(optr+3);
938                 cksum -= ((TcpMssMax&0xff)<<8)|optr[4];
939                 cksum += (cksum>>16);
940         } else {
941                 cksum += mss;
942                 cksum -= TcpMssMax;
943                 cksum += (cksum>>16);
944         }
945         hnputs(tcphdr->cksum, cksum);
946         hnputs(optr+2, TcpMssMax);
947 }
948
949 /*
950  *  process to read from the ethernet
951  */
952 static void
953 etherread(void *a)
954 {
955         Port *port = a;
956         Bridge *b = port->bridge;
957         Block *bp, *bp2;
958         Etherpkt *ep;
959         Centry *ce;
960         long md;
961         
962         qlock(b);
963         port->readp = up;       /* hide identity under a rock for unbind */
964
965         while(!port->closed){
966                 // release lock to read - error means it is time to quit
967                 qunlock(b);
968                 if(waserror()) {
969                         print("etherread read error: %s\n", up->errstr);
970                         qlock(b);
971                         break;
972                 }
973                 if(0)
974                         print("devbridge: etherread: reading\n");
975                 bp = devtab[port->data[0]->type]->bread(port->data[0],
976                         ETHERMAXTU, 0);
977                 if(0)
978                         print("devbridge: etherread: blocklen = %d\n",
979                                 blocklen(bp));
980                 poperror();
981                 qlock(b);
982                 if(bp == nil || port->closed)
983                         break;
984                 if(waserror()) {
985 //                      print("etherread bridge error\n");
986                         if(bp)
987                                 freeb(bp);
988                         continue;
989                 }
990                 if(blocklen(bp) < ETHERMINTU)
991                         error("short packet");
992                 port->in++;
993
994                 ep = (Etherpkt*)bp->rp;
995                 cacheupdate(b, ep->s, port->id);
996                 if(b->tcpmss)
997                         tcpmsshack(ep, BLEN(bp));
998
999                 /*
1000                  * delay packets to simulate a slow link
1001                  */
1002                 if(b->delay0 || b->delayn){
1003                         md = b->delay0 + b->delayn * BLEN(bp);
1004                         if(md > 0)
1005                                 microdelay(md);
1006                 }
1007
1008                 if(ep->d[0] & 1) {
1009                         log(b, Logmcast, "multicast: port=%d src=%E dst=%E type=%#.4ux\n",
1010                                 port->id, ep->s, ep->d, ep->type[0]<<8|ep->type[1]);
1011                         port->inmulti++;
1012                         bp2 = bp; bp = nil;
1013                         ethermultiwrite(b, bp2, port);
1014                 } else {
1015                         ce = cachelookup(b, ep->d);
1016                         if(ce == nil) {
1017                                 b->miss++;
1018                                 port->inunknown++;
1019                                 bp2 = bp; bp = nil;
1020                                 ethermultiwrite(b, bp2, port);
1021                         }else if(ce->port != port->id){
1022                                 b->hit++;
1023                                 bp2 = bp; bp = nil;
1024                                 etherwrite(b->port[ce->port], bp2);
1025                         }
1026                 }
1027
1028                 poperror();
1029                 if(bp)
1030                         freeb(bp);
1031         }
1032 //      print("etherread: trying to exit\n");
1033         port->readp = nil;
1034         portfree(port);
1035         qunlock(b);
1036         pexit("hangup", 1);
1037 }
1038
1039 static int
1040 fragment(Etherpkt *epkt, int n)
1041 {
1042         Iphdr *iphdr;
1043
1044         if(n <= TunnelMtu)
1045                 return 0;
1046
1047         /* ignore non-ipv4 packets */
1048         if(nhgets(epkt->type) != ETIP4)
1049                 return 0;
1050         iphdr = (Iphdr*)(epkt->data);
1051         n -= ETHERHDRSIZE;
1052         /*
1053          * ignore: IP runt packets, bad packets (I don't handle IP
1054          * options for the moment), packets with don't-fragment set,
1055          * and short blocks.
1056          */
1057         if(n < IPHDR || iphdr->vihl != (IP_VER4|IP_HLEN4) ||
1058             iphdr->frag[0] & (IP_DF>>8) || nhgets(iphdr->length) > n)
1059                 return 0;
1060
1061         return 1;
1062 }
1063
1064
1065 static void
1066 etherwrite(Port *port, Block *bp)
1067 {
1068         Iphdr *eh, *feh;
1069         Etherpkt *epkt;
1070         int n, lid, len, seglen, chunk, dlen, blklen, offset, mf;
1071         Block *xp, *nb;
1072         ushort fragoff, frag;
1073
1074         port->out++;
1075         epkt = (Etherpkt*)bp->rp;
1076         n = blocklen(bp);
1077         if(port->type != Ttun || !fragment(epkt, n)) {
1078                 devtab[port->data[1]->type]->bwrite(port->data[1], bp, 0);
1079                 return;
1080         }
1081         port->outfrag++;
1082         if(waserror()){
1083                 freeblist(bp);  
1084                 nexterror();
1085         }
1086
1087         seglen = (TunnelMtu - ETHERHDRSIZE - IPHDR) & ~7;
1088         eh = (Iphdr*)(epkt->data);
1089         len = nhgets(eh->length);
1090         frag = nhgets(eh->frag);
1091         mf = frag & IP_MF;
1092         frag <<= 3;
1093         dlen = len - IPHDR;
1094         xp = bp;
1095         lid = nhgets(eh->id);
1096         offset = ETHERHDRSIZE+IPHDR;
1097         while(xp != nil && offset && offset >= BLEN(xp)) {
1098                 offset -= BLEN(xp);
1099                 xp = xp->next;
1100         }
1101         xp->rp += offset;
1102         
1103         if(0)
1104                 print("seglen=%d, dlen=%d, mf=%x, frag=%d\n",
1105                         seglen, dlen, mf, frag);
1106         for(fragoff = 0; fragoff < dlen; fragoff += seglen) {
1107                 nb = allocb(ETHERHDRSIZE+IPHDR+seglen);
1108                 
1109                 feh = (Iphdr*)(nb->wp+ETHERHDRSIZE);
1110
1111                 memmove(nb->wp, epkt, ETHERHDRSIZE+IPHDR);
1112                 nb->wp += ETHERHDRSIZE+IPHDR;
1113
1114                 if((fragoff + seglen) >= dlen) {
1115                         seglen = dlen - fragoff;
1116                         hnputs(feh->frag, (frag+fragoff)>>3 | mf);
1117                 }
1118                 else    
1119                         hnputs(feh->frag, (frag+fragoff>>3) | IP_MF);
1120
1121                 hnputs(feh->length, seglen + IPHDR);
1122                 hnputs(feh->id, lid);
1123
1124                 /* Copy up the data area */
1125                 chunk = seglen;
1126                 while(chunk) {
1127                         blklen = chunk;
1128                         if(BLEN(xp) < chunk)
1129                                 blklen = BLEN(xp);
1130                         memmove(nb->wp, xp->rp, blklen);
1131                         nb->wp += blklen;
1132                         xp->rp += blklen;
1133                         chunk -= blklen;
1134                         if(xp->rp == xp->wp)
1135                                 xp = xp->next;
1136                 } 
1137
1138                 feh->cksum[0] = 0;
1139                 feh->cksum[1] = 0;
1140                 hnputs(feh->cksum, ipcsum(&feh->vihl));
1141         
1142                 /* don't generate small packets */
1143                 if(BLEN(nb) < ETHERMINTU)
1144                         nb->wp = nb->rp + ETHERMINTU;
1145                 devtab[port->data[1]->type]->bwrite(port->data[1], nb, 0);
1146         }
1147         poperror();
1148         freeblist(bp);  
1149 }
1150
1151 // hold b lock
1152 static void
1153 portfree(Port *port)
1154 {
1155         port->ref--;
1156         if(port->ref < 0)
1157                 panic("portfree: bad ref");
1158         if(port->ref > 0)
1159                 return;
1160
1161         if(port->data[0])
1162                 cclose(port->data[0]);
1163         if(port->data[1])
1164                 cclose(port->data[1]);
1165         memset(port, 0, sizeof(Port));
1166         free(port);
1167 }
1168
1169 Dev bridgedevtab = {
1170         'B',
1171         "bridge",
1172
1173         devreset,
1174         bridgeinit,
1175         devshutdown,
1176         bridgeattach,
1177         bridgewalk,
1178         bridgestat,
1179         bridgeopen,
1180         devcreate,
1181         bridgeclose,
1182         bridgeread,
1183         devbread,
1184         bridgewrite,
1185         devbwrite,
1186         devremove,
1187         devwstat,
1188 };