]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/ip/ipmux.c
ip/tcp: only calculae mss from interface mtu when directly reachable for v6
[plan9front.git] / sys / src / 9 / ip / ipmux.c
1 /*
2  * IP packet filter
3  */
4 #include "u.h"
5 #include "../port/lib.h"
6 #include "mem.h"
7 #include "dat.h"
8 #include "fns.h"
9 #include "../port/error.h"
10
11 #include "ip.h"
12 #include "ipv6.h"
13
14 typedef struct Ipmuxrock  Ipmuxrock;
15 typedef struct Ipmux      Ipmux;
16
17 typedef struct Myip4hdr Myip4hdr;
18 struct Myip4hdr
19 {
20         uchar   vihl;           /* Version and header length */
21         uchar   tos;            /* Type of service */
22         uchar   length[2];      /* packet length */
23         uchar   id[2];          /* ip->identification */
24         uchar   frag[2];        /* Fragment information */
25         uchar   ttl;            /* Time to live */
26         uchar   proto;          /* Protocol */
27         uchar   cksum[2];       /* Header checksum */
28         uchar   src[4];         /* IP source */
29         uchar   dst[4];         /* IP destination */
30
31         uchar   data[1];        /* start of data */
32 };
33 Myip4hdr *ipoff = 0;
34
35 enum
36 {
37         Tproto,
38         Tdata,
39         Tiph,
40         Tdst,
41         Tsrc,
42         Tifc,
43
44         Cother = 0,
45         Cbyte,          /* single byte */
46         Cmbyte,         /* single byte with mask */
47         Cshort,         /* single short */
48         Cmshort,        /* single short with mask */
49         Clong,          /* single long */
50         Cmlong,         /* single long with mask */
51         Cifc,
52         Cmifc,
53 };
54
55 char *ftname[] = 
56 {
57 [Tproto]        "proto",
58 [Tdata]         "data",
59 [Tiph]          "iph",
60 [Tdst]          "dst",
61 [Tsrc]          "src",
62 [Tifc]          "ifc",
63 };
64
65 /*
66  *  a node in the decision tree
67  */
68 struct Ipmux
69 {
70         Ipmux   *yes;
71         Ipmux   *no;
72         uchar   type;           /* type of field(Txxxx) */
73         uchar   ctype;          /* tupe of comparison(Cxxxx) */
74         uchar   len;            /* length in bytes of item to compare */
75         uchar   n;              /* number of items val points to */
76         short   off;            /* offset of comparison */
77         short   eoff;           /* end offset of comparison */
78         uchar   skiphdr;        /* should offset start after ipheader */
79         uchar   *val;
80         uchar   *mask;
81         uchar   *e;             /* val+n*len*/
82
83         int     ref;            /* so we can garbage collect */
84         Conv    *conv;
85 };
86
87 /*
88  *  someplace to hold per conversation data
89  */
90 struct Ipmuxrock
91 {
92         Ipmux   *chain;
93 };
94
95 static int      ipmuxsprint(Ipmux*, int, char*, int);
96 static void     ipmuxkick(void *x);
97
98 static char*
99 skipwhite(char *p)
100 {
101         while(*p == ' ' || *p == '\t')
102                 p++;
103         return p;
104 }
105
106 static char*
107 follows(char *p, char c)
108 {
109         char *f;
110
111         f = strchr(p, c);
112         if(f == nil)
113                 return nil;
114         *f++ = 0;
115         f = skipwhite(f);
116         if(*f == 0)
117                 return nil;
118         return f;
119 }
120
121 static Ipmux*
122 parseop(char **pp)
123 {
124         char *p = *pp;
125         int type, off, end, len;
126         Ipmux *f;
127
128         p = skipwhite(p);
129         if(strncmp(p, "dst", 3) == 0){
130                 type = Tdst;
131                 off = (int)(uintptr)(ipoff->dst);
132                 len = IPv4addrlen;
133                 p += 3;
134         }
135         else if(strncmp(p, "src", 3) == 0){
136                 type = Tsrc;
137                 off = (int)(uintptr)(ipoff->src);
138                 len = IPv4addrlen;
139                 p += 3;
140         }
141         else if(strncmp(p, "ifc", 3) == 0){
142                 type = Tifc;
143                 off = -IPv4addrlen;
144                 len = IPv4addrlen;
145                 p += 3;
146         }
147         else if(strncmp(p, "proto", 5) == 0){
148                 type = Tproto;
149                 off = (int)(uintptr)&(ipoff->proto);
150                 len = 1;
151                 p += 5;
152         }
153         else if(strncmp(p, "data", 4) == 0 || strncmp(p, "iph", 3) == 0){
154                 if(strncmp(p, "data", 4) == 0) {
155                         type = Tdata;
156                         p += 4;
157                 }
158                 else {
159                         type = Tiph;
160                         p += 3;
161                 }
162                 p = skipwhite(p);
163                 if(*p != '[')
164                         return nil;
165                 p++;
166                 off = strtoul(p, &p, 0);
167                 if(off < 0 || off > (64-IP4HDR))
168                         return nil;
169                 p = skipwhite(p);
170                 if(*p != ':')
171                         end = off;
172                 else {
173                         p++;
174                         p = skipwhite(p);
175                         end = strtoul(p, &p, 0);
176                         if(end < off)
177                                 return nil;
178                         p = skipwhite(p);
179                 }
180                 if(*p != ']')
181                         return nil;
182                 p++;
183                 len = end - off + 1;
184         }
185         else
186                 return nil;
187
188         f = smalloc(sizeof(*f));
189         f->type = type;
190         f->len = len;
191         f->off = off;
192         f->val = nil;
193         f->mask = nil;
194         f->n = 1;
195         f->ref = 1;
196         if(type == Tdata)
197                 f->skiphdr = 1;
198         else
199                 f->skiphdr = 0;
200
201         return f;       
202 }
203
204 static int
205 htoi(char x)
206 {
207         if(x >= '0' && x <= '9')
208                 x -= '0';
209         else if(x >= 'a' && x <= 'f')
210                 x -= 'a' - 10;
211         else if(x >= 'A' && x <= 'F')
212                 x -= 'A' - 10;
213         else
214                 x = 0;
215         return x;
216 }
217
218 static int
219 hextoi(char *p)
220 {
221         return (htoi(p[0])<<4) | htoi(p[1]);
222 }
223
224 static void
225 parseval(uchar *v, char *p, int len)
226 {
227         while(*p && len-- > 0){
228                 *v++ = hextoi(p);
229                 p += 2;
230         }
231 }
232
233 static Ipmux*
234 parsemux(char *p)
235 {
236         int n, nomask;
237         Ipmux *f;
238         char *val;
239         char *mask;
240         char *vals[20];
241         uchar *v;
242
243         /* parse operand */
244         f = parseop(&p);
245         if(f == nil)
246                 return nil;
247
248         /* find value */
249         val = follows(p, '=');
250         if(val == nil)
251                 goto parseerror;
252
253         /* parse mask */
254         mask = follows(p, '&');
255         if(mask != nil){
256                 switch(f->type){
257                 case Tsrc:
258                 case Tdst:
259                 case Tifc:
260                         f->mask = smalloc(f->len);
261                         v4parseip(f->mask, mask);
262                         break;
263                 case Tdata:
264                 case Tiph:
265                         f->mask = smalloc(f->len);
266                         parseval(f->mask, mask, f->len);
267                         break;
268                 default:
269                         goto parseerror;
270                 }
271                 nomask = 0;
272         } else {
273                 nomask = 1;
274                 f->mask = smalloc(f->len);
275                 memset(f->mask, 0xff, f->len);
276         }
277
278         /* parse vals */
279         f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|");
280         if(f->n == 0)
281                 goto parseerror;
282         f->val = smalloc(f->n*f->len);
283         v = f->val;
284         for(n = 0; n < f->n; n++){
285                 switch(f->type){
286                 case Tsrc:
287                 case Tdst:
288                 case Tifc:
289                         v4parseip(v, vals[n]);
290                         break;
291                 case Tproto:
292                 case Tdata:
293                 case Tiph:
294                         parseval(v, vals[n], f->len);
295                         break;
296                 }
297                 v += f->len;
298         }
299
300         f->eoff = f->off + f->len;
301         f->e = f->val + f->n*f->len;
302         f->ctype = Cother;
303         if(f->n == 1){
304                 switch(f->len){
305                 case 1:
306                         f->ctype = nomask ? Cbyte : Cmbyte;
307                         break;
308                 case 2:
309                         f->ctype = nomask ? Cshort : Cmshort;
310                         break;
311                 case 4:
312                         if(f->type == Tifc)
313                                 f->ctype = nomask ? Cifc : Cmifc;
314                         else
315                                 f->ctype = nomask ? Clong : Cmlong;
316                         break;
317                 }
318         }
319         return f;
320
321 parseerror:
322         if(f->mask)
323                 free(f->mask);
324         if(f->val)
325                 free(f->val);
326         free(f);
327         return nil;
328 }
329
330 /*
331  *  Compare relative ordering of two ipmuxs.  This doesn't compare the
332  *  values, just the fields being looked at.  
333  *
334  *  returns:    <0 if a is a more specific match
335  *               0 if a and b are matching on the same fields
336  *              >0 if b is a more specific match
337  */
338 static int
339 ipmuxcmp(Ipmux *a, Ipmux *b)
340 {
341         int n;
342
343         /* compare types, lesser ones are more important */
344         n = a->type - b->type;
345         if(n != 0)
346                 return n;
347
348         /* compare offsets, call earlier ones more specific */
349         n = (a->off+((int)a->skiphdr)*(int)(uintptr)ipoff->data) - 
350                 (b->off+((int)b->skiphdr)*(int)(uintptr)ipoff->data);
351         if(n != 0)
352                 return n;
353
354         /* compare match lengths, longer ones are more specific */
355         n = b->len - a->len;
356         if(n != 0)
357                 return n;
358
359         /*
360          *  if we get here we have two entries matching
361          *  the same bytes of the record.  Now check
362          *  the mask for equality.  Longer masks are
363          *  more specific.
364          */
365         if(a->mask != nil && b->mask == nil)
366                 return -1;
367         if(a->mask == nil && b->mask != nil)
368                 return 1;
369         if(a->mask != nil && b->mask != nil){
370                 n = memcmp(b->mask, a->mask, a->len);
371                 if(n != 0)
372                         return n;
373         }
374         return 0;
375 }
376
377 /*
378  *  Compare the values of two ipmuxs.  We're assuming that ipmuxcmp
379  *  returned 0 comparing them.
380  */
381 static int
382 ipmuxvalcmp(Ipmux *a, Ipmux *b)
383 {
384         int n;
385
386         n = b->len*b->n - a->len*a->n;
387         if(n != 0)
388                 return n;
389         return memcmp(a->val, b->val, a->len*a->n);
390
391
392 /*
393  *  add onto an existing ipmux chain in the canonical comparison
394  *  order
395  */
396 static void
397 ipmuxchain(Ipmux **l, Ipmux *f)
398 {
399         for(; *l; l = &(*l)->yes)
400                 if(ipmuxcmp(f, *l) < 0)
401                         break;
402         f->yes = *l;
403         *l = f;
404 }
405
406 /*
407  *  copy a tree
408  */
409 static Ipmux*
410 ipmuxcopy(Ipmux *f)
411 {
412         Ipmux *nf;
413
414         if(f == nil)
415                 return nil;
416         nf = smalloc(sizeof *nf);
417         *nf = *f;
418         nf->no = ipmuxcopy(f->no);
419         nf->yes = ipmuxcopy(f->yes);
420         nf->val = smalloc(f->n*f->len);
421         nf->e = nf->val + f->len*f->n;
422         memmove(nf->val, f->val, f->n*f->len);
423         return nf;
424 }
425
426 static void
427 ipmuxfree(Ipmux *f)
428 {
429         if(f->val != nil)
430                 free(f->val);
431         free(f);
432 }
433
434 static void
435 ipmuxtreefree(Ipmux *f)
436 {
437         if(f == nil)
438                 return;
439         if(f->no != nil)
440                 ipmuxfree(f->no);
441         if(f->yes != nil)
442                 ipmuxfree(f->yes);
443         ipmuxfree(f);
444 }
445
446 /*
447  *  merge two trees
448  */
449 static Ipmux*
450 ipmuxmerge(Ipmux *a, Ipmux *b)
451 {
452         int n;
453         Ipmux *f;
454
455         if(a == nil)
456                 return b;
457         if(b == nil)
458                 return a;
459         n = ipmuxcmp(a, b);
460         if(n < 0){
461                 f = ipmuxcopy(b);
462                 a->yes = ipmuxmerge(a->yes, b);
463                 a->no = ipmuxmerge(a->no, f);
464                 return a;
465         }
466         if(n > 0){
467                 f = ipmuxcopy(a);
468                 b->yes = ipmuxmerge(b->yes, a);
469                 b->no = ipmuxmerge(b->no, f);
470                 return b;
471         }
472         if(ipmuxvalcmp(a, b) == 0){
473                 a->yes = ipmuxmerge(a->yes, b->yes);
474                 a->no = ipmuxmerge(a->no, b->no);
475                 a->ref++;
476                 ipmuxfree(b);
477                 return a;
478         }
479         a->no = ipmuxmerge(a->no, b);
480         return a;
481 }
482
483 /*
484  *  remove a chain from a demux tree.  This is like merging accept that
485  *  we remove instead of insert.
486  */
487 static int
488 ipmuxremove(Ipmux **l, Ipmux *f)
489 {
490         int n, rv;
491         Ipmux *ft;
492
493         if(f == nil)
494                 return 0;               /* we've removed it all */
495         if(*l == nil)
496                 return -1;
497
498         ft = *l;
499         n = ipmuxcmp(ft, f);
500         if(n < 0){
501                 /* *l is maching an earlier field, descend both paths */
502                 rv = ipmuxremove(&ft->yes, f);
503                 rv += ipmuxremove(&ft->no, f);
504                 return rv;
505         }
506         if(n > 0){
507                 /* f represents an earlier field than *l, this should be impossible */
508                 return -1;
509         }
510
511         /* if we get here f and *l are comparing the same fields */
512         if(ipmuxvalcmp(ft, f) != 0){
513                 /* different values mean mutually exclusive */
514                 return ipmuxremove(&ft->no, f);
515         }
516
517         /* we found a match */
518         if(--(ft->ref) == 0){
519                 /*
520                  *  a dead node implies the whole yes side is also dead.
521                  *  since our chain is constrained to be on that side,
522                  *  we're done.
523                  */
524                 ipmuxtreefree(ft->yes);
525                 *l = ft->no;
526                 ipmuxfree(ft);
527                 return 0;
528         }
529
530         /*
531          *  free the rest of the chain.  it is constrained to match the
532          *  yes side.
533          */
534         return ipmuxremove(&ft->yes, f->yes);
535 }
536
537 /*
538  *  connection request is a semi separated list of filters
539  *  e.g. proto=17;data[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
540  *
541  *  there's no protection against overlapping specs.
542  */
543 static char*
544 ipmuxconnect(Conv *c, char **argv, int argc)
545 {
546         int i, n;
547         char *field[10];
548         Ipmux *mux, *chain;
549         Ipmuxrock *r;
550         Fs *f;
551
552         f = c->p->f;
553
554         if(argc != 2)
555                 return Ebadarg;
556
557         n = getfields(argv[1], field, nelem(field), 1, ";");
558         if(n <= 0)
559                 return Ebadarg;
560
561         chain = nil;
562         mux = nil;
563         for(i = 0; i < n; i++){
564                 mux = parsemux(field[i]);
565                 if(mux == nil){
566                         ipmuxtreefree(chain);
567                         return Ebadarg;
568                 }
569                 ipmuxchain(&chain, mux);
570         }
571         if(chain == nil)
572                 return Ebadarg;
573         mux->conv = c;
574
575         /* save a copy of the chain so we can later remove it */
576         mux = ipmuxcopy(chain);
577         r = (Ipmuxrock*)(c->ptcl);
578         r->chain = chain;
579
580         /* add the chain to the protocol demultiplexor tree */
581         wlock(f);
582         f->ipmux->priv = ipmuxmerge(f->ipmux->priv, mux);
583         wunlock(f);
584
585         Fsconnected(c, nil);
586         return nil;
587 }
588
589 static int
590 ipmuxstate(Conv *c, char *state, int n)
591 {
592         Ipmuxrock *r;
593         
594         r = (Ipmuxrock*)(c->ptcl);
595         return ipmuxsprint(r->chain, 0, state, n);
596 }
597
598 static void
599 ipmuxcreate(Conv *c)
600 {
601         Ipmuxrock *r;
602
603         c->rq = qopen(64*1024, Qmsg, 0, c);
604         c->wq = qopen(64*1024, Qkick, ipmuxkick, c);
605         r = (Ipmuxrock*)(c->ptcl);
606         r->chain = nil;
607 }
608
609 static char*
610 ipmuxannounce(Conv*, char**, int)
611 {
612         return "ipmux does not support announce";
613 }
614
615 static void
616 ipmuxclose(Conv *c)
617 {
618         Ipmuxrock *r;
619         Fs *f = c->p->f;
620
621         r = (Ipmuxrock*)(c->ptcl);
622
623         qclose(c->rq);
624         qclose(c->wq);
625         qclose(c->eq);
626         ipmove(c->laddr, IPnoaddr);
627         ipmove(c->raddr, IPnoaddr);
628         c->lport = 0;
629         c->rport = 0;
630
631         wlock(f);
632         ipmuxremove(&(c->p->priv), r->chain);
633         wunlock(f);
634         ipmuxtreefree(r->chain);
635         r->chain = nil;
636 }
637
638 /*
639  *  takes a fully formed ip packet and just passes it down
640  *  the stack
641  */
642 static void
643 ipmuxkick(void *x)
644 {
645         Conv *c = x;
646         Block *bp;
647
648         bp = qget(c->wq);
649         if(bp != nil) {
650                 Myip4hdr *ih4 = (Myip4hdr*)(bp->rp);
651
652                 if((ih4->vihl & 0xF0) != IP_VER6)
653                         ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil);
654                 else
655                         ipoput6(c->p->f, bp, 0, ((Ip6hdr*)ih4)->ttl, 0, nil);
656         }
657 }
658
659 static void
660 ipmuxiput(Proto *p, Ipifc *ifc, Block *bp)
661 {
662         int len, hl;
663         Fs *f = p->f;
664         uchar *m, *h, *v, *e, *ve, *hp;
665         Conv *c;
666         Ipmux *mux;
667         Myip4hdr *ip;
668         Ip6hdr *ip6;
669
670         ip = (Myip4hdr*)bp->rp;
671         hl = (ip->vihl&0x0F)<<2;
672
673         if(p->priv == nil)
674                 goto nomatch;
675
676         h = bp->rp;
677         len = BLEN(bp);
678
679         /* run the v4 filter */
680         rlock(f);
681         c = nil;
682         mux = f->ipmux->priv;
683         while(mux != nil){
684                 if(mux->eoff > len){
685                         mux = mux->no;
686                         continue;
687                 }
688                 hp = h + mux->off + ((int)mux->skiphdr)*hl;
689                 switch(mux->ctype){
690                 case Cbyte:
691                         if(*mux->val == *hp)
692                                 goto yes;
693                         break;
694                 case Cmbyte:
695                         if((*hp & *mux->mask) == *mux->val)
696                                 goto yes;
697                         break;
698                 case Cshort:
699                         if(*((ushort*)mux->val) == *(ushort*)hp)
700                                 goto yes;
701                         break;
702                 case Cmshort:
703                         if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val))
704                                 goto yes;
705                         break;
706                 case Clong:
707                         if(*((ulong*)mux->val) == *(ulong*)hp)
708                                 goto yes;
709                         break;
710                 case Cmlong:
711                         if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
712                                 goto yes;
713                         break;
714                 case Cifc:
715                         if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off))
716                                 goto yes;
717                         break;
718                 case Cmifc:
719                         if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
720                                 goto yes;
721                         break;
722                 default:
723                         v = mux->val;
724                         for(e = mux->e; v < e; v = ve){
725                                 m = mux->mask;
726                                 hp = h + mux->off;
727                                 for(ve = v + mux->len; v < ve; v++){
728                                         if((*hp++ & *m++) != *v)
729                                                 break;
730                                 }
731                                 if(v == ve)
732                                         goto yes;
733                         }
734                 }
735                 mux = mux->no;
736                 continue;
737 yes:
738                 if(mux->conv != nil)
739                         c = mux->conv;
740                 mux = mux->yes;
741         }
742         runlock(f);
743
744         if(c != nil){
745                 /* tack on interface address */
746                 bp = padblock(bp, IPaddrlen);
747                 ipmove(bp->rp, ifc->lifc->local);
748                 qpass(c->rq, concatblock(bp));
749                 return;
750         }
751
752 nomatch:
753         /* doesn't match any filter, hand it to the specific protocol handler */
754         ip = (Myip4hdr*)bp->rp;
755         if((ip->vihl & 0xF0) == IP_VER4) {
756                 p = f->t2p[ip->proto];
757         } else {
758                 ip6 = (Ip6hdr*)bp->rp;
759                 p = f->t2p[ip6->proto];
760         }
761         if(p && p->rcv)
762                 (*p->rcv)(p, ifc, bp);
763         else
764                 freeblist(bp);
765         return;
766 }
767
768 static int
769 ipmuxsprint(Ipmux *mux, int level, char *buf, int len)
770 {
771         int i, j, n;
772         uchar *v;
773
774         n = 0;
775         for(i = 0; i < level; i++)
776                 n += snprint(buf+n, len-n, " ");
777         if(mux == nil){
778                 n += snprint(buf+n, len-n, "\n");
779                 return n;
780         }
781         n += snprint(buf+n, len-n, "h[%d:%d]&", 
782                mux->off+((int)mux->skiphdr)*((int)(uintptr)ipoff->data), 
783                mux->off+(((int)mux->skiphdr)*((int)(uintptr)ipoff->data))+mux->len-1);
784         for(i = 0; i < mux->len; i++)
785                 n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
786         n += snprint(buf+n, len-n, "=");
787         v = mux->val;
788         for(j = 0; j < mux->n; j++){
789                 for(i = 0; i < mux->len; i++)
790                         n += snprint(buf+n, len - n, "%2.2ux", *v++);
791                 n += snprint(buf+n, len-n, "|");
792         }
793         n += snprint(buf+n, len-n, "\n");
794         level++;
795         n += ipmuxsprint(mux->no, level, buf+n, len-n);
796         n += ipmuxsprint(mux->yes, level, buf+n, len-n);
797         return n;
798 }
799
800 static int
801 ipmuxstats(Proto *p, char *buf, int len)
802 {
803         int n;
804         Fs *f = p->f;
805
806         rlock(f);
807         n = ipmuxsprint(p->priv, 0, buf, len);
808         runlock(f);
809
810         return n;
811 }
812
813 void
814 ipmuxinit(Fs *f)
815 {
816         Proto *ipmux;
817
818         ipmux = smalloc(sizeof(Proto));
819         ipmux->priv = nil;
820         ipmux->name = "ipmux";
821         ipmux->connect = ipmuxconnect;
822         ipmux->announce = ipmuxannounce;
823         ipmux->state = ipmuxstate;
824         ipmux->create = ipmuxcreate;
825         ipmux->close = ipmuxclose;
826         ipmux->rcv = ipmuxiput;
827         ipmux->ctl = nil;
828         ipmux->advise = nil;
829         ipmux->stats = ipmuxstats;
830         ipmux->ipproto = -1;
831         ipmux->nc = 64;
832         ipmux->ptclsize = sizeof(Ipmuxrock);
833
834         f->ipmux = ipmux;                       /* hack for Fsrcvpcol */
835
836         Fsproto(f, ipmux);
837 }