]> git.lizzy.rs Git - plan9front.git/blobdiff - sys/src/9/port/devbridge.c
kernel: massive pci code rewrite
[plan9front.git] / sys / src / 9 / port / devbridge.c
index be416a249872cca848b42ffced600d82c72c3067..3b8ca903737bc735afcb8d65025b05dcc34a9cae 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * IPv4 Ethernet bridge
+ * IP Ethernet bridge
  */
 #include "u.h"
 #include "../port/lib.h"
@@ -7,13 +7,13 @@
 #include "dat.h"
 #include "fns.h"
 #include "../ip/ip.h"
+#include "../ip/ipv6.h"
 #include "../port/netif.h"
 #include "../port/error.h"
 
 typedef struct Bridge  Bridge;
 typedef struct Port    Port;
 typedef struct Centry  Centry;
-typedef struct Iphdr   Iphdr;
 typedef struct Tcphdr  Tcphdr;
 
 enum
@@ -39,6 +39,7 @@ enum
        CacheLook=      5,              // how many cache entries to examine
        CacheSize=      (CacheHash+CacheLook-1),
        CacheTimeout=   5*60,           // timeout for cache entry in seconds
+       MaxMTU= IP_MAX, // allow for jumbo frames and large UDP
 
        TcpMssMax = 1300,               // max desirable Tcp MSS value
        TunnelMtu = 1400,
@@ -109,9 +110,9 @@ struct Bridge
 
 struct Port
 {
+       Ref;
        int     id;
        Bridge  *bridge;
-       int     ref;
        int     closed;
 
        Chan    *data[2];       // channel to data
@@ -137,27 +138,12 @@ struct Port
 };
 
 enum {
-       IP_TCPPROTO     = 6,
        EOLOPT          = 0,
        NOOPOPT         = 1,
        MSSOPT          = 2,
        MSS_LENGTH      = 4,            /* Mean segment size */
        SYN             = 0x02,         /* Pkt. is synchronise */
-       IPHDR           = 20,           /* sizeof(Iphdr) */
-};
-
-struct Iphdr
-{
-       uchar   vihl;           /* Version and header length */
-       uchar   tos;            /* Type of service */
-       uchar   length[2];      /* packet length */
-       uchar   id[2];          /* ip->identification */
-       uchar   frag[2];        /* Fragment information */
-       uchar   ttl;            /* Time to live */
-       uchar   proto;          /* Protocol */
-       uchar   cksum[2];       /* Header checksum */
-       uchar   src[4];         /* IP source */
-       uchar   dst[4];         /* IP destination */
+       TCPHDR          = 20,
 };
 
 struct Tcphdr
@@ -207,14 +193,14 @@ bridgeinit(void)
 }
 
 static Chan*
-bridgeattach(charspec)
+bridgeattach(char *spec)
 {
        Chan *c;
-       int dev;
+       ulong dev;
 
-       dev = atoi(spec);
-       if(dev<0 || dev >= Maxbridge)
-               error("bad specification");
+       dev = strtoul(spec, nil, 10);
+       if(dev >= Maxbridge)
+               error(Enodev);
 
        c = devattach('B', spec);
        mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR);
@@ -291,15 +277,21 @@ bridgeread(Chan *c, void *a, long n, vlong off)
        USED(off);
        switch(TYPE(c->qid)) {
        default:
-               error(Eperm);
+               error(Egreg);
        case Qtopdir:
        case Qbridgedir:
        case Qportdir:
                return devdirread(c, a, n, 0, 0, bridgegen);
        case Qlog:
                return logread(b, a, off, n);
+       case Qlocal:
+               return 0;       /* TO DO */
        case Qstatus:
                qlock(b);
+               if(waserror()){
+                       qunlock(b);
+                       nexterror();
+               }
                port = b->port[PORT(c->qid)];
                if(port == 0)
                        strcpy(buf, "unbound\n");
@@ -318,16 +310,15 @@ bridgeread(Chan *c, void *a, long n, vlong off)
                        }
                        ingood = port->in - port->inmulti - port->inunknown;
                        outgood = port->out - port->outmulti - port->outunknown;
-                       i += snprint(buf+i, sizeof(buf)-i,
+                       snprint(buf+i, sizeof(buf)-i,
                                "in=%d(%d:%d:%d) out=%d(%d:%d:%d:%d)\n",
                                port->in, ingood, port->inmulti, port->inunknown,
                                port->out, outgood, port->outmulti,
                                port->outunknown, port->outfrag);
-                       USED(i);
                }
-               n = readstr(off, a, n, buf);
+               poperror();
                qunlock(b);
-               return n;
+               return readstr(off, a, n, buf);
        case Qbctl:
                snprint(buf, sizeof(buf), "%s tcpmss\ndelay %ld %ld\n",
                        b->tcpmss ? "set" : "clear", b->delay0, b->delayn);
@@ -513,7 +504,7 @@ portbind(Bridge *b, int argc, char *argv[])
        Chan *ctl;
        int type = 0, i, n;
        ulong ownhash;
-       char *dev, *dev2 = nil, *p;
+       char *dev, *dev2 = nil;
        char buf[100], name[KNAMELEN], path[8*KNAMELEN];
        static char usage[] = "usage: bind ether|tunnel name ownhash dev [dev2]";
 
@@ -574,15 +565,15 @@ portbind(Bridge *b, int argc, char *argv[])
                // check addr?
 
                // get directory name
-               n = devtab[ctl->type]->read(ctl, buf, sizeof(buf), 0);
+               n = devtab[ctl->type]->read(ctl, buf, sizeof(buf)-1, 0);
                buf[n] = 0;
-               for(p = buf; *p == ' '; p++)
-                       ;
-               snprint(path, sizeof(path), "%s/%lud/data", dev, strtoul(p, 0, 0));
+               snprint(path, sizeof(path), "%s/%lud/data", dev, strtoul(buf, 0, 0));
 
                // setup connection to be promiscuous
                snprint(buf, sizeof(buf), "connect -1");
                devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
+               snprint(buf, sizeof(buf), "nonblocking");
+               devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
                snprint(buf, sizeof(buf), "promiscuous");
                devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
                snprint(buf, sizeof(buf), "bridge");
@@ -613,8 +604,9 @@ portbind(Bridge *b, int argc, char *argv[])
                b->nport = port->id+1;
 
        // assumes kproc always succeeds
-       kproc("etherread", etherread, port);    // poperror must be next
-       port->ref++;
+       incref(port);
+       snprint(buf, sizeof(buf), "bridge:%s", dev);
+       kproc(buf, etherread, port);
 }
 
 // assumes b is locked
@@ -794,6 +786,8 @@ cachedump(Bridge *b)
        n *= 51;        // change if print format is changed
        n += 10;        // some slop at the end
        buf = malloc(n);
+       if(buf == nil)
+               error(Enomem);
        p = buf;
        ep = buf + n;
        ce = b->cache;
@@ -814,21 +808,14 @@ cachedump(Bridge *b)
 
 
 
-// assumes b is locked
+// assumes b is locked, no error return
 static void
 ethermultiwrite(Bridge *b, Block *bp, Port *port)
 {
        Port *oport;
-       Block *bp2;
        Etherpkt *ep;
        int i, mcast;
 
-       if(waserror()) {
-               if(bp)
-                       freeb(bp);
-               nexterror();
-       }
-       
        ep = (Etherpkt*)bp->rp;
        mcast = ep->d[0] & 1;           /* multicast bit of ethernet address */
 
@@ -848,60 +835,54 @@ ethermultiwrite(Bridge *b, Block *bp, Port *port)
                // delay one so that the last write does not copy
                if(oport != nil) {
                        b->copy++;
-                       bp2 = copyblock(bp, blocklen(bp));
-                       if(!waserror()) {
-                               etherwrite(oport, bp2);
-                               poperror();
-                       }
+                       etherwrite(oport, copyblock(bp, BLEN(bp)));
                }
                oport = b->port[i];
        }
 
        // last write free block
-       if(oport) {
-               bp2 = bp; bp = nil; USED(bp);
-               if(!waserror()) {
-                       etherwrite(oport, bp2);
-                       poperror();
-               }
-       } else
+       if(oport)
+               etherwrite(oport, bp);
+       else
                freeb(bp);
-
-       poperror();
 }
 
 static void
 tcpmsshack(Etherpkt *epkt, int n)
 {
        int hl, optlen;
-       Iphdr *iphdr;
        Tcphdr *tcphdr;
        ulong mss, cksum;
        uchar *optr;
 
        /* ignore non-ipv4 packets */
-       if(nhgets(epkt->type) != ETIP4)
+       switch(nhgets(epkt->type)){
+       case ETIP4:
+       case ETIP6:
+               break;
+       default:
                return;
-       iphdr = (Iphdr*)(epkt->data);
+       }
        n -= ETHERHDRSIZE;
-       if(n < IPHDR)
+       if(n < 1)
                return;
-
-       /* ignore bad packets */
-       if(iphdr->vihl != (IP_VER4|IP_HLEN4)) {
-               hl = (iphdr->vihl&0xF)<<2;
-               if((iphdr->vihl&0xF0) != IP_VER4 || hl < (IP_HLEN4<<2))
+       switch(epkt->data[0]&0xF0){
+       case IP_VER4:
+               hl = (epkt->data[0]&15)<<2;
+               if(n < hl+TCPHDR || hl < IP4HDR || epkt->data[9] != TCP)
                        return;
-       } else
-               hl = IP_HLEN4<<2;
-
-       /* ignore non-tcp packets */
-       if(iphdr->proto != IP_TCPPROTO)
-               return;
-       n -= hl;
-       if(n < sizeof(Tcphdr))
+               n -= hl;
+               tcphdr = (Tcphdr*)(epkt->data + hl);
+               break;
+       case IP_VER6:
+               if(n < IP6HDR+TCPHDR || epkt->data[6] != TCP)
+                       return;
+               n -= IP6HDR;
+               tcphdr = (Tcphdr*)(epkt->data + IP6HDR);
+               break;
+       default:
                return;
-       tcphdr = (Tcphdr*)((uchar*)(iphdr) + hl);
+       }
        // MSS can only appear in SYN packet
        if(!(tcphdr->flag[1] & SYN))
                return;
@@ -910,8 +891,8 @@ tcpmsshack(Etherpkt *epkt, int n)
                return;
 
        // check for MSS option
-       optr = (uchar*)tcphdr + sizeof(Tcphdr);
-       n = hl - sizeof(Tcphdr);
+       optr = (uchar*)tcphdr + TCPHDR;
+       n = hl - TCPHDR;
        for(;;) {
                if(n <= 0 || *optr == EOLOPT)
                        return;
@@ -932,10 +913,11 @@ tcpmsshack(Etherpkt *epkt, int n)
        mss = nhgets(optr+2);
        if(mss <= TcpMssMax)
                return;
+
        // fit checksum
        cksum = nhgets(tcphdr->cksum);
        if(optr-(uchar*)tcphdr & 1) {
-print("tcpmsshack: odd alignment!\n");
+// print("tcpmsshack: odd alignment!\n");
                // odd alignments are a pain
                cksum += nhgets(optr+1);
                cksum -= (optr[1]<<8)|(TcpMssMax>>8);
@@ -961,10 +943,10 @@ etherread(void *a)
 {
        Port *port = a;
        Bridge *b = port->bridge;
-       Block *bp, *bp2;
+       Block *bp;
        Etherpkt *ep;
        Centry *ce;
-       long md;
+       long md, n;
        
        qlock(b);
        port->readp = up;       /* hide identity under a rock for unbind */
@@ -977,64 +959,56 @@ etherread(void *a)
                        qlock(b);
                        break;
                }
-               if(0)
-                       print("devbridge: etherread: reading\n");
-               bp = devtab[port->data[0]->type]->bread(port->data[0],
-                       ETHERMAXTU, 0);
-               if(0)
-                       print("devbridge: etherread: blocklen = %d\n",
-                               blocklen(bp));
+               bp = devtab[port->data[0]->type]->bread(port->data[0], MaxMTU, 0);
                poperror();
                qlock(b);
-               if(bp == nil || port->closed)
+               if(bp == nil)
                        break;
+               n = BLEN(bp);
+               if(port->closed || n < ETHERHDRSIZE){
+                       freeb(bp);
+                       continue;
+               }
                if(waserror()) {
 //                     print("etherread bridge error\n");
-                       if(bp)
-                               freeb(bp);
+                       freeb(bp);
                        continue;
                }
-               if(blocklen(bp) < ETHERMINTU)
-                       error("short packet");
                port->in++;
 
                ep = (Etherpkt*)bp->rp;
                cacheupdate(b, ep->s, port->id);
                if(b->tcpmss)
-                       tcpmsshack(ep, BLEN(bp));
+                       tcpmsshack(ep, n);
 
                /*
                 * delay packets to simulate a slow link
                 */
-               if(b->delay0 || b->delayn){
-                       md = b->delay0 + b->delayn * BLEN(bp);
+               if(b->delay0 != 0 || b->delayn != 0){
+                       md = b->delay0 + b->delayn * n;
                        if(md > 0)
                                microdelay(md);
                }
 
+               poperror();     /* must now dispose of bp */
+
                if(ep->d[0] & 1) {
                        log(b, Logmcast, "multicast: port=%d src=%E dst=%E type=%#.4ux\n",
                                port->id, ep->s, ep->d, ep->type[0]<<8|ep->type[1]);
                        port->inmulti++;
-                       bp2 = bp; bp = nil;
-                       ethermultiwrite(b, bp2, port);
+                       ethermultiwrite(b, bp, port);
                } else {
                        ce = cachelookup(b, ep->d);
                        if(ce == nil) {
                                b->miss++;
                                port->inunknown++;
-                               bp2 = bp; bp = nil;
-                               ethermultiwrite(b, bp2, port);
+                               ethermultiwrite(b, bp, port);
                        }else if(ce->port != port->id){
                                b->hit++;
-                               bp2 = bp; bp = nil;
-                               etherwrite(b->port[ce->port], bp2);
-                       }
+                               etherwrite(b->port[ce->port], bp);
+                       }else
+                               freeb(bp);
                }
-
-               poperror();
-               if(bp)
-                       freeb(bp);
        }
 //     print("etherread: trying to exit\n");
        port->readp = nil;
@@ -1046,7 +1020,7 @@ etherread(void *a)
 static int
 fragment(Etherpkt *epkt, int n)
 {
-       Iphdr *iphdr;
+       Ip4hdr *iphdr;
 
        if(n <= TunnelMtu)
                return 0;
@@ -1054,69 +1028,68 @@ fragment(Etherpkt *epkt, int n)
        /* ignore non-ipv4 packets */
        if(nhgets(epkt->type) != ETIP4)
                return 0;
-       iphdr = (Iphdr*)(epkt->data);
+       iphdr = (Ip4hdr*)(epkt->data);
        n -= ETHERHDRSIZE;
        /*
         * ignore: IP runt packets, bad packets (I don't handle IP
         * options for the moment), packets with don't-fragment set,
         * and short blocks.
         */
-       if(n < IPHDR || iphdr->vihl != (IP_VER4|IP_HLEN4) ||
+       if(n < IP4HDR || iphdr->vihl != (IP_VER4|IP_HLEN4) ||
            iphdr->frag[0] & (IP_DF>>8) || nhgets(iphdr->length) > n)
                return 0;
 
        return 1;
 }
 
-
 static void
 etherwrite(Port *port, Block *bp)
 {
-       Iphdr *eh, *feh;
+       Ip4hdr *eh, *feh;
        Etherpkt *epkt;
-       int n, lid, len, seglen, chunk, dlen, blklen, offset, mf;
-       Block *xp, *nb;
+       int n, lid, len, seglen, dlen, blklen, mf;
+       Block *nb;
        ushort fragoff, frag;
 
        port->out++;
+       n = BLEN(bp);
        epkt = (Etherpkt*)bp->rp;
-       n = blocklen(bp);
        if(port->type != Ttun || !fragment(epkt, n)) {
-               devtab[port->data[1]->type]->bwrite(port->data[1], bp, 0);
+               if(!waserror()){
+                       /* don't generate small packets */
+                       if(n < ETHERMINTU)
+                               bp = adjustblock(bp, ETHERMINTU);
+                       devtab[port->data[1]->type]->bwrite(port->data[1], bp, 0);
+                       poperror();
+               }
                return;
        }
        port->outfrag++;
        if(waserror()){
-               freeblist(bp);  
-               nexterror();
+               freeb(bp);      
+               return;
        }
 
-       seglen = (TunnelMtu - ETHERHDRSIZE - IPHDR) & ~7;
-       eh = (Iphdr*)(epkt->data);
+       seglen = (TunnelMtu - ETHERHDRSIZE - IP4HDR) & ~7;
+       eh = (Ip4hdr*)(epkt->data);
        len = nhgets(eh->length);
        frag = nhgets(eh->frag);
        mf = frag & IP_MF;
        frag <<= 3;
-       dlen = len - IPHDR;
-       xp = bp;
+       dlen = len - IP4HDR;
        lid = nhgets(eh->id);
-       offset = ETHERHDRSIZE+IPHDR;
-       while(xp != nil && offset && offset >= BLEN(xp)) {
-               offset -= BLEN(xp);
-               xp = xp->next;
-       }
-       xp->rp += offset;
+       bp->rp += ETHERHDRSIZE+IP4HDR;
        
        if(0)
                print("seglen=%d, dlen=%d, mf=%x, frag=%d\n",
                        seglen, dlen, mf, frag);
        for(fragoff = 0; fragoff < dlen; fragoff += seglen) {
-               nb = allocb(ETHERHDRSIZE+IPHDR+seglen);
+               nb = allocb(ETHERHDRSIZE+IP4HDR+seglen);
                
-               feh = (Iphdr*)(nb->wp+ETHERHDRSIZE);
+               feh = (Ip4hdr*)(nb->wp+ETHERHDRSIZE);
 
-               memmove(nb->wp, epkt, ETHERHDRSIZE+IPHDR);
-               nb->wp += ETHERHDRSIZE+IPHDR;
+               memmove(nb->wp, epkt, ETHERHDRSIZE+IP4HDR);
+               nb->wp += ETHERHDRSIZE+IP4HDR;
 
                if((fragoff + seglen) >= dlen) {
                        seglen = dlen - fragoff;
@@ -1125,22 +1098,17 @@ etherwrite(Port *port, Block *bp)
                else    
                        hnputs(feh->frag, (frag+fragoff>>3) | IP_MF);
 
-               hnputs(feh->length, seglen + IPHDR);
+               hnputs(feh->length, seglen + IP4HDR);
                hnputs(feh->id, lid);
 
-               /* Copy up the data area */
-               chunk = seglen;
-               while(chunk) {
-                       blklen = chunk;
-                       if(BLEN(xp) < chunk)
-                               blklen = BLEN(xp);
-                       memmove(nb->wp, xp->rp, blklen);
+               if(seglen){
+                       blklen = BLEN(bp);
+                       if(seglen < blklen)
+                               blklen = seglen;
+                       memmove(nb->wp, bp->rp, blklen);
                        nb->wp += blklen;
-                       xp->rp += blklen;
-                       chunk -= blklen;
-                       if(xp->rp == xp->wp)
-                               xp = xp->next;
-               } 
+                       bp->rp += blklen;
+               }
 
                feh->cksum[0] = 0;
                feh->cksum[1] = 0;
@@ -1148,21 +1116,18 @@ etherwrite(Port *port, Block *bp)
        
                /* don't generate small packets */
                if(BLEN(nb) < ETHERMINTU)
-                       nb->wp = nb->rp + ETHERMINTU;
+                       nb = adjustblock(nb, ETHERMINTU);
                devtab[port->data[1]->type]->bwrite(port->data[1], nb, 0);
        }
        poperror();
-       freeblist(bp);  
+       freeb(bp);      
 }
 
 // hold b lock
 static void
 portfree(Port *port)
 {
-       port->ref--;
-       if(port->ref < 0)
-               panic("portfree: bad ref");
-       if(port->ref > 0)
+       if(decref(port) != 0)
                return;
 
        if(port->data[0])