]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/port/devaoe.c
merge
[plan9front.git] / sys / src / 9 / port / devaoe.c
1 /*
2  *      © 2005-13 coraid
3  *      aoe storage initiator
4  */
5
6 #include "u.h"
7 #include "../port/lib.h"
8 #include "mem.h"
9 #include "dat.h"
10 #include "fns.h"
11 #include "io.h"
12 #include "ureg.h"
13 #include "../port/error.h"
14 #include "../port/netif.h"
15 #include "etherif.h"
16 #include "../ip/ip.h"
17 #include "../port/aoe.h"
18 #include <fis.h>
19
20 #pragma varargck argpos eventlog        1
21
22 #define dprint(...)     if(debug) eventlog(__VA_ARGS__); else USED(debug);
23 #define uprint(...)     snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__);
24
25 enum {
26         Typebits                = 4,
27         Unitbits                = 12,
28         L3bits          = 4,
29         Maxtype         = (1<<Typebits)-1,
30         Maxunits        = (1<<Unitbits)-1,
31         Maxl3           = (1<<L3bits)-1,
32         Maxframes       = 128,
33         Maxmtu          = 100000,
34         Ndevlink        = 6,
35         Nea             = 6,
36         Nnetlink        = 6,
37 };
38
39 #define TYPE(q)         ((ulong)(q).path & Maxtype)
40 #define UNIT(q)         (((ulong)(q).path>>Typebits) & Maxunits)
41 #define L(q)            (((ulong)(q).path>>Typebits+Unitbits) & Maxl3)
42 #define QID(u, t)       ((u)<<Typebits | (t))
43 #define Q3(l, u, t)     ((l)<<Typebits+Unitbits | QID(u, t))
44 #define UP(d)           ((d)->flag & Dup)
45
46 #define Ticks           MACHP(0)->ticks
47 #define Ms2tk(t)        (((t)*HZ)/1000)
48 #define Tk2ms(t)        (((t)*1000)/HZ)
49
50 enum {
51         Qzero,
52         Qtopdir         = 1,
53         Qtopbase,
54         Qtopctl         = Qtopbase,
55         Qtoplog,
56         Qtopend,
57
58         Qunitdir,
59         Qunitbase,
60         Qctl            = Qunitbase,
61         Qdata,
62         Qconfig,
63         Qident,
64
65         Qdevlinkdir,
66         Qdevlinkbase,
67         Qdevlink        = Qdevlinkbase,
68         Qdevlinkend,
69
70         Qtopfiles       = Qtopend-Qtopbase,
71         Qdevlinkfiles   = Qdevlinkend-Qdevlinkbase,
72
73         Eventlen        = 256,
74         Nevents         = 64,
75
76         Fread           = 0,
77         Fwrite,
78         Tfree           = -1,
79         Tmgmt,
80
81         /* round trip bounds, timeouts, in ticks */
82         Rtmax           = Ms2tk(320),
83         Rtmin           = Ms2tk(20),
84         Srbtimeout      = 45*HZ,
85
86         Dbcnt           = 1024,
87
88         Crd             = 0x20,
89         Crdext          = 0x24,
90         Cwr             = 0x30,
91         Cwrext          = 0x34,
92         Cid             = 0xec,
93
94         Alloc           = 0x01234567,
95         Free            = 0x89abcdef,
96 };
97
98 enum {
99         Read,
100         Write,
101 };
102
103 /*
104  * unified set of flags
105  * a Netlink + Aoedev most both be jumbo capable
106  * to send jumbograms to that interface.
107  */
108 enum {
109         Dup     = 1<<0,
110         Djumbo  = 1<<1,
111         Dnofail = 1<<2,
112 };
113
114 static char *flagname[] = {
115         "up",
116         "jumbo",
117         "nofail",
118 };
119
120 typedef struct {
121         uchar   flag;
122         uint    lostjumbo;
123
124         Chan    *cc;
125         Chan    *dc;
126         Chan    *mtu;           /* open early to prevent bind issues. */
127         char    path[Maxpath];
128         uchar   ea[Eaddrlen];
129 } Netlink;
130
131 typedef struct {
132         Netlink *nl;
133         int     nea;
134         ulong   eaidx;
135         uchar   eatab[Nea][Eaddrlen];
136         int     datamtu;
137         ulong   npkt;
138         ulong   resent;
139         uchar   flag;
140
141         ulong   rttavg;
142         ulong   mintimer;
143 } Devlink;
144
145 typedef struct Srb Srb;
146 struct Srb {
147         Rendez;
148         uint    state;
149         Srb     *next;
150         ulong   ticksent;
151         ulong   len;
152         vlong   sector;
153         short   write;
154         short   nout;
155         char    *error;
156         void    *dp;
157         void    *data;
158 };
159
160 typedef struct {
161         int     tag;
162         ulong   bcnt;
163         ulong   dlen;
164         vlong   lba;
165         ulong   ticksent;
166         int     nhdr;
167         uchar   hdr[ETHERMINTU];
168         void    *dp;
169         Devlink *dl;
170         Netlink *nl;
171         int     eaidx;
172         Srb     *srb;
173 } Frame;
174
175 typedef struct Aoedev Aoedev;
176 struct Aoedev {
177         QLock;
178         Aoedev  *next;
179
180         ulong   vers;
181
182         int     ndl;
183         ulong   dlidx;
184         Devlink *dl;
185         Devlink dltab[Ndevlink];
186
187         uchar   flag;
188         ushort  fwver;
189         int     nopen;
190         uint    major;
191         uint    minor;
192         int     unit;
193         int     lasttag;
194         int     nframes;
195         Frame   *frames;
196         vlong   bsize;
197         vlong   realbsize;
198
199         uint    maxbcnt;
200         uint    maxmtu;
201         ulong   lostjumbo;
202         ushort  nout;
203         ushort  maxout;
204         ulong   lastwadj;
205         Srb     *head;
206         Srb     *tail;
207         Srb     *inprocess;
208
209         Sfis;
210         char    serial[20+1];
211         char    firmware[8+1];
212         char    model[40+1];
213         int     nconfig;
214         uchar   config[1024];
215         uchar   ident[512];
216 };
217
218 #pragma varargck type   "æ"    Aoedev*
219
220 static struct {
221         Lock;
222         QLock;
223         Rendez;
224         char    buf[Eventlen*Nevents];
225         char    *rp;
226         char    *wp;
227 } events;
228
229 static struct {
230         RWlock;
231         int     nd;
232         Aoedev  *d;
233 } devs;
234
235 static struct {
236         Lock;
237         int     reader[Nnetlink];       /* reader is running. */
238         Rendez  rendez[Nnetlink];       /* confirm exit. */
239         Netlink nl[Nnetlink];
240 } netlinks;
241
242 extern  Dev     aoedevtab;
243 static  Ref     units;
244 static  Ref     drivevers;
245 static  int     debug;
246 static  int     autodiscover    = 1;
247 static  int     rediscover;
248 extern  char    Enotup[]        = "aoe device is down";
249
250 static Srb*
251 srballoc(ulong sz)
252 {
253         Srb *srb;
254
255         srb = smalloc(sizeof *srb+sz);
256         srb->state = Alloc;
257         srb->dp = srb->data = srb+1;
258         srb->ticksent = Ticks;
259         return srb;
260 }
261
262 static Srb*
263 srbkalloc(void *db, ulong)
264 {
265         Srb *srb;
266
267         srb = smalloc(sizeof *srb);
268         srb->state = Alloc;
269         srb->dp = srb->data = db;
270         srb->ticksent = Ticks;
271         return srb;
272 }
273
274 static int
275 srbready(void *v)
276 {
277         Srb *s;
278
279         s = v;
280         return s->nout == 0 && (s->len == 0 || s->error != nil);
281 }
282
283 static void
284 srbfree(Srb *srb)
285 {
286         int n;
287
288         for(n = 0; srb->state != Free; n++)
289                 sched();
290         free(srb);
291 }
292
293 /* under Aoedev qlock() so setting of srb->state is safe */
294 static void
295 srbwakeup(Srb *srb)
296 {
297         if(srbready(srb)){
298                 assert(srb->state == Alloc);
299                 wakeup(srb);
300                 srb->state = Free;
301         }
302 }
303
304 static void
305 srbcleanout(Aoedev *d, Srb *srb)
306 {
307         Srb *x, **ll;
308
309         if(srb == d->inprocess)
310                 d->inprocess = nil;
311         else
312                 for(ll = &d->head; x = *ll; ll = &x->next){
313                         d->tail = x;
314                         if(x == srb)
315                                 *ll = x->next;
316                 }
317 }
318
319 static void
320 srberror(Aoedev *d, Srb *srb, char *s)
321 {
322         srbcleanout(d, srb);
323         srb->error = s;
324         srbwakeup(srb);
325 }
326
327 static void
328 frameerror(Aoedev *d, Frame *f, char *s)
329 {
330         Srb *srb;
331
332         if(f->tag == Tfree)
333                 return;
334         srb = f->srb;
335         f->srb = nil;
336         f->tag = Tfree;         /* don't get fooled by way-slow responses */
337         if(!srb)
338                 return;
339         srb->nout--;
340         srberror(d, srb, s);
341         d->nout--;
342 }
343
344 static char*
345 unitname(Aoedev *d)
346 {
347         uprint("%ud.%ud", d->major, d->minor);
348         return up->genbuf;
349 }
350
351 static long
352 eventlogread(void *a, long n)
353 {
354         int len;
355         char *p, *buf;
356
357         buf = smalloc(Eventlen);
358         qlock(&events);
359         lock(&events);
360         p = events.rp;
361         len = *p;
362         if(len == 0){
363                 n = 0;
364                 unlock(&events);
365         } else {
366                 if(n > len)
367                         n = len;
368                 /* can't move directly into pageable space with events lock held */
369                 memmove(buf, p+1, n);
370                 *p = 0;
371                 events.rp = p += Eventlen;
372                 if(p >= events.buf + sizeof events.buf)
373                         events.rp = events.buf;
374                 unlock(&events);
375
376                 /* the concern here is page faults in memmove below */
377                 if(waserror()){
378                         free(buf);
379                         qunlock(&events);
380                         nexterror();
381                 }
382                 memmove(a, buf, n);
383                 poperror();
384         }
385         free(buf);
386         qunlock(&events);
387         return n;
388 }
389
390 static int
391 eventlog(char *fmt, ...)
392 {
393         int dragrp, n;
394         char *p;
395         va_list arg;
396
397         lock(&events);
398         p = events.wp;
399         dragrp = *p++;
400         va_start(arg, fmt);
401         n = vsnprint(p, Eventlen-1, fmt, arg);
402         *--p = n;
403         p = events.wp += Eventlen;
404         if(p >= events.buf + sizeof events.buf)
405                 p = events.wp = events.buf;
406         if(dragrp)
407                 events.rp = p;
408         unlock(&events);
409         wakeup(&events);
410         return n;
411 }
412
413 static int
414 eventcount(void)
415 {
416         uint n;
417
418         lock(&events);
419         if(*events.rp == 0)
420                 n = 0;
421         else
422                 n = events.wp - events.rp & Nevents - 1;
423         unlock(&events);
424         return n/Eventlen;
425 }
426
427 static int
428 tsince(int tag)
429 {
430         int n;
431
432         n = Ticks & 0xffff;
433         n -= tag & 0xffff;
434         if(n < 0)
435                 n += 1<<16;
436         return n;
437 }
438
439 static int
440 newtag(Aoedev *d)
441 {
442         int t;
443
444         do {
445                 t = ++d->lasttag << 16;
446                 t |= Ticks & 0xffff;
447         } while (t == Tfree || t == Tmgmt);
448         return t;
449 }
450
451 static void
452 downdev(Aoedev *d, char *err)
453 {
454         Frame *f, *e;
455
456         d->flag &= ~Dup;
457         f = d->frames;
458         e = f + d->nframes;
459         for(; f < e; f++)
460                 frameerror(d, f, Enotup);
461         d->inprocess = nil;
462         eventlog("%æ: removed; %s\n", d, err);
463 }
464
465 static Block*
466 allocfb(Frame *f)
467 {
468         int len;
469         Block *b;
470
471         len = f->nhdr + f->dlen;
472         if(len < ETHERMINTU)
473                 len = ETHERMINTU;
474         b = allocb(len);
475         memmove(b->wp, f->hdr, f->nhdr);
476         if(f->dlen)
477                 memmove(b->wp + f->nhdr, f->dp, f->dlen);
478         b->wp += len;
479         return b;
480 }
481
482 static void
483 putlba(Aoeata *a, vlong lba)
484 {
485         uchar *c;
486
487         c = a->lba;
488         c[0] = lba;
489         c[1] = lba >> 8;
490         c[2] = lba >> 16;
491         c[3] = lba >> 24;
492         c[4] = lba >> 32;
493         c[5] = lba >> 40;
494 }
495
496 static Devlink*
497 pickdevlink(Aoedev *d)
498 {
499         ulong i, n;
500         Devlink *l;
501
502         for(i = 0; i < d->ndl; i++){
503                 n = d->dlidx++ % d->ndl;
504                 l = d->dl + n;
505                 if(l && l->flag & Dup)
506                         return l;
507         }
508         return 0;
509 }
510
511 static int
512 pickea(Devlink *l)
513 {
514         if(l == 0)
515                 return -1;
516         if(l->nea == 0)
517                 return -1;
518         return l->eaidx++ % l->nea;
519 }
520
521 /*
522  * would like this to depend on the chan (srb).
523  * not possible in the current structure.
524  */
525 #define Nofail(d, s)    (((d)->flag&Dnofail) == Dnofail)
526
527 static int
528 hset(Aoedev *d, Frame *f, Aoehdr *h, int cmd, int new)
529 {
530         int i;
531         Devlink *l;
532
533         if(f->srb)
534         if((long)(Ticks-f->srb->ticksent) > Srbtimeout){
535                 eventlog("%æ: srb timeout\n", d);
536                 if(cmd == ACata && Nofail(d, s))
537                         f->srb->ticksent = Ticks;
538                 else
539                         frameerror(d, f, Etimedout);
540                 return -1;
541         }
542         l = pickdevlink(d);
543         i = pickea(l);
544         if(i == -1){
545                 if(!(cmd == ACata && f->srb && Nofail(d, s)))
546                         downdev(d, "resend fails; no netlink/ea");
547                 return -1;
548         }
549         memmove(h->dst, l->eatab[i], Eaddrlen);
550         memmove(h->src, l->nl->ea, sizeof h->src);
551         hnputs(h->type, Aoetype);
552         h->verflag = Aoever << 4;
553         h->error = 0;
554         hnputs(h->major, d->major);
555         h->minor = d->minor;
556         h->cmd = cmd;
557
558         if(new)
559                 f->tag = newtag(d);
560         hnputl(h->tag, f->tag);
561         f->dl = l;
562         f->nl = l->nl;
563         f->eaidx = i;
564         f->ticksent = Ticks;
565
566         return f->tag;
567 }
568
569 static int
570 resend(Aoedev *d, Frame *f)
571 {
572         ulong n;
573         Aoeata *a;
574         Aoehdr *h;
575
576         h = (Aoehdr*)f->hdr;
577         if(hset(d, f, h, h->cmd, 0) == -1)
578                 return -1;
579         a = (Aoeata*)(f->hdr + Aoehsz);
580         n = f->bcnt;
581         if(n > d->maxbcnt){
582                 n = d->maxbcnt;         /* mtu mismatch (jumbo fail?) */
583                 if(f->dlen > n)
584                         f->dlen = n;
585         }
586         a->scnt = n / Aoesectsz;
587         f->dl->resent++;
588         f->dl->npkt++;
589         if(waserror())
590                 /* should remove the netlink */
591                 return -1;
592         devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
593         poperror();
594         return 0;
595 }
596
597 static void
598 discover(uint major, uint minor)
599 {
600         Aoehdr *h;
601         Block *b;
602         Netlink *nl, *e;
603
604         nl = netlinks.nl;
605         e = nl + nelem(netlinks.nl);
606         for(; nl < e; nl++){
607                 if(nl->cc == nil)
608                         continue;
609                 b = allocb(ETHERMINTU);
610                 if(waserror()){
611                         freeb(b);
612                         nexterror();
613                 }
614                 b->wp = b->rp + ETHERMINTU;
615                 memset(b->rp, 0, ETHERMINTU);
616                 h = (Aoehdr*)b->rp;
617                 memset(h->dst, 0xff, sizeof h->dst);
618                 memmove(h->src, nl->ea, sizeof h->src);
619                 hnputs(h->type, Aoetype);
620                 h->verflag = Aoever << 4;
621                 hnputs(h->major, major);
622                 h->minor = minor;
623                 h->cmd = ACconfig;
624                 poperror();
625                 devtab[nl->dc->type]->bwrite(nl->dc, b, 0);
626         }
627 }
628
629 /*
630  * Check all frames on device and resend any frames that have been
631  * outstanding for 200% of the device round trip time average.
632  */
633 static void
634 aoesweepproc(void*)
635 {
636         ulong i, tx, timeout, nbc;
637         vlong starttick;
638         enum { Nms = 100, Nbcms = 30*1000, };
639         uchar *ea;
640         Aoeata *a;
641         Aoedev *d;
642         Devlink *l;
643         Frame *f, *e;
644
645         nbc = Nbcms/Nms;
646 loop:
647         if(nbc-- == 0){
648                 if(rediscover && !waserror()){
649                         discover(0xffff, 0xff);
650                         poperror();
651                 }
652                 nbc = Nbcms/Nms;
653         }
654         starttick = Ticks;
655         rlock(&devs);
656         for(d = devs.d; d; d = d->next){
657                 if(!canqlock(d))
658                         continue;
659                 if(!UP(d)){
660                         qunlock(d);
661                         continue;
662                 }
663                 tx = 0;
664                 f = d->frames;
665                 e = f + d->nframes;
666                 for (; f < e; f++){
667                         if(f->tag == Tfree)
668                                 continue;
669                         l = f->dl;
670                         timeout = l->rttavg << 1;
671                         i = tsince(f->tag);
672                         if(i < timeout)
673                                 continue;
674                         if(d->nout == d->maxout){
675                                 if(d->maxout > 1)
676                                         d->maxout--;
677                                 d->lastwadj = Ticks;
678                         }
679                         a = (Aoeata*)(f->hdr + Aoehsz);
680                         if(a->scnt > Dbcnt / Aoesectsz &&
681                            ++f->nl->lostjumbo > (d->nframes << 1)){
682                                 ea = f->dl->eatab[f->eaidx];
683                                 eventlog("%æ: jumbo failure on %s:%E; %llud\n",
684                                         d, f->nl->path, ea, f->lba);
685                                 d->maxbcnt = Dbcnt;
686                                 d->flag &= ~Djumbo;
687                         }
688                         resend(d, f);
689                         if(tx++ == 0){
690                                 if((l->rttavg <<= 1) > Rtmax)
691                                         l->rttavg = Rtmax;
692                                 eventlog("%æ: rtt %ldms\n", d, Tk2ms(l->rttavg));
693                         }
694                 }
695                 if(d->nout == d->maxout && d->maxout < d->nframes &&
696                    TK2MS(Ticks-d->lastwadj) > 10*1000){
697                         d->maxout++;
698                         d->lastwadj = Ticks;
699                 }
700                 qunlock(d);
701         }
702         runlock(&devs);
703         i = Nms - TK2MS(Ticks - starttick);
704         if(i > 0)
705                 tsleep(&up->sleep, return0, 0, i);
706         goto loop;
707 }
708
709 static int
710 fmtæ(Fmt *f)
711 {
712         char buf[16];
713         Aoedev *d;
714
715         d = va_arg(f->args, Aoedev*);
716         snprint(buf, sizeof buf, "aoe%ud.%ud", d->major, d->minor);
717         return fmtstrcpy(f, buf);
718 }
719
720 static void netbind(char *path);
721
722 static void
723 aoecfg(void)
724 {
725         char *p, *f[32], buf[24], ifbuf[64];
726         int n, i;
727
728         if((p = getconf("aoeif")) == nil)
729                 return;
730         strncpy(ifbuf, p, sizeof(ifbuf)-1);
731         ifbuf[sizeof(ifbuf)-1] = 0;
732         if((n = tokenize(ifbuf, f, nelem(f))) < 1)
733                 return;
734         /* goo! */
735         for(i = 0; i < n; i++){
736                 p = f[i];
737                 if(strncmp(p, "ether", 5) == 0)
738                         snprint(buf, sizeof buf, "#l%c/ether%c", p[5], p[5]);
739                 else if(strncmp(p, "#l", 2) == 0)
740                         snprint(buf, sizeof buf, "#l%c/ether%c", p[2], p[2]);
741                 else
742                         continue;
743                 if(!waserror()){
744                         netbind(buf);
745                         poperror();
746                 }
747         }
748 }
749
750 static void
751 aoeinit(void)
752 {
753         static int init;
754         static QLock l;
755
756         if(!canqlock(&l))
757                 return;
758         if(init == 0){
759                 fmtinstall(L'æ', fmtæ);
760                 events.rp = events.wp = events.buf;
761                 kproc("aoesweep", aoesweepproc, nil);
762                 aoecfg();
763                 init = 1;
764         }
765         qunlock(&l);
766 }
767
768 static Chan*
769 aoeattach(char *spec)
770 {
771         Chan *c;
772
773         if(*spec)
774                 error(Enonexist);
775         aoeinit();
776         c = devattach(L'æ', spec);
777         mkqid(&c->qid, Qzero, 0, QTDIR);
778         return c;
779 }
780
781 static int
782 unitseq(Chan *c, uint unit, Dir *dp)
783 {
784         int i, rv;
785         Qid q;
786         Aoedev *d;
787
788         i = 0;
789         rv = -1;
790         rlock(&devs);
791         for(d = devs.d; d; d = d->next)
792                 if(i++ == unit){
793                         mkqid(&q, QID(d->unit, Qunitdir), 0, QTDIR);
794                         devdir(c, q, unitname(d), 0, eve, 0555, dp);
795                         rv = 1;
796                         break;
797                 }
798         runlock(&devs);
799         return rv;
800 }
801
802 static Aoedev*
803 unit2dev(ulong unit)
804 {
805         Aoedev *d;
806
807         rlock(&devs);
808         for(d = devs.d; d; d = d->next)
809                 if(d->unit == unit){
810                         runlock(&devs);
811                         return d;
812                 }
813         runlock(&devs);
814         error("unit lookup failure");
815         return nil;
816 }
817
818 static int
819 unitgen(Chan *c, ulong type, Dir *dp)
820 {
821         int perm, t;
822         ulong vers;
823         vlong size;
824         char *p;
825         Aoedev *d;
826         Qid q;
827
828         d = unit2dev(UNIT(c->qid));
829         perm = 0644;
830         size = 0;
831         vers = d->vers;
832         t = QTFILE;
833
834         switch(type){
835         default:
836                 return -1;
837         case Qctl:
838                 p = "ctl";
839                 break;
840         case Qdata:
841                 p = "data";
842                 perm = 0640;
843                 if(UP(d))
844                         size = d->bsize;
845                 break;
846         case Qconfig:
847                 p = "config";
848                 if(UP(d))
849                         size = d->nconfig;
850                 break;
851         case Qident:
852                 p = "ident";
853                 if(UP(d))
854                         size = sizeof d->ident;
855                 break;
856         case Qdevlinkdir:
857                 p = "devlink";
858                 t = QTDIR;
859                 perm = 0555;
860                 break;
861         }
862         mkqid(&q, QID(UNIT(c->qid), type), vers, t);
863         devdir(c, q, p, size, eve, perm, dp);
864         return 1;
865 }
866
867 static int
868 topgen(Chan *c, ulong type, Dir *d)
869 {
870         int perm;
871         vlong size;
872         char *p;
873         Qid q;
874
875         perm = 0444;
876         size = 0;
877         switch(type){
878         default:
879                 return -1;
880         case Qtopctl:
881                 p = "ctl";
882                 perm = 0644;
883                 break;
884         case Qtoplog:
885                 p = "log";
886                 size = eventcount();
887                 break;
888         }
889         mkqid(&q, type, 0, QTFILE);
890         devdir(c, q, p, size, eve, perm, d);
891         return 1;
892 }
893
894 static int
895 aoegen(Chan *c, char *, Dirtab *, int, int s, Dir *dp)
896 {
897         int i;
898         Aoedev *d;
899         Qid q;
900
901         if(c->qid.path == 0){
902                 switch(s){
903                 case DEVDOTDOT:
904                         q.path = 0;
905                         q.type = QTDIR;
906                         devdir(c, q, "#æ", 0, eve, 0555, dp);
907                         break;
908                 case 0:
909                         q.path = Qtopdir;
910                         q.type = QTDIR;
911                         devdir(c, q, "aoe", 0, eve, 0555, dp);
912                         break;
913                 default:
914                         return -1;
915                 }
916                 return 1;
917         }
918
919         switch(TYPE(c->qid)){
920         default:
921                 return -1;
922         case Qtopdir:
923                 if(s == DEVDOTDOT){
924                         mkqid(&q, Qzero, 0, QTDIR);
925                         devdir(c, q, "aoe", 0, eve, 0555, dp);
926                         return 1;
927                 }
928                 if(s < Qtopfiles)
929                         return topgen(c, Qtopbase + s, dp);
930                 s -= Qtopfiles;
931                 return unitseq(c, s, dp);
932         case Qtopctl:
933         case Qtoplog:
934                 return topgen(c, TYPE(c->qid), dp);
935         case Qunitdir:
936                 if(s == DEVDOTDOT){
937                         mkqid(&q, QID(0, Qtopdir), 0, QTDIR);
938                         uprint("%uld", UNIT(c->qid));
939                         devdir(c, q, up->genbuf, 0, eve, 0555, dp);
940                         return 1;
941                 }
942                 return unitgen(c, Qunitbase+s, dp);
943         case Qctl:
944         case Qdata:
945         case Qconfig:
946         case Qident:
947                 return unitgen(c, TYPE(c->qid), dp);
948         case Qdevlinkdir:
949                 i = UNIT(c->qid);
950                 if(s == DEVDOTDOT){
951                         mkqid(&q, QID(i, Qunitdir), 0, QTDIR);
952                         devdir(c, q, "devlink", 0, eve, 0555, dp);
953                         return 1;
954                 }
955                 if(i >= units.ref)
956                         return -1;
957                 d = unit2dev(i);
958                 if(s >= d->ndl)
959                         return -1;
960                 uprint("%d", s);
961                 mkqid(&q, Q3(s, i, Qdevlink), 0, QTFILE);
962                 devdir(c, q, up->genbuf, 0, eve, 0755, dp);
963                 return 1;
964         case Qdevlink:
965                 uprint("%d", s);
966                 mkqid(&q, Q3(s, UNIT(c->qid), Qdevlink), 0, QTFILE);
967                 devdir(c, q, up->genbuf, 0, eve, 0755, dp);
968                 return 1;
969         }
970 }
971
972 static Walkqid*
973 aoewalk(Chan *c, Chan *nc, char **name, int nname)
974 {
975         return devwalk(c, nc, name, nname, nil, 0, aoegen);
976 }
977
978 static int
979 aoestat(Chan *c, uchar *db, int n)
980 {
981         return devstat(c, db, n, nil, 0, aoegen);
982 }
983
984 static Chan*
985 aoeopen(Chan *c, int omode)
986 {
987         Aoedev *d;
988
989         if(TYPE(c->qid) != Qdata)
990                 return devopen(c, omode, 0, 0, aoegen);
991
992         d = unit2dev(UNIT(c->qid));
993         qlock(d);
994         if(waserror()){
995                 qunlock(d);
996                 nexterror();
997         }
998         if(!UP(d))
999                 error(Enotup);
1000         c = devopen(c, omode, 0, 0, aoegen);
1001         d->nopen++;
1002         poperror();
1003         qunlock(d);
1004         return c;
1005 }
1006
1007 static void
1008 aoeclose(Chan *c)
1009 {
1010         Aoedev *d;
1011
1012         if(TYPE(c->qid) != Qdata || (c->flag&COPEN) == 0)
1013                 return;
1014
1015         d = unit2dev(UNIT(c->qid));
1016         qlock(d);
1017         if(--d->nopen == 0 && !waserror()){
1018                 discover(d->major, d->minor);
1019                 poperror();
1020         }
1021         qunlock(d);
1022 }
1023
1024 static void
1025 atarw(Aoedev *d, Frame *f)
1026 {
1027         ulong bcnt;
1028         char extbit, writebit;
1029         Aoeata *ah;
1030         Aoehdr *h;
1031         Srb *srb;
1032
1033         extbit = 0x4;
1034         writebit = 0x10;
1035
1036         srb = d->inprocess;
1037         bcnt = d->maxbcnt;
1038         if(bcnt > srb->len)
1039                 bcnt = srb->len;
1040         f->nhdr = Aoehsz + Aoeatasz;
1041         memset(f->hdr, 0, f->nhdr);
1042         h = (Aoehdr*)f->hdr;
1043         if(hset(d, f, h, ACata, 1) == -1){
1044                 d->inprocess = nil;
1045                 return;
1046         }
1047         ah = (Aoeata*)(f->hdr + Aoehsz);
1048         f->dp = srb->dp;
1049         f->bcnt = bcnt;
1050         f->lba = srb->sector;
1051         f->srb = srb;
1052
1053         ah->scnt = bcnt / Aoesectsz;
1054         putlba(ah, f->lba);
1055         if(d->feat & Dllba)
1056                 ah->aflag |= AAFext;
1057         else {
1058                 extbit = 0;
1059                 ah->lba[3] &= 0x0f;
1060                 ah->lba[3] |= 0xe0;     /* LBA bit+obsolete 0xa0 */
1061         }
1062         if(srb->write){
1063                 ah->aflag |= AAFwrite;
1064                 f->dlen = bcnt;
1065         }else{
1066                 writebit = 0;
1067                 f->dlen = 0;
1068         }
1069         ah->cmdstat = 0x20 | writebit | extbit;
1070
1071         /* mark tracking fields and load out */
1072         srb->nout++;
1073         srb->dp = (uchar*)srb->dp + bcnt;
1074         srb->len -= bcnt;
1075         srb->sector += bcnt / Aoesectsz;
1076         if(srb->len == 0)
1077                 d->inprocess = nil;
1078         d->nout++;
1079         f->dl->npkt++;
1080         if(waserror())
1081                 frameerror(d, f, "write error");
1082         else{
1083                 devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
1084                 poperror();
1085         }
1086 }
1087
1088 static char*
1089 aoeerror(Aoehdr *h)
1090 {
1091         int n;
1092         static char *errs[] = {
1093                 "aoe protocol error: unknown",
1094                 "aoe protocol error: bad command code",
1095                 "aoe protocol error: bad argument param",
1096                 "aoe protocol error: device unavailable",
1097                 "aoe protocol error: config string present",
1098                 "aoe protocol error: unsupported version",
1099                 "aoe protocol error: target is reserved",
1100         };
1101
1102         if((h->verflag & AFerr) == 0)
1103                 return 0;
1104         n = h->error;
1105         if(n > nelem(errs))
1106                 n = 0;
1107         return errs[n];
1108 }
1109
1110 static void
1111 rtupdate(Devlink *l, int rtt)
1112 {
1113         int n;
1114
1115         n = rtt;
1116         if(rtt < 0){
1117                 n = -rtt;
1118                 if(n < Rtmin)
1119                         n = Rtmin;
1120                 else if(n > Rtmax)
1121                         n = Rtmax;
1122                 l->mintimer += (n - l->mintimer) >> 1;
1123         } else if(n < l->mintimer)
1124                 n = l->mintimer;
1125         else if(n > Rtmax)
1126                 n = Rtmax;
1127
1128         /* g == .25; cf. Congestion Avoidance and Control, Jacobson&Karels; 1988 */
1129         n -= l->rttavg;
1130         l->rttavg += n >> 2;
1131 }
1132
1133 static Frame*
1134 getframe(Aoedev *d, int tag)
1135 {
1136         Frame *f, *e;
1137
1138         f = d->frames;
1139         e = f + d->nframes;
1140         for(; f < e; f++)
1141                 if(f->tag == tag)
1142                         return f;
1143         return nil;
1144 }
1145
1146 static Frame*
1147 freeframe(Aoedev *d)
1148 {
1149         if(d->nout < d->maxout)
1150                 return getframe(d, Tfree);
1151         return nil;
1152 }
1153
1154 static void
1155 work(Aoedev *d)
1156 {
1157         Frame *f;
1158
1159         while(f = freeframe(d)) {
1160                 if(d->inprocess == nil){
1161                         if(d->head == nil)
1162                                 return;
1163                         d->inprocess = d->head;
1164                         d->head = d->head->next;
1165                         if(d->head == nil)
1166                                 d->tail = nil;
1167                 }
1168                 atarw(d, f);
1169         }
1170 }
1171
1172 static void
1173 strategy(Aoedev *d, Srb *srb)
1174 {
1175         qlock(d);
1176         if(waserror()){
1177                 qunlock(d);
1178                 nexterror();
1179         }
1180         if(!UP(d))
1181                 error(Eio);
1182         srb->next = nil;
1183         if(d->tail)
1184                 d->tail->next = srb;
1185         d->tail = srb;
1186         if(d->head == nil)
1187                 d->head = srb;
1188         work(d);
1189         poperror();
1190         qunlock(d);
1191
1192         while(waserror()){
1193                 qlock(d);
1194                 srberror(d, srb, "interrupted");
1195                 qunlock(d);
1196         }
1197         sleep(srb, srbready, srb);
1198         poperror();
1199 }
1200
1201 #define iskaddr(a)      ((uintptr)(a) > KZERO)
1202
1203 static long
1204 rw(Aoedev *d, int write, uchar *db, long len, uvlong off)
1205 {
1206         long n, nlen, copy;
1207         enum { Srbsz = 1<<19, };
1208         Srb *srb;
1209
1210         if((off|len) & (Aoesectsz-1))
1211                 error("offset and length must be sector multiple.\n");
1212         if(!UP(d))
1213                 error(Eio);
1214         if(off >= d->bsize)
1215                 return 0;
1216         if(off + len > d->bsize)
1217                 len = d->bsize - off;
1218         copy = 0;
1219         if(iskaddr(db)){
1220                 srb = srbkalloc(db, len);
1221                 copy = 1;
1222         }else
1223                 srb = srballoc(Srbsz <= len? Srbsz: len);
1224         if(waserror()){
1225                 srbfree(srb);
1226                 nexterror();
1227         }
1228         srb->write = write;
1229         for(nlen = len; nlen; nlen -= n){
1230                 srb->sector = off / Aoesectsz;
1231                 srb->dp = srb->data;
1232                 n = nlen;
1233                 if(n > Srbsz)
1234                         n = Srbsz;
1235                 srb->len = n;
1236                 if(write && !copy)
1237                         memmove(srb->data, db, n);
1238                 strategy(d, srb);
1239                 if(srb->error)
1240                         error(srb->error);
1241                 if(!write && !copy)
1242                         memmove(db, srb->data, n);
1243                 db += n;
1244                 off += n;
1245         }
1246         poperror();
1247         srbfree(srb);
1248         return len;
1249 }
1250
1251 static long
1252 readmem(ulong off, void *dst, long n, void *src, long size)
1253 {
1254         if(off >= size)
1255                 return 0;
1256         if(off + n > size)
1257                 n = size - off;
1258         memmove(dst, (uchar*)src + off, n);
1259         return n;
1260 }
1261
1262 static char*
1263 aoeflag(char *s, char *e, uchar f)
1264 {
1265         uchar i;
1266
1267         for(i = 0; i < nelem(flagname); i++)
1268                 if(f & 1 << i)
1269                         s = seprint(s, e, "%s ", flagname[i]);
1270         return seprint(s, e, "\n");
1271 }
1272
1273 static int
1274 pstat(Aoedev *d, char *db, int len, int off)
1275 {
1276         int i;
1277         char *state, *s, *p, *e;
1278
1279         s = p = smalloc(READSTR);
1280         e = p + READSTR;
1281
1282         state = "down";
1283         if(UP(d))
1284                 state = "up";
1285
1286         p = seprint(p, e,
1287                 "state: %s\n"   "nopen: %d\n"   "nout: %d\n"
1288                 "nmaxout: %d\n" "nframes: %d\n" "maxbcnt: %d [maxmtu %d]\n"
1289                 "fw: %.4ux\n"
1290                 "model: %s\n"   "serial: %s\n"  "firmware: %s\n",
1291                 state,          d->nopen,       d->nout,
1292                 d->maxout,      d->nframes,     d->maxbcnt, d->maxmtu,
1293                 d->fwver,
1294                 d->model,       d->serial,      d->firmware);
1295         p = seprint(p, e, "flag: ");
1296         p = pflag(p, e, d);
1297         p[-1] = ' ';    /* horrid */
1298         p = aoeflag(p, e, d->flag);
1299
1300         if(p - s < len)
1301                 len = p - s;
1302         i = readstr(off, db, len, s);
1303         free(s);
1304         return i;
1305 }
1306
1307 static long
1308 unitread(Chan *c, void *db, long len, vlong off)
1309 {
1310         Aoedev *d;
1311
1312         d = unit2dev(UNIT(c->qid));
1313         if(d->vers != c->qid.vers)
1314                 error(Echange);
1315         switch(TYPE(c->qid)){
1316         default:
1317                 error(Ebadarg);
1318         case Qctl:
1319                 return pstat(d, db, len, off);
1320         case Qdata:
1321                 return rw(d, Read, db, len, off);
1322         case Qconfig:
1323                 if(!UP(d))
1324                         error(Enotup);
1325                 return readmem(off, db, len, d->config, d->nconfig);
1326         case Qident:
1327                 if(!UP(d))
1328                         error(Enotup);
1329                 return readmem(off, db, len, d->ident, sizeof d->ident);
1330         }
1331 }
1332
1333 static int
1334 getmtu(Chan *m)
1335 {
1336         int n, mtu;
1337         char buf[36];
1338
1339         mtu = 1514;
1340         if(m == nil || waserror())
1341                 return mtu;
1342         n = devtab[m->type]->read(m, buf, sizeof buf - 1, 0);
1343         poperror();
1344         if(n > 12){
1345                 buf[n] = 0;
1346                 mtu = strtoul(buf + 12, 0, 0);
1347         }
1348         return mtu;
1349 }
1350
1351 static int
1352 devlinkread(Chan *c, void *db, int len, int off)
1353 {
1354         int i;
1355         char *s, *p, *e;
1356         Aoedev *d;
1357         Devlink *l;
1358
1359         d = unit2dev(UNIT(c->qid));
1360         i = L(c->qid);
1361         if(i >= d->ndl)
1362                 return 0;
1363         l = d->dl + i;
1364
1365         s = p = malloc(READSTR);
1366         e = s + READSTR;
1367
1368         p = seprint(p, e, "addr: ");
1369         for(i = 0; i < l->nea; i++)
1370                 p = seprint(p, e, "%E ", l->eatab[i]);
1371         p = seprint(p, e, "\n");
1372         p = seprint(p, e, "npkt: %uld\n", l->npkt);
1373         p = seprint(p, e, "resent: %uld\n", l->resent);
1374         p = seprint(p, e, "flag: ");
1375         p = aoeflag(p, e, l->flag);
1376         p = seprint(p, e, "rttavg: %uld\n", Tk2ms(l->rttavg));
1377         p = seprint(p, e, "mintimer: %uld\n", Tk2ms(l->mintimer));
1378         p = seprint(p, e, "datamtu: %d\n", l->datamtu);
1379
1380         p = seprint(p, e, "nl path: %s\n", l->nl->path);
1381         p = seprint(p, e, "nl ea: %E\n", l->nl->ea);
1382         p = seprint(p, e, "nl flag: ");
1383         p = aoeflag(p, e, l->flag);
1384         p = seprint(p, e, "nl lostjumbo: %d\n", l->nl->lostjumbo);
1385         p = seprint(p, e, "nl datamtu: %d\n", getmtu(l->nl->mtu));
1386
1387         if(p - s < len)
1388                 len = p - s;
1389         i = readstr(off, db, len, s);
1390         free(s);
1391         return i;
1392 }
1393
1394 static long
1395 topctlread(Chan *, void *db, int len, int off)
1396 {
1397         int i;
1398         char *s, *p, *e;
1399         Netlink *n;
1400
1401         s = p = malloc(READSTR);
1402         e = s + READSTR;
1403
1404         p = seprint(p, e, "debug: %d\n", debug);
1405         p = seprint(p, e, "autodiscover: %d\n", autodiscover);
1406         p = seprint(p, e, "rediscover: %d\n", rediscover);
1407
1408         for(i = 0; i < Nnetlink; i++){
1409                 n = netlinks.nl+i;
1410                 if(n->cc == 0)
1411                         continue;
1412                 p = seprint(p, e, "if%d path: %s\n", i, n->path);
1413                 p = seprint(p, e, "if%d ea: %E\n", i, n->ea);
1414                 p = seprint(p, e, "if%d flag: ", i);
1415                 p = aoeflag(p, e, n->flag);
1416                 p = seprint(p, e, "if%d lostjumbo: %d\n", i, n->lostjumbo);
1417                 p = seprint(p, e, "if%d datamtu: %d\n", i, getmtu(n->mtu));
1418         }
1419
1420         if(p - s < len)
1421                 len = p - s;
1422         i = readstr(off, db, len, s);
1423         free(s);
1424         return i;
1425 }
1426
1427 static long
1428 aoeread(Chan *c, void *db, long n, vlong off)
1429 {
1430         switch(TYPE(c->qid)){
1431         default:
1432                 error(Eperm);
1433         case Qzero:
1434         case Qtopdir:
1435         case Qunitdir:
1436         case Qdevlinkdir:
1437                 return devdirread(c, db, n, 0, 0, aoegen);
1438         case Qtopctl:
1439                 return topctlread(c, db, n, off);
1440         case Qtoplog:
1441                 return eventlogread(db, n);
1442         case Qctl:
1443         case Qdata:
1444         case Qconfig:
1445         case Qident:
1446                 return unitread(c, db, n, off);
1447         case Qdevlink:
1448                 return devlinkread(c, db, n, off);
1449         }
1450 }
1451
1452 static long
1453 configwrite(Aoedev *d, void *db, long len)
1454 {
1455         char *s;
1456         Aoehdr *h;
1457         Aoecfg *ch;
1458         Frame *f;
1459         Srb *srb;
1460
1461         if(!UP(d))
1462                 error(Enotup);
1463         if(len > sizeof d->config)
1464                 error(Etoobig);
1465         srb = srballoc(len);
1466         s = malloc(len);
1467         memmove(s, db, len);
1468         if(waserror()){
1469                 srbfree(srb);
1470                 free(s);
1471                 nexterror();
1472         }
1473         for (;;) {
1474                 qlock(d);
1475                 if(waserror()){
1476                         qunlock(d);
1477                         nexterror();
1478                 }
1479                 f = freeframe(d);
1480                 if(f != nil)
1481                         break;
1482                 poperror();
1483                 qunlock(d);
1484                 if(waserror())
1485                         nexterror();
1486                 tsleep(&up->sleep, return0, 0, 100);
1487                 poperror();
1488         }
1489         f->nhdr = Aoehsz + Aoecfgsz;
1490         memset(f->hdr, 0, f->nhdr);
1491         h = (Aoehdr*)f->hdr;
1492         if(hset(d, f, h, ACconfig, 1) == -1)
1493                 return 0;
1494         ch = (Aoecfg*)(f->hdr + Aoehsz);
1495         f->srb = srb;
1496         f->dp = s;
1497         ch->verccmd = AQCfset;
1498         hnputs(ch->cslen, len);
1499         d->nout++;
1500         srb->nout++;
1501         f->dl->npkt++;
1502         f->dlen = len;
1503         /*
1504          * these refer to qlock & waserror in the above for loop.
1505          * there's still the first waserror outstanding.
1506          */
1507         poperror();
1508         qunlock(d);
1509
1510         devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
1511         sleep(srb, srbready, srb);
1512         if(srb->error)
1513                 error(srb->error);
1514
1515         qlock(d);
1516         if(waserror()){
1517                 qunlock(d);
1518                 nexterror();
1519         }
1520         memmove(d->config, s, len);
1521         d->nconfig = len;
1522         poperror();
1523         qunlock(d);
1524
1525         poperror();                     /* pop first waserror */
1526
1527         srbfree(srb);
1528         memmove(db, s, len);
1529         free(s);
1530         return len;
1531 }
1532
1533 static int
1534 devmaxdata(Aoedev *d)
1535 {
1536         int i, m, mtu, datamtu;
1537         Devlink *l;
1538         Netlink *n;
1539
1540         mtu = 100000;
1541         datamtu = 100000;
1542         for(i = 0; i < d->ndl; i++){
1543                 l = d->dl + i;
1544                 n = l->nl;
1545                 if((l->flag & Dup) == 0 || (n->flag & Dup) == 0)
1546                         continue;
1547                 m = getmtu(n->mtu);
1548                 if(l->datamtu < datamtu)
1549                         datamtu = l->datamtu;
1550                 if(m < mtu)
1551                         mtu = m;
1552         }
1553         if(mtu == 100000)
1554                 mtu = 1514;
1555         mtu -= Aoehsz + Aoeatasz;
1556         mtu -= mtu % Aoesectsz;
1557         if(mtu > datamtu)
1558                 mtu = datamtu;
1559         return mtu;
1560 }
1561
1562 static int
1563 toggle(char *s, uint f, uint bit)
1564 {
1565         if(s == nil)
1566                 f = f^bit;
1567         else if(strcmp(s, "on") == 0)
1568                 f |= bit;
1569         else
1570                 f &= ~bit;
1571         return f;
1572 }
1573
1574 static void ataident(Aoedev*);
1575
1576 static long
1577 unitctlwrite(Aoedev *d, void *db, long n)
1578 {
1579         uint maxbcnt, m;
1580         uvlong bsize;
1581         enum {
1582                 Failio,
1583                 Ident,
1584                 Jumbo,
1585                 Maxbno,
1586                 Mtu,
1587                 Nofailf,
1588                 Setsize,
1589         };
1590         Cmdbuf *cb;
1591         Cmdtab *ct;
1592         static Cmdtab cmds[] = {
1593                 {Failio,        "failio",       1 },
1594                 {Ident,         "identify",     1 },
1595                 {Jumbo,         "jumbo",        0 },
1596                 {Maxbno,        "maxbno",       0 },
1597                 {Mtu,           "mtu",          0 },
1598                 {Nofailf,               "nofail",               0 },
1599                 {Setsize,       "setsize",      0 },
1600         };
1601
1602         cb = parsecmd(db, n);
1603         qlock(d);
1604         if(waserror()){
1605                 qunlock(d);
1606                 free(cb);
1607                 nexterror();
1608         }
1609         ct = lookupcmd(cb, cmds, nelem(cmds));
1610         switch(ct->index){
1611         case Failio:
1612                 downdev(d, "i/o failure");
1613                 break;
1614         case Ident:
1615                 ataident(d);
1616                 break;
1617         case Jumbo:
1618                 d->flag = toggle(cb->f[1], d->flag, Djumbo);
1619                 break;
1620         case Maxbno:
1621         case Mtu:
1622                 maxbcnt = devmaxdata(d);
1623                 if(cb->nf > 2)
1624                         error(Ecmdargs);
1625                 if(cb->nf == 2){
1626                         m = strtoul(cb->f[1], 0, 0);
1627                         if(ct->index == Maxbno)
1628                                 m *= Aoesectsz;
1629                         else{
1630                                 m -= Aoehsz + Aoeatasz;
1631                                 m &= ~(Aoesectsz-1);
1632                         }
1633                         if(m == 0 || m > maxbcnt)
1634                                 cmderror(cb, "invalid mtu");
1635                         maxbcnt = m;
1636                         d->maxmtu = m;
1637                 } else
1638                         d->maxmtu = Maxmtu;
1639                 d->maxbcnt = maxbcnt;
1640                 break;
1641         case Nofailf:
1642                 d->flag = toggle(cb->f[1], d->flag, Dnofail);
1643                 break;
1644         case Setsize:
1645                 bsize = d->realbsize;
1646                 if(cb->nf > 2)
1647                         error(Ecmdargs);
1648                 if(cb->nf == 2){
1649                         bsize = strtoull(cb->f[1], 0, 0);
1650                         if(bsize % Aoesectsz)
1651                                 cmderror(cb, "disk size must be sector aligned");
1652                 }
1653                 d->bsize = bsize;
1654                 break;
1655         }
1656         poperror();
1657         qunlock(d);
1658         free(cb);
1659         return n;
1660 }
1661
1662 static long
1663 unitwrite(Chan *c, void *db, long n, vlong off)
1664 {
1665         long rv;
1666         char *buf;
1667         Aoedev *d;
1668
1669         d = unit2dev(UNIT(c->qid));
1670         switch(TYPE(c->qid)){
1671         default:
1672                 error(Ebadarg);
1673         case Qctl:
1674                 return unitctlwrite(d, db, n);
1675         case Qident:
1676                 error(Eperm);
1677         case Qdata:
1678                 return rw(d, Write, db, n, off);
1679         case Qconfig:
1680                 if(off + n > sizeof d->config)
1681                         error(Etoobig);
1682                 buf = malloc(sizeof d->config);
1683                 if(waserror()){
1684                         free(buf);
1685                         nexterror();
1686                 }
1687                 memmove(buf, d->config, d->nconfig);
1688                 memmove(buf + off, db, n);
1689                 rv = configwrite(d, buf, n + off);
1690                 poperror();
1691                 free(buf);
1692                 return rv;
1693         }
1694 }
1695
1696 static Netlink*
1697 addnet(char *path, Chan *cc, Chan *dc, Chan *mtu, uchar *ea)
1698 {
1699         Netlink *nl, *e;
1700
1701         lock(&netlinks);
1702         if(waserror()){
1703                 unlock(&netlinks);
1704                 nexterror();
1705         }
1706         nl = netlinks.nl;
1707         e = nl + nelem(netlinks.nl);
1708         for(; nl < e && nl->cc; nl++)
1709                 continue;
1710         if(nl == e)
1711                 error("out of netlink structures");
1712         nl->cc = cc;
1713         nl->dc = dc;
1714         nl->mtu = mtu;
1715         strncpy(nl->path, path, sizeof(nl->path)-1);
1716         nl->path[sizeof(nl->path)-1] = 0;
1717         memmove(nl->ea, ea, sizeof nl->ea);
1718         poperror();
1719         nl->flag |= Dup;
1720         unlock(&netlinks);
1721         return nl;
1722 }
1723
1724 static int
1725 newunit(void)
1726 {
1727         int x;
1728
1729         lock(&units);
1730         if(units.ref == Maxunits)
1731                 x = -1;
1732         else
1733                 x = units.ref++;
1734         unlock(&units);
1735         return x;
1736 }
1737
1738 static int
1739 dropunit(void)
1740 {
1741         int x;
1742
1743         lock(&units);
1744         x = --units.ref;
1745         unlock(&units);
1746         return x;
1747 }
1748
1749 /*
1750  * always allocate max frames.  maxout may change.
1751  */
1752 static Aoedev*
1753 newdev(uint major, uint minor, int n)
1754 {
1755         Aoedev *d;
1756         Frame *f, *e;
1757
1758         d = malloc(sizeof *d);
1759         f = malloc(sizeof *f*Maxframes);
1760         if(d == nil || f == nil) {
1761                 free(d);
1762                 free(f);
1763                 error("aoe device allocation failure");
1764         }
1765         d->nframes = n;
1766         d->frames = f;
1767         for (e = f + Maxframes; f < e; f++)
1768                 f->tag = Tfree;
1769         d->maxout = n;
1770         d->major = major;
1771         d->minor = minor;
1772         d->maxbcnt = Dbcnt;
1773         d->flag = Djumbo;
1774         d->maxmtu = Maxmtu;
1775         d->unit = newunit();            /* bzzt.  inaccurate if units removed */
1776         if(d->unit == -1){
1777                 free(d);
1778                 free(d->frames);
1779                 error("too many units");
1780         }
1781         d->dl = d->dltab;
1782         return d;
1783 }
1784
1785 static Aoedev*
1786 mm2dev(uint major, uint minor)
1787 {
1788         Aoedev *d;
1789
1790         rlock(&devs);
1791         for(d = devs.d; d; d = d->next)
1792                 if(d->major == major && d->minor == minor){
1793                         runlock(&devs);
1794                         return d;
1795                 }
1796         runlock(&devs);
1797         eventlog("mm2dev: %ud.%ud not found\n", major, minor);
1798         return nil;
1799 }
1800
1801 /* Find the device in our list.  If not known, add it */
1802 static Aoedev*
1803 getdev(uint major, uint minor, int n)
1804 {
1805         Aoedev *d;
1806
1807         if(major == 0xffff || minor == 0xff)
1808                 return 0;
1809         wlock(&devs);
1810         if(waserror()){
1811                 wunlock(&devs);
1812                 nexterror();
1813         }
1814         for(d = devs.d; d; d = d->next)
1815                 if(d->major == major && d->minor == minor)
1816                         break;
1817         if(d == nil) {
1818                 d = newdev(major, minor, n);
1819                 d->next = devs.d;
1820                 devs.d = d;
1821         }
1822         poperror();
1823         wunlock(&devs);
1824         return d;
1825 }
1826
1827 static void
1828 ataident(Aoedev *d)
1829 {
1830         Aoeata *a;
1831         Aoehdr *h;
1832         Frame *f;
1833
1834         f = freeframe(d);
1835         if(f == nil)
1836                 return;
1837         f->nhdr = Aoehsz + Aoeatasz;
1838         memset(f->hdr, 0, f->nhdr);
1839         h = (Aoehdr*)f->hdr;
1840         if(hset(d, f, h, ACata, 1) == -1)
1841                 return;
1842         a = (Aoeata*)(f->hdr + Aoehsz);
1843         f->srb = srbkalloc(0, 0);
1844         a->cmdstat = Cid;       /* ata 6, page 110 */
1845         a->scnt = 1;
1846         a->lba[3] = 0xa0;
1847         d->nout++;
1848         f->dl->npkt++;
1849         f->bcnt = 512;
1850         f->dlen = 0;
1851         if(waserror()){
1852                 srbfree(f->srb);
1853                 d->nout--;
1854                 f->tag = Tfree;
1855         }else{
1856                 devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
1857                 poperror();
1858         }
1859 }
1860
1861 static int
1862 newdlea(Devlink *l, uchar *ea)
1863 {
1864         int i;
1865         uchar *t;
1866
1867         for(i = 0; i < Nea; i++){
1868                 t = l->eatab[i];
1869                 if(i == l->nea){
1870                         memmove(t, ea, Eaddrlen);
1871                         return l->nea++;
1872                 }
1873                 if(memcmp(t, ea, Eaddrlen) == 0)
1874                         return i;
1875         }
1876         return -1;
1877 }
1878
1879 static Devlink*
1880 newdevlink(Aoedev *d, Netlink *n, Aoehdr *h)
1881 {
1882         int i;
1883         Aoecfg *c;
1884         Devlink *l;
1885
1886         c = (Aoecfg*)((uchar*)h + Aoehsz);
1887         for(i = 0; i < Ndevlink; i++){
1888                 l = d->dl + i;
1889                 if(i == d->ndl){
1890                         d->ndl++;
1891                         newdlea(l, h->src);
1892                         l->datamtu = c->scnt*Aoesectsz;
1893                         l->nl = n;
1894                         l->flag |= Dup;
1895                         l->mintimer = Rtmin;
1896                         l->rttavg = Rtmax;
1897                         return l;
1898                 }
1899                 if(l->nl == n){
1900                         newdlea(l, h->src);
1901                         l->datamtu = c->scnt*Aoesectsz;
1902                         l->flag |= Dup;
1903                         return l;
1904                 }
1905         }
1906         eventlog("%æ: out of links: %s:%E to %E\n", d, n->path, n->ea, h->src);
1907         return 0;
1908 }
1909
1910 static void
1911 errrsp(Block *b, char *s)
1912 {
1913         int n;
1914         Aoedev *d;
1915         Aoehdr *h;
1916         Frame *f;
1917
1918         h = (Aoehdr*)b->rp;
1919         n = nhgetl(h->tag);
1920         if(n == Tmgmt || n == Tfree)
1921                 return;
1922         d = mm2dev(nhgets(h->major), h->minor);
1923         if(d == nil)
1924                 return;
1925         if(f = getframe(d, n))
1926                 frameerror(d, f, s);
1927 }
1928
1929 static void
1930 qcfgrsp(Block *b, Netlink *nl)
1931 {
1932         int cmd, cslen, blen;
1933         uint n, major;
1934         Aoedev *d;
1935         Aoehdr *h, *h0;
1936         Aoecfg *ch;
1937         Devlink *l;
1938         Frame *f;
1939         Srb *srb;
1940
1941         h = (Aoehdr*)b->rp;
1942         ch = (Aoecfg*)(b->rp + Aoehsz);
1943         major = nhgets(h->major);
1944         n = nhgetl(h->tag);
1945         if(n != Tmgmt && n != Tfree){
1946                 d = mm2dev(major, h->minor);
1947                 if(d == nil)
1948                         return;
1949                 qlock(d);
1950                 f = getframe(d, n);
1951                 if(f == nil){
1952                         qunlock(d);
1953                         eventlog("%æ: unknown response tag %ux\n", d, n);
1954                         return;
1955                 }
1956                 h0 = (Aoehdr*)f->hdr;
1957                 cmd = h0->cmd;
1958                 if(cmd != ACconfig){
1959                         qunlock(d);
1960                         eventlog("%æ: malicious server got ACconfig want %d; tag %ux\n", d, cmd, n);
1961                         return;
1962                 }
1963                 cslen = nhgets(ch->cslen);
1964                 blen = BLEN(b) - (Aoehsz + Aoecfgsz);
1965                 if(cslen < blen && BLEN(b) > 60)
1966                         eventlog("%æ: cfgrsp: tag %.8ux oversized %d %d\n",
1967                                 d, n, cslen, blen);
1968                 if(cslen > blen){
1969                         eventlog("%æ: cfgrsp: tag %.8ux runt %d %d\n",
1970                                 d, n, cslen, blen);
1971                         cslen = blen;
1972                 }
1973                 memmove(f->dp, b->rp + Aoehsz + Aoecfgsz, cslen);
1974                 srb = f->srb;
1975                 f->dp = nil;
1976                 f->srb = nil;
1977                 if(srb){
1978                         srb->nout--;
1979                         srbwakeup(srb);
1980                         d->nout--;
1981                         f->tag = Tfree;
1982                 }
1983                 qunlock(d);
1984                 return;
1985         }
1986
1987         cmd = ch->verccmd & 0xf;
1988         if(cmd != 0){
1989                 eventlog("aoe%ud.%ud: cfgrsp: bad command %d\n", major, h->minor, cmd);
1990                 return;
1991         }
1992         n = nhgets(ch->bufcnt);
1993         if(n > Maxframes)
1994                 n = Maxframes;
1995
1996         if(waserror()){
1997                 eventlog("getdev: %ud.%ud ignored: %s\n", major, h->minor, up->errstr);
1998                 return;
1999         }
2000         d = getdev(major, h->minor, n);
2001         poperror();
2002         if(d == 0)
2003                 return;
2004
2005         qlock(d);
2006         *up->errstr = 0;
2007         if(waserror()){
2008                 qunlock(d);
2009                 eventlog("%æ: %s\n", d, up->errstr);
2010                 nexterror();
2011         }
2012
2013         l = newdevlink(d, nl, h);               /* add this interface. */
2014
2015         d->fwver = nhgets(ch->fwver);
2016         cslen = nhgets(ch->cslen);
2017         if(cslen > sizeof d->config)
2018                 cslen = sizeof d->config;
2019         if(Aoehsz + Aoecfgsz + cslen > BLEN(b))
2020                 cslen = BLEN(b) - (Aoehsz + Aoecfgsz);
2021         d->nconfig = cslen;
2022         memmove(d->config, b->rp + Aoehsz + Aoecfgsz, cslen);
2023
2024         /* manually set mtu may be reset lower if conditions warrant */
2025         if(l){
2026                 n = devmaxdata(d);
2027                 if((d->flag & Djumbo) == 0)
2028                         n = Dbcnt;
2029                 if(n > d->maxmtu)
2030                         n = d->maxmtu;
2031                 if(n != d->maxbcnt){
2032                         eventlog("%æ: setting %d byte mtu on %s:%E\n",
2033                                 d, n, nl->path, nl->ea);
2034                         d->maxbcnt = n;
2035                 }
2036         }
2037         if(d->nopen == 0)
2038                 ataident(d);
2039         poperror();
2040         qunlock(d);
2041 }
2042
2043 static vlong
2044 aoeidentify(Aoedev *d, ushort *id)
2045 {
2046         vlong s;
2047
2048         s = idfeat(d, id);
2049         if(s == -1){
2050                 eventlog("%æ: idfeat returns -1\n", d);
2051                 return -1;
2052         }
2053         if((d->feat&Dlba) == 0){
2054                 eventlog("%æ: no lba support\n", d);
2055                 return -1;
2056         }
2057         d->flag |= Dup;
2058         memmove(d->ident, id, sizeof d->ident);
2059         return s;
2060 }
2061
2062 static void
2063 newvers(Aoedev *d)
2064 {
2065         lock(&drivevers);
2066         d->vers = drivevers.ref++;
2067         unlock(&drivevers);
2068 }
2069
2070 static int
2071 identify(Aoedev *d, ushort *id)
2072 {
2073         vlong osectors, s;
2074         uchar oserial[21];
2075
2076         s = aoeidentify(d, id);
2077         if(s == -1)
2078                 return -1;
2079         osectors = d->realbsize;
2080         memmove(oserial, d->serial, sizeof d->serial);
2081
2082         idmove(d->serial, id+10, 20);
2083         idmove(d->firmware, id+23, 8);
2084         idmove(d->model, id+27, 40);
2085         /* idss() */
2086         /* d->wwn = idwwn(d, id); */
2087
2088         s *= Aoesectsz;
2089         if(osectors != s || memcmp(oserial, d->serial, sizeof oserial)){
2090                 d->bsize = s;
2091                 d->realbsize = s;
2092 //              d->mediachange = 1;
2093                 newvers(d);
2094         }
2095         return 0;
2096 }
2097
2098 static void
2099 atarsp(Block *b)
2100 {
2101         uint n, cmd;
2102         ushort major;
2103         Aoeata *ahin, *ahout;
2104         Aoehdr *h, *h0;
2105         Aoedev *d;
2106         Frame *f;
2107         Srb *srb;
2108
2109         h = (Aoehdr*)b->rp;
2110         major = nhgets(h->major);
2111         d = mm2dev(major, h->minor);
2112         if(d == nil)
2113                 return;
2114         ahin = (Aoeata*)(b->rp + Aoehsz);
2115         qlock(d);
2116         if(waserror()){
2117                 qunlock(d);
2118                 nexterror();
2119         }
2120         n = nhgetl(h->tag);
2121         if(n == Tfree || n == Tmgmt)
2122                 goto bail;
2123         f = getframe(d, n);
2124         if(f == nil){
2125                 eventlog("%æ: unexpected response; tag %ux\n", d, n);
2126                 goto bail;
2127         }
2128         h0 = (Aoehdr*)f->hdr;
2129         cmd = h0->cmd;
2130         if(cmd != ACata){
2131                 eventlog("%æ: malicious server got ACata want %d; tag %ux\n", d, cmd, n);
2132                 goto bail;
2133         }
2134
2135         rtupdate(f->dl, tsince(f->tag));
2136         ahout = (Aoeata*)(f->hdr + Aoehsz);
2137         srb = f->srb;
2138
2139         if(ahin->cmdstat & 0xa9){
2140                 eventlog("%æ: ata error cmd %.2ux stat %.2ux\n",
2141                         d, ahout->cmdstat, ahin->cmdstat);
2142                 if(srb)
2143                         srb->error = Eio;
2144         } else {
2145                 n = ahout->scnt * Aoesectsz;
2146                 switch(ahout->cmdstat){
2147                 case Crd:
2148                 case Crdext:
2149                         if(BLEN(b) - (Aoehsz + Aoeatasz) != n){
2150                                 eventlog("%æ: misread blen %ld expect %d\n",
2151                                         d, BLEN(b), n);
2152                                 goto bail;
2153                         }
2154                         memmove(f->dp, b->rp + Aoehsz + Aoeatasz, n);
2155                 case Cwr:
2156                 case Cwrext:
2157                         if(n > Dbcnt)
2158                                 f->nl->lostjumbo = 0;
2159                         if(f->bcnt -= n){
2160                                 f->lba += n / Aoesectsz;
2161                                 f->dp = (uchar*)f->dp + n;
2162                                 resend(d, f);
2163                                 goto bail;
2164                         }
2165                         break;
2166                 case Cid:
2167                         if(BLEN(b) - (Aoehsz + Aoeatasz) < 512){
2168                                 eventlog("%æ: runt identify blen %ld expect %d\n",
2169                                         d, BLEN(b), 512 + Aoehsz + Aoeatasz);
2170                                 goto bail;
2171                         }
2172                         identify(d, (ushort*)(b->rp + Aoehsz + Aoeatasz));
2173                         free(srb);              /* BOTCH */
2174                         srb = nil;
2175                         break;
2176                 default:
2177                         eventlog("%æ: unknown ata command %.2ux \n",
2178                                 d, ahout->cmdstat);
2179                 }
2180         }
2181
2182         f->srb = nil;
2183         if(srb){
2184                 srb->nout--;
2185                 srbwakeup(srb);
2186         }
2187         f->tag = Tfree;
2188         d->nout--;
2189
2190         work(d);
2191 bail:
2192         poperror();
2193         qunlock(d);
2194 }
2195
2196 static void
2197 netrdaoeproc(void *v)
2198 {
2199         int idx;
2200         char name[Maxpath+1], *s;
2201         Aoehdr *h;
2202         Block *b;
2203         Netlink *nl;
2204
2205         nl = (Netlink*)v;
2206         idx = nl - netlinks.nl;
2207         netlinks.reader[idx] = 1;
2208         kstrcpy(name, nl->path, Maxpath);
2209
2210         if(waserror()){
2211                 eventlog("netrdaoe@%s: exiting: %s\n", name, up->errstr);
2212                 netlinks.reader[idx] = 0;
2213                 wakeup(netlinks.rendez + idx);
2214                 pexit(up->errstr, 1);
2215         }
2216         if(autodiscover)
2217                 discover(0xffff, 0xff);
2218         for (;;) {
2219                 if((nl->flag & Dup) == 0)
2220                         error("netlink is down");
2221                 if(nl->dc == nil)
2222                         panic("netrdaoe: nl->dc == nil");
2223                 b = devtab[nl->dc->type]->bread(nl->dc, 1<<16, 0);
2224                 if(b == nil)
2225                         error("network read");
2226                 h = (Aoehdr*)b->rp;
2227                 if(h->verflag & AFrsp)
2228                         if(s = aoeerror(h)){
2229                                 eventlog("%s: %d.%d %s\n", nl->path,
2230                                         h->major[0]<<8 | h->major[1], h->minor, s);
2231                                 errrsp(b, s);
2232                         }else if(h->cmd == ACata)
2233                                 atarsp(b);
2234                         else if(h->cmd == ACconfig)
2235                                 qcfgrsp(b, nl);
2236                         else if((h->cmd & 0xf0) != 0xf0){
2237                                 eventlog("%s: unknown cmd %d\n",
2238                                         nl->path, h->cmd);
2239                                 errrsp(b, "unknown command");
2240                         }
2241                 freeb(b);
2242         }
2243 }
2244
2245 static void
2246 getaddr(char *path, uchar *ea)
2247 {
2248         int n;
2249         char buf[2*Eaddrlen+1];
2250         Chan *c;
2251
2252         uprint("%s/addr", path);
2253         c = namec(up->genbuf, Aopen, OREAD, 0);
2254         if(waserror()) {
2255                 cclose(c);
2256                 nexterror();
2257         }
2258         if(c == nil)
2259                 panic("æ: getaddr: c == nil");
2260         n = devtab[c->type]->read(c, buf, sizeof buf-1, 0);
2261         poperror();
2262         cclose(c);
2263         buf[n] = 0;
2264         if(parseether(ea, buf) < 0)
2265                 error("parseether failure");
2266 }
2267
2268 static void
2269 netbind(char *path)
2270 {
2271         char addr[Maxpath];
2272         uchar ea[2*Eaddrlen+1];
2273         Chan *dc, *cc, *mtu;
2274         Netlink *nl;
2275
2276         snprint(addr, sizeof addr, "%s!0x%x", path, Aoetype);
2277         dc = chandial(addr, nil, nil, &cc);
2278         snprint(addr, sizeof addr, "%s/mtu", path);
2279         if(waserror())
2280                 mtu = nil;
2281         else {
2282                 mtu = namec(addr, Aopen, OREAD, 0);
2283                 poperror();
2284         }
2285
2286         if(waserror()){
2287                 cclose(dc);
2288                 cclose(cc);
2289                 if(mtu)
2290                         cclose(mtu);
2291                 nexterror();
2292         }
2293         if(dc == nil  || cc == nil)
2294                 error(Enonexist);
2295         getaddr(path, ea);
2296         nl = addnet(path, cc, dc, mtu, ea);
2297         snprint(addr, sizeof addr, "netrdaoe@%s", path);
2298         kproc(addr, netrdaoeproc, nl);
2299         poperror();
2300 }
2301
2302 static int
2303 unbound(void *v)
2304 {
2305         return *(int*)v != 0;
2306 }
2307
2308 static void
2309 netunbind(char *path)
2310 {
2311         int i, idx;
2312         Aoedev *d, *p, *next;
2313         Chan *dc, *cc;
2314         Devlink *l;
2315         Frame *f;
2316         Netlink *n, *e;
2317
2318         n = netlinks.nl;
2319         e = n + nelem(netlinks.nl);
2320
2321         lock(&netlinks);
2322         for(; n < e; n++)
2323                 if(n->dc && strcmp(n->path, path) == 0)
2324                         break;
2325         unlock(&netlinks);
2326         if(n == e)
2327                 error("device not bound");
2328
2329         /*
2330          * hunt down devices using this interface; disable
2331          * this also terminates the reader.
2332          */
2333         idx = n - netlinks.nl;
2334         wlock(&devs);
2335         for(d = devs.d; d; d = d->next){
2336                 qlock(d);
2337                 for(i = 0; i < d->ndl; i++){
2338                         l = d->dl + i;
2339                         if(l->nl == n)
2340                                 l->flag &= ~Dup;
2341                 }
2342                 qunlock(d);
2343         }
2344         n->flag &= ~Dup;
2345         wunlock(&devs);
2346
2347         /* confirm reader is down. */
2348         while(waserror())
2349                 ;
2350         sleep(netlinks.rendez + idx, unbound, netlinks.reader + idx);
2351         poperror();
2352
2353         /* reschedule packets. */
2354         wlock(&devs);
2355         for(d = devs.d; d; d = d->next){
2356                 qlock(d);
2357                 for(i = 0; i < d->nframes; i++){
2358                         f = d->frames + i;
2359                         if(f->tag != Tfree && f->nl == n)
2360                                 resend(d, f);
2361                 }
2362                 qunlock(d);
2363         }
2364         wunlock(&devs);
2365
2366         /* squeeze devlink pool.  (we assert nobody is using them now) */
2367         wlock(&devs);
2368         for(d = devs.d; d; d = d->next){
2369                 qlock(d);
2370                 for(i = 0; i < d->ndl; i++){
2371                         l = d->dl + i;
2372                         if(l->nl == n)
2373                                 memmove(l, l + 1, sizeof *l * (--d->ndl - i));
2374                 }
2375                 qunlock(d);
2376         }
2377         wunlock(&devs);
2378
2379         /* close device link. */
2380         lock(&netlinks);
2381         dc = n->dc;
2382         cc = n->cc;
2383         if(n->mtu)
2384                 cclose(n->mtu);
2385         memset(n, 0, sizeof *n);
2386         unlock(&netlinks);
2387
2388         cclose(dc);
2389         cclose(cc);
2390
2391         /* squeeze orphan devices */
2392         wlock(&devs);
2393         for(p = d = devs.d; d; d = next){
2394                 next = d->next;
2395                 if(d->ndl > 0){
2396                         p = d;
2397                         continue;
2398                 }
2399                 qlock(d);
2400                 downdev(d, "orphan");
2401                 qunlock(d);
2402                 if(p != devs.d)
2403                         p->next = next;
2404                 else{
2405                         devs.d = next;
2406                         p = devs.d;
2407                 }
2408                 free(d->frames);
2409                 free(d);
2410                 dropunit();
2411         }
2412         wunlock(&devs);
2413 }
2414
2415 static void
2416 strtoss(char *f, uint *shelf, uint *slot)
2417 {
2418         char *s;
2419
2420         *shelf = 0xffff;
2421         *slot = 0xff;
2422         if(!f)
2423                 return;
2424         *shelf = strtol(f, &s, 0);
2425         if(s == f || *shelf > 0xffff)
2426                 error("bad shelf");
2427         f = s;
2428         if(*f++ == '.'){
2429                 *slot = strtol(f, &s, 0);
2430                 if(s == f || *slot > 0xff)
2431                         error("bad slot");
2432         }
2433 }
2434
2435 static void
2436 discoverstr(char *f)
2437 {
2438         uint shelf, slot;
2439
2440         strtoss(f, &shelf, &slot);
2441         discover(shelf, slot);
2442 }
2443
2444 static void
2445 removedev(Aoedev *d)
2446 {
2447         int i;
2448         Aoedev *p;
2449
2450         wlock(&devs);
2451         p = 0;
2452         if(d != devs.d)
2453         for(p = devs.d; p; p = p->next)
2454                 if(p->next == d)
2455                         break;
2456         qlock(d);
2457         d->flag &= ~Dup;
2458         newvers(d);
2459         d->ndl = 0;
2460         qunlock(d);
2461         for(i = 0; i < d->nframes; i++)
2462                 frameerror(d, d->frames+i, Enotup);
2463
2464         if(p)
2465                 p->next = d->next;
2466         else
2467                 devs.d = d->next;
2468         free(d->frames);
2469         free(d);
2470         dropunit();
2471         wunlock(&devs);
2472 }
2473
2474
2475 static void
2476 aoeremove(Chan *c)
2477 {
2478         switch(TYPE(c->qid)){
2479         default:
2480         case Qzero:
2481         case Qtopdir:
2482         case Qtoplog:
2483         case Qtopctl:
2484         case Qctl:
2485         case Qdata:
2486         case Qconfig:
2487         case Qident:
2488                 error(Eperm);
2489         case Qunitdir:
2490                 removedev(unit2dev(UNIT(c->qid)));
2491                 break;
2492         }
2493 }
2494
2495 static void
2496 removestr(char *f)
2497 {
2498         uint shelf, slot;
2499         Aoedev *d;
2500
2501         strtoss(f, &shelf, &slot);
2502         wlock(&devs);
2503         for(d = devs.d; d; d = d->next)
2504                 if(shelf == d->major && slot == d->minor){
2505                         wunlock(&devs); /* BOTCH */
2506                         removedev(d);
2507                         return;
2508                 }
2509         wunlock(&devs);
2510         error("device not bound");
2511 }
2512
2513 static long
2514 topctlwrite(void *db, long n)
2515 {
2516         enum {
2517                 Autodiscover,
2518                 Bind,
2519                 Debug,
2520                 Discover,
2521                 Rediscover,
2522                 Remove,
2523                 Unbind,
2524         };
2525         char *f;
2526         Cmdbuf *cb;
2527         Cmdtab *ct;
2528         static Cmdtab cmds[] = {
2529                 { Autodiscover, "autodiscover", 0       },
2530                 { Bind,         "bind",         2       },
2531                 { Debug,        "debug",        0       },
2532                 { Discover,     "discover",     0       },
2533                 { Rediscover,   "rediscover",   0       },
2534                 { Remove,       "remove",       2       },
2535                 { Unbind,       "unbind",       2       },
2536         };
2537
2538         cb = parsecmd(db, n);
2539         if(waserror()){
2540                 free(cb);
2541                 nexterror();
2542         }
2543         ct = lookupcmd(cb, cmds, nelem(cmds));
2544         f = cb->f[1];
2545         switch(ct->index){
2546         case Autodiscover:
2547                 autodiscover = toggle(f, autodiscover, 1);
2548                 break;
2549         case Bind:
2550                 netbind(f);
2551                 break;
2552         case Debug:
2553                 debug = toggle(f, debug, 1);
2554                 break;
2555         case Discover:
2556                 discoverstr(f);
2557                 break;
2558         case Rediscover:
2559                 rediscover = toggle(f, rediscover, 1);
2560                 break;
2561         case Remove:
2562                 removestr(f);   /* depricated */
2563                 break;
2564         case Unbind:
2565                 netunbind(f);
2566                 break;
2567         }
2568         poperror();
2569         free(cb);
2570         return n;
2571 }
2572
2573 static long
2574 aoewrite(Chan *c, void *db, long n, vlong off)
2575 {
2576         switch(TYPE(c->qid)){
2577         default:
2578         case Qzero:
2579         case Qtopdir:
2580         case Qunitdir:
2581         case Qtoplog:
2582                 error(Eperm);
2583         case Qtopctl:
2584                 return topctlwrite(db, n);
2585         case Qctl:
2586         case Qdata:
2587         case Qconfig:
2588         case Qident:
2589                 return unitwrite(c, db, n, off);
2590         }
2591 }
2592
2593 Dev aoedevtab = {
2594         L'æ',
2595         "aoe",
2596
2597         devreset,
2598         devinit,
2599         devshutdown,
2600         aoeattach,
2601         aoewalk,
2602         aoestat,
2603         aoeopen,
2604         devcreate,
2605         aoeclose,
2606         aoeread,
2607         devbread,
2608         aoewrite,
2609         devbwrite,
2610         aoeremove,
2611         devwstat,
2612         devpower,
2613         devconfig,
2614 };