]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/port/devaoe.c
devaoe: more nil vs. 0
[plan9front.git] / sys / src / 9 / port / devaoe.c
1 /*
2  *      © 2005-13 coraid
3  *      aoe storage initiator
4  */
5
6 #include "u.h"
7 #include "../port/lib.h"
8 #include "mem.h"
9 #include "dat.h"
10 #include "fns.h"
11 #include "io.h"
12 #include "ureg.h"
13 #include "../port/error.h"
14 #include "../port/netif.h"
15 #include "etherif.h"
16 #include "../ip/ip.h"
17 #include "../port/aoe.h"
18 #include <fis.h>
19
20 #pragma varargck argpos eventlog        1
21
22 #define dprint(...)     if(debug) eventlog(__VA_ARGS__); else USED(debug);
23 #define uprint(...)     snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__);
24
25 enum {
26         Typebits                = 4,
27         Unitbits                = 12,
28         L3bits          = 4,
29         Maxtype         = (1<<Typebits)-1,
30         Maxunits        = (1<<Unitbits)-1,
31         Maxl3           = (1<<L3bits)-1,
32         Maxframes       = 128,
33         Maxmtu          = 100000,
34         Ndevlink        = 6,
35         Nea             = 6,
36         Nnetlink        = 6,
37 };
38
39 #define TYPE(q)         ((ulong)(q).path & Maxtype)
40 #define UNIT(q)         (((ulong)(q).path>>Typebits) & Maxunits)
41 #define L(q)            (((ulong)(q).path>>Typebits+Unitbits) & Maxl3)
42 #define QID(u, t)       ((u)<<Typebits | (t))
43 #define Q3(l, u, t)     ((l)<<Typebits+Unitbits | QID(u, t))
44 #define UP(d)           ((d)->flag & Dup)
45
46 #define Ticks           MACHP(0)->ticks
47 #define Ms2tk(t)        (((t)*HZ)/1000)
48 #define Tk2ms(t)        (((t)*1000)/HZ)
49
50 enum {
51         Qzero,
52         Qtopdir         = 1,
53         Qtopbase,
54         Qtopctl         = Qtopbase,
55         Qtoplog,
56         Qtopend,
57
58         Qunitdir,
59         Qunitbase,
60         Qctl            = Qunitbase,
61         Qdata,
62         Qconfig,
63         Qident,
64
65         Qdevlinkdir,
66         Qdevlinkbase,
67         Qdevlink        = Qdevlinkbase,
68         Qdevlinkend,
69
70         Qtopfiles       = Qtopend-Qtopbase,
71         Qdevlinkfiles   = Qdevlinkend-Qdevlinkbase,
72
73         Eventlen        = 256,
74         Nevents         = 64,
75
76         Fread           = 0,
77         Fwrite,
78         Tfree           = -1,
79         Tmgmt,
80
81         /* round trip bounds, timeouts, in ticks */
82         Rtmax           = Ms2tk(320),
83         Rtmin           = Ms2tk(20),
84         Srbtimeout      = 45*HZ,
85
86         Dbcnt           = 1024,
87
88         Crd             = 0x20,
89         Crdext          = 0x24,
90         Cwr             = 0x30,
91         Cwrext          = 0x34,
92         Cid             = 0xec,
93
94         Alloc           = 0x01234567,
95         Free            = 0x89abcdef,
96 };
97
98 enum {
99         Read,
100         Write,
101 };
102
103 /*
104  * unified set of flags
105  * a Netlink + Aoedev most both be jumbo capable
106  * to send jumbograms to that interface.
107  */
108 enum {
109         Dup     = 1<<0,
110         Djumbo  = 1<<1,
111         Dnofail = 1<<2,
112 };
113
114 static char *flagname[] = {
115         "up",
116         "jumbo",
117         "nofail",
118 };
119
120 typedef struct {
121         uchar   flag;
122         uint    lostjumbo;
123
124         Chan    *cc;
125         Chan    *dc;
126         Chan    *mtu;           /* open early to prevent bind issues. */
127         char    path[Maxpath];
128         uchar   ea[Eaddrlen];
129 } Netlink;
130
131 typedef struct {
132         Netlink *nl;
133         int     nea;
134         ulong   eaidx;
135         uchar   eatab[Nea][Eaddrlen];
136         int     datamtu;
137         ulong   npkt;
138         ulong   resent;
139         uchar   flag;
140
141         ulong   rttavg;
142         ulong   mintimer;
143 } Devlink;
144
145 typedef struct Srb Srb;
146 struct Srb {
147         Rendez;
148         uint    state;
149         Srb     *next;
150         ulong   ticksent;
151         ulong   len;
152         vlong   sector;
153         short   write;
154         short   nout;
155         char    *error;
156         void    *dp;
157         void    *data;
158 };
159
160 typedef struct {
161         int     tag;
162         ulong   bcnt;
163         ulong   dlen;
164         vlong   lba;
165         ulong   ticksent;
166         int     nhdr;
167         uchar   hdr[ETHERMINTU];
168         void    *dp;
169         Devlink *dl;
170         Netlink *nl;
171         int     eaidx;
172         Srb     *srb;
173 } Frame;
174
175 typedef struct Aoedev Aoedev;
176 struct Aoedev {
177         QLock;
178         Aoedev  *next;
179
180         ulong   vers;
181
182         int     ndl;
183         ulong   dlidx;
184         Devlink *dl;
185         Devlink dltab[Ndevlink];
186
187         uchar   flag;
188         ushort  fwver;
189         int     nopen;
190         uint    major;
191         uint    minor;
192         int     unit;
193         int     lasttag;
194         int     nframes;
195         Frame   *frames;
196         vlong   bsize;
197         vlong   realbsize;
198
199         uint    maxbcnt;
200         uint    maxmtu;
201         ulong   lostjumbo;
202         ushort  nout;
203         ushort  maxout;
204         ulong   lastwadj;
205         Srb     *head;
206         Srb     *tail;
207         Srb     *inprocess;
208
209         Sfis;
210         char    serial[20+1];
211         char    firmware[8+1];
212         char    model[40+1];
213         int     nconfig;
214         uchar   config[1024];
215         uchar   ident[512];
216 };
217
218 #pragma varargck type   "æ"    Aoedev*
219
220 static struct {
221         Lock;
222         QLock;
223         Rendez;
224         char    buf[Eventlen*Nevents];
225         char    *rp;
226         char    *wp;
227 } events;
228
229 static struct {
230         RWlock;
231         int     nd;
232         Aoedev  *d;
233 } devs;
234
235 static struct {
236         Lock;
237         int     reader[Nnetlink];       /* reader is running. */
238         Rendez  rendez[Nnetlink];       /* confirm exit. */
239         Netlink nl[Nnetlink];
240 } netlinks;
241
242 extern  Dev     aoedevtab;
243 static  Ref     units;
244 static  Ref     drivevers;
245 static  int     debug;
246 static  int     autodiscover    = 1;
247 static  int     rediscover;
248 extern  char    Enotup[]        = "aoe device is down";
249
250 static Srb*
251 srballoc(ulong sz)
252 {
253         Srb *srb;
254
255         srb = smalloc(sizeof *srb+sz);
256         srb->state = Alloc;
257         srb->dp = srb->data = srb+1;
258         srb->ticksent = Ticks;
259         return srb;
260 }
261
262 static Srb*
263 srbkalloc(void *db, ulong)
264 {
265         Srb *srb;
266
267         srb = smalloc(sizeof *srb);
268         srb->state = Alloc;
269         srb->dp = srb->data = db;
270         srb->ticksent = Ticks;
271         return srb;
272 }
273
274 static int
275 srbready(void *v)
276 {
277         Srb *s;
278
279         s = v;
280         return s->nout == 0 && (s->len == 0 || s->error != nil);
281 }
282
283 static void
284 srbfree(Srb *srb)
285 {
286         int n;
287
288         for(n = 0; srb->state != Free; n++)
289                 sched();
290         free(srb);
291 }
292
293 /* under Aoedev qlock() so setting of srb->state is safe */
294 static void
295 srbwakeup(Srb *srb)
296 {
297         if(srbready(srb)){
298                 assert(srb->state == Alloc);
299                 wakeup(srb);
300                 srb->state = Free;
301         }
302 }
303
304 static void
305 srbcleanout(Aoedev *d, Srb *srb)
306 {
307         Srb *x, **ll;
308
309         if(srb == d->inprocess)
310                 d->inprocess = nil;
311         else
312                 for(ll = &d->head; (x = *ll) != nil; ll = &x->next){
313                         d->tail = x;
314                         if(x == srb)
315                                 *ll = x->next;
316                 }
317 }
318
319 static void
320 srberror(Aoedev *d, Srb *srb, char *s)
321 {
322         srbcleanout(d, srb);
323         srb->error = s;
324         srbwakeup(srb);
325 }
326
327 static void
328 frameerror(Aoedev *d, Frame *f, char *s)
329 {
330         Srb *srb;
331
332         if(f->tag == Tfree)
333                 return;
334         srb = f->srb;
335         f->srb = nil;
336         f->tag = Tfree;         /* don't get fooled by way-slow responses */
337         if(srb == nil)
338                 return;
339         srb->nout--;
340         srberror(d, srb, s);
341         d->nout--;
342 }
343
344 static char*
345 unitname(Aoedev *d)
346 {
347         uprint("%ud.%ud", d->major, d->minor);
348         return up->genbuf;
349 }
350
351 static long
352 eventlogread(void *a, long n)
353 {
354         int len;
355         char *p, *buf;
356
357         buf = smalloc(Eventlen);
358         qlock(&events);
359         lock(&events);
360         p = events.rp;
361         len = *p;
362         if(len == 0){
363                 n = 0;
364                 unlock(&events);
365         } else {
366                 if(n > len)
367                         n = len;
368                 /* can't move directly into pageable space with events lock held */
369                 memmove(buf, p+1, n);
370                 *p = 0;
371                 events.rp = p += Eventlen;
372                 if(p >= events.buf + sizeof events.buf)
373                         events.rp = events.buf;
374                 unlock(&events);
375
376                 /* the concern here is page faults in memmove below */
377                 if(waserror()){
378                         qunlock(&events);
379                         free(buf);
380                         nexterror();
381                 }
382                 memmove(a, buf, n);
383                 poperror();
384         }
385         qunlock(&events);
386         free(buf);
387         return n;
388 }
389
390 static int
391 eventlog(char *fmt, ...)
392 {
393         int dragrp, n;
394         char *p;
395         va_list arg;
396
397         lock(&events);
398         p = events.wp;
399         dragrp = *p++;
400         va_start(arg, fmt);
401         n = vsnprint(p, Eventlen-1, fmt, arg);
402         *--p = n;
403         p = events.wp += Eventlen;
404         if(p >= events.buf + sizeof events.buf)
405                 p = events.wp = events.buf;
406         if(dragrp)
407                 events.rp = p;
408         unlock(&events);
409         wakeup(&events);
410         return n;
411 }
412
413 static int
414 eventcount(void)
415 {
416         uint n;
417
418         lock(&events);
419         if(*events.rp == 0)
420                 n = 0;
421         else
422                 n = events.wp - events.rp & Nevents - 1;
423         unlock(&events);
424         return n/Eventlen;
425 }
426
427 static int
428 tsince(int tag)
429 {
430         int n;
431
432         n = Ticks & 0xffff;
433         n -= tag & 0xffff;
434         if(n < 0)
435                 n += 1<<16;
436         return n;
437 }
438
439 static int
440 newtag(Aoedev *d)
441 {
442         int t;
443
444         do {
445                 t = ++d->lasttag << 16;
446                 t |= Ticks & 0xffff;
447         } while (t == Tfree || t == Tmgmt);
448         return t;
449 }
450
451 static void
452 downdev(Aoedev *d, char *err)
453 {
454         Frame *f, *e;
455
456         d->flag &= ~Dup;
457         f = d->frames;
458         e = f + d->nframes;
459         for(; f < e; f++)
460                 frameerror(d, f, Enotup);
461         d->inprocess = nil;
462         eventlog("%æ: removed; %s\n", d, err);
463 }
464
465 static Block*
466 allocfb(Frame *f)
467 {
468         int len;
469         Block *b;
470
471         len = f->nhdr + f->dlen;
472         if(len < ETHERMINTU)
473                 len = ETHERMINTU;
474         b = allocb(len);
475         memmove(b->wp, f->hdr, f->nhdr);
476         if(f->dlen)
477                 memmove(b->wp + f->nhdr, f->dp, f->dlen);
478         b->wp += len;
479         return b;
480 }
481
482 static void
483 putlba(Aoeata *a, vlong lba)
484 {
485         uchar *c;
486
487         c = a->lba;
488         c[0] = lba;
489         c[1] = lba >> 8;
490         c[2] = lba >> 16;
491         c[3] = lba >> 24;
492         c[4] = lba >> 32;
493         c[5] = lba >> 40;
494 }
495
496 static Devlink*
497 pickdevlink(Aoedev *d)
498 {
499         ulong i, n;
500         Devlink *l;
501
502         for(i = 0; i < d->ndl; i++){
503                 n = d->dlidx++ % d->ndl;
504                 l = d->dl + n;
505                 if(l->flag & Dup)
506                         return l;
507         }
508         return nil;
509 }
510
511 static int
512 pickea(Devlink *l)
513 {
514         if(l == nil)
515                 return -1;
516         if(l->nea == 0)
517                 return -1;
518         return l->eaidx++ % l->nea;
519 }
520
521 /*
522  * would like this to depend on the chan (srb).
523  * not possible in the current structure.
524  */
525 #define Nofail(d, s)    (((d)->flag&Dnofail) == Dnofail)
526
527 static int
528 hset(Aoedev *d, Frame *f, Aoehdr *h, int cmd, int new)
529 {
530         int i;
531         Devlink *l;
532
533         if(f->srb != nil)
534         if((long)(Ticks-f->srb->ticksent) > Srbtimeout){
535                 eventlog("%æ: srb timeout\n", d);
536                 if(cmd == ACata && Nofail(d, s))
537                         f->srb->ticksent = Ticks;
538                 else
539                         frameerror(d, f, Etimedout);
540                 return -1;
541         }
542         l = pickdevlink(d);
543         i = pickea(l);
544         if(i == -1){
545                 if(!(cmd == ACata && f->srb != nil && Nofail(d, s)))
546                         downdev(d, "resend fails; no netlink/ea");
547                 return -1;
548         }
549         memmove(h->dst, l->eatab[i], Eaddrlen);
550         memmove(h->src, l->nl->ea, sizeof h->src);
551         hnputs(h->type, Aoetype);
552         h->verflag = Aoever << 4;
553         h->error = 0;
554         hnputs(h->major, d->major);
555         h->minor = d->minor;
556         h->cmd = cmd;
557
558         if(new)
559                 f->tag = newtag(d);
560         hnputl(h->tag, f->tag);
561         f->dl = l;
562         f->nl = l->nl;
563         f->eaidx = i;
564         f->ticksent = Ticks;
565
566         return f->tag;
567 }
568
569 static int
570 resend(Aoedev *d, Frame *f)
571 {
572         ulong n;
573         Aoeata *a;
574         Aoehdr *h;
575
576         h = (Aoehdr*)f->hdr;
577         if(hset(d, f, h, h->cmd, 0) == -1)
578                 return -1;
579         a = (Aoeata*)(f->hdr + Aoehsz);
580         n = f->bcnt;
581         if(n > d->maxbcnt){
582                 n = d->maxbcnt;         /* mtu mismatch (jumbo fail?) */
583                 if(f->dlen > n)
584                         f->dlen = n;
585         }
586         a->scnt = n / Aoesectsz;
587         f->dl->resent++;
588         f->dl->npkt++;
589         if(waserror())
590                 /* should remove the netlink */
591                 return -1;
592         devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
593         poperror();
594         return 0;
595 }
596
597 static void
598 discover(uint major, uint minor)
599 {
600         Aoehdr *h;
601         Block *b;
602         Netlink *nl, *e;
603
604         nl = netlinks.nl;
605         e = nl + nelem(netlinks.nl);
606         for(; nl < e; nl++){
607                 if(nl->cc == nil)
608                         continue;
609                 b = allocb(ETHERMINTU);
610                 if(waserror()){
611                         freeb(b);
612                         nexterror();
613                 }
614                 b->wp = b->rp + ETHERMINTU;
615                 memset(b->rp, 0, ETHERMINTU);
616                 h = (Aoehdr*)b->rp;
617                 memset(h->dst, 0xff, sizeof h->dst);
618                 memmove(h->src, nl->ea, sizeof h->src);
619                 hnputs(h->type, Aoetype);
620                 h->verflag = Aoever << 4;
621                 hnputs(h->major, major);
622                 h->minor = minor;
623                 h->cmd = ACconfig;
624                 poperror();
625                 devtab[nl->dc->type]->bwrite(nl->dc, b, 0);
626         }
627 }
628
629 /*
630  * Check all frames on device and resend any frames that have been
631  * outstanding for 200% of the device round trip time average.
632  */
633 static void
634 aoesweepproc(void*)
635 {
636         ulong i, tx, timeout, nbc;
637         vlong starttick;
638         enum { Nms = 100, Nbcms = 30*1000, };
639         uchar *ea;
640         Aoeata *a;
641         Aoedev *d;
642         Devlink *l;
643         Frame *f, *e;
644
645         nbc = Nbcms/Nms;
646 loop:
647         if(nbc-- == 0){
648                 if(rediscover && !waserror()){
649                         discover(0xffff, 0xff);
650                         poperror();
651                 }
652                 nbc = Nbcms/Nms;
653         }
654         starttick = Ticks;
655         rlock(&devs);
656         for(d = devs.d; d != nil; d = d->next){
657                 if(!canqlock(d))
658                         continue;
659                 if(!UP(d)){
660                         qunlock(d);
661                         continue;
662                 }
663                 tx = 0;
664                 f = d->frames;
665                 e = f + d->nframes;
666                 for (; f < e; f++){
667                         if(f->tag == Tfree)
668                                 continue;
669                         l = f->dl;
670                         timeout = l->rttavg << 1;
671                         i = tsince(f->tag);
672                         if(i < timeout)
673                                 continue;
674                         if(d->nout == d->maxout){
675                                 if(d->maxout > 1)
676                                         d->maxout--;
677                                 d->lastwadj = Ticks;
678                         }
679                         a = (Aoeata*)(f->hdr + Aoehsz);
680                         if(a->scnt > Dbcnt / Aoesectsz &&
681                            ++f->nl->lostjumbo > (d->nframes << 1)){
682                                 ea = f->dl->eatab[f->eaidx];
683                                 eventlog("%æ: jumbo failure on %s:%E; %llud\n",
684                                         d, f->nl->path, ea, f->lba);
685                                 d->maxbcnt = Dbcnt;
686                                 d->flag &= ~Djumbo;
687                         }
688                         resend(d, f);
689                         if(tx++ == 0){
690                                 if((l->rttavg <<= 1) > Rtmax)
691                                         l->rttavg = Rtmax;
692                                 eventlog("%æ: rtt %ldms\n", d, Tk2ms(l->rttavg));
693                         }
694                 }
695                 if(d->nout == d->maxout && d->maxout < d->nframes &&
696                    TK2MS(Ticks-d->lastwadj) > 10*1000){
697                         d->maxout++;
698                         d->lastwadj = Ticks;
699                 }
700                 qunlock(d);
701         }
702         runlock(&devs);
703         i = Nms - TK2MS(Ticks - starttick);
704         if(i > 0 && !waserror()){
705                 tsleep(&up->sleep, return0, 0, i);
706                 poperror();
707         }
708         goto loop;
709 }
710
711 static int
712 fmtæ(Fmt *f)
713 {
714         char buf[16];
715         Aoedev *d;
716
717         d = va_arg(f->args, Aoedev*);
718         snprint(buf, sizeof buf, "aoe%ud.%ud", d->major, d->minor);
719         return fmtstrcpy(f, buf);
720 }
721
722 static void netbind(char *path);
723
724 static void
725 aoecfg(void)
726 {
727         char *p, *f[32], buf[24], ifbuf[64];
728         int n, i;
729
730         if((p = getconf("aoeif")) == nil)
731                 return;
732         strncpy(ifbuf, p, sizeof(ifbuf)-1);
733         ifbuf[sizeof(ifbuf)-1] = 0;
734         if((n = tokenize(ifbuf, f, nelem(f))) < 1)
735                 return;
736         /* goo! */
737         for(i = 0; i < n; i++){
738                 p = f[i];
739                 if(strncmp(p, "ether", 5) == 0)
740                         snprint(buf, sizeof buf, "#l%c/ether%c", p[5], p[5]);
741                 else if(strncmp(p, "#l", 2) == 0)
742                         snprint(buf, sizeof buf, "#l%c/ether%c", p[2], p[2]);
743                 else
744                         continue;
745                 if(!waserror()){
746                         netbind(buf);
747                         poperror();
748                 }
749         }
750 }
751
752 static void
753 aoeinit(void)
754 {
755         static int init;
756         static QLock l;
757
758         if(!canqlock(&l))
759                 return;
760         if(init == 0){
761                 fmtinstall(L'æ', fmtæ);
762                 events.rp = events.wp = events.buf;
763                 kproc("aoesweep", aoesweepproc, nil);
764                 aoecfg();
765                 init = 1;
766         }
767         qunlock(&l);
768 }
769
770 static Chan*
771 aoeattach(char *spec)
772 {
773         Chan *c;
774
775         if(*spec)
776                 error(Enonexist);
777         aoeinit();
778         c = devattach(L'æ', spec);
779         mkqid(&c->qid, Qzero, 0, QTDIR);
780         return c;
781 }
782
783 static int
784 unitseq(Chan *c, uint unit, Dir *dp)
785 {
786         int i, rv;
787         Qid q;
788         Aoedev *d;
789
790         i = 0;
791         rv = -1;
792         rlock(&devs);
793         for(d = devs.d; d != nil; d = d->next)
794                 if(i++ == unit){
795                         mkqid(&q, QID(d->unit, Qunitdir), 0, QTDIR);
796                         devdir(c, q, unitname(d), 0, eve, 0555, dp);
797                         rv = 1;
798                         break;
799                 }
800         runlock(&devs);
801         return rv;
802 }
803
804 static Aoedev*
805 unit2dev(ulong unit)
806 {
807         Aoedev *d;
808
809         rlock(&devs);
810         for(d = devs.d; d != nil; d = d->next)
811                 if(d->unit == unit){
812                         runlock(&devs);
813                         return d;
814                 }
815         runlock(&devs);
816         error("unit lookup failure");
817         return nil;
818 }
819
820 static int
821 unitgen(Chan *c, ulong type, Dir *dp)
822 {
823         int perm, t;
824         ulong vers;
825         vlong size;
826         char *p;
827         Aoedev *d;
828         Qid q;
829
830         d = unit2dev(UNIT(c->qid));
831         perm = 0644;
832         size = 0;
833         vers = d->vers;
834         t = QTFILE;
835
836         switch(type){
837         default:
838                 return -1;
839         case Qctl:
840                 p = "ctl";
841                 break;
842         case Qdata:
843                 p = "data";
844                 perm = 0640;
845                 if(UP(d))
846                         size = d->bsize;
847                 break;
848         case Qconfig:
849                 p = "config";
850                 if(UP(d))
851                         size = d->nconfig;
852                 break;
853         case Qident:
854                 p = "ident";
855                 if(UP(d))
856                         size = sizeof d->ident;
857                 break;
858         case Qdevlinkdir:
859                 p = "devlink";
860                 t = QTDIR;
861                 perm = 0555;
862                 break;
863         }
864         mkqid(&q, QID(UNIT(c->qid), type), vers, t);
865         devdir(c, q, p, size, eve, perm, dp);
866         return 1;
867 }
868
869 static int
870 topgen(Chan *c, ulong type, Dir *d)
871 {
872         int perm;
873         vlong size;
874         char *p;
875         Qid q;
876
877         perm = 0444;
878         size = 0;
879         switch(type){
880         default:
881                 return -1;
882         case Qtopctl:
883                 p = "ctl";
884                 perm = 0644;
885                 break;
886         case Qtoplog:
887                 p = "log";
888                 size = eventcount();
889                 break;
890         }
891         mkqid(&q, type, 0, QTFILE);
892         devdir(c, q, p, size, eve, perm, d);
893         return 1;
894 }
895
896 static int
897 aoegen(Chan *c, char *, Dirtab *, int, int s, Dir *dp)
898 {
899         int i;
900         Aoedev *d;
901         Qid q;
902
903         if(c->qid.path == 0){
904                 switch(s){
905                 case DEVDOTDOT:
906                         q.path = 0;
907                         q.type = QTDIR;
908                         devdir(c, q, "#æ", 0, eve, 0555, dp);
909                         break;
910                 case 0:
911                         q.path = Qtopdir;
912                         q.type = QTDIR;
913                         devdir(c, q, "aoe", 0, eve, 0555, dp);
914                         break;
915                 default:
916                         return -1;
917                 }
918                 return 1;
919         }
920
921         switch(TYPE(c->qid)){
922         default:
923                 return -1;
924         case Qtopdir:
925                 if(s == DEVDOTDOT){
926                         mkqid(&q, Qzero, 0, QTDIR);
927                         devdir(c, q, "aoe", 0, eve, 0555, dp);
928                         return 1;
929                 }
930                 if(s < Qtopfiles)
931                         return topgen(c, Qtopbase + s, dp);
932                 s -= Qtopfiles;
933                 return unitseq(c, s, dp);
934         case Qtopctl:
935         case Qtoplog:
936                 return topgen(c, TYPE(c->qid), dp);
937         case Qunitdir:
938                 if(s == DEVDOTDOT){
939                         mkqid(&q, QID(0, Qtopdir), 0, QTDIR);
940                         uprint("%uld", UNIT(c->qid));
941                         devdir(c, q, up->genbuf, 0, eve, 0555, dp);
942                         return 1;
943                 }
944                 return unitgen(c, Qunitbase+s, dp);
945         case Qctl:
946         case Qdata:
947         case Qconfig:
948         case Qident:
949                 return unitgen(c, TYPE(c->qid), dp);
950         case Qdevlinkdir:
951                 i = UNIT(c->qid);
952                 if(s == DEVDOTDOT){
953                         mkqid(&q, QID(i, Qunitdir), 0, QTDIR);
954                         devdir(c, q, "devlink", 0, eve, 0555, dp);
955                         return 1;
956                 }
957                 if(i >= Maxunits || i >= units.ref)
958                         return -1;
959                 d = unit2dev(i);
960                 if(s >= d->ndl)
961                         return -1;
962                 uprint("%d", s);
963                 mkqid(&q, Q3(s, i, Qdevlink), 0, QTFILE);
964                 devdir(c, q, up->genbuf, 0, eve, 0755, dp);
965                 return 1;
966         case Qdevlink:
967                 uprint("%d", s);
968                 mkqid(&q, Q3(s, UNIT(c->qid), Qdevlink), 0, QTFILE);
969                 devdir(c, q, up->genbuf, 0, eve, 0755, dp);
970                 return 1;
971         }
972 }
973
974 static Walkqid*
975 aoewalk(Chan *c, Chan *nc, char **name, int nname)
976 {
977         return devwalk(c, nc, name, nname, nil, 0, aoegen);
978 }
979
980 static int
981 aoestat(Chan *c, uchar *db, int n)
982 {
983         return devstat(c, db, n, nil, 0, aoegen);
984 }
985
986 static Chan*
987 aoeopen(Chan *c, int omode)
988 {
989         Aoedev *d;
990
991         if(TYPE(c->qid) != Qdata)
992                 return devopen(c, omode, 0, 0, aoegen);
993
994         d = unit2dev(UNIT(c->qid));
995         qlock(d);
996         if(waserror()){
997                 qunlock(d);
998                 nexterror();
999         }
1000         if(!UP(d))
1001                 error(Enotup);
1002         c = devopen(c, omode, 0, 0, aoegen);
1003         d->nopen++;
1004         poperror();
1005         qunlock(d);
1006         return c;
1007 }
1008
1009 static void
1010 aoeclose(Chan *c)
1011 {
1012         Aoedev *d;
1013
1014         if(TYPE(c->qid) != Qdata || (c->flag&COPEN) == 0)
1015                 return;
1016
1017         d = unit2dev(UNIT(c->qid));
1018         qlock(d);
1019         if(--d->nopen == 0 && !waserror()){
1020                 discover(d->major, d->minor);
1021                 poperror();
1022         }
1023         qunlock(d);
1024 }
1025
1026 static void
1027 atarw(Aoedev *d, Frame *f)
1028 {
1029         ulong bcnt;
1030         char extbit, writebit;
1031         Aoeata *ah;
1032         Aoehdr *h;
1033         Srb *srb;
1034
1035         extbit = 0x4;
1036         writebit = 0x10;
1037
1038         srb = d->inprocess;
1039         bcnt = d->maxbcnt;
1040         if(bcnt > srb->len)
1041                 bcnt = srb->len;
1042         f->nhdr = Aoehsz + Aoeatasz;
1043         memset(f->hdr, 0, f->nhdr);
1044         h = (Aoehdr*)f->hdr;
1045         if(hset(d, f, h, ACata, 1) == -1){
1046                 d->inprocess = nil;
1047                 return;
1048         }
1049         ah = (Aoeata*)(f->hdr + Aoehsz);
1050         f->dp = srb->dp;
1051         f->bcnt = bcnt;
1052         f->lba = srb->sector;
1053         f->srb = srb;
1054
1055         ah->scnt = bcnt / Aoesectsz;
1056         putlba(ah, f->lba);
1057         if(d->feat & Dllba)
1058                 ah->aflag |= AAFext;
1059         else {
1060                 extbit = 0;
1061                 ah->lba[3] &= 0x0f;
1062                 ah->lba[3] |= 0xe0;     /* LBA bit+obsolete 0xa0 */
1063         }
1064         if(srb->write){
1065                 ah->aflag |= AAFwrite;
1066                 f->dlen = bcnt;
1067         }else{
1068                 writebit = 0;
1069                 f->dlen = 0;
1070         }
1071         ah->cmdstat = 0x20 | writebit | extbit;
1072
1073         /* mark tracking fields and load out */
1074         srb->nout++;
1075         srb->dp = (uchar*)srb->dp + bcnt;
1076         srb->len -= bcnt;
1077         srb->sector += bcnt / Aoesectsz;
1078         if(srb->len == 0)
1079                 d->inprocess = nil;
1080         d->nout++;
1081         f->dl->npkt++;
1082         if(waserror())
1083                 frameerror(d, f, "write error");
1084         else{
1085                 devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
1086                 poperror();
1087         }
1088 }
1089
1090 static char*
1091 aoeerror(Aoehdr *h)
1092 {
1093         int n;
1094         static char *errs[] = {
1095                 "aoe protocol error: unknown",
1096                 "aoe protocol error: bad command code",
1097                 "aoe protocol error: bad argument param",
1098                 "aoe protocol error: device unavailable",
1099                 "aoe protocol error: config string present",
1100                 "aoe protocol error: unsupported version",
1101                 "aoe protocol error: target is reserved",
1102         };
1103
1104         if((h->verflag & AFerr) == 0)
1105                 return nil;
1106         n = h->error;
1107         if(n >= nelem(errs))
1108                 n = 0;
1109         return errs[n];
1110 }
1111
1112 static void
1113 rtupdate(Devlink *l, int rtt)
1114 {
1115         int n;
1116
1117         n = rtt;
1118         if(rtt < 0){
1119                 n = -rtt;
1120                 if(n < Rtmin)
1121                         n = Rtmin;
1122                 else if(n > Rtmax)
1123                         n = Rtmax;
1124                 l->mintimer += (n - l->mintimer) >> 1;
1125         } else if(n < l->mintimer)
1126                 n = l->mintimer;
1127         else if(n > Rtmax)
1128                 n = Rtmax;
1129
1130         /* g == .25; cf. Congestion Avoidance and Control, Jacobson&Karels; 1988 */
1131         n -= l->rttavg;
1132         l->rttavg += n >> 2;
1133 }
1134
1135 static Frame*
1136 getframe(Aoedev *d, int tag)
1137 {
1138         Frame *f, *e;
1139
1140         f = d->frames;
1141         e = f + d->nframes;
1142         for(; f < e; f++)
1143                 if(f->tag == tag)
1144                         return f;
1145         return nil;
1146 }
1147
1148 static Frame*
1149 freeframe(Aoedev *d)
1150 {
1151         if(d->nout < d->maxout)
1152                 return getframe(d, Tfree);
1153         return nil;
1154 }
1155
1156 static void
1157 work(Aoedev *d)
1158 {
1159         Frame *f;
1160
1161         while(f = freeframe(d)) {
1162                 if(d->inprocess == nil){
1163                         if(d->head == nil)
1164                                 return;
1165                         d->inprocess = d->head;
1166                         d->head = d->head->next;
1167                         if(d->head == nil)
1168                                 d->tail = nil;
1169                 }
1170                 atarw(d, f);
1171         }
1172 }
1173
1174 static void
1175 strategy(Aoedev *d, Srb *srb)
1176 {
1177         qlock(d);
1178         if(waserror()){
1179                 qunlock(d);
1180                 nexterror();
1181         }
1182         if(!UP(d))
1183                 error(Eio);
1184         srb->next = nil;
1185         if(d->tail != nil)
1186                 d->tail->next = srb;
1187         d->tail = srb;
1188         if(d->head == nil)
1189                 d->head = srb;
1190         work(d);
1191         poperror();
1192         qunlock(d);
1193
1194         while(waserror()){
1195                 qlock(d);
1196                 srberror(d, srb, "interrupted");
1197                 qunlock(d);
1198         }
1199         sleep(srb, srbready, srb);
1200         poperror();
1201 }
1202
1203 #define iskaddr(a)      ((uintptr)(a) > KZERO)
1204
1205 static long
1206 rw(Aoedev *d, int write, uchar *db, long len, uvlong off)
1207 {
1208         long n, nlen, copy;
1209         enum { Srbsz = 1<<19, };
1210         Srb *srb;
1211
1212         if((off|len) & (Aoesectsz-1))
1213                 error("offset and length must be sector multiple.\n");
1214         if(!UP(d))
1215                 error(Eio);
1216         if(off >= d->bsize)
1217                 return 0;
1218         if(off + len > d->bsize)
1219                 len = d->bsize - off;
1220         copy = 0;
1221         if(iskaddr(db)){
1222                 srb = srbkalloc(db, len);
1223                 copy = 1;
1224         }else
1225                 srb = srballoc(Srbsz <= len? Srbsz: len);
1226         if(waserror()){
1227                 srbfree(srb);
1228                 nexterror();
1229         }
1230         srb->write = write;
1231         for(nlen = len; nlen; nlen -= n){
1232                 srb->sector = off / Aoesectsz;
1233                 srb->dp = srb->data;
1234                 n = nlen;
1235                 if(n > Srbsz)
1236                         n = Srbsz;
1237                 srb->len = n;
1238                 if(write && !copy)
1239                         memmove(srb->data, db, n);
1240                 strategy(d, srb);
1241                 if(srb->error != nil)
1242                         error(srb->error);
1243                 if(!write && !copy)
1244                         memmove(db, srb->data, n);
1245                 db += n;
1246                 off += n;
1247         }
1248         poperror();
1249         srbfree(srb);
1250         return len;
1251 }
1252
1253 static long
1254 readmem(ulong off, void *dst, long n, void *src, long size)
1255 {
1256         if(off >= size)
1257                 return 0;
1258         if(off + n > size)
1259                 n = size - off;
1260         memmove(dst, (uchar*)src + off, n);
1261         return n;
1262 }
1263
1264 static char*
1265 aoeflag(char *s, char *e, uchar f)
1266 {
1267         uchar i;
1268
1269         for(i = 0; i < nelem(flagname); i++)
1270                 if(f & 1 << i)
1271                         s = seprint(s, e, "%s ", flagname[i]);
1272         return seprint(s, e, "\n");
1273 }
1274
1275 static int
1276 pstat(Aoedev *d, char *db, int len, int off)
1277 {
1278         int i;
1279         char *state, *s, *p, *e;
1280
1281         s = p = smalloc(READSTR);
1282         e = p + READSTR;
1283
1284         state = "down";
1285         if(UP(d))
1286                 state = "up";
1287
1288         p = seprint(p, e,
1289                 "state: %s\n"   "nopen: %d\n"   "nout: %d\n"
1290                 "nmaxout: %d\n" "nframes: %d\n" "maxbcnt: %d [maxmtu %d]\n"
1291                 "fw: %.4ux\n"
1292                 "model: %s\n"   "serial: %s\n"  "firmware: %s\n",
1293                 state,          d->nopen,       d->nout,
1294                 d->maxout,      d->nframes,     d->maxbcnt, d->maxmtu,
1295                 d->fwver,
1296                 d->model,       d->serial,      d->firmware);
1297         p = seprint(p, e, "flag: ");
1298         p = pflag(p, e, d);
1299         p[-1] = ' ';    /* horrid */
1300         p = aoeflag(p, e, d->flag);
1301
1302         if(p - s < len)
1303                 len = p - s;
1304         i = readstr(off, db, len, s);
1305         free(s);
1306         return i;
1307 }
1308
1309 static long
1310 unitread(Chan *c, void *db, long len, vlong off)
1311 {
1312         Aoedev *d;
1313
1314         d = unit2dev(UNIT(c->qid));
1315         if(d->vers != c->qid.vers)
1316                 error(Echange);
1317         switch(TYPE(c->qid)){
1318         default:
1319                 error(Ebadarg);
1320         case Qctl:
1321                 return pstat(d, db, len, off);
1322         case Qdata:
1323                 return rw(d, Read, db, len, off);
1324         case Qconfig:
1325                 if(!UP(d))
1326                         error(Enotup);
1327                 return readmem(off, db, len, d->config, d->nconfig);
1328         case Qident:
1329                 if(!UP(d))
1330                         error(Enotup);
1331                 return readmem(off, db, len, d->ident, sizeof d->ident);
1332         }
1333 }
1334
1335 static int
1336 getmtu(Chan *m)
1337 {
1338         int n, mtu;
1339         char buf[36];
1340
1341         mtu = 1514;
1342         if(m == nil || waserror())
1343                 return mtu;
1344         n = devtab[m->type]->read(m, buf, sizeof buf - 1, 0);
1345         poperror();
1346         if(n > 12){
1347                 buf[n] = 0;
1348                 mtu = strtoul(buf + 12, 0, 0);
1349         }
1350         return mtu;
1351 }
1352
1353 static int
1354 devlinkread(Chan *c, void *db, int len, int off)
1355 {
1356         int i;
1357         char *s, *p, *e;
1358         Aoedev *d;
1359         Devlink *l;
1360
1361         d = unit2dev(UNIT(c->qid));
1362         i = L(c->qid);
1363         if(i >= d->ndl)
1364                 return 0;
1365         l = d->dl + i;
1366
1367         s = p = smalloc(READSTR);
1368         e = s + READSTR;
1369
1370         p = seprint(p, e, "addr: ");
1371         for(i = 0; i < l->nea; i++)
1372                 p = seprint(p, e, "%E ", l->eatab[i]);
1373         p = seprint(p, e, "\n");
1374         p = seprint(p, e, "npkt: %uld\n", l->npkt);
1375         p = seprint(p, e, "resent: %uld\n", l->resent);
1376         p = seprint(p, e, "flag: ");
1377         p = aoeflag(p, e, l->flag);
1378         p = seprint(p, e, "rttavg: %uld\n", Tk2ms(l->rttavg));
1379         p = seprint(p, e, "mintimer: %uld\n", Tk2ms(l->mintimer));
1380         p = seprint(p, e, "datamtu: %d\n", l->datamtu);
1381
1382         p = seprint(p, e, "nl path: %s\n", l->nl->path);
1383         p = seprint(p, e, "nl ea: %E\n", l->nl->ea);
1384         p = seprint(p, e, "nl flag: ");
1385         p = aoeflag(p, e, l->flag);
1386         p = seprint(p, e, "nl lostjumbo: %d\n", l->nl->lostjumbo);
1387         p = seprint(p, e, "nl datamtu: %d\n", getmtu(l->nl->mtu));
1388
1389         if(p - s < len)
1390                 len = p - s;
1391         i = readstr(off, db, len, s);
1392         free(s);
1393         return i;
1394 }
1395
1396 static long
1397 topctlread(Chan *, void *db, int len, int off)
1398 {
1399         int i;
1400         char *s, *p, *e;
1401         Netlink *n;
1402
1403         s = p = smalloc(READSTR);
1404         e = s + READSTR;
1405
1406         p = seprint(p, e, "debug: %d\n", debug);
1407         p = seprint(p, e, "autodiscover: %d\n", autodiscover);
1408         p = seprint(p, e, "rediscover: %d\n", rediscover);
1409
1410         for(i = 0; i < Nnetlink; i++){
1411                 n = netlinks.nl+i;
1412                 if(n->cc == nil)
1413                         continue;
1414                 p = seprint(p, e, "if%d path: %s\n", i, n->path);
1415                 p = seprint(p, e, "if%d ea: %E\n", i, n->ea);
1416                 p = seprint(p, e, "if%d flag: ", i);
1417                 p = aoeflag(p, e, n->flag);
1418                 p = seprint(p, e, "if%d lostjumbo: %d\n", i, n->lostjumbo);
1419                 p = seprint(p, e, "if%d datamtu: %d\n", i, getmtu(n->mtu));
1420         }
1421
1422         if(p - s < len)
1423                 len = p - s;
1424         i = readstr(off, db, len, s);
1425         free(s);
1426         return i;
1427 }
1428
1429 static long
1430 aoeread(Chan *c, void *db, long n, vlong off)
1431 {
1432         switch(TYPE(c->qid)){
1433         default:
1434                 error(Eperm);
1435         case Qzero:
1436         case Qtopdir:
1437         case Qunitdir:
1438         case Qdevlinkdir:
1439                 return devdirread(c, db, n, 0, 0, aoegen);
1440         case Qtopctl:
1441                 return topctlread(c, db, n, off);
1442         case Qtoplog:
1443                 return eventlogread(db, n);
1444         case Qctl:
1445         case Qdata:
1446         case Qconfig:
1447         case Qident:
1448                 return unitread(c, db, n, off);
1449         case Qdevlink:
1450                 return devlinkread(c, db, n, off);
1451         }
1452 }
1453
1454 static long
1455 configwrite(Aoedev *d, void *db, long len)
1456 {
1457         char *s;
1458         Aoehdr *h;
1459         Aoecfg *ch;
1460         Frame *f;
1461         Srb *srb;
1462
1463         if(!UP(d))
1464                 error(Enotup);
1465         if(len > sizeof d->config)
1466                 error(Etoobig);
1467         srb = srballoc(len);
1468         s = smalloc(len);
1469         memmove(s, db, len);
1470         if(waserror()){
1471                 srbfree(srb);
1472                 free(s);
1473                 nexterror();
1474         }
1475         for (;;) {
1476                 qlock(d);
1477                 if(waserror()){
1478                         qunlock(d);
1479                         nexterror();
1480                 }
1481                 f = freeframe(d);
1482                 if(f != nil)
1483                         break;
1484                 poperror();
1485                 qunlock(d);
1486                 if(waserror())
1487                         nexterror();
1488                 tsleep(&up->sleep, return0, 0, 100);
1489                 poperror();
1490         }
1491         f->nhdr = Aoehsz + Aoecfgsz;
1492         memset(f->hdr, 0, f->nhdr);
1493         h = (Aoehdr*)f->hdr;
1494         if(hset(d, f, h, ACconfig, 1) == -1)
1495                 return 0;
1496         ch = (Aoecfg*)(f->hdr + Aoehsz);
1497         f->srb = srb;
1498         f->dp = s;
1499         ch->verccmd = AQCfset;
1500         hnputs(ch->cslen, len);
1501         d->nout++;
1502         srb->nout++;
1503         f->dl->npkt++;
1504         f->dlen = len;
1505         /*
1506          * these refer to qlock & waserror in the above for loop.
1507          * there's still the first waserror outstanding.
1508          */
1509         poperror();
1510         qunlock(d);
1511
1512         devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
1513         sleep(srb, srbready, srb);
1514         if(srb->error)
1515                 error(srb->error);
1516
1517         qlock(d);
1518         if(waserror()){
1519                 qunlock(d);
1520                 nexterror();
1521         }
1522         memmove(d->config, s, len);
1523         d->nconfig = len;
1524         poperror();
1525         qunlock(d);
1526
1527         poperror();                     /* pop first waserror */
1528
1529         srbfree(srb);
1530         memmove(db, s, len);
1531         free(s);
1532         return len;
1533 }
1534
1535 static int
1536 devmaxdata(Aoedev *d)
1537 {
1538         int i, m, mtu, datamtu;
1539         Devlink *l;
1540         Netlink *n;
1541
1542         mtu = 100000;
1543         datamtu = 100000;
1544         for(i = 0; i < d->ndl; i++){
1545                 l = d->dl + i;
1546                 n = l->nl;
1547                 if((l->flag & Dup) == 0 || (n->flag & Dup) == 0)
1548                         continue;
1549                 m = getmtu(n->mtu);
1550                 if(l->datamtu < datamtu)
1551                         datamtu = l->datamtu;
1552                 if(m < mtu)
1553                         mtu = m;
1554         }
1555         if(mtu == 100000)
1556                 mtu = 1514;
1557         mtu -= Aoehsz + Aoeatasz;
1558         mtu -= mtu % Aoesectsz;
1559         if(mtu > datamtu)
1560                 mtu = datamtu;
1561         return mtu;
1562 }
1563
1564 static int
1565 toggle(char *s, uint f, uint bit)
1566 {
1567         if(s == nil)
1568                 f = f^bit;
1569         else if(strcmp(s, "on") == 0)
1570                 f |= bit;
1571         else
1572                 f &= ~bit;
1573         return f;
1574 }
1575
1576 static void ataident(Aoedev*);
1577
1578 static long
1579 unitctlwrite(Aoedev *d, void *db, long n)
1580 {
1581         uint maxbcnt, m;
1582         uvlong bsize;
1583         enum {
1584                 Failio,
1585                 Ident,
1586                 Jumbo,
1587                 Maxbno,
1588                 Mtu,
1589                 Nofailf,
1590                 Setsize,
1591         };
1592         Cmdbuf *cb;
1593         Cmdtab *ct;
1594         static Cmdtab cmds[] = {
1595                 {Failio,        "failio",       1 },
1596                 {Ident,         "identify",     1 },
1597                 {Jumbo,         "jumbo",        0 },
1598                 {Maxbno,        "maxbno",       0 },
1599                 {Mtu,           "mtu",          0 },
1600                 {Nofailf,               "nofail",               0 },
1601                 {Setsize,       "setsize",      0 },
1602         };
1603
1604         cb = parsecmd(db, n);
1605         qlock(d);
1606         if(waserror()){
1607                 qunlock(d);
1608                 free(cb);
1609                 nexterror();
1610         }
1611         ct = lookupcmd(cb, cmds, nelem(cmds));
1612         switch(ct->index){
1613         case Failio:
1614                 downdev(d, "i/o failure");
1615                 break;
1616         case Ident:
1617                 ataident(d);
1618                 break;
1619         case Jumbo:
1620                 d->flag = toggle(cb->f[1], d->flag, Djumbo);
1621                 break;
1622         case Maxbno:
1623         case Mtu:
1624                 maxbcnt = devmaxdata(d);
1625                 if(cb->nf > 2)
1626                         error(Ecmdargs);
1627                 if(cb->nf == 2){
1628                         m = strtoul(cb->f[1], 0, 0);
1629                         if(ct->index == Maxbno)
1630                                 m *= Aoesectsz;
1631                         else{
1632                                 m -= Aoehsz + Aoeatasz;
1633                                 m &= ~(Aoesectsz-1);
1634                         }
1635                         if(m == 0 || m > maxbcnt)
1636                                 cmderror(cb, "invalid mtu");
1637                         maxbcnt = m;
1638                         d->maxmtu = m;
1639                 } else
1640                         d->maxmtu = Maxmtu;
1641                 d->maxbcnt = maxbcnt;
1642                 break;
1643         case Nofailf:
1644                 d->flag = toggle(cb->f[1], d->flag, Dnofail);
1645                 break;
1646         case Setsize:
1647                 bsize = d->realbsize;
1648                 if(cb->nf > 2)
1649                         error(Ecmdargs);
1650                 if(cb->nf == 2){
1651                         bsize = strtoull(cb->f[1], 0, 0);
1652                         if(bsize % Aoesectsz)
1653                                 cmderror(cb, "disk size must be sector aligned");
1654                 }
1655                 d->bsize = bsize;
1656                 break;
1657         }
1658         poperror();
1659         qunlock(d);
1660         free(cb);
1661         return n;
1662 }
1663
1664 static long
1665 unitwrite(Chan *c, void *db, long n, vlong off)
1666 {
1667         long rv;
1668         char *buf;
1669         Aoedev *d;
1670
1671         d = unit2dev(UNIT(c->qid));
1672         switch(TYPE(c->qid)){
1673         default:
1674                 error(Ebadarg);
1675         case Qctl:
1676                 return unitctlwrite(d, db, n);
1677         case Qident:
1678                 error(Eperm);
1679         case Qdata:
1680                 return rw(d, Write, db, n, off);
1681         case Qconfig:
1682                 if(off + n > sizeof d->config)
1683                         error(Etoobig);
1684                 buf = smalloc(sizeof d->config);
1685                 if(waserror()){
1686                         free(buf);
1687                         nexterror();
1688                 }
1689                 memmove(buf, d->config, d->nconfig);
1690                 memmove(buf + off, db, n);
1691                 rv = configwrite(d, buf, n + off);
1692                 poperror();
1693                 free(buf);
1694                 return rv;
1695         }
1696 }
1697
1698 static Netlink*
1699 addnet(char *path, Chan *cc, Chan *dc, Chan *mtu, uchar *ea)
1700 {
1701         Netlink *nl, *e;
1702
1703         lock(&netlinks);
1704         if(waserror()){
1705                 unlock(&netlinks);
1706                 nexterror();
1707         }
1708         nl = netlinks.nl;
1709         e = nl + nelem(netlinks.nl);
1710         for(; nl < e && nl->cc != nil; nl++)
1711                 continue;
1712         if(nl == e)
1713                 error("out of netlink structures");
1714         nl->cc = cc;
1715         nl->dc = dc;
1716         nl->mtu = mtu;
1717         strncpy(nl->path, path, sizeof(nl->path)-1);
1718         nl->path[sizeof(nl->path)-1] = 0;
1719         memmove(nl->ea, ea, sizeof nl->ea);
1720         poperror();
1721         nl->flag |= Dup;
1722         unlock(&netlinks);
1723         return nl;
1724 }
1725
1726 static int
1727 newunit(void)
1728 {
1729         int x;
1730
1731         x = incref(&units);
1732         if(x >= Maxunits){
1733                 decref(&units);
1734                 x = -1;
1735         }
1736         return x;
1737 }
1738
1739 static int
1740 dropunit(void)
1741 {
1742         return decref(&units);
1743 }
1744
1745 /*
1746  * always allocate max frames.  maxout may change.
1747  */
1748 static Aoedev*
1749 newdev(uint major, uint minor, int n)
1750 {
1751         Aoedev *d;
1752         Frame *f, *e;
1753
1754         d = malloc(sizeof *d);
1755         f = malloc(sizeof *f*Maxframes);
1756         if(d == nil || f == nil) {
1757                 free(d);
1758                 free(f);
1759                 error("aoe device allocation failure");
1760         }
1761         d->nframes = n;
1762         d->frames = f;
1763         for (e = f + Maxframes; f < e; f++)
1764                 f->tag = Tfree;
1765         d->maxout = n;
1766         d->major = major;
1767         d->minor = minor;
1768         d->maxbcnt = Dbcnt;
1769         d->flag = Djumbo;
1770         d->maxmtu = Maxmtu;
1771         d->unit = newunit();            /* bzzt.  inaccurate if units removed */
1772         if(d->unit == -1){
1773                 free(d);
1774                 free(d->frames);
1775                 error("too many units");
1776         }
1777         d->dl = d->dltab;
1778         return d;
1779 }
1780
1781 static Aoedev*
1782 mm2dev(uint major, uint minor)
1783 {
1784         Aoedev *d;
1785
1786         rlock(&devs);
1787         for(d = devs.d; d; d = d->next)
1788                 if(d->major == major && d->minor == minor){
1789                         runlock(&devs);
1790                         return d;
1791                 }
1792         runlock(&devs);
1793         eventlog("mm2dev: %ud.%ud not found\n", major, minor);
1794         return nil;
1795 }
1796
1797 /* Find the device in our list.  If not known, add it */
1798 static Aoedev*
1799 getdev(uint major, uint minor, int n)
1800 {
1801         Aoedev *d;
1802
1803         if(major == 0xffff || minor == 0xff)
1804                 return 0;
1805         wlock(&devs);
1806         if(waserror()){
1807                 wunlock(&devs);
1808                 nexterror();
1809         }
1810         for(d = devs.d; d; d = d->next)
1811                 if(d->major == major && d->minor == minor)
1812                         break;
1813         if(d == nil) {
1814                 d = newdev(major, minor, n);
1815                 d->next = devs.d;
1816                 devs.d = d;
1817         }
1818         poperror();
1819         wunlock(&devs);
1820         return d;
1821 }
1822
1823 static void
1824 ataident(Aoedev *d)
1825 {
1826         Aoeata *a;
1827         Aoehdr *h;
1828         Frame *f;
1829
1830         f = freeframe(d);
1831         if(f == nil)
1832                 return;
1833         f->nhdr = Aoehsz + Aoeatasz;
1834         memset(f->hdr, 0, f->nhdr);
1835         h = (Aoehdr*)f->hdr;
1836         if(hset(d, f, h, ACata, 1) == -1)
1837                 return;
1838         a = (Aoeata*)(f->hdr + Aoehsz);
1839         f->srb = srbkalloc(0, 0);
1840         a->cmdstat = Cid;       /* ata 6, page 110 */
1841         a->scnt = 1;
1842         a->lba[3] = 0xa0;
1843         d->nout++;
1844         f->dl->npkt++;
1845         f->bcnt = 512;
1846         f->dlen = 0;
1847         if(waserror()){
1848                 srbfree(f->srb);
1849                 d->nout--;
1850                 f->tag = Tfree;
1851         }else{
1852                 devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
1853                 poperror();
1854         }
1855 }
1856
1857 static int
1858 newdlea(Devlink *l, uchar *ea)
1859 {
1860         int i;
1861         uchar *t;
1862
1863         for(i = 0; i < Nea; i++){
1864                 t = l->eatab[i];
1865                 if(i == l->nea){
1866                         memmove(t, ea, Eaddrlen);
1867                         return l->nea++;
1868                 }
1869                 if(memcmp(t, ea, Eaddrlen) == 0)
1870                         return i;
1871         }
1872         return -1;
1873 }
1874
1875 static Devlink*
1876 newdevlink(Aoedev *d, Netlink *n, Aoehdr *h)
1877 {
1878         int i;
1879         Aoecfg *c;
1880         Devlink *l;
1881
1882         c = (Aoecfg*)((uchar*)h + Aoehsz);
1883         for(i = 0; i < Ndevlink; i++){
1884                 l = d->dl + i;
1885                 if(i == d->ndl){
1886                         d->ndl++;
1887                         newdlea(l, h->src);
1888                         l->datamtu = c->scnt*Aoesectsz;
1889                         l->nl = n;
1890                         l->flag |= Dup;
1891                         l->mintimer = Rtmin;
1892                         l->rttavg = Rtmax;
1893                         return l;
1894                 }
1895                 if(l->nl == n){
1896                         newdlea(l, h->src);
1897                         l->datamtu = c->scnt*Aoesectsz;
1898                         l->flag |= Dup;
1899                         return l;
1900                 }
1901         }
1902         eventlog("%æ: out of links: %s:%E to %E\n", d, n->path, n->ea, h->src);
1903         return 0;
1904 }
1905
1906 static void
1907 errrsp(Block *b, char *s)
1908 {
1909         int n;
1910         Aoedev *d;
1911         Aoehdr *h;
1912         Frame *f;
1913
1914         h = (Aoehdr*)b->rp;
1915         n = nhgetl(h->tag);
1916         if(n == Tmgmt || n == Tfree)
1917                 return;
1918         d = mm2dev(nhgets(h->major), h->minor);
1919         if(d == nil)
1920                 return;
1921         if(f = getframe(d, n))
1922                 frameerror(d, f, s);
1923 }
1924
1925 static void
1926 qcfgrsp(Block *b, Netlink *nl)
1927 {
1928         int cmd, cslen, blen;
1929         uint n, major;
1930         Aoedev *d;
1931         Aoehdr *h, *h0;
1932         Aoecfg *ch;
1933         Devlink *l;
1934         Frame *f;
1935         Srb *srb;
1936
1937         h = (Aoehdr*)b->rp;
1938         ch = (Aoecfg*)(b->rp + Aoehsz);
1939         major = nhgets(h->major);
1940         n = nhgetl(h->tag);
1941         if(n != Tmgmt && n != Tfree){
1942                 d = mm2dev(major, h->minor);
1943                 if(d == nil)
1944                         return;
1945                 qlock(d);
1946                 f = getframe(d, n);
1947                 if(f == nil){
1948                         qunlock(d);
1949                         eventlog("%æ: unknown response tag %ux\n", d, n);
1950                         return;
1951                 }
1952                 h0 = (Aoehdr*)f->hdr;
1953                 cmd = h0->cmd;
1954                 if(cmd != ACconfig){
1955                         qunlock(d);
1956                         eventlog("%æ: malicious server got ACconfig want %d; tag %ux\n", d, cmd, n);
1957                         return;
1958                 }
1959                 cslen = nhgets(ch->cslen);
1960                 blen = BLEN(b) - (Aoehsz + Aoecfgsz);
1961                 if(cslen < blen && BLEN(b) > 60)
1962                         eventlog("%æ: cfgrsp: tag %.8ux oversized %d %d\n",
1963                                 d, n, cslen, blen);
1964                 if(cslen > blen){
1965                         eventlog("%æ: cfgrsp: tag %.8ux runt %d %d\n",
1966                                 d, n, cslen, blen);
1967                         cslen = blen;
1968                 }
1969                 memmove(f->dp, b->rp + Aoehsz + Aoecfgsz, cslen);
1970                 srb = f->srb;
1971                 f->dp = nil;
1972                 f->srb = nil;
1973                 if(srb != nil){
1974                         srb->nout--;
1975                         srbwakeup(srb);
1976                         d->nout--;
1977                         f->tag = Tfree;
1978                 }
1979                 qunlock(d);
1980                 return;
1981         }
1982
1983         cmd = ch->verccmd & 0xf;
1984         if(cmd != 0){
1985                 eventlog("aoe%ud.%ud: cfgrsp: bad command %d\n", major, h->minor, cmd);
1986                 return;
1987         }
1988         n = nhgets(ch->bufcnt);
1989         if(n > Maxframes)
1990                 n = Maxframes;
1991
1992         if(waserror()){
1993                 eventlog("getdev: %ud.%ud ignored: %s\n", major, h->minor, up->errstr);
1994                 return;
1995         }
1996         d = getdev(major, h->minor, n);
1997         poperror();
1998         if(d == 0)
1999                 return;
2000
2001         qlock(d);
2002         *up->errstr = 0;
2003         if(waserror()){
2004                 qunlock(d);
2005                 eventlog("%æ: %s\n", d, up->errstr);
2006                 nexterror();
2007         }
2008
2009         l = newdevlink(d, nl, h);               /* add this interface. */
2010
2011         d->fwver = nhgets(ch->fwver);
2012         cslen = nhgets(ch->cslen);
2013         if(cslen > sizeof d->config)
2014                 cslen = sizeof d->config;
2015         if(Aoehsz + Aoecfgsz + cslen > BLEN(b))
2016                 cslen = BLEN(b) - (Aoehsz + Aoecfgsz);
2017         d->nconfig = cslen;
2018         memmove(d->config, b->rp + Aoehsz + Aoecfgsz, cslen);
2019
2020         /* manually set mtu may be reset lower if conditions warrant */
2021         if(l){
2022                 n = devmaxdata(d);
2023                 if((d->flag & Djumbo) == 0)
2024                         n = Dbcnt;
2025                 if(n > d->maxmtu)
2026                         n = d->maxmtu;
2027                 if(n != d->maxbcnt){
2028                         eventlog("%æ: setting %d byte mtu on %s:%E\n",
2029                                 d, n, nl->path, nl->ea);
2030                         d->maxbcnt = n;
2031                 }
2032         }
2033         if(d->nopen == 0)
2034                 ataident(d);
2035         poperror();
2036         qunlock(d);
2037 }
2038
2039 static vlong
2040 aoeidentify(Aoedev *d, ushort *id)
2041 {
2042         vlong s;
2043
2044         s = idfeat(d, id);
2045         if(s == -1){
2046                 eventlog("%æ: idfeat returns -1\n", d);
2047                 return -1;
2048         }
2049         if((d->feat&Dlba) == 0){
2050                 eventlog("%æ: no lba support\n", d);
2051                 return -1;
2052         }
2053         d->flag |= Dup;
2054         memmove(d->ident, id, sizeof d->ident);
2055         return s;
2056 }
2057
2058 static void
2059 newvers(Aoedev *d)
2060 {
2061         d->vers = incref(&drivevers);
2062 }
2063
2064 static int
2065 identify(Aoedev *d, ushort *id)
2066 {
2067         vlong osectors, s;
2068         uchar oserial[21];
2069
2070         s = aoeidentify(d, id);
2071         if(s == -1)
2072                 return -1;
2073         osectors = d->realbsize;
2074         memmove(oserial, d->serial, sizeof d->serial);
2075
2076         idmove(d->serial, id+10, 20);
2077         idmove(d->firmware, id+23, 8);
2078         idmove(d->model, id+27, 40);
2079         /* idss() */
2080         /* d->wwn = idwwn(d, id); */
2081
2082         s *= Aoesectsz;
2083         if(osectors != s || memcmp(oserial, d->serial, sizeof oserial)){
2084                 d->bsize = s;
2085                 d->realbsize = s;
2086 //              d->mediachange = 1;
2087                 newvers(d);
2088         }
2089         return 0;
2090 }
2091
2092 static void
2093 atarsp(Block *b)
2094 {
2095         uint n, cmd;
2096         ushort major;
2097         Aoeata *ahin, *ahout;
2098         Aoehdr *h, *h0;
2099         Aoedev *d;
2100         Frame *f;
2101         Srb *srb;
2102
2103         h = (Aoehdr*)b->rp;
2104         major = nhgets(h->major);
2105         d = mm2dev(major, h->minor);
2106         if(d == nil)
2107                 return;
2108         ahin = (Aoeata*)(b->rp + Aoehsz);
2109         qlock(d);
2110         if(waserror()){
2111                 qunlock(d);
2112                 nexterror();
2113         }
2114         n = nhgetl(h->tag);
2115         if(n == Tfree || n == Tmgmt)
2116                 goto bail;
2117         f = getframe(d, n);
2118         if(f == nil){
2119                 eventlog("%æ: unexpected response; tag %ux\n", d, n);
2120                 goto bail;
2121         }
2122         h0 = (Aoehdr*)f->hdr;
2123         cmd = h0->cmd;
2124         if(cmd != ACata){
2125                 eventlog("%æ: malicious server got ACata want %d; tag %ux\n", d, cmd, n);
2126                 goto bail;
2127         }
2128
2129         rtupdate(f->dl, tsince(f->tag));
2130         ahout = (Aoeata*)(f->hdr + Aoehsz);
2131         srb = f->srb;
2132
2133         if(ahin->cmdstat & 0xa9){
2134                 eventlog("%æ: ata error cmd %.2ux stat %.2ux\n",
2135                         d, ahout->cmdstat, ahin->cmdstat);
2136                 if(srb != nil)
2137                         srb->error = Eio;
2138         } else {
2139                 n = ahout->scnt * Aoesectsz;
2140                 switch(ahout->cmdstat){
2141                 case Crd:
2142                 case Crdext:
2143                         if(BLEN(b) - (Aoehsz + Aoeatasz) != n){
2144                                 eventlog("%æ: misread blen %ld expect %d\n",
2145                                         d, BLEN(b), n);
2146                                 goto bail;
2147                         }
2148                         memmove(f->dp, b->rp + Aoehsz + Aoeatasz, n);
2149                 case Cwr:
2150                 case Cwrext:
2151                         if(n > Dbcnt)
2152                                 f->nl->lostjumbo = 0;
2153                         if(f->bcnt -= n){
2154                                 f->lba += n / Aoesectsz;
2155                                 f->dp = (uchar*)f->dp + n;
2156                                 resend(d, f);
2157                                 goto bail;
2158                         }
2159                         break;
2160                 case Cid:
2161                         if(BLEN(b) - (Aoehsz + Aoeatasz) < 512){
2162                                 eventlog("%æ: runt identify blen %ld expect %d\n",
2163                                         d, BLEN(b), 512 + Aoehsz + Aoeatasz);
2164                                 goto bail;
2165                         }
2166                         identify(d, (ushort*)(b->rp + Aoehsz + Aoeatasz));
2167                         free(srb);              /* BOTCH */
2168                         srb = nil;
2169                         break;
2170                 default:
2171                         eventlog("%æ: unknown ata command %.2ux \n",
2172                                 d, ahout->cmdstat);
2173                 }
2174         }
2175
2176         f->srb = nil;
2177         if(srb != nil){
2178                 srb->nout--;
2179                 srbwakeup(srb);
2180         }
2181         f->tag = Tfree;
2182         d->nout--;
2183
2184         work(d);
2185 bail:
2186         poperror();
2187         qunlock(d);
2188 }
2189
2190 static void
2191 netrdaoeproc(void *v)
2192 {
2193         int idx;
2194         char name[Maxpath+1], *s;
2195         Aoehdr *h;
2196         Block *b;
2197         Netlink *nl;
2198
2199         nl = (Netlink*)v;
2200         idx = nl - netlinks.nl;
2201         netlinks.reader[idx] = 1;
2202         kstrcpy(name, nl->path, Maxpath);
2203
2204         if(waserror()){
2205                 eventlog("netrdaoe@%s: exiting: %s\n", name, up->errstr);
2206                 netlinks.reader[idx] = 0;
2207                 wakeup(netlinks.rendez + idx);
2208                 pexit(up->errstr, 1);
2209         }
2210         if(autodiscover)
2211                 discover(0xffff, 0xff);
2212         for (;;) {
2213                 if((nl->flag & Dup) == 0)
2214                         error("netlink is down");
2215                 if(nl->dc == nil)
2216                         panic("netrdaoe: nl->dc == nil");
2217                 b = devtab[nl->dc->type]->bread(nl->dc, 1<<16, 0);
2218                 if(b == nil)
2219                         error("network read");
2220                 h = (Aoehdr*)b->rp;
2221                 if(h->verflag & AFrsp)
2222                         if(s = aoeerror(h)){
2223                                 eventlog("%s: %d.%d %s\n", nl->path,
2224                                         h->major[0]<<8 | h->major[1], h->minor, s);
2225                                 errrsp(b, s);
2226                         }else if(h->cmd == ACata)
2227                                 atarsp(b);
2228                         else if(h->cmd == ACconfig)
2229                                 qcfgrsp(b, nl);
2230                         else if((h->cmd & 0xf0) != 0xf0){
2231                                 eventlog("%s: unknown cmd %d\n",
2232                                         nl->path, h->cmd);
2233                                 errrsp(b, "unknown command");
2234                         }
2235                 freeb(b);
2236         }
2237 }
2238
2239 static void
2240 getaddr(char *path, uchar *ea)
2241 {
2242         int n;
2243         char buf[2*Eaddrlen+1];
2244         Chan *c;
2245
2246         uprint("%s/addr", path);
2247         c = namec(up->genbuf, Aopen, OREAD, 0);
2248         if(waserror()) {
2249                 cclose(c);
2250                 nexterror();
2251         }
2252         n = devtab[c->type]->read(c, buf, sizeof buf-1, 0);
2253         cclose(c);
2254         poperror();
2255         buf[n] = 0;
2256         if(parseether(ea, buf) < 0)
2257                 error("parseether failure");
2258 }
2259
2260 static void
2261 netbind(char *path)
2262 {
2263         char addr[Maxpath];
2264         uchar ea[2*Eaddrlen+1];
2265         Chan *dc, *cc, *mtu;
2266         Netlink *nl;
2267
2268         snprint(addr, sizeof addr, "%s!0x%x", path, Aoetype);
2269         dc = chandial(addr, nil, nil, &cc);
2270         snprint(addr, sizeof addr, "%s/mtu", path);
2271         mtu = nil;
2272         if(!waserror()){
2273                 mtu = namec(addr, Aopen, OREAD, 0);
2274                 poperror();
2275         }
2276         if(waserror()){
2277                 cclose(dc);
2278                 cclose(cc);
2279                 if(mtu != nil)
2280                         cclose(mtu);
2281                 nexterror();
2282         }
2283         if(dc == nil || cc == nil)
2284                 error(Enonexist);
2285         getaddr(path, ea);
2286         nl = addnet(path, cc, dc, mtu, ea);
2287         snprint(addr, sizeof addr, "netrdaoe@%s", path);
2288         kproc(addr, netrdaoeproc, nl);
2289         poperror();
2290 }
2291
2292 static int
2293 unbound(void *v)
2294 {
2295         return *(int*)v != 0;
2296 }
2297
2298 static void
2299 netunbind(char *path)
2300 {
2301         int i, idx;
2302         Aoedev *d, *p, *next;
2303         Chan *dc, *cc;
2304         Devlink *l;
2305         Frame *f;
2306         Netlink *n, *e;
2307
2308         n = netlinks.nl;
2309         e = n + nelem(netlinks.nl);
2310
2311         lock(&netlinks);
2312         for(; n < e; n++)
2313                 if(n->dc && strcmp(n->path, path) == 0)
2314                         break;
2315         unlock(&netlinks);
2316         if(n == e)
2317                 error("device not bound");
2318
2319         /*
2320          * hunt down devices using this interface; disable
2321          * this also terminates the reader.
2322          */
2323         idx = n - netlinks.nl;
2324         wlock(&devs);
2325         for(d = devs.d; d; d = d->next){
2326                 qlock(d);
2327                 for(i = 0; i < d->ndl; i++){
2328                         l = d->dl + i;
2329                         if(l->nl == n)
2330                                 l->flag &= ~Dup;
2331                 }
2332                 qunlock(d);
2333         }
2334         n->flag &= ~Dup;
2335         wunlock(&devs);
2336
2337         /* confirm reader is down. */
2338         while(waserror())
2339                 ;
2340         sleep(netlinks.rendez + idx, unbound, netlinks.reader + idx);
2341         poperror();
2342
2343         /* reschedule packets. */
2344         wlock(&devs);
2345         for(d = devs.d; d != nil; d = d->next){
2346                 qlock(d);
2347                 for(i = 0; i < d->nframes; i++){
2348                         f = d->frames + i;
2349                         if(f->tag != Tfree && f->nl == n)
2350                                 resend(d, f);
2351                 }
2352                 qunlock(d);
2353         }
2354         wunlock(&devs);
2355
2356         /* squeeze devlink pool.  (we assert nobody is using them now) */
2357         wlock(&devs);
2358         for(d = devs.d; d != nil; d = d->next){
2359                 qlock(d);
2360                 for(i = 0; i < d->ndl; i++){
2361                         l = d->dl + i;
2362                         if(l->nl == n)
2363                                 memmove(l, l + 1, sizeof *l * (--d->ndl - i));
2364                 }
2365                 qunlock(d);
2366         }
2367         wunlock(&devs);
2368
2369         /* close device link. */
2370         lock(&netlinks);
2371         dc = n->dc;
2372         cc = n->cc;
2373         if(n->mtu != nil)
2374                 cclose(n->mtu);
2375         memset(n, 0, sizeof *n);
2376         unlock(&netlinks);
2377
2378         cclose(dc);
2379         cclose(cc);
2380
2381         /* squeeze orphan devices */
2382         wlock(&devs);
2383         for(p = d = devs.d; d != nil; d = next){
2384                 next = d->next;
2385                 if(d->ndl > 0){
2386                         p = d;
2387                         continue;
2388                 }
2389                 qlock(d);
2390                 downdev(d, "orphan");
2391                 qunlock(d);
2392                 if(p != devs.d)
2393                         p->next = next;
2394                 else{
2395                         devs.d = next;
2396                         p = devs.d;
2397                 }
2398                 free(d->frames);
2399                 free(d);
2400                 dropunit();
2401         }
2402         wunlock(&devs);
2403 }
2404
2405 static void
2406 strtoss(char *f, uint *shelf, uint *slot)
2407 {
2408         char *s;
2409
2410         *shelf = 0xffff;
2411         *slot = 0xff;
2412         if(f == nil)
2413                 return;
2414         *shelf = strtol(f, &s, 0);
2415         if(s == f || *shelf > 0xffff)
2416                 error("bad shelf");
2417         f = s;
2418         if(*f++ == '.'){
2419                 *slot = strtol(f, &s, 0);
2420                 if(s == f || *slot > 0xff)
2421                         error("bad slot");
2422         }
2423 }
2424
2425 static void
2426 discoverstr(char *f)
2427 {
2428         uint shelf, slot;
2429
2430         strtoss(f, &shelf, &slot);
2431         discover(shelf, slot);
2432 }
2433
2434 static void
2435 removedev(Aoedev *d)
2436 {
2437         int i;
2438         Aoedev *p;
2439
2440         wlock(&devs);
2441         p = 0;
2442         if(d != devs.d)
2443         for(p = devs.d; p != nil; p = p->next)
2444                 if(p->next == d)
2445                         break;
2446         qlock(d);
2447         d->flag &= ~Dup;
2448         newvers(d);
2449         d->ndl = 0;
2450         qunlock(d);
2451         for(i = 0; i < d->nframes; i++)
2452                 frameerror(d, d->frames+i, Enotup);
2453
2454         if(p != nil)
2455                 p->next = d->next;
2456         else
2457                 devs.d = d->next;
2458         free(d->frames);
2459         free(d);
2460         dropunit();
2461         wunlock(&devs);
2462 }
2463
2464
2465 static void
2466 aoeremove(Chan *c)
2467 {
2468         switch(TYPE(c->qid)){
2469         default:
2470         case Qzero:
2471         case Qtopdir:
2472         case Qtoplog:
2473         case Qtopctl:
2474         case Qctl:
2475         case Qdata:
2476         case Qconfig:
2477         case Qident:
2478                 error(Eperm);
2479         case Qunitdir:
2480                 removedev(unit2dev(UNIT(c->qid)));
2481                 break;
2482         }
2483 }
2484
2485 static void
2486 removestr(char *f)
2487 {
2488         uint shelf, slot;
2489         Aoedev *d;
2490
2491         strtoss(f, &shelf, &slot);
2492         wlock(&devs);
2493         for(d = devs.d; d != nil; d = d->next)
2494                 if(shelf == d->major && slot == d->minor){
2495                         wunlock(&devs); /* BOTCH */
2496                         removedev(d);
2497                         return;
2498                 }
2499         wunlock(&devs);
2500         error("device not bound");
2501 }
2502
2503 static long
2504 topctlwrite(void *db, long n)
2505 {
2506         enum {
2507                 Autodiscover,
2508                 Bind,
2509                 Debug,
2510                 Discover,
2511                 Rediscover,
2512                 Remove,
2513                 Unbind,
2514         };
2515         char *f;
2516         Cmdbuf *cb;
2517         Cmdtab *ct;
2518         static Cmdtab cmds[] = {
2519                 { Autodiscover, "autodiscover", 0       },
2520                 { Bind,         "bind",         2       },
2521                 { Debug,        "debug",        0       },
2522                 { Discover,     "discover",     0       },
2523                 { Rediscover,   "rediscover",   0       },
2524                 { Remove,       "remove",       2       },
2525                 { Unbind,       "unbind",       2       },
2526         };
2527
2528         cb = parsecmd(db, n);
2529         if(waserror()){
2530                 free(cb);
2531                 nexterror();
2532         }
2533         ct = lookupcmd(cb, cmds, nelem(cmds));
2534         f = cb->f[1];
2535         switch(ct->index){
2536         case Autodiscover:
2537                 autodiscover = toggle(f, autodiscover, 1);
2538                 break;
2539         case Bind:
2540                 netbind(f);
2541                 break;
2542         case Debug:
2543                 debug = toggle(f, debug, 1);
2544                 break;
2545         case Discover:
2546                 discoverstr(f);
2547                 break;
2548         case Rediscover:
2549                 rediscover = toggle(f, rediscover, 1);
2550                 break;
2551         case Remove:
2552                 removestr(f);   /* depricated */
2553                 break;
2554         case Unbind:
2555                 netunbind(f);
2556                 break;
2557         }
2558         poperror();
2559         free(cb);
2560         return n;
2561 }
2562
2563 static long
2564 aoewrite(Chan *c, void *db, long n, vlong off)
2565 {
2566         switch(TYPE(c->qid)){
2567         default:
2568         case Qzero:
2569         case Qtopdir:
2570         case Qunitdir:
2571         case Qtoplog:
2572                 error(Eperm);
2573         case Qtopctl:
2574                 return topctlwrite(db, n);
2575         case Qctl:
2576         case Qdata:
2577         case Qconfig:
2578         case Qident:
2579                 return unitwrite(c, db, n, off);
2580         }
2581 }
2582
2583 Dev aoedevtab = {
2584         L'æ',
2585         "aoe",
2586
2587         devreset,
2588         devinit,
2589         devshutdown,
2590         aoeattach,
2591         aoewalk,
2592         aoestat,
2593         aoeopen,
2594         devcreate,
2595         aoeclose,
2596         aoeread,
2597         devbread,
2598         aoewrite,
2599         devbwrite,
2600         aoeremove,
2601         devwstat,
2602         devpower,
2603         devconfig,
2604 };