]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/port/devproc.c
kernel: avoid inconsistent reads in /proc/#/fd and /proc/#/ns
[plan9front.git] / sys / src / 9 / port / devproc.c
1 #include        "u.h"
2 #include        <trace.h>
3 #include        "tos.h"
4 #include        "../port/lib.h"
5 #include        "mem.h"
6 #include        "dat.h"
7 #include        "fns.h"
8 #include        "../port/error.h"
9 #include        "ureg.h"
10 #include        "edf.h"
11
12 enum
13 {
14         Qdir,
15         Qtrace,
16         Qargs,
17         Qctl,
18         Qfd,
19         Qfpregs,
20         Qkregs,
21         Qmem,
22         Qnote,
23         Qnoteid,
24         Qnotepg,
25         Qns,
26         Qppid,
27         Qproc,
28         Qregs,
29         Qsegment,
30         Qstatus,
31         Qtext,
32         Qwait,
33         Qprofile,
34         Qsyscall,
35 };
36
37 enum
38 {
39         CMclose,
40         CMclosefiles,
41         CMfixedpri,
42         CMhang,
43         CMkill,
44         CMnohang,
45         CMnoswap,
46         CMpri,
47         CMprivate,
48         CMprofile,
49         CMstart,
50         CMstartstop,
51         CMstartsyscall,
52         CMstop,
53         CMwaitstop,
54         CMwired,
55         CMtrace,
56         CMinterrupt,
57         CMnointerrupt,
58         /* real time */
59         CMperiod,
60         CMdeadline,
61         CMcost,
62         CMsporadic,
63         CMdeadlinenotes,
64         CMadmit,
65         CMextra,
66         CMexpel,
67         CMevent,
68 };
69
70 enum{
71         Nevents = 0x4000,
72         Emask = Nevents - 1,
73 };
74
75 #define STATSIZE        (2*KNAMELEN+12+9*12)
76 /*
77  * Status, fd, and ns are left fully readable (0444) because of their use in debugging,
78  * particularly on shared servers.
79  * Arguably, ns and fd shouldn't be readable; if you'd prefer, change them to 0000
80  */
81 Dirtab procdir[] =
82 {
83         "args",         {Qargs},        0,                      0660,
84         "ctl",          {Qctl},         0,                      0000,
85         "fd",           {Qfd},          0,                      0444,
86         "fpregs",       {Qfpregs},      sizeof(FPsave),         0000,
87         "kregs",        {Qkregs},       sizeof(Ureg),           0400,
88         "mem",          {Qmem},         0,                      0000,
89         "note",         {Qnote},        0,                      0000,
90         "noteid",       {Qnoteid},      0,                      0664,
91         "notepg",       {Qnotepg},      0,                      0000,
92         "ns",           {Qns},          0,                      0444,
93         "ppid",         {Qppid},        0,                      0444,
94         "proc",         {Qproc},        0,                      0400,
95         "regs",         {Qregs},        sizeof(Ureg),           0000,
96         "segment",      {Qsegment},     0,                      0444,
97         "status",       {Qstatus},      STATSIZE,               0444,
98         "text",         {Qtext},        0,                      0000,
99         "wait",         {Qwait},        0,                      0400,
100         "profile",      {Qprofile},     0,                      0400,
101         "syscall",      {Qsyscall},     0,                      0400,   
102 };
103
104 static
105 Cmdtab proccmd[] = {
106         CMclose,                "close",                2,
107         CMclosefiles,           "closefiles",           1,
108         CMfixedpri,             "fixedpri",             2,
109         CMhang,                 "hang",                 1,
110         CMnohang,               "nohang",               1,
111         CMnoswap,               "noswap",               1,
112         CMkill,                 "kill",                 1,
113         CMpri,                  "pri",                  2,
114         CMprivate,              "private",              1,
115         CMprofile,              "profile",              1,
116         CMstart,                "start",                1,
117         CMstartstop,            "startstop",            1,
118         CMstartsyscall,         "startsyscall",         1,
119         CMstop,                 "stop",                 1,
120         CMwaitstop,             "waitstop",             1,
121         CMwired,                "wired",                2,
122         CMtrace,                "trace",                0,
123         CMinterrupt,            "interrupt",            1,
124         CMnointerrupt,          "nointerrupt",          1,
125         CMperiod,               "period",               2,
126         CMdeadline,             "deadline",             2,
127         CMcost,                 "cost",                 2,
128         CMsporadic,             "sporadic",             1,
129         CMdeadlinenotes,        "deadlinenotes",        1,
130         CMadmit,                "admit",                1,
131         CMextra,                "extra",                1,
132         CMexpel,                "expel",                1,
133         CMevent,                "event",                1,
134 };
135
136 /* Segment type from portdat.h */
137 static char *sname[]={ "Text", "Data", "Bss", "Stack", "Shared", "Phys", };
138
139 /*
140  * Qids are, in path:
141  *       4 bits of file type (qids above)
142  *      23 bits of process slot number + 1
143  *           in vers,
144  *      32 bits of pid, for consistency checking
145  * If notepg, c->pgrpid.path is pgrp slot, .vers is noteid.
146  */
147 #define QSHIFT  5       /* location in qid of proc slot # */
148
149 #define QID(q)          ((((ulong)(q).path)&0x0000001F)>>0)
150 #define SLOT(q)         (((((ulong)(q).path)&0x07FFFFFE0)>>QSHIFT)-1)
151 #define PID(q)          ((q).vers)
152 #define NOTEID(q)       ((q).vers)
153
154 void    procctlreq(Proc*, char*, int);
155 int     procctlmemio(Proc*, uintptr, int, void*, int);
156 Chan*   proctext(Chan*, Proc*);
157 int     procstopped(void*);
158 ulong   procpagecount(Proc *);
159
160 static Traceevent *tevents;
161 static Lock tlock;
162 static int topens;
163 static int tproduced, tconsumed;
164 void (*proctrace)(Proc*, int, vlong);
165
166 extern int unfair;
167
168 static void
169 profclock(Ureg *ur, Timer *)
170 {
171         Tos *tos;
172
173         if(up == nil || up->state != Running)
174                 return;
175
176         /* user profiling clock */
177         if(userureg(ur)){
178                 tos = (Tos*)(USTKTOP-sizeof(Tos));
179                 tos->clock += TK2MS(1);
180                 segclock(ur->pc);
181         }
182 }
183
184 static int
185 procgen(Chan *c, char *name, Dirtab *tab, int, int s, Dir *dp)
186 {
187         Qid qid;
188         Proc *p;
189         char *ename;
190         Segment *q;
191         ulong pid, path, perm, len;
192
193         if(s == DEVDOTDOT){
194                 mkqid(&qid, Qdir, 0, QTDIR);
195                 devdir(c, qid, "#p", 0, eve, 0555, dp);
196                 return 1;
197         }
198
199         if(c->qid.path == Qdir){
200                 if(s == 0){
201                         strcpy(up->genbuf, "trace");
202                         mkqid(&qid, Qtrace, -1, QTFILE);
203                         devdir(c, qid, up->genbuf, 0, eve, 0444, dp);
204                         return 1;
205                 }
206
207                 if(name != nil){
208                         /* ignore s and use name to find pid */
209                         pid = strtol(name, &ename, 10);
210                         if(pid==0 || ename[0]!='\0')
211                                 return -1;
212                         s = procindex(pid);
213                         if(s < 0)
214                                 return -1;
215                 }
216                 else if(--s >= conf.nproc)
217                         return -1;
218
219                 p = proctab(s);
220                 pid = p->pid;
221                 if(pid == 0)
222                         return 0;
223                 sprint(up->genbuf, "%lud", pid);
224                 /*
225                  * String comparison is done in devwalk so name must match its formatted pid
226                 */
227                 if(name != nil && strcmp(name, up->genbuf) != 0)
228                         return -1;
229                 mkqid(&qid, (s+1)<<QSHIFT, pid, QTDIR);
230                 devdir(c, qid, up->genbuf, 0, p->user, DMDIR|0555, dp);
231                 return 1;
232         }
233         if(c->qid.path == Qtrace){
234                 strcpy(up->genbuf, "trace");
235                 mkqid(&qid, Qtrace, -1, QTFILE);
236                 devdir(c, qid, up->genbuf, 0, eve, 0444, dp);
237                 return 1;
238         }
239         if(s >= nelem(procdir))
240                 return -1;
241         if(tab)
242                 panic("procgen");
243
244         tab = &procdir[s];
245         path = c->qid.path&~(((1<<QSHIFT)-1));  /* slot component */
246
247         /* p->procmode determines default mode for files in /proc */
248         p = proctab(SLOT(c->qid));
249         perm = tab->perm;
250         if(perm == 0)
251                 perm = p->procmode;
252         else    /* just copy read bits */
253                 perm |= p->procmode & 0444;
254
255         len = tab->length;
256         switch(QID(c->qid)) {
257         case Qwait:
258                 len = p->nwait; /* incorrect size, but >0 means there's something to read */
259                 break;
260         case Qprofile:
261                 q = p->seg[TSEG];
262                 if(q != nil && q->profile != nil) {
263                         len = (q->top-q->base)>>LRESPROF;
264                         len *= sizeof(*q->profile);
265                 }
266                 break;
267         }
268
269         mkqid(&qid, path|tab->qid.path, c->qid.vers, QTFILE);
270         devdir(c, qid, tab->name, len, p->user, perm, dp);
271         return 1;
272 }
273
274 static void
275 _proctrace(Proc* p, Tevent etype, vlong ts)
276 {
277         Traceevent *te;
278
279         if (p->trace == 0 || topens == 0 ||
280                 tproduced - tconsumed >= Nevents)
281                 return;
282
283         te = &tevents[tproduced&Emask];
284         te->pid = p->pid;
285         te->etype = etype;
286         if (ts == 0)
287                 te->time = todget(nil);
288         else
289                 te->time = ts;
290         tproduced++;
291 }
292
293 static void
294 procinit(void)
295 {
296         if(conf.nproc >= (1<<(16-QSHIFT))-1)
297                 print("warning: too many procs for devproc\n");
298         addclock0link((void (*)(void))profclock, 113);  /* Relative prime to HZ */
299 }
300
301 static Chan*
302 procattach(char *spec)
303 {
304         return devattach('p', spec);
305 }
306
307 static Walkqid*
308 procwalk(Chan *c, Chan *nc, char **name, int nname)
309 {
310         return devwalk(c, nc, name, nname, 0, 0, procgen);
311 }
312
313 static int
314 procstat(Chan *c, uchar *db, int n)
315 {
316         return devstat(c, db, n, 0, 0, procgen);
317 }
318
319 /*
320  *  none can't read or write state on other
321  *  processes.  This is to contain access of
322  *  servers running as none should they be
323  *  subverted by, for example, a stack attack.
324  */
325 static void
326 nonone(Proc *p)
327 {
328         if(p == up)
329                 return;
330         if(strcmp(up->user, "none") != 0)
331                 return;
332         if(iseve())
333                 return;
334         error(Eperm);
335 }
336
337 static Chan*
338 procopen(Chan *c, int omode)
339 {
340         Proc *p;
341         Pgrp *pg;
342         Chan *tc;
343         int pid;
344
345         if(c->qid.type & QTDIR)
346                 return devopen(c, omode, 0, 0, procgen);
347
348         if(QID(c->qid) == Qtrace){
349                 if (omode != OREAD) 
350                         error(Eperm);
351                 lock(&tlock);
352                 if (waserror()){
353                         topens--;
354                         unlock(&tlock);
355                         nexterror();
356                 }
357                 if (topens++ > 0)
358                         error("already open");
359                 if (tevents == nil){
360                         tevents = (Traceevent*)malloc(sizeof(Traceevent) * Nevents);
361                         if(tevents == nil)
362                                 error(Enomem);
363                         tproduced = tconsumed = 0;
364                 }
365                 proctrace = _proctrace;
366                 unlock(&tlock);
367                 poperror();
368
369                 c->mode = openmode(omode);
370                 c->flag |= COPEN;
371                 c->offset = 0;
372                 return c;
373         }
374                 
375         p = proctab(SLOT(c->qid));
376         eqlock(&p->debug);
377         if(waserror()){
378                 qunlock(&p->debug);
379                 nexterror();
380         }
381         pid = PID(c->qid);
382         if(p->pid != pid)
383                 error(Eprocdied);
384
385         omode = openmode(omode);
386
387         switch(QID(c->qid)){
388         case Qtext:
389                 if(omode != OREAD)
390                         error(Eperm);
391                 tc = proctext(c, p);
392                 tc->offset = 0;
393                 qunlock(&p->debug);
394                 poperror();
395                 cclose(c);
396                 return tc;
397
398         case Qproc:
399         case Qkregs:
400         case Qsegment:
401         case Qprofile:
402         case Qns:
403         case Qfd:
404                 if(omode != OREAD)
405                         error(Eperm);
406                 break;
407
408         case Qnote:
409                 if(p->privatemem)
410                         error(Eperm);
411                 break;
412
413         case Qmem:
414         case Qctl:
415                 if(p->privatemem)
416                         error(Eperm);
417                 nonone(p);
418                 break;
419
420         case Qargs:
421         case Qnoteid:
422         case Qstatus:
423         case Qwait:
424         case Qregs:
425         case Qfpregs:
426         case Qsyscall:  
427         case Qppid:
428                 nonone(p);
429                 break;
430
431         case Qnotepg:
432                 nonone(p);
433                 pg = p->pgrp;
434                 if(pg == nil)
435                         error(Eprocdied);
436                 if(omode!=OWRITE)
437                         error(Eperm);
438                 c->pgrpid.path = pg->pgrpid+1;
439                 c->pgrpid.vers = p->noteid;
440                 break;
441
442         default:
443                 print("procopen %#lux\n", QID(c->qid));
444                 error(Egreg);
445         }
446
447         /* Affix pid to qid */
448         if(p->state != Dead)
449                 c->qid.vers = p->pid;
450
451         /* make sure the process slot didn't get reallocated while we were playing */
452         coherence();
453         if(p->pid != pid)
454                 error(Eprocdied);
455
456         tc = devopen(c, omode, 0, 0, procgen);
457         qunlock(&p->debug);
458         poperror();
459
460         return tc;
461 }
462
463 static int
464 procwstat(Chan *c, uchar *db, int n)
465 {
466         Proc *p;
467         Dir *d;
468
469         if(c->qid.type&QTDIR)
470                 error(Eperm);
471
472         if(QID(c->qid) == Qtrace)
473                 return devwstat(c, db, n);
474                 
475         p = proctab(SLOT(c->qid));
476         nonone(p);
477         d = nil;
478
479         eqlock(&p->debug);
480         if(waserror()){
481                 qunlock(&p->debug);
482                 free(d);
483                 nexterror();
484         }
485
486         if(p->pid != PID(c->qid))
487                 error(Eprocdied);
488
489         if(strcmp(up->user, p->user) != 0 && !iseve())
490                 error(Eperm);
491
492         d = smalloc(sizeof(Dir)+n);
493         n = convM2D(db, n, &d[0], (char*)&d[1]);
494         if(n == 0)
495                 error(Eshortstat);
496         if(!emptystr(d->uid) && strcmp(d->uid, p->user) != 0){
497                 if(!iseve())
498                         error(Eperm);
499                 kstrdup(&p->user, d->uid);
500         }
501         /* p->procmode determines default mode for files in /proc */
502         if(d->mode != ~0UL)
503                 p->procmode = d->mode&0777;
504
505         qunlock(&p->debug);
506         poperror();
507         free(d);
508         return n;
509 }
510
511 static void
512 procclose(Chan *c)
513 {
514         if(QID(c->qid) == Qtrace && (c->flag & COPEN) != 0){
515                 lock(&tlock);
516                 if(topens > 0)
517                         topens--;
518                 if(topens == 0)
519                         proctrace = nil;
520                 unlock(&tlock);
521         }
522 }
523
524 static int
525 procargs(Proc *p, char *buf, int nbuf)
526 {
527         int j, k, m;
528         char *a;
529         int n;
530
531         a = p->args;
532         if(p->setargs){
533                 snprint(buf, nbuf, "%s [%s]", p->text, p->args);
534                 return strlen(buf);
535         }
536         n = p->nargs;
537         for(j = 0; j < nbuf - 1; j += m){
538                 if(n <= 0)
539                         break;
540                 if(j != 0)
541                         buf[j++] = ' ';
542                 m = snprint(buf+j, nbuf-j, "%q",  a);
543                 k = strlen(a) + 1;
544                 a += k;
545                 n -= k;
546         }
547         return j;
548 }
549
550 static int
551 eventsavailable(void *)
552 {
553         return tproduced > tconsumed;
554 }
555
556 static int
557 prochaswaitq(void *x)
558 {
559         Chan *c;
560         Proc *p;
561
562         c = (Chan *)x;
563         p = proctab(SLOT(c->qid));
564         return p->pid != PID(c->qid) || p->waitq != nil;
565 }
566
567 static void
568 int2flag(int flag, char *s)
569 {
570         if(flag == 0){
571                 *s = '\0';
572                 return;
573         }
574         *s++ = '-';
575         if(flag & MAFTER)
576                 *s++ = 'a';
577         if(flag & MBEFORE)
578                 *s++ = 'b';
579         if(flag & MCREATE)
580                 *s++ = 'c';
581         if(flag & MCACHE)
582                 *s++ = 'C';
583         *s = '\0';
584 }
585
586 static int
587 readns1(Chan *c, Proc *p, char *buf, int nbuf)
588 {
589         Pgrp *pg;
590         Mount *t, *cm;
591         Mhead *f, *mh;
592         ulong minid, bestmid;
593         char flag[10], *srv;
594         int i;
595
596         pg = p->pgrp;
597         if(pg == nil || p->dot == nil || p->pid != PID(c->qid))
598                 error(Eprocdied);
599
600         bestmid = ~0;
601         minid = c->nrock;
602         if(minid == bestmid)
603                 return 0;
604
605         rlock(&pg->ns);
606
607         mh = nil;
608         cm = nil;
609         for(i = 0; i < MNTHASH; i++) {
610                 for(f = pg->mnthash[i]; f != nil; f = f->hash) {
611                         for(t = f->mount; t != nil; t = t->next) {
612                                 if(t->mountid >= minid && t->mountid < bestmid) {
613                                         bestmid = t->mountid;
614                                         cm = t;
615                                         mh = f;
616                                 }
617                         }
618                 }
619         }
620
621         if(bestmid == ~0) {
622                 c->nrock = bestmid;
623                 i = snprint(buf, nbuf, "cd %s\n", p->dot->path->s);
624         } else {
625                 c->nrock = bestmid+1;
626
627                 int2flag(cm->mflag, flag);
628                 if(strcmp(cm->to->path->s, "#M") == 0){
629                         srv = srvname(cm->to->mchan);
630                         i = snprint(buf, nbuf, "mount %s %s %s %s\n", flag,
631                                 srv==nil? cm->to->mchan->path->s : srv,
632                                 mh->from->path->s, cm->spec? cm->spec : "");
633                         free(srv);
634                 }else{
635                         i = snprint(buf, nbuf, "bind %s %s %s\n", flag,
636                                 cm->to->path->s, mh->from->path->s);
637                 }
638         }
639
640         runlock(&pg->ns);
641
642         return i;
643 }
644
645 int
646 procfdprint(Chan *c, int fd, char *s, int ns)
647 {
648         return snprint(s, ns, "%3d %.2s %C %4ld (%.16llux %lud %.2ux) %5ld %8lld %s\n",
649                 fd,
650                 &"r w rw"[(c->mode&3)<<1],
651                 devtab[c->type]->dc, c->dev,
652                 c->qid.path, c->qid.vers, c->qid.type,
653                 c->iounit, c->offset, c->path->s);
654 }
655
656 static int
657 readfd1(Chan *c, Proc *p, char *buf, int nbuf)
658 {
659         Fgrp *fg;
660         int n, i;
661
662         fg = p->fgrp;
663         if(fg == nil || p->dot == nil || p->pid != PID(c->qid))
664                 return 0;
665
666         if(c->nrock == 0){
667                 c->nrock = 1;
668                 return snprint(buf, nbuf, "%s\n", p->dot->path->s);
669         }
670
671         lock(fg);
672         n = 0;
673         for(;;){
674                 i = c->nrock-1;
675                 if(i < 0 || i > fg->maxfd)
676                         break;
677                 c->nrock++;
678                 if(fg->fd[i] != nil){
679                         n = procfdprint(fg->fd[i], i, buf, nbuf);
680                         break;
681                 }
682         }
683         unlock(fg);
684
685         return n;
686 }
687
688 /*
689  * userspace can't pass negative file offset for a
690  * 64 bit kernel address, so we use 63 bit and sign
691  * extend to 64 bit.
692  */
693 static uintptr
694 off2addr(vlong off)
695 {
696         off <<= 1;
697         off >>= 1;
698         return off;
699 }
700
701 static long
702 procread(Chan *c, void *va, long n, vlong off)
703 {
704         char *a, *sps, statbuf[1024];
705         int i, j, navail, ne, rsize;
706         long l;
707         uchar *rptr;
708         uintptr addr;
709         ulong offset;
710         Confmem *cm;
711         Proc *p;
712         Segment *sg, *s;
713         Ureg kur;
714         Waitq *wq;
715         
716         a = va;
717         offset = off;
718         if(c->qid.type & QTDIR)
719                 return devdirread(c, a, n, 0, 0, procgen);
720
721         if(QID(c->qid) == Qtrace){
722                 if(!eventsavailable(nil))
723                         return 0;
724
725                 rptr = (uchar*)va;
726                 navail = tproduced - tconsumed;
727                 if(navail > n / sizeof(Traceevent))
728                         navail = n / sizeof(Traceevent);
729                 while(navail > 0) {
730                         ne = ((tconsumed & Emask) + navail > Nevents)? 
731                                         Nevents - (tconsumed & Emask): navail;
732                         memmove(rptr, &tevents[tconsumed & Emask], 
733                                         ne * sizeof(Traceevent));
734
735                         tconsumed += ne;
736                         rptr += ne * sizeof(Traceevent);
737                         navail -= ne;
738                 }
739                 return rptr - (uchar*)va;
740         }
741
742         p = proctab(SLOT(c->qid));
743         if(p->pid != PID(c->qid))
744                 error(Eprocdied);
745
746         switch(QID(c->qid)){
747         case Qargs:
748                 eqlock(&p->debug);
749                 j = procargs(p, statbuf, sizeof(statbuf));
750                 qunlock(&p->debug);
751                 if(offset >= j)
752                         return 0;
753                 if(offset+n > j)
754                         n = j-offset;
755         statbufread:
756                 memmove(a, statbuf+offset, n);
757                 return n;
758
759         case Qsyscall:
760                 eqlock(&p->debug);
761                 if(waserror()){
762                         qunlock(&p->debug);
763                         nexterror();
764                 }
765                 if(p->pid != PID(c->qid))
766                         error(Eprocdied);
767                 j = 0;
768                 if(p->syscalltrace != nil)
769                         j = readstr(offset, a, n, p->syscalltrace);
770                 qunlock(&p->debug);
771                 poperror();
772                 return j;
773
774         case Qmem:
775                 addr = off2addr(off);
776                 if(addr < KZERO)
777                         return procctlmemio(p, addr, n, va, 1);
778
779                 if(!iseve())
780                         error(Eperm);
781
782                 /* validate kernel addresses */
783                 if(addr < (uintptr)end) {
784                         if(addr+n > (uintptr)end)
785                                 n = (uintptr)end - addr;
786                         memmove(a, (char*)addr, n);
787                         return n;
788                 }
789                 for(i=0; i<nelem(conf.mem); i++){
790                         cm = &conf.mem[i];
791                         /* klimit-1 because klimit might be zero! */
792                         if(cm->kbase <= addr && addr <= cm->klimit-1){
793                                 if(addr+n >= cm->klimit-1)
794                                         n = cm->klimit - addr;
795                                 memmove(a, (char*)addr, n);
796                                 return n;
797                         }
798                 }
799                 error(Ebadarg);
800
801         case Qprofile:
802                 s = p->seg[TSEG];
803                 if(s == nil || s->profile == nil)
804                         error("profile is off");
805                 i = (s->top-s->base)>>LRESPROF;
806                 i *= sizeof(s->profile[0]);
807                 if(i < 0 || offset >= i)
808                         return 0;
809                 if(offset+n > i)
810                         n = i - offset;
811                 memmove(a, ((char*)s->profile)+offset, n);
812                 return n;
813
814         case Qnote:
815                 eqlock(&p->debug);
816                 if(waserror()){
817                         qunlock(&p->debug);
818                         nexterror();
819                 }
820                 if(p->pid != PID(c->qid))
821                         error(Eprocdied);
822                 if(n < 1)       /* must accept at least the '\0' */
823                         error(Etoosmall);
824                 if(p->nnote == 0)
825                         n = 0;
826                 else {
827                         i = strlen(p->note[0].msg) + 1;
828                         if(i < n)
829                                 n = i;
830                         memmove(a, p->note[0].msg, n-1);
831                         a[n-1] = '\0';
832                         if(--p->nnote == 0)
833                                 p->notepending = 0;
834                         memmove(p->note, p->note+1, p->nnote*sizeof(Note));
835                 }
836                 poperror();
837                 qunlock(&p->debug);
838                 return n;
839
840         case Qproc:
841                 if(offset >= sizeof(Proc))
842                         return 0;
843                 if(offset+n > sizeof(Proc))
844                         n = sizeof(Proc) - offset;
845                 memmove(a, ((char*)p)+offset, n);
846                 return n;
847
848         case Qregs:
849                 rptr = (uchar*)p->dbgreg;
850                 rsize = sizeof(Ureg);
851                 goto regread;
852
853         case Qkregs:
854                 memset(&kur, 0, sizeof(Ureg));
855                 setkernur(&kur, p);
856                 rptr = (uchar*)&kur;
857                 rsize = sizeof(Ureg);
858                 goto regread;
859
860         case Qfpregs:
861                 rptr = (uchar*)&p->fpsave;
862                 rsize = sizeof(FPsave);
863         regread:
864                 if(rptr == nil)
865                         error(Enoreg);
866                 if(offset >= rsize)
867                         return 0;
868                 if(offset+n > rsize)
869                         n = rsize - offset;
870                 memmove(a, rptr+offset, n);
871                 return n;
872
873         case Qstatus:
874                 if(offset >= STATSIZE)
875                         return 0;
876                 if(offset+n > STATSIZE)
877                         n = STATSIZE - offset;
878
879                 sps = p->psstate;
880                 if(sps == nil)
881                         sps = statename[p->state];
882
883                 memset(statbuf, ' ', sizeof statbuf);
884                 readstr(0, statbuf+0*KNAMELEN, KNAMELEN-1, p->text);
885                 readstr(0, statbuf+1*KNAMELEN, KNAMELEN-1, p->user);
886                 readstr(0, statbuf+2*KNAMELEN, 11, sps);
887
888                 j = 2*KNAMELEN + 12;
889                 for(i = 0; i < 6; i++) {
890                         l = p->time[i];
891                         if(i == TReal)
892                                 l = MACHP(0)->ticks - l;
893                         l = TK2MS(l);
894                         readnum(0, statbuf+j+NUMSIZE*i, NUMSIZE, l, NUMSIZE);
895                 }
896
897                 readnum(0, statbuf+j+NUMSIZE*6, NUMSIZE, procpagecount(p)*BY2PG/1024, NUMSIZE);
898                 readnum(0, statbuf+j+NUMSIZE*7, NUMSIZE, p->basepri, NUMSIZE);
899                 readnum(0, statbuf+j+NUMSIZE*8, NUMSIZE, p->priority, NUMSIZE);
900                 goto statbufread;
901
902         case Qsegment:
903                 j = 0;
904                 for(i = 0; i < NSEG; i++) {
905                         sg = p->seg[i];
906                         if(sg == nil)
907                                 continue;
908                         j += sprint(statbuf+j, "%-6s %c%c %8p %8p %4ld\n",
909                                 sname[sg->type&SG_TYPE],
910                                 sg->type&SG_RONLY ? 'R' : ' ',
911                                 sg->profile ? 'P' : ' ',
912                                 sg->base, sg->top, sg->ref);
913                 }
914                 if(offset >= j)
915                         return 0;
916                 if(offset+n > j)
917                         n = j-offset;
918                 goto statbufread;
919
920         case Qwait:
921                 if(!canqlock(&p->qwaitr))
922                         error(Einuse);
923
924                 if(waserror()) {
925                         qunlock(&p->qwaitr);
926                         nexterror();
927                 }
928
929                 lock(&p->exl);
930                 while(p->waitq == nil && p->pid == PID(c->qid)) {
931                         if(up == p && p->nchild == 0) {
932                                 unlock(&p->exl);
933                                 error(Enochild);
934                         }
935                         unlock(&p->exl);
936                         sleep(&p->waitr, prochaswaitq, c);
937                         lock(&p->exl);
938                 }
939                 if(p->pid != PID(c->qid)){
940                         unlock(&p->exl);
941                         error(Eprocdied);
942                 }
943                 wq = p->waitq;
944                 p->waitq = wq->next;
945                 p->nwait--;
946                 unlock(&p->exl);
947
948                 qunlock(&p->qwaitr);
949                 poperror();
950
951                 j = snprint(statbuf, sizeof(statbuf), "%d %lud %lud %lud %q",
952                         wq->w.pid,
953                         wq->w.time[TUser], wq->w.time[TSys], wq->w.time[TReal],
954                         wq->w.msg);
955                 free(wq);
956                 if(j < n)
957                         n = j;
958                 offset = 0;
959                 goto statbufread;
960
961         case Qns:
962         case Qfd:
963                 eqlock(&p->debug);
964                 if(waserror()){
965                         qunlock(&p->debug);
966                         nexterror();
967                 }
968                 if(offset == 0 || offset != c->mrock)
969                         c->nrock = c->mrock = 0;
970                 do {
971                         if(QID(c->qid) == Qns)
972                                 j = readns1(c, p, statbuf, sizeof(statbuf));
973                         else
974                                 j = readfd1(c, p, statbuf, sizeof(statbuf));
975                         if(j == 0)
976                                 break;
977                         c->mrock += j;
978                 } while(c->mrock <= offset);
979                 i = c->mrock - offset;
980                 qunlock(&p->debug);
981                 poperror();
982
983                 if(i <= 0)
984                         return 0;
985                 if(i < n)
986                         n = i;
987                 offset = j - i;
988                 goto statbufread;
989
990         case Qnoteid:
991                 return readnum(offset, va, n, p->noteid, NUMSIZE);
992
993         case Qppid:
994                 return readnum(offset, va, n, p->parentpid, NUMSIZE);
995
996         }
997         error(Egreg);
998         return 0;               /* not reached */
999 }
1000
1001 static long
1002 procwrite(Chan *c, void *va, long n, vlong off)
1003 {
1004         int id, m;
1005         Proc *p, *t, *et;
1006         char *a, *arg, buf[ERRMAX];
1007         ulong offset;
1008
1009         a = va;
1010         offset = off;
1011         if(c->qid.type & QTDIR)
1012                 error(Eisdir);
1013
1014         p = proctab(SLOT(c->qid));
1015
1016         /* Use the remembered noteid in the channel rather
1017          * than the process pgrpid
1018          */
1019         if(QID(c->qid) == Qnotepg) {
1020                 pgrpnote(NOTEID(c->pgrpid), va, n, NUser);
1021                 return n;
1022         }
1023
1024         eqlock(&p->debug);
1025         if(waserror()){
1026                 qunlock(&p->debug);
1027                 nexterror();
1028         }
1029         if(p->pid != PID(c->qid))
1030                 error(Eprocdied);
1031
1032         switch(QID(c->qid)){
1033         case Qargs:
1034                 if(n == 0)
1035                         error(Eshort);
1036                 if(n >= ERRMAX)
1037                         error(Etoobig);
1038                 arg = malloc(n+1);
1039                 if(arg == nil)
1040                         error(Enomem);
1041                 memmove(arg, va, n);
1042                 m = n;
1043                 if(arg[m-1] != 0)
1044                         arg[m++] = 0;
1045                 free(p->args);
1046                 p->nargs = m;
1047                 p->args = arg;
1048                 p->setargs = 1;
1049                 break;
1050
1051         case Qmem:
1052                 if(p->state != Stopped)
1053                         error(Ebadctl);
1054                 n = procctlmemio(p, off2addr(off), n, va, 0);
1055                 break;
1056
1057         case Qregs:
1058                 if(offset >= sizeof(Ureg))
1059                         n = 0;
1060                 else if(offset+n > sizeof(Ureg))
1061                         n = sizeof(Ureg) - offset;
1062                 if(p->dbgreg == nil)
1063                         error(Enoreg);
1064                 setregisters(p->dbgreg, (char*)(p->dbgreg)+offset, va, n);
1065                 break;
1066
1067         case Qfpregs:
1068                 if(offset >= sizeof(FPsave))
1069                         n = 0;
1070                 else if(offset+n > sizeof(FPsave))
1071                         n = sizeof(FPsave) - offset;
1072                 memmove((uchar*)&p->fpsave+offset, va, n);
1073                 break;
1074
1075         case Qctl:
1076                 procctlreq(p, va, n);
1077                 break;
1078
1079         case Qnote:
1080                 if(p->kp)
1081                         error(Eperm);
1082                 if(n >= ERRMAX-1)
1083                         error(Etoobig);
1084                 memmove(buf, va, n);
1085                 buf[n] = 0;
1086                 if(!postnote(p, 0, buf, NUser))
1087                         error("note not posted");
1088                 break;
1089         case Qnoteid:
1090                 if(p->kp)
1091                         error(Eperm);
1092                 id = atoi(a);
1093                 if(id <= 0)
1094                         error(Ebadarg);
1095                 if(id == p->pid) {
1096                         p->noteid = id;
1097                         break;
1098                 }
1099                 t = proctab(0);
1100                 for(et = t+conf.nproc; t < et; t++) {
1101                         if(t->state == Dead || t->kp)
1102                                 continue;
1103                         if(id == t->noteid) {
1104                                 nonone(t);
1105                                 if(strcmp(p->user, t->user) != 0)
1106                                         error(Eperm);
1107                                 p->noteid = id;
1108                                 break;
1109                         }
1110                 }
1111                 if(p->noteid != id)
1112                         error(Ebadarg);
1113                 break;
1114         default:
1115                 print("unknown qid in procwrite\n");
1116                 error(Egreg);
1117         }
1118         poperror();
1119         qunlock(&p->debug);
1120         return n;
1121 }
1122
1123 Dev procdevtab = {
1124         'p',
1125         "proc",
1126
1127         devreset,
1128         procinit,
1129         devshutdown,
1130         procattach,
1131         procwalk,
1132         procstat,
1133         procopen,
1134         devcreate,
1135         procclose,
1136         procread,
1137         devbread,
1138         procwrite,
1139         devbwrite,
1140         devremove,
1141         procwstat,
1142 };
1143
1144 Chan*
1145 proctext(Chan *c, Proc *p)
1146 {
1147         Chan *tc;
1148         Image *i;
1149         Segment *s;
1150
1151         s = p->seg[TSEG];
1152         if(s == nil)
1153                 error(Enonexist);
1154         if(p->state==Dead)
1155                 error(Eprocdied);
1156
1157         i = s->image;
1158         if(i == nil)
1159                 error(Eprocdied);
1160
1161         lock(i);
1162         if(waserror()) {
1163                 unlock(i);
1164                 nexterror();
1165         }
1166
1167         if(i->s != s)
1168                 error(Eprocdied);
1169                 
1170         tc = i->c;
1171         if(tc == nil)
1172                 error(Eprocdied);
1173
1174         if(incref(tc) == 1 || (tc->flag&COPEN) == 0 || tc->mode != OREAD) {
1175                 cclose(tc);
1176                 error(Eprocdied);
1177         }
1178
1179         if(p->pid != PID(c->qid)) {
1180                 cclose(tc);
1181                 error(Eprocdied);
1182         }
1183
1184         unlock(i);
1185         poperror();
1186
1187         return tc;
1188 }
1189
1190 void
1191 procstopwait(Proc *p, int ctl)
1192 {
1193         int pid;
1194
1195         if(p->pdbg != nil)
1196                 error(Einuse);
1197         if(procstopped(p) || p->state == Broken)
1198                 return;
1199         pid = p->pid;
1200         if(pid == 0)
1201                 error(Eprocdied);
1202         if(ctl != 0)
1203                 p->procctl = ctl;
1204         p->pdbg = up;
1205         qunlock(&p->debug);
1206         up->psstate = "Stopwait";
1207         if(waserror()) {
1208                 qlock(&p->debug);
1209                 p->pdbg = nil;
1210                 nexterror();
1211         }
1212         sleep(&up->sleep, procstopped, p);
1213         poperror();
1214         qlock(&p->debug);
1215         if(p->pid != pid)
1216                 error(Eprocdied);
1217 }
1218
1219 void
1220 procctlclosefiles(Proc *p, int all, int fd)
1221 {
1222         Fgrp *f;
1223         Chan *c;
1224
1225         if(fd < 0)
1226                 error(Ebadfd);
1227         f = p->fgrp;
1228         if(f == nil)
1229                 error(Eprocdied);
1230
1231         incref(f);
1232         lock(f);
1233         while(fd <= f->maxfd){
1234                 c = f->fd[fd];
1235                 if(c != nil){
1236                         f->fd[fd] = nil;
1237                         unlock(f);
1238                         qunlock(&p->debug);
1239                         cclose(c);
1240                         qlock(&p->debug);
1241                         lock(f);
1242                 }
1243                 if(!all)
1244                         break;
1245                 fd++;
1246         }
1247         unlock(f);
1248         closefgrp(f);
1249 }
1250
1251 static char *
1252 parsetime(vlong *rt, char *s)
1253 {
1254         uvlong ticks;
1255         ulong l;
1256         char *e, *p;
1257         static int p10[] = {100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1};
1258
1259         if (s == nil)
1260                 return("missing value");
1261         ticks=strtoul(s, &e, 10);
1262         if (*e == '.'){
1263                 p = e+1;
1264                 l = strtoul(p, &e, 10);
1265                 if(e-p > nelem(p10))
1266                         return "too many digits after decimal point";
1267                 if(e-p == 0)
1268                         return "ill-formed number";
1269                 l *= p10[e-p-1];
1270         }else
1271                 l = 0;
1272         if (*e == '\0' || strcmp(e, "s") == 0){
1273                 ticks = 1000000000 * ticks + l;
1274         }else if (strcmp(e, "ms") == 0){
1275                 ticks = 1000000 * ticks + l/1000;
1276         }else if (strcmp(e, "µs") == 0 || strcmp(e, "us") == 0){
1277                 ticks = 1000 * ticks + l/1000000;
1278         }else if (strcmp(e, "ns") != 0)
1279                 return "unrecognized unit";
1280         *rt = ticks;
1281         return nil;
1282 }
1283
1284 void
1285 procctlreq(Proc *p, char *va, int n)
1286 {
1287         Segment *s;
1288         int npc, pri;
1289         Cmdbuf *cb;
1290         Cmdtab *ct;
1291         vlong time;
1292         char *e;
1293         void (*pt)(Proc*, int, vlong);
1294
1295         if(p->kp)       /* no ctl requests to kprocs */
1296                 error(Eperm);
1297
1298         cb = parsecmd(va, n);
1299         if(waserror()){
1300                 free(cb);
1301                 nexterror();
1302         }
1303
1304         ct = lookupcmd(cb, proccmd, nelem(proccmd));
1305
1306         switch(ct->index){
1307         case CMclose:
1308                 procctlclosefiles(p, 0, atoi(cb->f[1]));
1309                 break;
1310         case CMclosefiles:
1311                 procctlclosefiles(p, 1, 0);
1312                 break;
1313         case CMhang:
1314                 p->hang = 1;
1315                 break;
1316         case CMkill:
1317                 switch(p->state) {
1318                 case Broken:
1319                         unbreak(p);
1320                         break;
1321                 case Stopped:
1322                         p->procctl = Proc_exitme;
1323                         postnote(p, 0, "sys: killed", NExit);
1324                         ready(p);
1325                         break;
1326                 default:
1327                         p->procctl = Proc_exitme;
1328                         postnote(p, 0, "sys: killed", NExit);
1329                 }
1330                 break;
1331         case CMnohang:
1332                 p->hang = 0;
1333                 break;
1334         case CMnoswap:
1335                 p->noswap = 1;
1336                 break;
1337         case CMpri:
1338                 pri = atoi(cb->f[1]);
1339                 if(pri > PriNormal && !iseve())
1340                         error(Eperm);
1341                 procpriority(p, pri, 0);
1342                 break;
1343         case CMfixedpri:
1344                 pri = atoi(cb->f[1]);
1345                 if(pri > PriNormal && !iseve())
1346                         error(Eperm);
1347                 procpriority(p, pri, 1);
1348                 break;
1349         case CMprivate:
1350                 p->privatemem = 1;
1351                 break;
1352         case CMprofile:
1353                 s = p->seg[TSEG];
1354                 if(s == nil || (s->type&SG_TYPE) != SG_TEXT)
1355                         error(Ebadctl);
1356                 if(s->profile != nil)
1357                         free(s->profile);
1358                 npc = (s->top-s->base)>>LRESPROF;
1359                 s->profile = malloc(npc*sizeof(*s->profile));
1360                 if(s->profile == nil)
1361                         error(Enomem);
1362                 break;
1363         case CMstart:
1364                 if(p->state != Stopped)
1365                         error(Ebadctl);
1366                 ready(p);
1367                 break;
1368         case CMstartstop:
1369                 if(p->state != Stopped)
1370                         error(Ebadctl);
1371                 p->procctl = Proc_traceme;
1372                 ready(p);
1373                 procstopwait(p, Proc_traceme);
1374                 break;
1375         case CMstartsyscall:
1376                 if(p->state != Stopped)
1377                         error(Ebadctl);
1378                 p->procctl = Proc_tracesyscall;
1379                 ready(p);
1380                 procstopwait(p, Proc_tracesyscall);
1381                 break;
1382         case CMstop:
1383                 procstopwait(p, Proc_stopme);
1384                 break;
1385         case CMwaitstop:
1386                 procstopwait(p, 0);
1387                 break;
1388         case CMwired:
1389                 procwired(p, atoi(cb->f[1]));
1390                 break;
1391         case CMtrace:
1392                 switch(cb->nf){
1393                 case 1:
1394                         p->trace ^= 1;
1395                         break;
1396                 case 2:
1397                         p->trace = (atoi(cb->f[1]) != 0);
1398                         break;
1399                 default:
1400                         error("args");
1401                 }
1402                 break;
1403         case CMinterrupt:
1404                 postnote(p, 0, nil, NUser);
1405                 break;
1406         case CMnointerrupt:
1407                 if(p->nnote == 0)
1408                         p->notepending = 0;
1409                 else
1410                         error("notes pending");
1411                 break;
1412         /* real time */
1413         case CMperiod:
1414                 if(p->edf == nil)
1415                         edfinit(p);
1416                 if(e=parsetime(&time, cb->f[1]))        /* time in ns */
1417                         error(e);
1418                 edfstop(p);
1419                 p->edf->T = time/1000;  /* Edf times are in µs */
1420                 break;
1421         case CMdeadline:
1422                 if(p->edf == nil)
1423                         edfinit(p);
1424                 if(e=parsetime(&time, cb->f[1]))
1425                         error(e);
1426                 edfstop(p);
1427                 p->edf->D = time/1000;
1428                 break;
1429         case CMcost:
1430                 if(p->edf == nil)
1431                         edfinit(p);
1432                 if(e=parsetime(&time, cb->f[1]))
1433                         error(e);
1434                 edfstop(p);
1435                 p->edf->C = time/1000;
1436                 break;
1437         case CMsporadic:
1438                 if(p->edf == nil)
1439                         edfinit(p);
1440                 p->edf->flags |= Sporadic;
1441                 break;
1442         case CMdeadlinenotes:
1443                 if(p->edf == nil)
1444                         edfinit(p);
1445                 p->edf->flags |= Sendnotes;
1446                 break;
1447         case CMadmit:
1448                 if(p->edf == nil)
1449                         error("edf params");
1450                 if(e = edfadmit(p))
1451                         error(e);
1452                 break;
1453         case CMextra:
1454                 if(p->edf == nil)
1455                         edfinit(p);
1456                 p->edf->flags |= Extratime;
1457                 break;
1458         case CMexpel:
1459                 if(p->edf != nil)
1460                         edfstop(p);
1461                 break;
1462         case CMevent:
1463                 pt = proctrace;
1464                 if(up->trace && pt != nil)
1465                         pt(up, SUser, 0);
1466                 break;
1467         }
1468
1469         poperror();
1470         free(cb);
1471 }
1472
1473 int
1474 procstopped(void *a)
1475 {
1476         return ((Proc*)a)->state == Stopped;
1477 }
1478
1479 int
1480 procctlmemio(Proc *p, uintptr offset, int n, void *va, int read)
1481 {
1482         KMap *k;
1483         Pte *pte;
1484         Page *pg;
1485         Segment *s;
1486         uintptr soff;
1487         char *a, *b;
1488         int i, l;
1489
1490         /* Only one page at a time */
1491         l = BY2PG - (offset&(BY2PG-1));
1492         if(n > l)
1493                 n = l;
1494
1495         /*
1496          * Make temporary copy to avoid fault while we have
1497          * segment locked as we would deadlock when trying
1498          * to read the calling procs memory.
1499          */
1500         a = malloc(n);
1501         if(a == nil)
1502                 error(Enomem);
1503         if(waserror()) {
1504                 free(a);
1505                 nexterror();
1506         }
1507
1508         if(!read)
1509                 memmove(a, va, n);      /* can fault */
1510
1511         for(;;) {
1512                 s = seg(p, offset, 0);
1513                 if(s == nil)
1514                         error(Ebadarg);
1515
1516                 eqlock(&p->seglock);
1517                 if(waserror()) {
1518                         qunlock(&p->seglock);
1519                         nexterror();
1520                 }
1521
1522                 for(i = 0; i < NSEG; i++) {
1523                         if(p->seg[i] == s)
1524                                 break;
1525                 }
1526                 if(i == NSEG)
1527                         error(Egreg);   /* segment gone */
1528
1529                 eqlock(s);
1530                 if(waserror()){
1531                         qunlock(s);
1532                         nexterror();
1533                 }
1534                 if(!read && (s->type&SG_TYPE) == SG_TEXT) {
1535                         s = txt2data(s);
1536                         p->seg[i] = s;
1537                 }
1538                 incref(s);
1539                 qunlock(&p->seglock);
1540                 poperror();
1541                 poperror();
1542                 /* segment s still locked, fixfault() unlocks */
1543                 if(waserror()){
1544                         putseg(s);
1545                         nexterror();
1546                 }
1547                 if(fixfault(s, offset, read, 0) == 0)
1548                         break;
1549                 putseg(s);
1550                 poperror();
1551         }
1552
1553         /*
1554          * Only access the page while segment is locked
1555          * as the proc could segfree or relocate the pte
1556          * concurrently.
1557          */ 
1558         eqlock(s);
1559         if(waserror()){
1560                 qunlock(s);
1561                 nexterror();
1562         }
1563         if(offset+n >= s->top)
1564                 n = s->top-offset;
1565         soff = offset-s->base;
1566         pte = s->map[soff/PTEMAPMEM];
1567         if(pte == nil)
1568                 error(Egreg);   /* page gone, should retry? */
1569         pg = pte->pages[(soff&(PTEMAPMEM-1))/BY2PG];
1570         if(pagedout(pg))
1571                 error(Egreg);   /* page gone, should retry?  */
1572
1573         /* Map and copy the page */
1574         k = kmap(pg);
1575         b = (char*)VA(k);
1576         b += offset&(BY2PG-1);
1577         if(read)
1578                 memmove(a, b, n);
1579         else
1580                 memmove(b, a, n);
1581         kunmap(k);
1582
1583         /* Ensure the process sees text page changes */
1584         if(s->flushme)
1585                 memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl));
1586
1587         if(!read)
1588                 p->newtlb = 1;
1589
1590         qunlock(s);
1591         poperror();
1592         putseg(s);
1593         poperror();
1594
1595         if(read)
1596                 memmove(va, a, n);      /* can fault */
1597
1598         free(a);
1599         poperror();
1600
1601         return n;
1602 }