]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/port/proc.c
pc kernel: fix wrong simd exception mask (fixes go bootstrap)
[plan9front.git] / sys / src / 9 / port / proc.c
1 #include        <u.h>
2 #include        "../port/lib.h"
3 #include        "mem.h"
4 #include        "dat.h"
5 #include        "fns.h"
6 #include        "../port/error.h"
7 #include        "edf.h"
8 #include        <trace.h>
9
10 int     schedgain = 30; /* units in seconds */
11 int     nrdy;
12
13 void updatecpu(Proc*);
14 int reprioritize(Proc*);
15
16 ulong   delayedscheds;  /* statistics */
17 ulong   skipscheds;
18 ulong   preempts;
19 ulong   load;
20
21 static struct Procalloc
22 {
23         Lock;
24         Proc*   ht[128];
25         Proc*   arena;
26         Proc*   free;
27 } procalloc;
28
29 enum
30 {
31         Q=10,
32         DQ=4,
33         Scaling=2,
34 };
35
36 Schedq  runq[Nrq];
37 ulong   runvec;
38
39 char *statename[] =
40 {       /* BUG: generate automatically */
41         "Dead",
42         "Moribund",
43         "Ready",
44         "Scheding",
45         "Running",
46         "Queueing",
47         "QueueingR",
48         "QueueingW",
49         "Wakeme",
50         "Broken",
51         "Stopped",
52         "Rendez",
53         "Waitrelease",
54 };
55
56 static void pidfree(Proc*);
57 static void rebalance(void);
58
59 /*
60  * Always splhi()'ed.
61  */
62 void
63 schedinit(void)         /* never returns */
64 {
65         Edf *e;
66
67         setlabel(&m->sched);
68         if(up != nil) {
69                 if((e = up->edf) != nil && (e->flags & Admitted))
70                         edfrecord(up);
71                 m->proc = nil;
72                 switch(up->state) {
73                 case Running:
74                         ready(up);
75                         break;
76                 case Moribund:
77                         up->state = Dead;
78                         edfstop(up);
79                         if(up->edf != nil)
80                                 free(up->edf);
81                         up->edf = nil;
82
83                         /*
84                          * Holding locks from pexit:
85                          *      procalloc
86                          *      palloc
87                          */
88                         mmurelease(up);
89                         unlock(&palloc);
90
91                         up->mach = nil;
92                         updatecpu(up);
93
94                         up->qnext = procalloc.free;
95                         procalloc.free = up;
96
97                         /* proc is free now, make sure unlock() wont touch it */
98                         up = procalloc.Lock.p = nil;
99                         unlock(&procalloc);
100                         sched();
101                 }
102                 up->mach = nil;
103                 updatecpu(up);
104                 up = nil;
105         }
106         sched();
107 }
108
109 /*
110  *  If changing this routine, look also at sleep().  It
111  *  contains a copy of the guts of sched().
112  */
113 void
114 sched(void)
115 {
116         Proc *p;
117
118         if(m->ilockdepth)
119                 panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p",
120                         m->machno,
121                         m->ilockdepth,
122                         up != nil ? up->lastilock: nil,
123                         (up != nil && up->lastilock != nil) ? up->lastilock->pc: 0,
124                         getcallerpc(&p+2));
125         if(up != nil) {
126                 /*
127                  * Delay the sched until the process gives up the locks
128                  * it is holding.  This avoids dumb lock loops.
129                  * Don't delay if the process is Moribund.
130                  * It called sched to die.
131                  * But do sched eventually.  This avoids a missing unlock
132                  * from hanging the entire kernel. 
133                  * But don't reschedule procs holding palloc or procalloc.
134                  * Those are far too important to be holding while asleep.
135                  *
136                  * This test is not exact.  There can still be a few instructions
137                  * in the middle of taslock when a process holds a lock
138                  * but Lock.p has not yet been initialized.
139                  */
140                 if(up->nlocks)
141                 if(up->state != Moribund)
142                 if(up->delaysched < 20
143                 || palloc.Lock.p == up
144                 || fscache.Lock.p == up
145                 || procalloc.Lock.p == up){
146                         up->delaysched++;
147                         delayedscheds++;
148                         return;
149                 }
150                 up->delaysched = 0;
151
152                 splhi();
153
154                 /* statistics */
155                 m->cs++;
156
157                 procsave(up);
158                 if(setlabel(&up->sched)){
159                         procrestore(up);
160                         spllo();
161                         return;
162                 }
163                 gotolabel(&m->sched);
164         }
165         p = runproc();
166         if(p->edf == nil){
167                 updatecpu(p);
168                 p->priority = reprioritize(p);
169         }
170         if(p != m->readied)
171                 m->schedticks = m->ticks + HZ/10;
172         m->readied = nil;
173         up = p;
174         up->state = Running;
175         up->mach = MACHP(m->machno);
176         m->proc = up;
177         mmuswitch(up);
178         gotolabel(&up->sched);
179 }
180
181 int
182 anyready(void)
183 {
184         return runvec;
185 }
186
187 int
188 anyhigher(void)
189 {
190         return runvec & ~((1<<(up->priority+1))-1);
191 }
192
193 /*
194  *  here once per clock tick to see if we should resched
195  */
196 void
197 hzsched(void)
198 {
199         /* once a second, rebalance will reprioritize ready procs */
200         if(m->machno == 0)
201                 rebalance();
202
203         /* unless preempted, get to run for at least 100ms */
204         if(anyhigher()
205         || (!up->fixedpri && (long)(m->ticks - m->schedticks) > 0 && anyready())){
206                 m->readied = nil;       /* avoid cooperative scheduling */
207                 up->delaysched++;
208         }
209 }
210
211 /*
212  *  here at the end of non-clock interrupts to see if we should preempt the
213  *  current process.  Returns 1 if preempted, 0 otherwise.
214  */
215 int
216 preempted(void)
217 {
218         if(up != nil && up->state == Running)
219         if(up->preempted == 0)
220         if(anyhigher())
221         if(!active.exiting){
222                 m->readied = nil;       /* avoid cooperative scheduling */
223                 up->preempted = 1;
224                 sched();
225                 splhi();
226                 up->preempted = 0;
227                 return 1;
228         }
229         return 0;
230 }
231
232 /*
233  * Update the cpu time average for this particular process,
234  * which is about to change from up -> not up or vice versa.
235  * p->lastupdate is the last time an updatecpu happened.
236  *
237  * The cpu time average is a decaying average that lasts
238  * about D clock ticks.  D is chosen to be approximately
239  * the cpu time of a cpu-intensive "quick job".  A job has to run
240  * for approximately D clock ticks before we home in on its 
241  * actual cpu usage.  Thus if you manage to get in and get out
242  * quickly, you won't be penalized during your burst.  Once you
243  * start using your share of the cpu for more than about D
244  * clock ticks though, your p->cpu hits 1000 (1.0) and you end up 
245  * below all the other quick jobs.  Interactive tasks, because
246  * they basically always use less than their fair share of cpu,
247  * will be rewarded.
248  *
249  * If the process has not been running, then we want to
250  * apply the filter
251  *
252  *      cpu = cpu * (D-1)/D
253  *
254  * n times, yielding 
255  * 
256  *      cpu = cpu * ((D-1)/D)^n
257  *
258  * but D is big enough that this is approximately 
259  *
260  *      cpu = cpu * (D-n)/D
261  *
262  * so we use that instead.
263  * 
264  * If the process has been running, we apply the filter to
265  * 1 - cpu, yielding a similar equation.  Note that cpu is 
266  * stored in fixed point (* 1000).
267  *
268  * Updatecpu must be called before changing up, in order
269  * to maintain accurate cpu usage statistics.  It can be called
270  * at any time to bring the stats for a given proc up-to-date.
271  */
272 void
273 updatecpu(Proc *p)
274 {
275         ulong t, ocpu, n, D;
276
277         if(p->edf != nil)
278                 return;
279
280         t = MACHP(0)->ticks*Scaling + Scaling/2;
281         n = t - p->lastupdate;
282         if(n == 0)
283                 return;
284         p->lastupdate = t;
285
286         D = schedgain*HZ*Scaling;
287         if(n > D)
288                 n = D;
289
290         ocpu = p->cpu;
291         if(p != up)
292                 p->cpu = (ocpu*(D-n))/D;
293         else{
294                 t = 1000 - ocpu;
295                 t = (t*(D-n))/D;
296                 p->cpu = 1000 - t;
297         }
298 //iprint("pid %lud %s for %lud cpu %lud -> %lud\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu);
299 }
300
301 /*
302  * On average, p has used p->cpu of a cpu recently.
303  * Its fair share is conf.nmach/m->load of a cpu.  If it has been getting
304  * too much, penalize it.  If it has been getting not enough, reward it.
305  * I don't think you can get much more than your fair share that 
306  * often, so most of the queues are for using less.  Having a priority
307  * of 3 means you're just right.  Having a higher priority (up to p->basepri) 
308  * means you're not using as much as you could.
309  */
310 int
311 reprioritize(Proc *p)
312 {
313         int fairshare, n, load, ratio;
314
315         load = MACHP(0)->load;
316         if(load == 0)
317                 return p->basepri;
318
319         /*
320          * fairshare = 1.000 * conf.nmach * 1.000/load,
321          * except the decimal point is moved three places
322          * on both load and fairshare.
323          */
324         fairshare = (conf.nmach*1000*1000)/load;
325         n = p->cpu;
326         if(n == 0)
327                 n = 1;
328         ratio = (fairshare+n/2) / n;
329         if(ratio > p->basepri)
330                 ratio = p->basepri;
331         if(ratio < 0)
332                 panic("reprioritize");
333 //iprint("pid %lud cpu %lud load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio);
334         return ratio;
335 }
336
337 /*
338  * add a process to a scheduling queue
339  */
340 void
341 queueproc(Schedq *rq, Proc *p)
342 {
343         int pri;
344
345         pri = rq - runq;
346         lock(runq);
347         p->priority = pri;
348         p->rnext = nil;
349         if(rq->tail != nil)
350                 rq->tail->rnext = p;
351         else
352                 rq->head = p;
353         rq->tail = p;
354         rq->n++;
355         nrdy++;
356         runvec |= 1<<pri;
357         unlock(runq);
358 }
359
360 /*
361  *  try to remove a process from a scheduling queue (called splhi)
362  */
363 Proc*
364 dequeueproc(Schedq *rq, Proc *tp)
365 {
366         Proc *l, *p;
367
368         if(!canlock(runq))
369                 return nil;
370
371         /*
372          *  the queue may have changed before we locked runq,
373          *  refind the target process.
374          */
375         l = nil;
376         for(p = rq->head; p != nil; p = p->rnext){
377                 if(p == tp)
378                         break;
379                 l = p;
380         }
381
382         /*
383          *  p->mach==0 only when process state is saved
384          */
385         if(p == nil || p->mach != nil){
386                 unlock(runq);
387                 return nil;
388         }
389         if(p->rnext == nil)
390                 rq->tail = l;
391         if(l != nil)
392                 l->rnext = p->rnext;
393         else
394                 rq->head = p->rnext;
395         if(rq->head == nil)
396                 runvec &= ~(1<<(rq-runq));
397         rq->n--;
398         nrdy--;
399         if(p->state != Ready)
400                 print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]);
401
402         unlock(runq);
403         return p;
404 }
405
406 /*
407  *  ready(p) picks a new priority for a process and sticks it in the
408  *  runq for that priority.
409  */
410 void
411 ready(Proc *p)
412 {
413         int s, pri;
414         Schedq *rq;
415         void (*pt)(Proc*, int, vlong);
416
417         if(p->state == Ready){
418                 print("double ready %s %lud pc %p\n", p->text, p->pid, getcallerpc(&p));
419                 return;
420         }
421
422         s = splhi();
423         if(edfready(p)){
424                 splx(s);
425                 return;
426         }
427
428         if(up != p && (p->wired == nil || p->wired == MACHP(m->machno)))
429                 m->readied = p; /* group scheduling */
430
431         updatecpu(p);
432         pri = reprioritize(p);
433         p->priority = pri;
434         rq = &runq[pri];
435         p->state = Ready;
436         queueproc(rq, p);
437         pt = proctrace;
438         if(pt != nil)
439                 pt(p, SReady, 0);
440         splx(s);
441 }
442
443 /*
444  *  yield the processor and drop our priority
445  */
446 void
447 yield(void)
448 {
449         if(anyready()){
450                 /* pretend we just used 1/2 tick */
451                 up->lastupdate -= Scaling/2;  
452                 sched();
453         }
454 }
455
456 /*
457  *  recalculate priorities once a second.  We need to do this
458  *  since priorities will otherwise only be recalculated when
459  *  the running process blocks.
460  */
461 ulong balancetime;
462
463 static void
464 rebalance(void)
465 {
466         int pri, npri, x;
467         Schedq *rq;
468         Proc *p;
469         ulong t;
470
471         t = m->ticks;
472         if(t - balancetime < HZ)
473                 return;
474         balancetime = t;
475
476         for(pri=0, rq=runq; pri<Npriq; pri++, rq++){
477 another:
478                 p = rq->head;
479                 if(p == nil)
480                         continue;
481                 if(p->mp != MACHP(m->machno))
482                         continue;
483                 if(pri == p->basepri)
484                         continue;
485                 updatecpu(p);
486                 npri = reprioritize(p);
487                 if(npri != pri){
488                         x = splhi();
489                         p = dequeueproc(rq, p);
490                         if(p != nil)
491                                 queueproc(&runq[npri], p);
492                         splx(x);
493                         goto another;
494                 }
495         }
496 }
497         
498
499 /*
500  *  pick a process to run
501  */
502 Proc*
503 runproc(void)
504 {
505         Schedq *rq;
506         Proc *p;
507         ulong start, now;
508         int i;
509         void (*pt)(Proc*, int, vlong);
510
511         start = perfticks();
512
513         /* cooperative scheduling until the clock ticks */
514         if((p = m->readied) != nil && p->mach == nil && p->state == Ready
515         && (p->wired == nil || p->wired == MACHP(m->machno))
516         && runq[Nrq-1].head == nil && runq[Nrq-2].head == nil){
517                 skipscheds++;
518                 rq = &runq[p->priority];
519                 goto found;
520         }
521
522         preempts++;
523
524 loop:
525         /*
526          *  find a process that last ran on this processor (affinity),
527          *  or one that hasn't moved in a while (load balancing).  Every
528          *  time around the loop affinity goes down.
529          */
530         spllo();
531         for(i = 0;; i++){
532                 /*
533                  *  find the highest priority target process that this
534                  *  processor can run given affinity constraints.
535                  *
536                  */
537                 for(rq = &runq[Nrq-1]; rq >= runq; rq--){
538                         for(p = rq->head; p != nil; p = p->rnext){
539                                 if(p->mp == nil || p->mp == MACHP(m->machno)
540                                 || (p->wired == nil && i > 0))
541                                         goto found;
542                         }
543                 }
544
545                 /* waste time or halt the CPU */
546                 idlehands();
547
548                 /* remember how much time we're here */
549                 now = perfticks();
550                 m->perf.inidle += now-start;
551                 start = now;
552         }
553
554 found:
555         splhi();
556         p = dequeueproc(rq, p);
557         if(p == nil)
558                 goto loop;
559
560         p->state = Scheding;
561         p->mp = MACHP(m->machno);
562
563         if(edflock(p)){
564                 edfrun(p, rq == &runq[PriEdf]); /* start deadline timer and do admin */
565                 edfunlock();
566         }
567         pt = proctrace;
568         if(pt != nil)
569                 pt(p, SRun, 0);
570         return p;
571 }
572
573 int
574 canpage(Proc *p)
575 {
576         int ok = 0;
577
578         splhi();
579         lock(runq);
580         /* Only reliable way to see if we are Running */
581         if(p->mach == nil) {
582                 p->newtlb = 1;
583                 ok = 1;
584         }
585         unlock(runq);
586         spllo();
587
588         return ok;
589 }
590
591 Proc*
592 newproc(void)
593 {
594         char msg[64];
595         Proc *p;
596
597         lock(&procalloc);
598         for(;;) {
599                 if((p = procalloc.free) != nil)
600                         break;
601
602                 snprint(msg, sizeof msg, "no procs; %s forking",
603                         up != nil ? up->text: "kernel");
604                 unlock(&procalloc);
605                 resrcwait(msg);
606                 lock(&procalloc);
607         }
608         procalloc.free = p->qnext;
609         unlock(&procalloc);
610
611         p->state = Scheding;
612         p->psstate = "New";
613         p->mach = nil;
614         p->eql = nil;
615         p->qnext = nil;
616         p->nchild = 0;
617         p->nwait = 0;
618         p->waitq = nil;
619         p->parent = nil;
620         p->pgrp = nil;
621         p->egrp = nil;
622         p->fgrp = nil;
623         p->rgrp = nil;
624         p->pdbg = nil;
625         p->fpstate = FPinit;
626         p->kp = 0;
627         p->procctl = 0;
628         p->syscalltrace = nil;  
629         p->notepending = 0;
630         p->ureg = nil;
631         p->privatemem = 0;
632         p->noswap = 0;
633         p->errstr = p->errbuf0;
634         p->syserrstr = p->errbuf1;
635         p->errbuf0[0] = '\0';
636         p->errbuf1[0] = '\0';
637         p->nlocks = 0;
638         p->delaysched = 0;
639         p->trace = 0;
640         kstrdup(&p->user, "*nouser");
641         kstrdup(&p->text, "*notext");
642         kstrdup(&p->args, "");
643         p->nargs = 0;
644         p->setargs = 0;
645         memset(p->seg, 0, sizeof p->seg);
646         p->parentpid = 0;
647         p->noteid = pidalloc(p);
648         if(p->kstack == nil)
649                 p->kstack = smalloc(KSTACK);
650
651         /* sched params */
652         p->mp = nil;
653         p->wired = nil;
654         procpriority(p, PriNormal, 0);
655         p->cpu = 0;
656         p->lastupdate = MACHP(0)->ticks*Scaling;
657         p->edf = nil;
658
659         return p;
660 }
661
662 /*
663  * wire this proc to a machine
664  */
665 void
666 procwired(Proc *p, int bm)
667 {
668         Proc *pp;
669         int i;
670         char nwired[MAXMACH];
671         Mach *wm;
672
673         if(bm < 0){
674                 /* pick a machine to wire to */
675                 memset(nwired, 0, sizeof(nwired));
676                 p->wired = nil;
677                 pp = proctab(0);
678                 for(i=0; i<conf.nproc; i++, pp++){
679                         wm = pp->wired;
680                         if(wm != nil && pp->pid)
681                                 nwired[wm->machno]++;
682                 }
683                 bm = 0;
684                 for(i=0; i<conf.nmach; i++)
685                         if(nwired[i] < nwired[bm])
686                                 bm = i;
687         } else {
688                 /* use the virtual machine requested */
689                 bm = bm % conf.nmach;
690         }
691
692         p->wired = MACHP(bm);
693         p->mp = p->wired;
694 }
695
696 void
697 procpriority(Proc *p, int pri, int fixed)
698 {
699         if(pri >= Npriq)
700                 pri = Npriq - 1;
701         else if(pri < 0)
702                 pri = 0;
703         p->basepri = pri;
704         p->priority = pri;
705         if(fixed){
706                 p->fixedpri = 1;
707         } else {
708                 p->fixedpri = 0;
709         }
710 }
711
712 void
713 procinit0(void)         /* bad planning - clashes with devproc.c */
714 {
715         Proc *p;
716         int i;
717
718         p = xalloc(conf.nproc*sizeof(Proc));
719         if(p == nil){
720                 xsummary();
721                 panic("cannot allocate %lud procs (%ludMB)", conf.nproc, conf.nproc*sizeof(Proc)/(1024*1024));
722         }
723         procalloc.arena = p;
724         procalloc.free = p;
725         for(i=0; i<conf.nproc-1; i++, p++)
726                 p->qnext = p+1;
727         p->qnext = nil;
728 }
729
730 /*
731  *  sleep if a condition is not true.  Another process will
732  *  awaken us after it sets the condition.  When we awaken
733  *  the condition may no longer be true.
734  *
735  *  we lock both the process and the rendezvous to keep r->p
736  *  and p->r synchronized.
737  */
738 void
739 sleep(Rendez *r, int (*f)(void*), void *arg)
740 {
741         int s;
742         void (*pt)(Proc*, int, vlong);
743
744         s = splhi();
745
746         if(up->nlocks)
747                 print("process %lud sleeps with %d locks held, last lock %#p locked at pc %#p, sleep called from %#p\n",
748                         up->pid, up->nlocks, up->lastlock, up->lastlock->pc, getcallerpc(&r));
749         lock(r);
750         lock(&up->rlock);
751         if(r->p != nil){
752                 print("double sleep called from %#p, %lud %lud\n", getcallerpc(&r), r->p->pid, up->pid);
753                 dumpstack();
754         }
755
756         /*
757          *  Wakeup only knows there may be something to do by testing
758          *  r->p in order to get something to lock on.
759          *  Flush that information out to memory in case the sleep is
760          *  committed.
761          */
762         r->p = up;
763
764         if((*f)(arg) || up->notepending){
765                 /*
766                  *  if condition happened or a note is pending
767                  *  never mind
768                  */
769                 r->p = nil;
770                 unlock(&up->rlock);
771                 unlock(r);
772         } else {
773                 /*
774                  *  now we are committed to
775                  *  change state and call scheduler
776                  */
777                 pt = proctrace;
778                 if(pt != nil)
779                         pt(up, SSleep, 0);
780                 up->state = Wakeme;
781                 up->r = r;
782
783                 /* statistics */
784                 m->cs++;
785
786                 procsave(up);
787                 if(setlabel(&up->sched)) {
788                         /*
789                          *  here when the process is awakened
790                          */
791                         procrestore(up);
792                         spllo();
793                 } else {
794                         /*
795                          *  here to go to sleep (i.e. stop Running)
796                          */
797                         unlock(&up->rlock);
798                         unlock(r);
799                         gotolabel(&m->sched);
800                 }
801         }
802
803         if(up->notepending) {
804                 up->notepending = 0;
805                 splx(s);
806                 interrupted();
807         }
808
809         splx(s);
810 }
811
812 void
813 interrupted(void)
814 {
815         if(up->procctl == Proc_exitme && up->closingfgrp != nil)
816                 forceclosefgrp();
817         error(Eintr);
818 }
819
820 static int
821 tfn(void *arg)
822 {
823         return up->trend == nil || up->tfn(arg);
824 }
825
826 void
827 twakeup(Ureg*, Timer *t)
828 {
829         Proc *p;
830         Rendez *trend;
831
832         p = t->ta;
833         trend = p->trend;
834         if(trend != nil){
835                 p->trend = nil;
836                 wakeup(trend);
837         }
838 }
839
840 void
841 tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms)
842 {
843         if(up->tt != nil){
844                 print("%s %lud: tsleep timer active: mode %d, tf %#p, pc %#p\n",
845                         up->text, up->pid, up->tmode, up->tf, getcallerpc(&r));
846                 timerdel(up);
847         }
848         up->tns = MS2NS(ms);
849         up->tf = twakeup;
850         up->tmode = Trelative;
851         up->ta = up;
852         up->trend = r;
853         up->tfn = fn;
854         timeradd(up);
855
856         if(waserror()){
857                 up->trend = nil;
858                 timerdel(up);
859                 nexterror();
860         }
861         sleep(r, tfn, arg);
862         up->trend = nil;
863         timerdel(up);
864         poperror();
865 }
866
867 /*
868  *  Expects that only one process can call wakeup for any given Rendez.
869  *  We hold both locks to ensure that r->p and p->r remain consistent.
870  *  Richard Miller has a better solution that doesn't require both to
871  *  be held simultaneously, but I'm a paranoid - presotto.
872  */
873 Proc*
874 wakeup(Rendez *r)
875 {
876         Proc *p;
877         int s;
878
879         s = splhi();
880
881         lock(r);
882         p = r->p;
883
884         if(p != nil){
885                 lock(&p->rlock);
886                 if(p->state != Wakeme || p->r != r){
887                         iprint("%p %p %d\n", p->r, r, p->state);
888                         panic("wakeup: state");
889                 }
890                 r->p = nil;
891                 p->r = nil;
892                 ready(p);
893                 unlock(&p->rlock);
894         }
895         unlock(r);
896
897         splx(s);
898
899         return p;
900 }
901
902 /*
903  *  if waking a sleeping process, this routine must hold both
904  *  p->rlock and r->lock.  However, it can't know them in
905  *  the same order as wakeup causing a possible lock ordering
906  *  deadlock.  We break the deadlock by giving up the p->rlock
907  *  lock if we can't get the r->lock and retrying.
908  */
909 int
910 postnote(Proc *p, int dolock, char *n, int flag)
911 {
912         int s, ret;
913         QLock *q;
914
915         if(p == nil)
916                 return 0;
917
918         if(dolock)
919                 qlock(&p->debug);
920
921         if(p->pid == 0){
922                 if(dolock)
923                         qunlock(&p->debug);
924                 return 0;
925         }
926
927         if(n != nil && flag != NUser && (p->notify == 0 || p->notified))
928                 p->nnote = 0;
929
930         ret = 0;
931         if(p->nnote < NNOTE && n != nil) {
932                 kstrcpy(p->note[p->nnote].msg, n, ERRMAX);
933                 p->note[p->nnote++].flag = flag;
934                 ret = 1;
935         }
936         p->notepending = 1;
937         if(dolock)
938                 qunlock(&p->debug);
939
940         /* this loop is to avoid lock ordering problems. */
941         for(;;){
942                 Rendez *r;
943
944                 s = splhi();
945                 lock(&p->rlock);
946                 r = p->r;
947
948                 /* waiting for a wakeup? */
949                 if(r == nil)
950                         break;  /* no */
951
952                 /* try for the second lock */
953                 if(canlock(r)){
954                         if(p->state != Wakeme || r->p != p)
955                                 panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state);
956                         p->r = nil;
957                         r->p = nil;
958                         ready(p);
959                         unlock(r);
960                         break;
961                 }
962
963                 /* give other process time to get out of critical section and try again */
964                 unlock(&p->rlock);
965                 splx(s);
966                 sched();
967         }
968         unlock(&p->rlock);
969         splx(s);
970
971         switch(p->state){
972         case Queueing:
973                 /* Try and pull out of a eqlock */
974                 if((q = p->eql) != nil){
975                         lock(&q->use);
976                         if(p->state == Queueing && p->eql == q){
977                                 Proc *d, *l;
978
979                                 for(l = nil, d = q->head; d != nil; l = d, d = d->qnext){
980                                         if(d == p){
981                                                 if(l != nil)
982                                                         l->qnext = p->qnext;
983                                                 else
984                                                         q->head = p->qnext;
985                                                 if(p->qnext == nil)
986                                                         q->tail = l;
987                                                 p->qnext = nil;
988                                                 p->eql = nil;   /* not taken */
989                                                 ready(p);
990                                                 break;
991                                         }
992                                 }
993                         }
994                         unlock(&q->use);
995                 }
996                 break;
997         case Rendezvous:
998                 /* Try and pull out of a rendezvous */
999                 lock(p->rgrp);
1000                 if(p->state == Rendezvous) {
1001                         Proc *d, **l;
1002
1003                         l = &REND(p->rgrp, p->rendtag);
1004                         for(d = *l; d != nil; d = d->rendhash) {
1005                                 if(d == p) {
1006                                         *l = p->rendhash;
1007                                         p->rendval = ~0;
1008                                         ready(p);
1009                                         break;
1010                                 }
1011                                 l = &d->rendhash;
1012                         }
1013                 }
1014                 unlock(p->rgrp);
1015                 break;
1016         }
1017         return ret;
1018 }
1019
1020 /*
1021  * weird thing: keep at most NBROKEN around
1022  */
1023 #define NBROKEN 4
1024 struct
1025 {
1026         QLock;
1027         int     n;
1028         Proc    *p[NBROKEN];
1029 }broken;
1030
1031 void
1032 addbroken(Proc *p)
1033 {
1034         qlock(&broken);
1035         if(broken.n == NBROKEN) {
1036                 ready(broken.p[0]);
1037                 memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1));
1038                 --broken.n;
1039         }
1040         broken.p[broken.n++] = p;
1041         qunlock(&broken);
1042
1043         edfstop(up);
1044         p->state = Broken;
1045         p->psstate = nil;
1046         sched();
1047 }
1048
1049 void
1050 unbreak(Proc *p)
1051 {
1052         int b;
1053
1054         qlock(&broken);
1055         for(b=0; b < broken.n; b++)
1056                 if(broken.p[b] == p) {
1057                         broken.n--;
1058                         memmove(&broken.p[b], &broken.p[b+1],
1059                                         sizeof(Proc*)*(NBROKEN-(b+1)));
1060                         ready(p);
1061                         break;
1062                 }
1063         qunlock(&broken);
1064 }
1065
1066 int
1067 freebroken(void)
1068 {
1069         int i, n;
1070
1071         qlock(&broken);
1072         n = broken.n;
1073         for(i=0; i<n; i++) {
1074                 ready(broken.p[i]);
1075                 broken.p[i] = nil;
1076         }
1077         broken.n = 0;
1078         qunlock(&broken);
1079         return n;
1080 }
1081
1082 void
1083 pexit(char *exitstr, int freemem)
1084 {
1085         Proc *p;
1086         Segment **s, **es;
1087         ulong utime, stime;
1088         Waitq *wq;
1089         Fgrp *fgrp;
1090         Egrp *egrp;
1091         Rgrp *rgrp;
1092         Pgrp *pgrp;
1093         Chan *dot;
1094         void (*pt)(Proc*, int, vlong);
1095
1096         up->alarm = 0;
1097         timerdel(up);
1098         pt = proctrace;
1099         if(pt != nil)
1100                 pt(up, SDead, 0);
1101
1102         /* nil out all the resources under lock (free later) */
1103         qlock(&up->debug);
1104         fgrp = up->fgrp;
1105         up->fgrp = nil;
1106         egrp = up->egrp;
1107         up->egrp = nil;
1108         rgrp = up->rgrp;
1109         up->rgrp = nil;
1110         pgrp = up->pgrp;
1111         up->pgrp = nil;
1112         dot = up->dot;
1113         up->dot = nil;
1114         qunlock(&up->debug);
1115
1116         if(fgrp != nil)
1117                 closefgrp(fgrp);
1118         if(egrp != nil)
1119                 closeegrp(egrp);
1120         if(rgrp != nil)
1121                 closergrp(rgrp);
1122         if(dot != nil)
1123                 cclose(dot);
1124         if(pgrp != nil)
1125                 closepgrp(pgrp);
1126
1127         /*
1128          * if not a kernel process and have a parent,
1129          * do some housekeeping.
1130          */
1131         if(up->kp == 0 && up->parentpid != 0) {
1132                 wq = smalloc(sizeof(Waitq));
1133                 wq->w.pid = up->pid;
1134                 utime = up->time[TUser] + up->time[TCUser];
1135                 stime = up->time[TSys] + up->time[TCSys];
1136                 wq->w.time[TUser] = tk2ms(utime);
1137                 wq->w.time[TSys] = tk2ms(stime);
1138                 wq->w.time[TReal] = tk2ms(MACHP(0)->ticks - up->time[TReal]);
1139                 if(exitstr != nil && exitstr[0])
1140                         snprint(wq->w.msg, sizeof(wq->w.msg), "%s %lud: %s", up->text, up->pid, exitstr);
1141                 else
1142                         wq->w.msg[0] = '\0';
1143
1144                 p = up->parent;
1145                 lock(&p->exl);
1146                 /*
1147                  * Check that parent is still alive.
1148                  */
1149                 if(p->pid == up->parentpid && p->state != Broken) {
1150                         p->nchild--;
1151                         p->time[TCUser] += utime;
1152                         p->time[TCSys] += stime;
1153                         /*
1154                          * If there would be more than 128 wait records
1155                          * processes for my parent, then don't leave a wait
1156                          * record behind.  This helps prevent badly written
1157                          * daemon processes from accumulating lots of wait
1158                          * records.
1159                          */
1160                         if(p->nwait < 128) {
1161                                 wq->next = p->waitq;
1162                                 p->waitq = wq;
1163                                 p->nwait++;
1164                                 wq = nil;
1165                                 wakeup(&p->waitr);
1166                         }
1167                 }
1168                 unlock(&p->exl);
1169                 if(wq != nil)
1170                         free(wq);
1171         }
1172         else if(up->kp == 0 && up->parent == nil){
1173                 if(exitstr == nil)
1174                         exitstr = "unknown";
1175                 panic("boot process died: %s", exitstr);
1176         }
1177
1178         if(!freemem)
1179                 addbroken(up);
1180
1181         qlock(&up->seglock);
1182         es = &up->seg[NSEG];
1183         for(s = up->seg; s < es; s++) {
1184                 if(*s != nil) {
1185                         putseg(*s);
1186                         *s = nil;
1187                 }
1188         }
1189         qunlock(&up->seglock);
1190
1191         lock(&up->exl);         /* Prevent my children from leaving waits */
1192         pidfree(up);
1193         up->pid = 0;
1194         wakeup(&up->waitr);
1195         unlock(&up->exl);
1196
1197         while((wq = up->waitq) != nil){
1198                 up->waitq = wq->next;
1199                 free(wq);
1200         }
1201
1202         /* release debuggers */
1203         qlock(&up->debug);
1204         if(up->pdbg != nil) {
1205                 wakeup(&up->pdbg->sleep);
1206                 up->pdbg = nil;
1207         }
1208         if(up->syscalltrace != nil) {
1209                 free(up->syscalltrace);
1210                 up->syscalltrace = nil;
1211         }
1212         if(up->watchpt != nil){
1213                 free(up->watchpt);
1214                 up->watchpt = nil;
1215         }
1216         up->nwatchpt = 0;
1217         qunlock(&up->debug);
1218
1219         /* Sched must not loop for these locks */
1220         lock(&procalloc);
1221         lock(&palloc);
1222
1223         edfstop(up);
1224         up->state = Moribund;
1225         sched();
1226         panic("pexit");
1227 }
1228
1229 static int
1230 haswaitq(void *x)
1231 {
1232         return ((Proc*)x)->waitq != nil;
1233 }
1234
1235 ulong
1236 pwait(Waitmsg *w)
1237 {
1238         ulong cpid;
1239         Waitq *wq;
1240
1241         if(!canqlock(&up->qwaitr))
1242                 error(Einuse);
1243
1244         if(waserror()) {
1245                 qunlock(&up->qwaitr);
1246                 nexterror();
1247         }
1248
1249         lock(&up->exl);
1250         while(up->waitq == nil) {
1251                 if(up->nchild == 0) {
1252                         unlock(&up->exl);
1253                         error(Enochild);
1254                 }
1255                 unlock(&up->exl);
1256                 sleep(&up->waitr, haswaitq, up);
1257                 lock(&up->exl);
1258         }
1259         wq = up->waitq;
1260         up->waitq = wq->next;
1261         up->nwait--;
1262         unlock(&up->exl);
1263
1264         qunlock(&up->qwaitr);
1265         poperror();
1266
1267         if(w != nil)
1268                 memmove(w, &wq->w, sizeof(Waitmsg));
1269         cpid = wq->w.pid;
1270         free(wq);
1271         return cpid;
1272 }
1273
1274 Proc*
1275 proctab(int i)
1276 {
1277         return &procalloc.arena[i];
1278 }
1279
1280 void
1281 dumpaproc(Proc *p)
1282 {
1283         ulong bss;
1284         char *s;
1285
1286         if(p == nil)
1287                 return;
1288
1289         bss = 0;
1290         if(p->seg[BSEG] != nil)
1291                 bss = p->seg[BSEG]->top;
1292
1293         s = p->psstate;
1294         if(s == nil)
1295                 s = statename[p->state];
1296         print("%3lud:%10s pc %#p dbgpc %#p  %8s (%s) ut %ld st %ld bss %lux qpc %#p nl %d nd %lud lpc %#p pri %lud\n",
1297                 p->pid, p->text, p->pc, dbgpc(p),  s, statename[p->state],
1298                 p->time[0], p->time[1], bss, p->qpc, p->nlocks, p->delaysched,
1299                 p->lastlock ? p->lastlock->pc : 0, p->priority);
1300 }
1301
1302 void
1303 procdump(void)
1304 {
1305         int i;
1306         Proc *p;
1307
1308         if(up != nil)
1309                 print("up %lud\n", up->pid);
1310         else
1311                 print("no current process\n");
1312         for(i=0; i<conf.nproc; i++) {
1313                 p = &procalloc.arena[i];
1314                 if(p->state != Dead)
1315                         dumpaproc(p);
1316         }
1317 }
1318
1319 /*
1320  *  wait till all matching processes have flushed their mmu
1321  */
1322 static void
1323 procflushmmu(int (*match)(Proc*, void*), void *a)
1324 {
1325         int i, nm, nwait;
1326         Proc *p;
1327
1328         /*
1329          *  tell all matching processes to flush their mmu's
1330          */
1331         nwait = 0;
1332         for(i=0; i<conf.nproc; i++) {
1333                 p = &procalloc.arena[i];
1334                 if(p->state != Dead && (*match)(p, a)){
1335                         p->newtlb = 1;
1336                         for(nm = 0; nm < conf.nmach; nm++){
1337                                 if(MACHP(nm)->proc == p){
1338                                         MACHP(nm)->flushmmu = 1;
1339                                         nwait++;
1340                                 }
1341                         }
1342                 }
1343         }
1344
1345         if(nwait == 0)
1346                 return;
1347
1348         /*
1349          *  wait for all other processors to take a clock interrupt
1350          *  and flush their mmu's
1351          */
1352         for(nm = 0; nm < conf.nmach; nm++)
1353                 while(m->machno != nm && MACHP(nm)->flushmmu)
1354                         sched();
1355 }
1356
1357 static int
1358 matchseg(Proc *p, void *a)
1359 {
1360         int ns;
1361
1362         for(ns = 0; ns < NSEG; ns++){
1363                 if(p->seg[ns] == a)
1364                         return 1;
1365         }
1366         return 0;
1367 }
1368 void
1369 procflushseg(Segment *s)
1370 {
1371         procflushmmu(matchseg, s);
1372 }
1373
1374 static int
1375 matchpseg(Proc *p, void *a)
1376 {
1377         Segment *s;
1378         int ns;
1379
1380         for(ns = 0; ns < NSEG; ns++){
1381                 s = p->seg[ns];
1382                 if(s != nil && s->pseg == a)
1383                         return 1;
1384         }
1385         return 0;
1386 }
1387 void
1388 procflushpseg(Physseg *ps)
1389 {
1390         procflushmmu(matchpseg, ps);
1391 }
1392
1393 void
1394 scheddump(void)
1395 {
1396         Proc *p;
1397         Schedq *rq;
1398
1399         for(rq = &runq[Nrq-1]; rq >= runq; rq--){
1400                 if(rq->head == nil)
1401                         continue;
1402                 print("rq%zd:", rq-runq);
1403                 for(p = rq->head; p != nil; p = p->rnext)
1404                         print(" %lud(%lud)", p->pid, m->ticks - p->readytime);
1405                 print("\n");
1406                 delay(150);
1407         }
1408         print("nrdy %d\n", nrdy);
1409 }
1410
1411 void
1412 kproc(char *name, void (*func)(void *), void *arg)
1413 {
1414         Proc *p;
1415         static Pgrp *kpgrp;
1416
1417         p = newproc();
1418         p->psstate = nil;
1419         p->procmode = 0640;
1420         p->kp = 1;
1421         p->noswap = 1;
1422
1423         p->scallnr = up->scallnr;
1424         p->s = up->s;
1425         p->nerrlab = 0;
1426         p->slash = up->slash;
1427         p->dot = up->slash;     /* unlike fork, do not inherit the dot for kprocs */
1428         if(p->dot != nil)
1429                 incref(p->dot);
1430
1431         memmove(p->note, up->note, sizeof(p->note));
1432         p->nnote = up->nnote;
1433         p->notified = 0;
1434         p->lastnote = up->lastnote;
1435         p->notify = up->notify;
1436         p->ureg = nil;
1437         p->dbgreg = nil;
1438
1439         procpriority(p, PriKproc, 0);
1440
1441         kprocchild(p, func, arg);
1442
1443         kstrdup(&p->user, eve);
1444         kstrdup(&p->text, name);
1445         if(kpgrp == nil)
1446                 kpgrp = newpgrp();
1447         p->pgrp = kpgrp;
1448         incref(kpgrp);
1449
1450         memset(p->time, 0, sizeof(p->time));
1451         p->time[TReal] = MACHP(0)->ticks;
1452         ready(p);
1453 }
1454
1455 /*
1456  *  called splhi() by notify().  See comment in notify for the
1457  *  reasoning.
1458  */
1459 void
1460 procctl(void)
1461 {
1462         char *state;
1463         ulong s;
1464
1465         switch(up->procctl) {
1466         case Proc_exitbig:
1467                 spllo();
1468                 pprint("Killed: Insufficient physical memory\n");
1469                 pexit("Killed: Insufficient physical memory", 1);
1470
1471         case Proc_exitme:
1472                 spllo();                /* pexit has locks in it */
1473                 pexit("Killed", 1);
1474
1475         case Proc_traceme:
1476                 if(up->nnote == 0)
1477                         return;
1478                 /* No break */
1479
1480         case Proc_stopme:
1481                 up->procctl = 0;
1482                 state = up->psstate;
1483                 up->psstate = "Stopped";
1484                 /* free a waiting debugger */
1485                 s = spllo();
1486                 qlock(&up->debug);
1487                 if(up->pdbg != nil) {
1488                         wakeup(&up->pdbg->sleep);
1489                         up->pdbg = nil;
1490                 }
1491                 qunlock(&up->debug);
1492                 splhi();
1493                 up->state = Stopped;
1494                 sched();
1495                 up->psstate = state;
1496                 splx(s);
1497                 return;
1498         }
1499 }
1500
1501 #include "errstr.h"
1502
1503 void
1504 error(char *err)
1505 {
1506         spllo();
1507
1508         assert(up->nerrlab < NERR);
1509         kstrcpy(up->errstr, err, ERRMAX);
1510         setlabel(&up->errlab[NERR-1]);
1511         nexterror();
1512 }
1513
1514 void
1515 nexterror(void)
1516 {
1517         assert(up->nerrlab > 0);
1518         gotolabel(&up->errlab[--up->nerrlab]);
1519 }
1520
1521 void
1522 exhausted(char *resource)
1523 {
1524         char buf[ERRMAX];
1525
1526         snprint(buf, sizeof buf, "no free %s", resource);
1527         iprint("%s\n", buf);
1528         error(buf);
1529 }
1530
1531 ulong
1532 procpagecount(Proc *p)
1533 {
1534         Segment *s;
1535         ulong pages;
1536         int i;
1537
1538         eqlock(&p->seglock);
1539         if(waserror()){
1540                 qunlock(&p->seglock);
1541                 nexterror();
1542         }
1543         pages = 0;
1544         for(i=0; i<NSEG; i++){
1545                 if((s = p->seg[i]) != nil){
1546                         eqlock(s);
1547                         pages += mcountseg(s);
1548                         qunlock(s);
1549                 }
1550         }
1551         qunlock(&p->seglock);
1552         poperror();
1553
1554         return pages;
1555 }
1556
1557 void
1558 killbig(char *why)
1559 {
1560         int i;
1561         Segment *s;
1562         ulong l, max;
1563         Proc *p, *ep, *kp;
1564
1565         max = 0;
1566         kp = nil;
1567         ep = procalloc.arena+conf.nproc;
1568         for(p = procalloc.arena; p < ep; p++) {
1569                 if(p->state == Dead || p->kp)
1570                         continue;
1571                 if((p->noswap || (p->procmode & 0222) == 0) && strcmp(eve, p->user) == 0)
1572                         continue;
1573                 l = procpagecount(p);
1574                 if(l > max){
1575                         kp = p;
1576                         max = l;
1577                 }
1578         }
1579         if(kp == nil)
1580                 return;
1581         print("%lud: %s killed: %s\n", kp->pid, kp->text, why);
1582         qlock(&kp->seglock);
1583         for(p = procalloc.arena; p < ep; p++) {
1584                 if(p->state == Dead || p->kp)
1585                         continue;
1586                 if(p != kp && p->seg[BSEG] != nil && p->seg[BSEG] == kp->seg[BSEG])
1587                         p->procctl = Proc_exitbig;
1588         }
1589         kp->procctl = Proc_exitbig;
1590         for(i = 0; i < NSEG; i++) {
1591                 s = kp->seg[i];
1592                 if(s == nil)
1593                         continue;
1594                 switch(s->type & SG_TYPE){
1595                 case SG_SHARED:
1596                 case SG_PHYSICAL:
1597                 case SG_FIXED:
1598                 case SG_STICKY:
1599                         continue;
1600                 }
1601                 qlock(s);
1602                 mfreeseg(s, s->base, (s->top - s->base)/BY2PG);
1603                 qunlock(s);
1604         }
1605         qunlock(&kp->seglock);
1606 }
1607
1608 /*
1609  *  change ownership to 'new' of all processes owned by 'old'.  Used when
1610  *  eve changes.
1611  */
1612 void
1613 renameuser(char *old, char *new)
1614 {
1615         Proc *p, *ep;
1616
1617         ep = procalloc.arena+conf.nproc;
1618         for(p = procalloc.arena; p < ep; p++)
1619                 if(p->user != nil && strcmp(old, p->user) == 0)
1620                         kstrdup(&p->user, new);
1621 }
1622
1623 /*
1624  *  time accounting called by clock() splhi'd
1625  */
1626 void
1627 accounttime(void)
1628 {
1629         Proc *p;
1630         ulong n, per;
1631         static ulong nrun;
1632
1633         p = m->proc;
1634         if(p != nil) {
1635                 nrun++;
1636                 p->time[p->insyscall]++;
1637         }
1638
1639         /* calculate decaying duty cycles */
1640         n = perfticks();
1641         per = n - m->perf.last;
1642         m->perf.last = n;
1643         per = ((uvlong)m->perf.period*(HZ-1) + per)/HZ;
1644         if(per != 0)
1645                 m->perf.period = per;
1646
1647         m->perf.avg_inidle = ((uvlong)m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ;
1648         m->perf.inidle = 0;
1649
1650         m->perf.avg_inintr = ((uvlong)m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ;
1651         m->perf.inintr = 0;
1652
1653         /* only one processor gets to compute system load averages */
1654         if(m->machno != 0)
1655                 return;
1656
1657         /*
1658          * calculate decaying load average.
1659          * if we decay by (n-1)/n then it takes
1660          * n clock ticks to go from load L to .36 L once
1661          * things quiet down.  it takes about 5 n clock
1662          * ticks to go to zero.  so using HZ means this is
1663          * approximately the load over the last second,
1664          * with a tail lasting about 5 seconds.
1665          */
1666         n = nrun;
1667         nrun = 0;
1668         n = (nrdy+n)*1000*100;
1669         load = ((uvlong)load*(HZ-1)+n)/HZ;
1670         m->load = load/100;
1671 }
1672
1673 int
1674 pidalloc(Proc *p)
1675 {
1676         static int gen, wrapped;
1677         int pid, h;
1678         Proc *x;
1679
1680         lock(&procalloc);
1681 Retry:
1682         pid = ++gen & 0x7FFFFFFF;
1683         if(pid == 0){
1684                 wrapped = 1;
1685                 goto Retry;
1686         }
1687         h = pid % nelem(procalloc.ht);
1688         if(wrapped)
1689                 for(x = procalloc.ht[h]; x != nil; x = x->pidhash)
1690                         if(x->pid == pid)
1691                                 goto Retry;
1692         if(p != nil){
1693                 p->pid = pid;
1694                 p->pidhash = procalloc.ht[h];
1695                 procalloc.ht[h] = p;
1696         }
1697         unlock(&procalloc);
1698         return pid;
1699 }
1700
1701 static void
1702 pidfree(Proc *p)
1703 {
1704         int h;
1705         Proc **l;
1706
1707         h = p->pid % nelem(procalloc.ht);
1708         lock(&procalloc);
1709         for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash)
1710                 if(*l == p){
1711                         *l = p->pidhash;
1712                         break;
1713                 }
1714         unlock(&procalloc);
1715 }
1716
1717 int
1718 procindex(ulong pid)
1719 {
1720         Proc *p;
1721         int h;
1722         int s;
1723
1724         s = -1;
1725         h = pid % nelem(procalloc.ht);
1726         lock(&procalloc);
1727         for(p = procalloc.ht[h]; p != nil; p = p->pidhash)
1728                 if(p->pid == pid){
1729                         s = p - procalloc.arena;
1730                         break;
1731                 }
1732         unlock(&procalloc);
1733         return s;
1734 }