]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/trap.c
Import sources from 2011-03-30 iso image
[plan9front.git] / sys / src / 9 / pc / trap.c
1 #include        "u.h"
2 #include        "tos.h"
3 #include        "../port/lib.h"
4 #include        "mem.h"
5 #include        "dat.h"
6 #include        "fns.h"
7 #include        "io.h"
8 #include        "ureg.h"
9 #include        "../port/error.h"
10 #include        <trace.h>
11
12 static int trapinited;
13
14 void    noted(Ureg*, ulong);
15
16 static void debugbpt(Ureg*, void*);
17 static void fault386(Ureg*, void*);
18 static void doublefault(Ureg*, void*);
19 static void unexpected(Ureg*, void*);
20 static void _dumpstack(Ureg*);
21
22 static Lock vctllock;
23 static Vctl *vctl[256];
24
25 enum
26 {
27         Ntimevec = 20           /* number of time buckets for each intr */
28 };
29 ulong intrtimes[256][Ntimevec];
30
31 void
32 intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
33 {
34         int vno;
35         Vctl *v;
36
37         if(f == nil){
38                 print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
39                         irq, tbdf, name);
40                 return;
41         }
42
43         v = xalloc(sizeof(Vctl));
44         v->isintr = 1;
45         v->irq = irq;
46         v->tbdf = tbdf;
47         v->f = f;
48         v->a = a;
49         strncpy(v->name, name, KNAMELEN-1);
50         v->name[KNAMELEN-1] = 0;
51
52         ilock(&vctllock);
53         vno = arch->intrenable(v);
54         if(vno == -1){
55                 iunlock(&vctllock);
56                 print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
57                         irq, tbdf, v->name);
58                 xfree(v);
59                 return;
60         }
61         if(vctl[vno]){
62                 if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
63                         panic("intrenable: handler: %s %s %#p %#p %#p %#p",
64                                 vctl[vno]->name, v->name,
65                                 vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
66                 v->next = vctl[vno];
67         }
68         vctl[vno] = v;
69         iunlock(&vctllock);
70 }
71
72 int
73 intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
74 {
75         Vctl **pv, *v;
76         int vno;
77
78         /*
79          * For now, none of this will work with the APIC code,
80          * there is no mapping between irq and vector as the IRQ
81          * is pretty meaningless.
82          */
83         if(arch->intrvecno == nil)
84                 return -1;
85         vno = arch->intrvecno(irq);
86         ilock(&vctllock);
87         pv = &vctl[vno];
88         while (*pv &&
89                   ((*pv)->irq != irq || (*pv)->tbdf != tbdf || (*pv)->f != f || (*pv)->a != a ||
90                    strcmp((*pv)->name, name)))
91                 pv = &((*pv)->next);
92         assert(*pv);
93
94         v = *pv;
95         *pv = (*pv)->next;      /* Link out the entry */
96
97         if(vctl[vno] == nil && arch->intrdisable != nil)
98                 arch->intrdisable(irq);
99         iunlock(&vctllock);
100         xfree(v);
101         return 0;
102 }
103
104 static long
105 irqallocread(Chan*, void *vbuf, long n, vlong offset)
106 {
107         char *buf, *p, str[2*(11+1)+KNAMELEN+1+1];
108         int m, vno;
109         long oldn;
110         Vctl *v;
111
112         if(n < 0 || offset < 0)
113                 error(Ebadarg);
114
115         oldn = n;
116         buf = vbuf;
117         for(vno=0; vno<nelem(vctl); vno++){
118                 for(v=vctl[vno]; v; v=v->next){
119                         m = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
120                         if(m <= offset) /* if do not want this, skip entry */
121                                 offset -= m;
122                         else{
123                                 /* skip offset bytes */
124                                 m -= offset;
125                                 p = str+offset;
126                                 offset = 0;
127
128                                 /* write at most max(n,m) bytes */
129                                 if(m > n)
130                                         m = n;
131                                 memmove(buf, p, m);
132                                 n -= m;
133                                 buf += m;
134
135                                 if(n == 0)
136                                         return oldn;
137                         }
138                 }
139         }
140         return oldn - n;
141 }
142
143 void
144 trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
145 {
146         Vctl *v;
147
148         if(vno < 0 || vno >= VectorPIC)
149                 panic("trapenable: vno %d", vno);
150         v = xalloc(sizeof(Vctl));
151         v->tbdf = BUSUNKNOWN;
152         v->f = f;
153         v->a = a;
154         strncpy(v->name, name, KNAMELEN);
155         v->name[KNAMELEN-1] = 0;
156
157         ilock(&vctllock);
158         if(vctl[vno])
159                 v->next = vctl[vno]->next;
160         vctl[vno] = v;
161         iunlock(&vctllock);
162 }
163
164 static void
165 nmienable(void)
166 {
167         int x;
168
169         /*
170          * Hack: should be locked with NVRAM access.
171          */
172         outb(0x70, 0x80);               /* NMI latch clear */
173         outb(0x70, 0);
174
175         x = inb(0x61) & 0x07;           /* Enable NMI */
176         outb(0x61, 0x08|x);
177         outb(0x61, x);
178 }
179
180 /*
181  * Minimal trap setup.  Just enough so that we can panic
182  * on traps (bugs) during kernel initialization.
183  * Called very early - malloc is not yet available.
184  */
185 void
186 trapinit0(void)
187 {
188         int d1, v;
189         ulong vaddr;
190         Segdesc *idt;
191
192         idt = (Segdesc*)IDTADDR;
193         vaddr = (ulong)vectortable;
194         for(v = 0; v < 256; v++){
195                 d1 = (vaddr & 0xFFFF0000)|SEGP;
196                 switch(v){
197
198                 case VectorBPT:
199                         d1 |= SEGPL(3)|SEGIG;
200                         break;
201
202                 case VectorSYSCALL:
203                         d1 |= SEGPL(3)|SEGIG;
204                         break;
205
206                 default:
207                         d1 |= SEGPL(0)|SEGIG;
208                         break;
209                 }
210                 idt[v].d0 = (vaddr & 0xFFFF)|(KESEL<<16);
211                 idt[v].d1 = d1;
212                 vaddr += 6;
213         }
214 }
215
216 void
217 trapinit(void)
218 {
219         /*
220          * Special traps.
221          * Syscall() is called directly without going through trap().
222          */
223         trapenable(VectorBPT, debugbpt, 0, "debugpt");
224         trapenable(VectorPF, fault386, 0, "fault386");
225         trapenable(Vector2F, doublefault, 0, "doublefault");
226         trapenable(Vector15, unexpected, 0, "unexpected");
227         nmienable();
228
229         addarchfile("irqalloc", 0444, irqallocread, nil);
230         trapinited = 1;
231 }
232
233 static char* excname[32] = {
234         "divide error",
235         "debug exception",
236         "nonmaskable interrupt",
237         "breakpoint",
238         "overflow",
239         "bounds check",
240         "invalid opcode",
241         "coprocessor not available",
242         "double fault",
243         "coprocessor segment overrun",
244         "invalid TSS",
245         "segment not present",
246         "stack exception",
247         "general protection violation",
248         "page fault",
249         "15 (reserved)",
250         "coprocessor error",
251         "alignment check",
252         "machine check",
253         "19 (reserved)",
254         "20 (reserved)",
255         "21 (reserved)",
256         "22 (reserved)",
257         "23 (reserved)",
258         "24 (reserved)",
259         "25 (reserved)",
260         "26 (reserved)",
261         "27 (reserved)",
262         "28 (reserved)",
263         "29 (reserved)",
264         "30 (reserved)",
265         "31 (reserved)",
266 };
267
268 /*
269  *  keep histogram of interrupt service times
270  */
271 void
272 intrtime(Mach*, int vno)
273 {
274         ulong diff;
275         ulong x;
276
277         x = perfticks();
278         diff = x - m->perf.intrts;
279         m->perf.intrts = x;
280
281         m->perf.inintr += diff;
282         if(up == nil && m->perf.inidle > diff)
283                 m->perf.inidle -= diff;
284
285         diff /= m->cpumhz*100;          /* quantum = 100µsec */
286         if(diff >= Ntimevec)
287                 diff = Ntimevec-1;
288         intrtimes[vno][diff]++;
289 }
290
291 /* go to user space */
292 void
293 kexit(Ureg*)
294 {
295         uvlong t;
296         Tos *tos;
297
298         /* precise time accounting, kernel exit */
299         tos = (Tos*)(USTKTOP-sizeof(Tos));
300         cycles(&t);
301         tos->kcycles += t - up->kentry;
302         tos->pcycles = up->pcycles;
303         tos->pid = up->pid;
304 }
305
306 /*
307  *  All traps come here.  It is slower to have all traps call trap()
308  *  rather than directly vectoring the handler.  However, this avoids a
309  *  lot of code duplication and possible bugs.  The only exception is
310  *  VectorSYSCALL.
311  *  Trap is called with interrupts disabled via interrupt-gates.
312  */
313 void
314 trap(Ureg* ureg)
315 {
316         int clockintr, i, vno, user;
317         char buf[ERRMAX];
318         Vctl *ctl, *v;
319         Mach *mach;
320
321         if(!trapinited){
322                 /* fault386 can give a better error message */
323                 if(ureg->trap == VectorPF)
324                         fault386(ureg, nil);
325                 panic("trap %lud: not ready", ureg->trap);
326         }
327
328         m->perf.intrts = perfticks();
329         user = (ureg->cs & 0xFFFF) == UESEL;
330         if(user){
331                 up->dbgreg = ureg;
332                 cycles(&up->kentry);
333         }
334
335         clockintr = 0;
336
337         vno = ureg->trap;
338         if(ctl = vctl[vno]){
339                 if(ctl->isintr){
340                         m->intr++;
341                         if(vno >= VectorPIC && vno != VectorSYSCALL)
342                                 m->lastintr = ctl->irq;
343                 }
344
345                 if(ctl->isr)
346                         ctl->isr(vno);
347                 for(v = ctl; v != nil; v = v->next){
348                         if(v->f)
349                                 v->f(ureg, v->a);
350                 }
351                 if(ctl->eoi)
352                         ctl->eoi(vno);
353
354                 if(ctl->isintr){
355                         intrtime(m, vno);
356
357                         if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
358                                 clockintr = 1;
359
360                         if(up && !clockintr)
361                                 preempted();
362                 }
363         }
364         else if(vno < nelem(excname) && user){
365                 spllo();
366                 sprint(buf, "sys: trap: %s", excname[vno]);
367                 postnote(up, 1, buf, NDebug);
368         }
369         else if(vno >= VectorPIC && vno != VectorSYSCALL){
370                 /*
371                  * An unknown interrupt.
372                  * Check for a default IRQ7. This can happen when
373                  * the IRQ input goes away before the acknowledge.
374                  * In this case, a 'default IRQ7' is generated, but
375                  * the corresponding bit in the ISR isn't set.
376                  * In fact, just ignore all such interrupts.
377                  */
378
379                 /* call all interrupt routines, just in case */
380                 for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
381                         ctl = vctl[i];
382                         if(ctl == nil)
383                                 continue;
384                         if(!ctl->isintr)
385                                 continue;
386                         for(v = ctl; v != nil; v = v->next){
387                                 if(v->f)
388                                         v->f(ureg, v->a);
389                         }
390                         /* should we do this? */
391                         if(ctl->eoi)
392                                 ctl->eoi(i);
393                 }
394
395                 /* clear the interrupt */
396                 i8259isr(vno);
397
398                 if(0)print("cpu%d: spurious interrupt %d, last %d\n",
399                         m->machno, vno, m->lastintr);
400                 if(0)if(conf.nmach > 1){
401                         for(i = 0; i < 32; i++){
402                                 if(!(active.machs & (1<<i)))
403                                         continue;
404                                 mach = MACHP(i);
405                                 if(m->machno == mach->machno)
406                                         continue;
407                                 print(" cpu%d: last %d",
408                                         mach->machno, mach->lastintr);
409                         }
410                         print("\n");
411                 }
412                 m->spuriousintr++;
413                 if(user)
414                         kexit(ureg);
415                 return;
416         }
417         else{
418                 if(vno == VectorNMI){
419                         /*
420                          * Don't re-enable, it confuses the crash dumps.
421                         nmienable();
422                          */
423                         iprint("cpu%d: PC %#8.8lux\n", m->machno, ureg->pc);
424                         while(m->machno != 0)
425                                 ;
426                 }
427                 dumpregs(ureg);
428                 if(!user){
429                         ureg->sp = (ulong)&ureg->sp;
430                         _dumpstack(ureg);
431                 }
432                 if(vno < nelem(excname))
433                         panic("%s", excname[vno]);
434                 panic("unknown trap/intr: %d", vno);
435         }
436         splhi();
437
438         /* delaysched set because we held a lock or because our quantum ended */
439         if(up && up->delaysched && clockintr){
440                 sched();
441                 splhi();
442         }
443
444         if(user){
445                 if(up->procctl || up->nnote)
446                         notify(ureg);
447                 kexit(ureg);
448         }
449 }
450
451 /*
452  *  dump registers
453  */
454 void
455 dumpregs2(Ureg* ureg)
456 {
457         if(up)
458                 iprint("cpu%d: registers for %s %lud\n",
459                         m->machno, up->text, up->pid);
460         else
461                 iprint("cpu%d: registers for kernel\n", m->machno);
462         iprint("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",
463                 ureg->flags, ureg->trap, ureg->ecode, ureg->pc);
464         iprint(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);
465         iprint("  AX %8.8luX  BX %8.8luX  CX %8.8luX  DX %8.8luX\n",
466                 ureg->ax, ureg->bx, ureg->cx, ureg->dx);
467         iprint("  SI %8.8luX  DI %8.8luX  BP %8.8luX\n",
468                 ureg->si, ureg->di, ureg->bp);
469         iprint("  CS %4.4luX  DS %4.4luX  ES %4.4luX  FS %4.4luX  GS %4.4luX\n",
470                 ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
471                 ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
472 }
473
474 void
475 dumpregs(Ureg* ureg)
476 {
477         vlong mca, mct;
478
479         dumpregs2(ureg);
480
481         /*
482          * Processor control registers.
483          * If machine check exception, time stamp counter, page size extensions
484          * or enhanced virtual 8086 mode extensions are supported, there is a
485          * CR4. If there is a CR4 and machine check extensions, read the machine
486          * check address and machine check type registers if RDMSR supported.
487          */
488         iprint("  CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",
489                 getcr0(), getcr2(), getcr3());
490         if(m->cpuiddx & 0x9A){
491                 iprint(" CR4 %8.8lux", getcr4());
492                 if((m->cpuiddx & 0xA0) == 0xA0){
493                         rdmsr(0x00, &mca);
494                         rdmsr(0x01, &mct);
495                         iprint("\n  MCA %8.8llux MCT %8.8llux", mca, mct);
496                 }
497         }
498         iprint("\n  ur %#p up %#p\n", ureg, up);
499 }
500
501
502 /*
503  * Fill in enough of Ureg to get a stack trace, and call a function.
504  * Used by debugging interface rdb.
505  */
506 void
507 callwithureg(void (*fn)(Ureg*))
508 {
509         Ureg ureg;
510         ureg.pc = getcallerpc(&fn);
511         ureg.sp = (ulong)&fn;
512         fn(&ureg);
513 }
514
515 static void
516 _dumpstack(Ureg *ureg)
517 {
518         uintptr l, v, i, estack;
519         extern ulong etext;
520         int x;
521         char *s;
522
523         if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
524                 iprint("dumpstack disabled\n");
525                 return;
526         }
527         iprint("dumpstack\n");
528
529         x = 0;
530         x += iprint("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);
531         i = 0;
532         if(up
533         && (uintptr)&l >= (uintptr)up->kstack
534         && (uintptr)&l <= (uintptr)up->kstack+KSTACK)
535                 estack = (uintptr)up->kstack+KSTACK;
536         else if((uintptr)&l >= (uintptr)m->stack
537         && (uintptr)&l <= (uintptr)m+MACHSIZE)
538                 estack = (uintptr)m+MACHSIZE;
539         else
540                 return;
541         x += iprint("estackx %p\n", estack);
542
543         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
544                 v = *(uintptr*)l;
545                 if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){
546                         /*
547                          * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
548                          * and CALL indirect through AX
549                          * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
550                          * but this is too clever and misses faulting address.
551                          */
552                         x += iprint("%.8p=%.8p ", l, v);
553                         i++;
554                 }
555                 if(i == 4){
556                         i = 0;
557                         x += iprint("\n");
558                 }
559         }
560         if(i)
561                 iprint("\n");
562         iprint("EOF\n");
563
564         if(ureg->trap != VectorNMI)
565                 return;
566
567         i = 0;
568         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
569                 iprint("%.8p ", *(uintptr*)l);
570                 if(++i == 8){
571                         i = 0;
572                         iprint("\n");
573                 }
574         }
575         if(i)
576                 iprint("\n");
577 }
578
579 void
580 dumpstack(void)
581 {
582         callwithureg(_dumpstack);
583 }
584
585 static void
586 debugbpt(Ureg* ureg, void*)
587 {
588         char buf[ERRMAX];
589
590         if(up == 0)
591                 panic("kernel bpt");
592         /* restore pc to instruction that caused the trap */
593         ureg->pc--;
594         sprint(buf, "sys: breakpoint");
595         postnote(up, 1, buf, NDebug);
596 }
597
598 static void
599 doublefault(Ureg*, void*)
600 {
601         panic("double fault");
602 }
603
604 static void
605 unexpected(Ureg* ureg, void*)
606 {
607         print("unexpected trap %lud; ignoring\n", ureg->trap);
608 }
609
610 extern void checkpages(void);
611 extern void checkfault(ulong, ulong);
612 static void
613 fault386(Ureg* ureg, void*)
614 {
615         ulong addr;
616         int read, user, n, insyscall;
617         char buf[ERRMAX];
618
619         addr = getcr2();
620         read = !(ureg->ecode & 2);
621
622         user = (ureg->cs & 0xFFFF) == UESEL;
623         if(!user){
624                 if(vmapsync(addr))
625                         return;
626                 if(addr >= USTKTOP)
627                         panic("kernel fault: bad address pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
628                 if(up == nil)
629                         panic("kernel fault: no user process pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
630         }
631         if(up == nil)
632                 panic("user fault: up=0 pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
633
634         insyscall = up->insyscall;
635         up->insyscall = 1;
636         n = fault(addr, read);
637         if(n < 0){
638                 if(!user){
639                         dumpregs(ureg);
640                         panic("fault: 0x%lux", addr);
641                 }
642                 checkpages();
643                 checkfault(addr, ureg->pc);
644                 sprint(buf, "sys: trap: fault %s addr=0x%lux",
645                         read ? "read" : "write", addr);
646                 postnote(up, 1, buf, NDebug);
647         }
648         up->insyscall = insyscall;
649 }
650
651 /*
652  *  system calls
653  */
654 #include "../port/systab.h"
655
656 /*
657  *  Syscall is called directly from assembler without going through trap().
658  */
659 void
660 syscall(Ureg* ureg)
661 {
662         char *e;
663         ulong   sp;
664         long    ret;
665         int     i, s;
666         ulong scallnr;
667         vlong startns, stopns;
668
669         if((ureg->cs & 0xFFFF) != UESEL)
670                 panic("syscall: cs 0x%4.4luX", ureg->cs);
671
672         cycles(&up->kentry);
673
674         m->syscall++;
675         up->insyscall = 1;
676         up->pc = ureg->pc;
677         up->dbgreg = ureg;
678
679         sp = ureg->usp;
680         scallnr = ureg->ax;
681         up->scallnr = scallnr;
682
683         if(up->procctl == Proc_tracesyscall){
684                 /*
685                  * Redundant validaddr.  Do we care?
686                  * Tracing syscalls is not exactly a fast path...
687                  * Beware, validaddr currently does a pexit rather
688                  * than an error if there's a problem; that might
689                  * change in the future.
690                  */
691                 if(sp < (USTKTOP-BY2PG) || sp > (USTKTOP-sizeof(Sargs)-BY2WD))
692                         validaddr(sp, sizeof(Sargs)+BY2WD, 0);
693
694                 syscallfmt(scallnr, ureg->pc, (va_list)(sp+BY2WD));
695                 up->procctl = Proc_stopme;
696                 procctl(up);
697                 if(up->syscalltrace)
698                         free(up->syscalltrace);
699                 up->syscalltrace = nil;
700                 startns = todget(nil);
701         }
702
703         if(scallnr == RFORK && up->fpstate == FPactive){
704                 fpsave(&up->fpsave);
705                 up->fpstate = FPinactive;
706         }
707         spllo();
708
709         up->nerrlab = 0;
710         ret = -1;
711         if(!waserror()){
712                 if(scallnr >= nsyscall || systab[scallnr] == 0){
713                         pprint("bad sys call number %lud pc %lux\n",
714                                 scallnr, ureg->pc);
715                         postnote(up, 1, "sys: bad sys call", NDebug);
716                         error(Ebadarg);
717                 }
718
719                 if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
720                         validaddr(sp, sizeof(Sargs)+BY2WD, 0);
721
722                 up->s = *((Sargs*)(sp+BY2WD));
723                 up->psstate = sysctab[scallnr];
724
725                 ret = systab[scallnr](up->s.args);
726                 poperror();
727         }else{
728                 /* failure: save the error buffer for errstr */
729                 e = up->syserrstr;
730                 up->syserrstr = up->errstr;
731                 up->errstr = e;
732                 if(0 && up->pid == 1)
733                         print("syscall %lud error %s\n", scallnr, up->syserrstr);
734         }
735         if(up->nerrlab){
736                 print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
737                 for(i = 0; i < NERR; i++)
738                         print("sp=%lux pc=%lux\n",
739                                 up->errlab[i].sp, up->errlab[i].pc);
740                 panic("error stack");
741         }
742
743         /*
744          *  Put return value in frame.  On the x86 the syscall is
745          *  just another trap and the return value from syscall is
746          *  ignored.  On other machines the return value is put into
747          *  the results register by caller of syscall.
748          */
749         ureg->ax = ret;
750
751         if(up->procctl == Proc_tracesyscall){
752                 stopns = todget(nil);
753                 up->procctl = Proc_stopme;
754                 sysretfmt(scallnr, (va_list)(sp+BY2WD), ret, startns, stopns);
755                 s = splhi();
756                 procctl(up);
757                 splx(s);
758                 if(up->syscalltrace)
759                         free(up->syscalltrace);
760                 up->syscalltrace = nil;
761         }
762
763         up->insyscall = 0;
764         up->psstate = 0;
765
766         if(scallnr == NOTED)
767                 noted(ureg, *(ulong*)(sp+BY2WD));
768
769         if(scallnr!=RFORK && (up->procctl || up->nnote)){
770                 splhi();
771                 notify(ureg);
772         }
773         /* if we delayed sched because we held a lock, sched now */
774         if(up->delaysched)
775                 sched();
776         kexit(ureg);
777 }
778
779 /*
780  *  Call user, if necessary, with note.
781  *  Pass user the Ureg struct and the note on his stack.
782  */
783 int
784 notify(Ureg* ureg)
785 {
786         int l;
787         ulong s, sp;
788         Note *n;
789
790         if(up->procctl)
791                 procctl(up);
792         if(up->nnote == 0)
793                 return 0;
794
795         if(up->fpstate == FPactive){
796                 fpsave(&up->fpsave);
797                 up->fpstate = FPinactive;
798         }
799         up->fpstate |= FPillegal;
800
801         s = spllo();
802         qlock(&up->debug);
803         up->notepending = 0;
804         n = &up->note[0];
805         if(strncmp(n->msg, "sys:", 4) == 0){
806                 l = strlen(n->msg);
807                 if(l > ERRMAX-15)       /* " pc=0x12345678\0" */
808                         l = ERRMAX-15;
809                 sprint(n->msg+l, " pc=0x%.8lux", ureg->pc);
810         }
811
812         if(n->flag!=NUser && (up->notified || up->notify==0)){
813                 if(n->flag == NDebug)
814                         pprint("suicide: %s\n", n->msg);
815                 qunlock(&up->debug);
816                 pexit(n->msg, n->flag!=NDebug);
817         }
818
819         if(up->notified){
820                 qunlock(&up->debug);
821                 splhi();
822                 return 0;
823         }
824
825         if(!up->notify){
826                 qunlock(&up->debug);
827                 pexit(n->msg, n->flag!=NDebug);
828         }
829         sp = ureg->usp;
830         sp -= 256;      /* debugging: preserve context causing problem */
831         sp -= sizeof(Ureg);
832 if(0) print("%s %lud: notify %.8lux %.8lux %.8lux %s\n",
833         up->text, up->pid, ureg->pc, ureg->usp, sp, n->msg);
834
835         if(!okaddr((ulong)up->notify, 1, 0)
836         || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
837                 qunlock(&up->debug);
838                 pprint("suicide: bad address in notify\n");
839                 pexit("Suicide", 0);
840         }
841
842         memmove((Ureg*)sp, ureg, sizeof(Ureg));
843         *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
844         up->ureg = (void*)sp;
845         sp -= BY2WD+ERRMAX;
846         memmove((char*)sp, up->note[0].msg, ERRMAX);
847         sp -= 3*BY2WD;
848         *(ulong*)(sp+2*BY2WD) = sp+3*BY2WD;             /* arg 2 is string */
849         *(ulong*)(sp+1*BY2WD) = (ulong)up->ureg;        /* arg 1 is ureg* */
850         *(ulong*)(sp+0*BY2WD) = 0;                      /* arg 0 is pc */
851         ureg->usp = sp;
852         ureg->pc = (ulong)up->notify;
853         up->notified = 1;
854         up->nnote--;
855         memmove(&up->lastnote, &up->note[0], sizeof(Note));
856         memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
857
858         qunlock(&up->debug);
859         splx(s);
860         return 1;
861 }
862
863 /*
864  *   Return user to state before notify()
865  */
866 void
867 noted(Ureg* ureg, ulong arg0)
868 {
869         Ureg *nureg;
870         ulong oureg, sp;
871
872         qlock(&up->debug);
873         if(arg0!=NRSTR && !up->notified) {
874                 qunlock(&up->debug);
875                 pprint("call to noted() when not notified\n");
876                 pexit("Suicide", 0);
877         }
878         up->notified = 0;
879
880         nureg = up->ureg;       /* pointer to user returned Ureg struct */
881
882         up->fpstate &= ~FPillegal;
883
884         /* sanity clause */
885         oureg = (ulong)nureg;
886         if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
887                 qunlock(&up->debug);
888                 pprint("bad ureg in noted or call to noted when not notified\n");
889                 pexit("Suicide", 0);
890         }
891
892         /*
893          * Check the segment selectors are all valid, otherwise
894          * a fault will be taken on attempting to return to the
895          * user process.
896          * Take care with the comparisons as different processor
897          * generations push segment descriptors in different ways.
898          */
899         if((nureg->cs & 0xFFFF) != UESEL || (nureg->ss & 0xFFFF) != UDSEL
900           || (nureg->ds & 0xFFFF) != UDSEL || (nureg->es & 0xFFFF) != UDSEL
901           || (nureg->fs & 0xFFFF) != UDSEL || (nureg->gs & 0xFFFF) != UDSEL){
902                 qunlock(&up->debug);
903                 pprint("bad segment selector in noted\n");
904                 pexit("Suicide", 0);
905         }
906
907         /* don't let user change system flags */
908         nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
909
910         memmove(ureg, nureg, sizeof(Ureg));
911
912         switch(arg0){
913         case NCONT:
914         case NRSTR:
915 if(0) print("%s %lud: noted %.8lux %.8lux\n",
916         up->text, up->pid, nureg->pc, nureg->usp);
917                 if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){
918                         qunlock(&up->debug);
919                         pprint("suicide: trap in noted\n");
920                         pexit("Suicide", 0);
921                 }
922                 up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
923                 qunlock(&up->debug);
924                 break;
925
926         case NSAVE:
927                 if(!okaddr(nureg->pc, BY2WD, 0)
928                 || !okaddr(nureg->usp, BY2WD, 0)){
929                         qunlock(&up->debug);
930                         pprint("suicide: trap in noted\n");
931                         pexit("Suicide", 0);
932                 }
933                 qunlock(&up->debug);
934                 sp = oureg-4*BY2WD-ERRMAX;
935                 splhi();
936                 ureg->sp = sp;
937                 ((ulong*)sp)[1] = oureg;        /* arg 1 0(FP) is ureg* */
938                 ((ulong*)sp)[0] = 0;            /* arg 0 is pc */
939                 break;
940
941         default:
942                 pprint("unknown noted arg 0x%lux\n", arg0);
943                 up->lastnote.flag = NDebug;
944                 /* fall through */
945
946         case NDFLT:
947                 if(up->lastnote.flag == NDebug){
948                         qunlock(&up->debug);
949                         pprint("suicide: %s\n", up->lastnote.msg);
950                 } else
951                         qunlock(&up->debug);
952                 pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
953         }
954 }
955
956 long
957 execregs(ulong entry, ulong ssize, ulong nargs)
958 {
959         ulong *sp;
960         Ureg *ureg;
961
962         up->fpstate = FPinit;
963         fpoff();
964
965         sp = (ulong*)(USTKTOP - ssize);
966         *--sp = nargs;
967
968         ureg = up->dbgreg;
969         ureg->usp = (ulong)sp;
970         ureg->pc = entry;
971         return USTKTOP-sizeof(Tos);             /* address of kernel/user shared data */
972 }
973
974 /*
975  *  return the userpc the last exception happened at
976  */
977 ulong
978 userpc(void)
979 {
980         Ureg *ureg;
981
982         ureg = (Ureg*)up->dbgreg;
983         return ureg->pc;
984 }
985
986 /* This routine must save the values of registers the user is not permitted
987  * to write from devproc and then restore the saved values before returning.
988  */
989 void
990 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
991 {
992         ulong cs, ds, es, flags, fs, gs, ss;
993
994         ss = ureg->ss;
995         flags = ureg->flags;
996         cs = ureg->cs;
997         ds = ureg->ds;
998         es = ureg->es;
999         fs = ureg->fs;
1000         gs = ureg->gs;
1001         memmove(pureg, uva, n);
1002         ureg->gs = gs;
1003         ureg->fs = fs;
1004         ureg->es = es;
1005         ureg->ds = ds;
1006         ureg->cs = cs;
1007         ureg->flags = (ureg->flags & 0x00FF) | (flags & 0xFF00);
1008         ureg->ss = ss;
1009 }
1010
1011 static void
1012 linkproc(void)
1013 {
1014         spllo();
1015         up->kpfun(up->kparg);
1016         pexit("kproc dying", 0);
1017 }
1018
1019 void
1020 kprocchild(Proc* p, void (*func)(void*), void* arg)
1021 {
1022         /*
1023          * gotolabel() needs a word on the stack in
1024          * which to place the return PC used to jump
1025          * to linkproc().
1026          */
1027         p->sched.pc = (ulong)linkproc;
1028         p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;
1029
1030         p->kpfun = func;
1031         p->kparg = arg;
1032 }
1033
1034 void
1035 forkchild(Proc *p, Ureg *ureg)
1036 {
1037         Ureg *cureg;
1038
1039         /*
1040          * Add 2*BY2WD to the stack to account for
1041          *  - the return PC
1042          *  - trap's argument (ur)
1043          */
1044         p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1045         p->sched.pc = (ulong)forkret;
1046
1047         cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1048         memmove(cureg, ureg, sizeof(Ureg));
1049         /* return value of syscall in child */
1050         cureg->ax = 0;
1051
1052         /* Things from bottom of syscall which were never executed */
1053         p->psstate = 0;
1054         p->insyscall = 0;
1055 }
1056
1057 /* Give enough context in the ureg to produce a kernel stack for
1058  * a sleeping process
1059  */
1060 void
1061 setkernur(Ureg* ureg, Proc* p)
1062 {
1063         ureg->pc = p->sched.pc;
1064         ureg->sp = p->sched.sp+4;
1065 }
1066
1067 ulong
1068 dbgpc(Proc *p)
1069 {
1070         Ureg *ureg;
1071
1072         ureg = p->dbgreg;
1073         if(ureg == 0)
1074                 return 0;
1075
1076         return ureg->pc;
1077 }