]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/trap.c
08033a9eedb8182693d4dc4f770eba4f9aaae53a
[plan9front.git] / sys / src / 9 / pc / trap.c
1 #include        "u.h"
2 #include        "tos.h"
3 #include        "../port/lib.h"
4 #include        "mem.h"
5 #include        "dat.h"
6 #include        "fns.h"
7 #include        "io.h"
8 #include        "ureg.h"
9 #include        "../port/error.h"
10 #include        <trace.h>
11
12 static int trapinited;
13
14 void    noted(Ureg*, ulong);
15
16 static void debugbpt(Ureg*, void*);
17 static void fault386(Ureg*, void*);
18 static void doublefault(Ureg*, void*);
19 static void unexpected(Ureg*, void*);
20 static void _dumpstack(Ureg*);
21
22 static Lock vctllock;
23 static Vctl *vctl[256];
24
25 enum
26 {
27         Ntimevec = 20           /* number of time buckets for each intr */
28 };
29 ulong intrtimes[256][Ntimevec];
30
31 void
32 intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
33 {
34         int vno;
35         Vctl *v;
36
37         if(f == nil){
38                 print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
39                         irq, tbdf, name);
40                 return;
41         }
42
43         v = xalloc(sizeof(Vctl));
44         v->isintr = 1;
45         v->irq = irq;
46         v->tbdf = tbdf;
47         v->f = f;
48         v->a = a;
49         strncpy(v->name, name, KNAMELEN-1);
50         v->name[KNAMELEN-1] = 0;
51
52         ilock(&vctllock);
53         vno = arch->intrenable(v);
54         if(vno == -1){
55                 iunlock(&vctllock);
56                 print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
57                         irq, tbdf, v->name);
58                 xfree(v);
59                 return;
60         }
61         if(vctl[vno]){
62                 if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
63                         panic("intrenable: handler: %s %s %#p %#p %#p %#p",
64                                 vctl[vno]->name, v->name,
65                                 vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
66                 v->next = vctl[vno];
67         }
68         vctl[vno] = v;
69         iunlock(&vctllock);
70 }
71
72 int
73 intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
74 {
75         Vctl **pv, *v;
76         int vno;
77
78         /*
79          * For now, none of this will work with the APIC code,
80          * there is no mapping between irq and vector as the IRQ
81          * is pretty meaningless.
82          */
83         if(arch->intrvecno == nil)
84                 return -1;
85         vno = arch->intrvecno(irq);
86         ilock(&vctllock);
87         pv = &vctl[vno];
88         while (*pv &&
89                   ((*pv)->irq != irq || (*pv)->tbdf != tbdf || (*pv)->f != f || (*pv)->a != a ||
90                    strcmp((*pv)->name, name)))
91                 pv = &((*pv)->next);
92         assert(*pv);
93
94         v = *pv;
95         *pv = (*pv)->next;      /* Link out the entry */
96
97         if(vctl[vno] == nil && arch->intrdisable != nil)
98                 arch->intrdisable(irq);
99         iunlock(&vctllock);
100         xfree(v);
101         return 0;
102 }
103
104 static long
105 irqallocread(Chan*, void *vbuf, long n, vlong offset)
106 {
107         char *buf, *p, str[2*(11+1)+KNAMELEN+1+1];
108         int m, vno;
109         long oldn;
110         Vctl *v;
111
112         if(n < 0 || offset < 0)
113                 error(Ebadarg);
114
115         oldn = n;
116         buf = vbuf;
117         for(vno=0; vno<nelem(vctl); vno++){
118                 for(v=vctl[vno]; v; v=v->next){
119                         m = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
120                         if(m <= offset) /* if do not want this, skip entry */
121                                 offset -= m;
122                         else{
123                                 /* skip offset bytes */
124                                 m -= offset;
125                                 p = str+offset;
126                                 offset = 0;
127
128                                 /* write at most max(n,m) bytes */
129                                 if(m > n)
130                                         m = n;
131                                 memmove(buf, p, m);
132                                 n -= m;
133                                 buf += m;
134
135                                 if(n == 0)
136                                         return oldn;
137                         }
138                 }
139         }
140         return oldn - n;
141 }
142
143 void
144 trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
145 {
146         Vctl *v;
147
148         if(vno < 0 || vno >= VectorPIC)
149                 panic("trapenable: vno %d", vno);
150         v = xalloc(sizeof(Vctl));
151         v->tbdf = BUSUNKNOWN;
152         v->f = f;
153         v->a = a;
154         strncpy(v->name, name, KNAMELEN);
155         v->name[KNAMELEN-1] = 0;
156
157         ilock(&vctllock);
158         if(vctl[vno])
159                 v->next = vctl[vno]->next;
160         vctl[vno] = v;
161         iunlock(&vctllock);
162 }
163
164 static void
165 nmienable(void)
166 {
167         int x;
168
169         /*
170          * Hack: should be locked with NVRAM access.
171          */
172         outb(0x70, 0x80);               /* NMI latch clear */
173         outb(0x70, 0);
174
175         x = inb(0x61) & 0x07;           /* Enable NMI */
176         outb(0x61, 0x08|x);
177         outb(0x61, x);
178 }
179
180 /*
181  * Minimal trap setup.  Just enough so that we can panic
182  * on traps (bugs) during kernel initialization.
183  * Called very early - malloc is not yet available.
184  */
185 void
186 trapinit0(void)
187 {
188         int d1, v;
189         ulong vaddr;
190         Segdesc *idt;
191
192         idt = (Segdesc*)IDTADDR;
193         vaddr = (ulong)vectortable;
194         for(v = 0; v < 256; v++){
195                 d1 = (vaddr & 0xFFFF0000)|SEGP;
196                 switch(v){
197
198                 case VectorBPT:
199                         d1 |= SEGPL(3)|SEGIG;
200                         break;
201
202                 case VectorSYSCALL:
203                         d1 |= SEGPL(3)|SEGIG;
204                         break;
205
206                 default:
207                         d1 |= SEGPL(0)|SEGIG;
208                         break;
209                 }
210                 idt[v].d0 = (vaddr & 0xFFFF)|(KESEL<<16);
211                 idt[v].d1 = d1;
212                 vaddr += 6;
213         }
214 }
215
216 void
217 trapinit(void)
218 {
219         /*
220          * Special traps.
221          * Syscall() is called directly without going through trap().
222          */
223         trapenable(VectorBPT, debugbpt, 0, "debugpt");
224         trapenable(VectorPF, fault386, 0, "fault386");
225         trapenable(Vector2F, doublefault, 0, "doublefault");
226         trapenable(Vector15, unexpected, 0, "unexpected");
227         nmienable();
228
229         addarchfile("irqalloc", 0444, irqallocread, nil);
230         trapinited = 1;
231 }
232
233 static char* excname[32] = {
234         "divide error",
235         "debug exception",
236         "nonmaskable interrupt",
237         "breakpoint",
238         "overflow",
239         "bounds check",
240         "invalid opcode",
241         "coprocessor not available",
242         "double fault",
243         "coprocessor segment overrun",
244         "invalid TSS",
245         "segment not present",
246         "stack exception",
247         "general protection violation",
248         "page fault",
249         "15 (reserved)",
250         "coprocessor error",
251         "alignment check",
252         "machine check",
253         "19 (reserved)",
254         "20 (reserved)",
255         "21 (reserved)",
256         "22 (reserved)",
257         "23 (reserved)",
258         "24 (reserved)",
259         "25 (reserved)",
260         "26 (reserved)",
261         "27 (reserved)",
262         "28 (reserved)",
263         "29 (reserved)",
264         "30 (reserved)",
265         "31 (reserved)",
266 };
267
268 /*
269  *  keep histogram of interrupt service times
270  */
271 void
272 intrtime(Mach*, int vno)
273 {
274         ulong diff;
275         ulong x;
276
277         x = perfticks();
278         diff = x - m->perf.intrts;
279         m->perf.intrts = x;
280
281         m->perf.inintr += diff;
282         if(up == nil && m->perf.inidle > diff)
283                 m->perf.inidle -= diff;
284
285         diff /= m->cpumhz*100;          /* quantum = 100µsec */
286         if(diff >= Ntimevec)
287                 diff = Ntimevec-1;
288         intrtimes[vno][diff]++;
289 }
290
291 /* go to user space */
292 void
293 kexit(Ureg*)
294 {
295         uvlong t;
296         Tos *tos;
297
298         /* precise time accounting, kernel exit */
299         tos = (Tos*)(USTKTOP-sizeof(Tos));
300         cycles(&t);
301         tos->kcycles += t - up->kentry;
302         tos->pcycles = up->pcycles;
303         tos->pid = up->pid;
304 }
305
306 /*
307  *  All traps come here.  It is slower to have all traps call trap()
308  *  rather than directly vectoring the handler.  However, this avoids a
309  *  lot of code duplication and possible bugs.  The only exception is
310  *  VectorSYSCALL.
311  *  Trap is called with interrupts disabled via interrupt-gates.
312  */
313 void
314 trap(Ureg* ureg)
315 {
316         int clockintr, i, vno, user;
317         char buf[ERRMAX];
318         Vctl *ctl, *v;
319         Mach *mach;
320
321         if(!trapinited){
322                 /* fault386 can give a better error message */
323                 if(ureg->trap == VectorPF)
324                         fault386(ureg, nil);
325                 panic("trap %lud: not ready", ureg->trap);
326         }
327
328         m->perf.intrts = perfticks();
329         user = userureg(ureg);
330         if(user){
331                 up->dbgreg = ureg;
332                 cycles(&up->kentry);
333         }
334
335         clockintr = 0;
336
337         vno = ureg->trap;
338         if(ctl = vctl[vno]){
339                 if(ctl->isintr){
340                         m->intr++;
341                         if(vno >= VectorPIC && vno != VectorSYSCALL)
342                                 m->lastintr = ctl->irq;
343                 }
344
345                 if(ctl->isr)
346                         ctl->isr(vno);
347                 for(v = ctl; v != nil; v = v->next){
348                         if(v->f)
349                                 v->f(ureg, v->a);
350                 }
351                 if(ctl->eoi)
352                         ctl->eoi(vno);
353
354                 if(ctl->isintr){
355                         intrtime(m, vno);
356
357                         if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
358                                 clockintr = 1;
359
360                         if(up && !clockintr)
361                                 preempted();
362                 }
363         }
364         else if(vno < nelem(excname) && user){
365                 spllo();
366                 sprint(buf, "sys: trap: %s", excname[vno]);
367                 postnote(up, 1, buf, NDebug);
368         }
369         else if(vno >= VectorPIC && vno != VectorSYSCALL){
370                 /*
371                  * An unknown interrupt.
372                  * Check for a default IRQ7. This can happen when
373                  * the IRQ input goes away before the acknowledge.
374                  * In this case, a 'default IRQ7' is generated, but
375                  * the corresponding bit in the ISR isn't set.
376                  * In fact, just ignore all such interrupts.
377                  */
378
379                 /* call all interrupt routines, just in case */
380                 for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
381                         ctl = vctl[i];
382                         if(ctl == nil)
383                                 continue;
384                         if(!ctl->isintr)
385                                 continue;
386                         for(v = ctl; v != nil; v = v->next){
387                                 if(v->f)
388                                         v->f(ureg, v->a);
389                         }
390                         /* should we do this? */
391                         if(ctl->eoi)
392                                 ctl->eoi(i);
393                 }
394
395                 /* clear the interrupt */
396                 i8259isr(vno);
397
398                 if(0)print("cpu%d: spurious interrupt %d, last %d\n",
399                         m->machno, vno, m->lastintr);
400                 if(0)if(conf.nmach > 1){
401                         for(i = 0; i < 32; i++){
402                                 if(!(active.machs & (1<<i)))
403                                         continue;
404                                 mach = MACHP(i);
405                                 if(m->machno == mach->machno)
406                                         continue;
407                                 print(" cpu%d: last %d",
408                                         mach->machno, mach->lastintr);
409                         }
410                         print("\n");
411                 }
412                 m->spuriousintr++;
413                 if(user)
414                         kexit(ureg);
415                 return;
416         }
417         else{
418                 if(vno == VectorNMI){
419                         /*
420                          * Don't re-enable, it confuses the crash dumps.
421                         nmienable();
422                          */
423                         iprint("cpu%d: PC %#8.8lux\n", m->machno, ureg->pc);
424                         while(m->machno != 0)
425                                 ;
426                 }
427
428                 if(vno == VectorGPF || vno == VectorSNP){
429                         ulong *sp;
430                         uchar *pc;
431
432                         /* l.s */
433                         extern void load_fs(ulong);
434                         extern void load_gs(ulong);
435
436                         /*
437                          * CS, SS, DS and ES are initialized by strayintr
438                          * in l.s. initialize the others too so we dont trap
439                          * again when restoring the old context.
440                          */
441                         load_fs(NULLSEL);
442                         load_gs(NULLSEL);
443
444                         pc = (uchar*)ureg->pc;
445                         sp = (ulong*)&ureg->sp;
446
447                         /*
448                          * we test for the instructions used by forkret()
449                          * to load the segments. this needs to be changed
450                          * if forkret changes!
451                          */
452
453                         /* POP */
454                         if((pc[0] == 0x0f && (pc[1] == 0xa9 /*GS*/ || 
455                                 pc[1] == 0xa1 /*FS*/)) || (pc[0] == 0x07) /*ES*/ ||     
456                                 (pc[0] == 0x1f) /*DS*/){
457                                 sp[0] = NULLSEL;
458                                 return;
459                         }
460
461                         /* IRET */
462                         if(pc[0] == 0xcf){
463                                 sp[1] = UESEL;  /*CS*/
464                                 sp[4] = UDSEL;  /*SS*/
465                                 return;
466                         }
467                 }
468
469                 dumpregs(ureg);
470                 if(!user){
471                         ureg->sp = (ulong)&ureg->sp;
472                         _dumpstack(ureg);
473                 }
474                 if(vno < nelem(excname))
475                         panic("%s", excname[vno]);
476                 panic("unknown trap/intr: %d", vno);
477         }
478         splhi();
479
480         /* delaysched set because we held a lock or because our quantum ended */
481         if(up && up->delaysched && clockintr){
482                 sched();
483                 splhi();
484         }
485
486         if(user){
487                 if(up->procctl || up->nnote)
488                         notify(ureg);
489                 kexit(ureg);
490         }
491 }
492
493 /*
494  *  dump registers
495  */
496 void
497 dumpregs2(Ureg* ureg)
498 {
499         if(up)
500                 iprint("cpu%d: registers for %s %lud\n",
501                         m->machno, up->text, up->pid);
502         else
503                 iprint("cpu%d: registers for kernel\n", m->machno);
504         iprint("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",
505                 ureg->flags, ureg->trap, ureg->ecode, ureg->pc);
506         if(userureg(ureg))
507                 iprint(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);
508         else
509                 iprint(" SP=%luX\n", (ulong)&ureg->sp);
510         iprint("  AX %8.8luX  BX %8.8luX  CX %8.8luX  DX %8.8luX\n",
511                 ureg->ax, ureg->bx, ureg->cx, ureg->dx);
512         iprint("  SI %8.8luX  DI %8.8luX  BP %8.8luX\n",
513                 ureg->si, ureg->di, ureg->bp);
514         iprint("  CS %4.4luX  DS %4.4luX  ES %4.4luX  FS %4.4luX  GS %4.4luX\n",
515                 ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
516                 ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
517 }
518
519 void
520 dumpregs(Ureg* ureg)
521 {
522         vlong mca, mct;
523
524         dumpregs2(ureg);
525
526         /*
527          * Processor control registers.
528          * If machine check exception, time stamp counter, page size extensions
529          * or enhanced virtual 8086 mode extensions are supported, there is a
530          * CR4. If there is a CR4 and machine check extensions, read the machine
531          * check address and machine check type registers if RDMSR supported.
532          */
533         iprint("  CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",
534                 getcr0(), getcr2(), getcr3());
535         if(m->cpuiddx & 0x9A){
536                 iprint(" CR4 %8.8lux", getcr4());
537                 if((m->cpuiddx & 0xA0) == 0xA0){
538                         rdmsr(0x00, &mca);
539                         rdmsr(0x01, &mct);
540                         iprint("\n  MCA %8.8llux MCT %8.8llux", mca, mct);
541                 }
542         }
543         iprint("\n  ur %#p up %#p\n", ureg, up);
544 }
545
546
547 /*
548  * Fill in enough of Ureg to get a stack trace, and call a function.
549  * Used by debugging interface rdb.
550  */
551 void
552 callwithureg(void (*fn)(Ureg*))
553 {
554         Ureg ureg;
555         ureg.pc = getcallerpc(&fn);
556         ureg.sp = (ulong)&fn;
557         fn(&ureg);
558 }
559
560 static void
561 _dumpstack(Ureg *ureg)
562 {
563         uintptr l, v, i, estack;
564         extern ulong etext;
565         int x;
566         char *s;
567
568         if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
569                 iprint("dumpstack disabled\n");
570                 return;
571         }
572         iprint("dumpstack\n");
573
574         x = 0;
575         x += iprint("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);
576         i = 0;
577         if(up
578         && (uintptr)&l >= (uintptr)up->kstack
579         && (uintptr)&l <= (uintptr)up->kstack+KSTACK)
580                 estack = (uintptr)up->kstack+KSTACK;
581         else if((uintptr)&l >= (uintptr)m->stack
582         && (uintptr)&l <= (uintptr)m+MACHSIZE)
583                 estack = (uintptr)m+MACHSIZE;
584         else
585                 return;
586         x += iprint("estackx %p\n", estack);
587
588         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
589                 v = *(uintptr*)l;
590                 if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){
591                         /*
592                          * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
593                          * and CALL indirect through AX
594                          * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
595                          * but this is too clever and misses faulting address.
596                          */
597                         x += iprint("%.8p=%.8p ", l, v);
598                         i++;
599                 }
600                 if(i == 4){
601                         i = 0;
602                         x += iprint("\n");
603                 }
604         }
605         if(i)
606                 iprint("\n");
607         iprint("EOF\n");
608
609         if(ureg->trap != VectorNMI)
610                 return;
611
612         i = 0;
613         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
614                 iprint("%.8p ", *(uintptr*)l);
615                 if(++i == 8){
616                         i = 0;
617                         iprint("\n");
618                 }
619         }
620         if(i)
621                 iprint("\n");
622 }
623
624 void
625 dumpstack(void)
626 {
627         callwithureg(_dumpstack);
628 }
629
630 static void
631 debugbpt(Ureg* ureg, void*)
632 {
633         char buf[ERRMAX];
634
635         if(up == 0)
636                 panic("kernel bpt");
637         /* restore pc to instruction that caused the trap */
638         ureg->pc--;
639         sprint(buf, "sys: breakpoint");
640         postnote(up, 1, buf, NDebug);
641 }
642
643 static void
644 doublefault(Ureg*, void*)
645 {
646         panic("double fault");
647 }
648
649 static void
650 unexpected(Ureg* ureg, void*)
651 {
652         print("unexpected trap %lud; ignoring\n", ureg->trap);
653 }
654
655 extern void checkpages(void);
656 extern void checkfault(ulong, ulong);
657 static void
658 fault386(Ureg* ureg, void*)
659 {
660         ulong addr;
661         int read, user, n, insyscall;
662         char buf[ERRMAX];
663
664         addr = getcr2();
665         read = !(ureg->ecode & 2);
666
667         user = userureg(ureg);
668         if(!user){
669                 if(vmapsync(addr))
670                         return;
671                 if(addr >= USTKTOP)
672                         panic("kernel fault: bad address pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
673                 if(up == nil)
674                         panic("kernel fault: no user process pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
675         }
676         if(up == nil)
677                 panic("user fault: up=0 pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
678
679         insyscall = up->insyscall;
680         up->insyscall = 1;
681         n = fault(addr, read);
682         if(n < 0){
683                 if(!user){
684                         dumpregs(ureg);
685                         panic("fault: 0x%lux", addr);
686                 }
687                 checkpages();
688                 checkfault(addr, ureg->pc);
689                 sprint(buf, "sys: trap: fault %s addr=0x%lux",
690                         read ? "read" : "write", addr);
691                 postnote(up, 1, buf, NDebug);
692         }
693         up->insyscall = insyscall;
694 }
695
696 /*
697  *  system calls
698  */
699 #include "../port/systab.h"
700
701 /*
702  *  Syscall is called directly from assembler without going through trap().
703  */
704 void
705 syscall(Ureg* ureg)
706 {
707         char *e;
708         ulong   sp;
709         long    ret;
710         int     i, s;
711         ulong scallnr;
712         vlong startns, stopns;
713
714         if(!userureg(ureg))
715                 panic("syscall: cs 0x%4.4luX", ureg->cs);
716
717         cycles(&up->kentry);
718
719         m->syscall++;
720         up->insyscall = 1;
721         up->pc = ureg->pc;
722         up->dbgreg = ureg;
723
724         sp = ureg->usp;
725         scallnr = ureg->ax;
726         up->scallnr = scallnr;
727
728         if(up->procctl == Proc_tracesyscall){
729                 /*
730                  * Redundant validaddr.  Do we care?
731                  * Tracing syscalls is not exactly a fast path...
732                  * Beware, validaddr currently does a pexit rather
733                  * than an error if there's a problem; that might
734                  * change in the future.
735                  */
736                 if(sp < (USTKTOP-BY2PG) || sp > (USTKTOP-sizeof(Sargs)-BY2WD))
737                         validaddr(sp, sizeof(Sargs)+BY2WD, 0);
738
739                 syscallfmt(scallnr, ureg->pc, (va_list)(sp+BY2WD));
740                 up->procctl = Proc_stopme;
741                 procctl(up);
742                 if(up->syscalltrace)
743                         free(up->syscalltrace);
744                 up->syscalltrace = nil;
745                 startns = todget(nil);
746         }
747
748         if(scallnr == RFORK && up->fpstate == FPactive){
749                 fpsave(&up->fpsave);
750                 up->fpstate = FPinactive;
751         }
752         spllo();
753
754         up->nerrlab = 0;
755         ret = -1;
756         if(!waserror()){
757                 if(scallnr >= nsyscall || systab[scallnr] == 0){
758                         pprint("bad sys call number %lud pc %lux\n",
759                                 scallnr, ureg->pc);
760                         postnote(up, 1, "sys: bad sys call", NDebug);
761                         error(Ebadarg);
762                 }
763
764                 if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
765                         validaddr(sp, sizeof(Sargs)+BY2WD, 0);
766
767                 up->s = *((Sargs*)(sp+BY2WD));
768                 up->psstate = sysctab[scallnr];
769
770                 ret = systab[scallnr](up->s.args);
771                 poperror();
772         }else{
773                 /* failure: save the error buffer for errstr */
774                 e = up->syserrstr;
775                 up->syserrstr = up->errstr;
776                 up->errstr = e;
777                 if(0 && up->pid == 1)
778                         print("syscall %lud error %s\n", scallnr, up->syserrstr);
779         }
780         if(up->nerrlab){
781                 print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
782                 for(i = 0; i < NERR; i++)
783                         print("sp=%lux pc=%lux\n",
784                                 up->errlab[i].sp, up->errlab[i].pc);
785                 panic("error stack");
786         }
787
788         /*
789          *  Put return value in frame.  On the x86 the syscall is
790          *  just another trap and the return value from syscall is
791          *  ignored.  On other machines the return value is put into
792          *  the results register by caller of syscall.
793          */
794         ureg->ax = ret;
795
796         if(up->procctl == Proc_tracesyscall){
797                 stopns = todget(nil);
798                 up->procctl = Proc_stopme;
799                 sysretfmt(scallnr, (va_list)(sp+BY2WD), ret, startns, stopns);
800                 s = splhi();
801                 procctl(up);
802                 splx(s);
803                 if(up->syscalltrace)
804                         free(up->syscalltrace);
805                 up->syscalltrace = nil;
806         }
807
808         up->insyscall = 0;
809         up->psstate = 0;
810
811         if(scallnr == NOTED)
812                 noted(ureg, *(ulong*)(sp+BY2WD));
813
814         if(scallnr!=RFORK && (up->procctl || up->nnote)){
815                 splhi();
816                 notify(ureg);
817         }
818         /* if we delayed sched because we held a lock, sched now */
819         if(up->delaysched)
820                 sched();
821         kexit(ureg);
822 }
823
824 /*
825  *  Call user, if necessary, with note.
826  *  Pass user the Ureg struct and the note on his stack.
827  */
828 int
829 notify(Ureg* ureg)
830 {
831         int l;
832         ulong s, sp;
833         Note *n;
834
835         if(up->procctl)
836                 procctl(up);
837         if(up->nnote == 0)
838                 return 0;
839
840         if(up->fpstate == FPactive){
841                 fpsave(&up->fpsave);
842                 up->fpstate = FPinactive;
843         }
844         up->fpstate |= FPillegal;
845
846         s = spllo();
847         qlock(&up->debug);
848         up->notepending = 0;
849         n = &up->note[0];
850         if(strncmp(n->msg, "sys:", 4) == 0){
851                 l = strlen(n->msg);
852                 if(l > ERRMAX-15)       /* " pc=0x12345678\0" */
853                         l = ERRMAX-15;
854                 sprint(n->msg+l, " pc=0x%.8lux", ureg->pc);
855         }
856
857         if(n->flag!=NUser && (up->notified || up->notify==0)){
858                 if(n->flag == NDebug)
859                         pprint("suicide: %s\n", n->msg);
860                 qunlock(&up->debug);
861                 pexit(n->msg, n->flag!=NDebug);
862         }
863
864         if(up->notified){
865                 qunlock(&up->debug);
866                 splhi();
867                 return 0;
868         }
869
870         if(!up->notify){
871                 qunlock(&up->debug);
872                 pexit(n->msg, n->flag!=NDebug);
873         }
874         sp = ureg->usp;
875         sp -= 256;      /* debugging: preserve context causing problem */
876         sp -= sizeof(Ureg);
877 if(0) print("%s %lud: notify %.8lux %.8lux %.8lux %s\n",
878         up->text, up->pid, ureg->pc, ureg->usp, sp, n->msg);
879
880         if(!okaddr((ulong)up->notify, 1, 0)
881         || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
882                 qunlock(&up->debug);
883                 pprint("suicide: bad address in notify\n");
884                 pexit("Suicide", 0);
885         }
886
887         memmove((Ureg*)sp, ureg, sizeof(Ureg));
888         *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
889         up->ureg = (void*)sp;
890         sp -= BY2WD+ERRMAX;
891         memmove((char*)sp, up->note[0].msg, ERRMAX);
892         sp -= 3*BY2WD;
893         *(ulong*)(sp+2*BY2WD) = sp+3*BY2WD;             /* arg 2 is string */
894         *(ulong*)(sp+1*BY2WD) = (ulong)up->ureg;        /* arg 1 is ureg* */
895         *(ulong*)(sp+0*BY2WD) = 0;                      /* arg 0 is pc */
896         ureg->usp = sp;
897         ureg->pc = (ulong)up->notify;
898         ureg->cs = UESEL;
899         ureg->ss = ureg->ds = ureg->es = UDSEL;
900         up->notified = 1;
901         up->nnote--;
902         memmove(&up->lastnote, &up->note[0], sizeof(Note));
903         memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
904
905         qunlock(&up->debug);
906         splx(s);
907         return 1;
908 }
909
910 /*
911  *   Return user to state before notify()
912  */
913 void
914 noted(Ureg* ureg, ulong arg0)
915 {
916         Ureg *nureg;
917         ulong oureg, sp;
918
919         qlock(&up->debug);
920         if(arg0!=NRSTR && !up->notified) {
921                 qunlock(&up->debug);
922                 pprint("call to noted() when not notified\n");
923                 pexit("Suicide", 0);
924         }
925         up->notified = 0;
926
927         nureg = up->ureg;       /* pointer to user returned Ureg struct */
928
929         up->fpstate &= ~FPillegal;
930
931         /* sanity clause */
932         oureg = (ulong)nureg;
933         if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
934                 qunlock(&up->debug);
935                 pprint("bad ureg in noted or call to noted when not notified\n");
936                 pexit("Suicide", 0);
937         }
938
939         /* don't let user change system flags */
940         nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
941         nureg->cs |= 3;
942         nureg->ss |= 3;
943
944         memmove(ureg, nureg, sizeof(Ureg));
945
946         switch(arg0){
947         case NCONT:
948         case NRSTR:
949 if(0) print("%s %lud: noted %.8lux %.8lux\n",
950         up->text, up->pid, nureg->pc, nureg->usp);
951                 if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){
952                         qunlock(&up->debug);
953                         pprint("suicide: trap in noted\n");
954                         pexit("Suicide", 0);
955                 }
956                 up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
957                 qunlock(&up->debug);
958                 break;
959
960         case NSAVE:
961                 if(!okaddr(nureg->pc, BY2WD, 0)
962                 || !okaddr(nureg->usp, BY2WD, 0)){
963                         qunlock(&up->debug);
964                         pprint("suicide: trap in noted\n");
965                         pexit("Suicide", 0);
966                 }
967                 qunlock(&up->debug);
968                 sp = oureg-4*BY2WD-ERRMAX;
969                 splhi();
970                 ureg->sp = sp;
971                 ((ulong*)sp)[1] = oureg;        /* arg 1 0(FP) is ureg* */
972                 ((ulong*)sp)[0] = 0;            /* arg 0 is pc */
973                 break;
974
975         default:
976                 pprint("unknown noted arg 0x%lux\n", arg0);
977                 up->lastnote.flag = NDebug;
978                 /* fall through */
979
980         case NDFLT:
981                 if(up->lastnote.flag == NDebug){
982                         qunlock(&up->debug);
983                         pprint("suicide: %s\n", up->lastnote.msg);
984                 } else
985                         qunlock(&up->debug);
986                 pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
987         }
988 }
989
990 long
991 execregs(ulong entry, ulong ssize, ulong nargs)
992 {
993         ulong *sp;
994         Ureg *ureg;
995
996         up->fpstate = FPinit;
997         fpoff();
998
999         sp = (ulong*)(USTKTOP - ssize);
1000         *--sp = nargs;
1001
1002         ureg = up->dbgreg;
1003         ureg->usp = (ulong)sp;
1004         ureg->pc = entry;
1005         ureg->cs = UESEL;
1006         ureg->ss = ureg->ds = ureg->es = UDSEL;
1007         ureg->fs = ureg->gs = NULLSEL;
1008         return USTKTOP-sizeof(Tos);             /* address of kernel/user shared data */
1009 }
1010
1011 /*
1012  *  return the userpc the last exception happened at
1013  */
1014 ulong
1015 userpc(void)
1016 {
1017         Ureg *ureg;
1018
1019         ureg = (Ureg*)up->dbgreg;
1020         return ureg->pc;
1021 }
1022
1023 /* This routine must save the values of registers the user is not permitted
1024  * to write from devproc and then restore the saved values before returning.
1025  */
1026 void
1027 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
1028 {
1029         ulong flags;
1030
1031         flags = ureg->flags;
1032         memmove(pureg, uva, n);
1033         ureg->flags = (ureg->flags & 0xCD5) | (flags & ~0xCD5);
1034         ureg->cs |= 3;
1035         ureg->ss |= 3;
1036 }
1037
1038 static void
1039 linkproc(void)
1040 {
1041         spllo();
1042         up->kpfun(up->kparg);
1043         pexit("kproc dying", 0);
1044 }
1045
1046 void
1047 kprocchild(Proc* p, void (*func)(void*), void* arg)
1048 {
1049         /*
1050          * gotolabel() needs a word on the stack in
1051          * which to place the return PC used to jump
1052          * to linkproc().
1053          */
1054         p->sched.pc = (ulong)linkproc;
1055         p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;
1056
1057         p->kpfun = func;
1058         p->kparg = arg;
1059 }
1060
1061 void
1062 forkchild(Proc *p, Ureg *ureg)
1063 {
1064         Ureg *cureg;
1065
1066         /*
1067          * Add 2*BY2WD to the stack to account for
1068          *  - the return PC
1069          *  - trap's argument (ur)
1070          */
1071         p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1072         p->sched.pc = (ulong)forkret;
1073
1074         cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1075         memmove(cureg, ureg, sizeof(Ureg));
1076         /* return value of syscall in child */
1077         cureg->ax = 0;
1078
1079         /* Things from bottom of syscall which were never executed */
1080         p->psstate = 0;
1081         p->insyscall = 0;
1082 }
1083
1084 /* Give enough context in the ureg to produce a kernel stack for
1085  * a sleeping process
1086  */
1087 void
1088 setkernur(Ureg* ureg, Proc* p)
1089 {
1090         ureg->pc = p->sched.pc;
1091         ureg->sp = p->sched.sp+4;
1092 }
1093
1094 ulong
1095 dbgpc(Proc *p)
1096 {
1097         Ureg *ureg;
1098
1099         ureg = p->dbgreg;
1100         if(ureg == 0)
1101                 return 0;
1102
1103         return ureg->pc;
1104 }