]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/trap.c
kernel: dont call pprint() while holding up->debug qlock
[plan9front.git] / sys / src / 9 / pc / trap.c
1 #include        "u.h"
2 #include        "tos.h"
3 #include        "../port/lib.h"
4 #include        "mem.h"
5 #include        "dat.h"
6 #include        "fns.h"
7 #include        "io.h"
8 #include        "ureg.h"
9 #include        "../port/error.h"
10 #include        <trace.h>
11
12 static int trapinited;
13
14 void    noted(Ureg*, ulong);
15
16 static void debugbpt(Ureg*, void*);
17 static void fault386(Ureg*, void*);
18 static void doublefault(Ureg*, void*);
19 static void unexpected(Ureg*, void*);
20 static void _dumpstack(Ureg*);
21
22 static Lock vctllock;
23 static Vctl *vctl[256];
24
25 enum
26 {
27         Ntimevec = 20           /* number of time buckets for each intr */
28 };
29 ulong intrtimes[256][Ntimevec];
30
31 void
32 intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
33 {
34         int vno;
35         Vctl *v;
36
37         if(f == nil){
38                 print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
39                         irq, tbdf, name);
40                 return;
41         }
42
43         if(tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0)){
44                 print("intrenable: got unassigned irq %d, tbdf 0x%uX for %s\n",
45                         irq, tbdf, name);
46                 irq = -1;
47         }
48
49         if((v = xalloc(sizeof(Vctl))) == nil)
50                 panic("intrenable: out of memory");
51         v->isintr = 1;
52         v->irq = irq;
53         v->tbdf = tbdf;
54         v->f = f;
55         v->a = a;
56         strncpy(v->name, name, KNAMELEN-1);
57         v->name[KNAMELEN-1] = 0;
58
59         ilock(&vctllock);
60         vno = arch->intrenable(v);
61         if(vno == -1){
62                 iunlock(&vctllock);
63                 print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
64                         irq, tbdf, v->name);
65                 xfree(v);
66                 return;
67         }
68         if(vctl[vno]){
69                 if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
70                         panic("intrenable: handler: %s %s %#p %#p %#p %#p",
71                                 vctl[vno]->name, v->name,
72                                 vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
73                 v->next = vctl[vno];
74         }
75         vctl[vno] = v;
76         iunlock(&vctllock);
77 }
78
79 int
80 intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
81 {
82         Vctl **pv, *v;
83         int vno;
84
85         /*
86          * For now, none of this will work with the APIC code,
87          * there is no mapping between irq and vector as the IRQ
88          * is pretty meaningless.
89          */
90         if(arch->intrvecno == nil)
91                 return -1;
92         vno = arch->intrvecno(irq);
93         ilock(&vctllock);
94         pv = &vctl[vno];
95         while (*pv &&
96                   ((*pv)->irq != irq || (*pv)->tbdf != tbdf || (*pv)->f != f || (*pv)->a != a ||
97                    strcmp((*pv)->name, name)))
98                 pv = &((*pv)->next);
99         assert(*pv);
100
101         v = *pv;
102         *pv = (*pv)->next;      /* Link out the entry */
103
104         if(vctl[vno] == nil && arch->intrdisable != nil)
105                 arch->intrdisable(irq);
106         iunlock(&vctllock);
107         xfree(v);
108         return 0;
109 }
110
111 static long
112 irqallocread(Chan*, void *vbuf, long n, vlong offset)
113 {
114         char *buf, *p, str[2*(11+1)+KNAMELEN+1+1];
115         int m, vno;
116         long oldn;
117         Vctl *v;
118
119         if(n < 0 || offset < 0)
120                 error(Ebadarg);
121
122         oldn = n;
123         buf = vbuf;
124         for(vno=0; vno<nelem(vctl); vno++){
125                 for(v=vctl[vno]; v; v=v->next){
126                         m = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
127                         if(m <= offset) /* if do not want this, skip entry */
128                                 offset -= m;
129                         else{
130                                 /* skip offset bytes */
131                                 m -= offset;
132                                 p = str+offset;
133                                 offset = 0;
134
135                                 /* write at most max(n,m) bytes */
136                                 if(m > n)
137                                         m = n;
138                                 memmove(buf, p, m);
139                                 n -= m;
140                                 buf += m;
141
142                                 if(n == 0)
143                                         return oldn;
144                         }
145                 }
146         }
147         return oldn - n;
148 }
149
150 void
151 trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
152 {
153         Vctl *v;
154
155         if(vno < 0 || vno >= VectorPIC)
156                 panic("trapenable: vno %d", vno);
157         if((v = xalloc(sizeof(Vctl))) == nil)
158                 panic("trapenable: out of memory");
159         v->tbdf = BUSUNKNOWN;
160         v->f = f;
161         v->a = a;
162         strncpy(v->name, name, KNAMELEN-1);
163         v->name[KNAMELEN-1] = 0;
164
165         ilock(&vctllock);
166         if(vctl[vno])
167                 v->next = vctl[vno]->next;
168         vctl[vno] = v;
169         iunlock(&vctllock);
170 }
171
172 static void
173 nmienable(void)
174 {
175         int x;
176
177         /*
178          * Hack: should be locked with NVRAM access.
179          */
180         outb(0x70, 0x80);               /* NMI latch clear */
181         outb(0x70, 0);
182
183         x = inb(0x61) & 0x07;           /* Enable NMI */
184         outb(0x61, 0x08|x);
185         outb(0x61, x);
186 }
187
188 /*
189  * Minimal trap setup.  Just enough so that we can panic
190  * on traps (bugs) during kernel initialization.
191  * Called very early - malloc is not yet available.
192  */
193 void
194 trapinit0(void)
195 {
196         int d1, v;
197         ulong vaddr;
198         Segdesc *idt;
199
200         idt = (Segdesc*)IDTADDR;
201         vaddr = (ulong)vectortable;
202         for(v = 0; v < 256; v++){
203                 d1 = (vaddr & 0xFFFF0000)|SEGP;
204                 switch(v){
205
206                 case VectorBPT:
207                         d1 |= SEGPL(3)|SEGIG;
208                         break;
209
210                 case VectorSYSCALL:
211                         d1 |= SEGPL(3)|SEGIG;
212                         break;
213
214                 default:
215                         d1 |= SEGPL(0)|SEGIG;
216                         break;
217                 }
218                 idt[v].d0 = (vaddr & 0xFFFF)|(KESEL<<16);
219                 idt[v].d1 = d1;
220                 vaddr += 6;
221         }
222 }
223
224 void
225 trapinit(void)
226 {
227         /*
228          * Special traps.
229          * Syscall() is called directly without going through trap().
230          */
231         trapenable(VectorBPT, debugbpt, 0, "debugpt");
232         trapenable(VectorPF, fault386, 0, "fault386");
233         trapenable(Vector2F, doublefault, 0, "doublefault");
234         trapenable(Vector15, unexpected, 0, "unexpected");
235         nmienable();
236
237         addarchfile("irqalloc", 0444, irqallocread, nil);
238         trapinited = 1;
239 }
240
241 static char* excname[32] = {
242         "divide error",
243         "debug exception",
244         "nonmaskable interrupt",
245         "breakpoint",
246         "overflow",
247         "bounds check",
248         "invalid opcode",
249         "coprocessor not available",
250         "double fault",
251         "coprocessor segment overrun",
252         "invalid TSS",
253         "segment not present",
254         "stack exception",
255         "general protection violation",
256         "page fault",
257         "15 (reserved)",
258         "coprocessor error",
259         "alignment check",
260         "machine check",
261         "19 (reserved)",
262         "20 (reserved)",
263         "21 (reserved)",
264         "22 (reserved)",
265         "23 (reserved)",
266         "24 (reserved)",
267         "25 (reserved)",
268         "26 (reserved)",
269         "27 (reserved)",
270         "28 (reserved)",
271         "29 (reserved)",
272         "30 (reserved)",
273         "31 (reserved)",
274 };
275
276 /*
277  *  keep histogram of interrupt service times
278  */
279 void
280 intrtime(Mach*, int vno)
281 {
282         ulong diff;
283         ulong x;
284
285         x = perfticks();
286         diff = x - m->perf.intrts;
287         m->perf.intrts = x;
288
289         m->perf.inintr += diff;
290         if(up == nil && m->perf.inidle > diff)
291                 m->perf.inidle -= diff;
292
293         diff /= m->cpumhz*100;          /* quantum = 100µsec */
294         if(diff >= Ntimevec)
295                 diff = Ntimevec-1;
296         intrtimes[vno][diff]++;
297 }
298
299 /* go to user space */
300 void
301 kexit(Ureg*)
302 {
303         uvlong t;
304         Tos *tos;
305
306         /* precise time accounting, kernel exit */
307         tos = (Tos*)(USTKTOP-sizeof(Tos));
308         cycles(&t);
309         tos->kcycles += t - up->kentry;
310         tos->pcycles = t + up->pcycles;
311         tos->pid = up->pid;
312 }
313
314 /*
315  *  All traps come here.  It is slower to have all traps call trap()
316  *  rather than directly vectoring the handler.  However, this avoids a
317  *  lot of code duplication and possible bugs.  The only exception is
318  *  VectorSYSCALL.
319  *  Trap is called with interrupts disabled via interrupt-gates.
320  */
321 void
322 trap(Ureg* ureg)
323 {
324         int clockintr, i, vno, user;
325         char buf[ERRMAX];
326         Vctl *ctl, *v;
327         Mach *mach;
328
329         if(!trapinited){
330                 /* fault386 can give a better error message */
331                 if(ureg->trap == VectorPF)
332                         fault386(ureg, nil);
333                 panic("trap %lud: not ready", ureg->trap);
334         }
335
336         m->perf.intrts = perfticks();
337         user = userureg(ureg);
338         if(user){
339                 up->dbgreg = ureg;
340                 cycles(&up->kentry);
341         }
342
343         clockintr = 0;
344
345         vno = ureg->trap;
346         if(ctl = vctl[vno]){
347                 if(ctl->isintr){
348                         m->intr++;
349                         if(vno >= VectorPIC && vno != VectorSYSCALL)
350                                 m->lastintr = ctl->irq;
351                 }
352
353                 if(ctl->isr)
354                         ctl->isr(vno);
355                 for(v = ctl; v != nil; v = v->next){
356                         if(v->f)
357                                 v->f(ureg, v->a);
358                 }
359                 if(ctl->eoi)
360                         ctl->eoi(vno);
361
362                 if(ctl->isintr){
363                         intrtime(m, vno);
364
365                         if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
366                                 clockintr = 1;
367
368                         if(up && !clockintr)
369                                 preempted();
370                 }
371         }
372         else if(vno < nelem(excname) && user){
373                 spllo();
374                 sprint(buf, "sys: trap: %s", excname[vno]);
375                 postnote(up, 1, buf, NDebug);
376         }
377         else if(vno >= VectorPIC && vno != VectorSYSCALL){
378                 /*
379                  * An unknown interrupt.
380                  * Check for a default IRQ7. This can happen when
381                  * the IRQ input goes away before the acknowledge.
382                  * In this case, a 'default IRQ7' is generated, but
383                  * the corresponding bit in the ISR isn't set.
384                  * In fact, just ignore all such interrupts.
385                  */
386
387                 /* call all interrupt routines, just in case */
388                 for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
389                         ctl = vctl[i];
390                         if(ctl == nil)
391                                 continue;
392                         if(!ctl->isintr)
393                                 continue;
394                         for(v = ctl; v != nil; v = v->next){
395                                 if(v->f)
396                                         v->f(ureg, v->a);
397                         }
398                         /* should we do this? */
399                         if(ctl->eoi)
400                                 ctl->eoi(i);
401                 }
402
403                 /* clear the interrupt */
404                 i8259isr(vno);
405
406                 if(0)print("cpu%d: spurious interrupt %d, last %d\n",
407                         m->machno, vno, m->lastintr);
408                 if(0)if(conf.nmach > 1){
409                         for(i = 0; i < 32; i++){
410                                 if(!(active.machs & (1<<i)))
411                                         continue;
412                                 mach = MACHP(i);
413                                 if(m->machno == mach->machno)
414                                         continue;
415                                 print(" cpu%d: last %d",
416                                         mach->machno, mach->lastintr);
417                         }
418                         print("\n");
419                 }
420                 m->spuriousintr++;
421                 if(user)
422                         kexit(ureg);
423                 return;
424         }
425         else{
426                 if(vno == VectorNMI){
427                         /*
428                          * Don't re-enable, it confuses the crash dumps.
429                         nmienable();
430                          */
431                         iprint("cpu%d: PC %#8.8lux\n", m->machno, ureg->pc);
432                         while(m->machno != 0)
433                                 ;
434                 }
435
436                 if(!user){
437                         void (*pc)(void);
438                         ulong *sp; 
439
440                         extern void _forkretpopgs(void);
441                         extern void _forkretpopfs(void);
442                         extern void _forkretpopes(void);
443                         extern void _forkretpopds(void);
444                         extern void _forkretiret(void);
445                         extern void _rdmsrinst(void);
446                         extern void _wrmsrinst(void);
447
448                         extern void load_fs(ulong);
449                         extern void load_gs(ulong);
450
451                         load_fs(NULLSEL);
452                         load_gs(NULLSEL);
453
454                         sp = (ulong*)&ureg->sp; /* kernel stack */
455                         pc = (void*)ureg->pc;
456
457                         if(pc == _forkretpopgs || pc == _forkretpopfs || 
458                            pc == _forkretpopes || pc == _forkretpopds){
459                                 if(vno == VectorGPF || vno == VectorSNP){
460                                         sp[0] = NULLSEL;
461                                         return;
462                                 }
463                         } else if(pc == _forkretiret){
464                                 if(vno == VectorGPF || vno == VectorSNP){
465                                         sp[1] = UESEL;  /* CS */
466                                         sp[4] = UDSEL;  /* SS */
467                                         return;
468                                 }
469                         } else if(pc == _rdmsrinst || pc == _wrmsrinst){
470                                 if(vno == VectorGPF){
471                                         ureg->bp = -1;
472                                         ureg->pc += 2;
473                                         return;
474                                 }
475                         }
476                 }
477
478                 dumpregs(ureg);
479                 if(!user){
480                         ureg->sp = (ulong)&ureg->sp;
481                         _dumpstack(ureg);
482                 }
483                 if(vno < nelem(excname))
484                         panic("%s", excname[vno]);
485                 panic("unknown trap/intr: %d", vno);
486         }
487         splhi();
488
489         /* delaysched set because we held a lock or because our quantum ended */
490         if(up && up->delaysched && clockintr){
491                 sched();
492                 splhi();
493         }
494
495         if(user){
496                 if(up->procctl || up->nnote)
497                         notify(ureg);
498                 kexit(ureg);
499         }
500 }
501
502 /*
503  *  dump registers
504  */
505 void
506 dumpregs2(Ureg* ureg)
507 {
508         if(up)
509                 iprint("cpu%d: registers for %s %lud\n",
510                         m->machno, up->text, up->pid);
511         else
512                 iprint("cpu%d: registers for kernel\n", m->machno);
513         iprint("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",
514                 ureg->flags, ureg->trap, ureg->ecode, ureg->pc);
515         if(userureg(ureg))
516                 iprint(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);
517         else
518                 iprint(" SP=%luX\n", (ulong)&ureg->sp);
519         iprint("  AX %8.8luX  BX %8.8luX  CX %8.8luX  DX %8.8luX\n",
520                 ureg->ax, ureg->bx, ureg->cx, ureg->dx);
521         iprint("  SI %8.8luX  DI %8.8luX  BP %8.8luX\n",
522                 ureg->si, ureg->di, ureg->bp);
523         iprint("  CS %4.4luX  DS %4.4luX  ES %4.4luX  FS %4.4luX  GS %4.4luX\n",
524                 ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
525                 ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
526 }
527
528 void
529 dumpregs(Ureg* ureg)
530 {
531         vlong mca, mct;
532
533         dumpregs2(ureg);
534
535         /*
536          * Processor control registers.
537          * If machine check exception, time stamp counter, page size extensions
538          * or enhanced virtual 8086 mode extensions are supported, there is a
539          * CR4. If there is a CR4 and machine check extensions, read the machine
540          * check address and machine check type registers if RDMSR supported.
541          */
542         iprint("  CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",
543                 getcr0(), getcr2(), getcr3());
544         if(m->cpuiddx & (Mce|Tsc|Pse|Vmex)){
545                 iprint(" CR4 %8.8lux", getcr4());
546                 if((m->cpuiddx & (Mce|Cpumsr)) == (Mce|Cpumsr)){
547                         rdmsr(0x00, &mca);
548                         rdmsr(0x01, &mct);
549                         iprint("\n  MCA %8.8llux MCT %8.8llux", mca, mct);
550                 }
551         }
552         iprint("\n  ur %#p up %#p\n", ureg, up);
553 }
554
555
556 /*
557  * Fill in enough of Ureg to get a stack trace, and call a function.
558  * Used by debugging interface rdb.
559  */
560 void
561 callwithureg(void (*fn)(Ureg*))
562 {
563         Ureg ureg;
564         ureg.pc = getcallerpc(&fn);
565         ureg.sp = (ulong)&fn;
566         fn(&ureg);
567 }
568
569 static void
570 _dumpstack(Ureg *ureg)
571 {
572         uintptr l, v, i, estack;
573         extern ulong etext;
574         int x;
575         char *s;
576
577         if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
578                 iprint("dumpstack disabled\n");
579                 return;
580         }
581         iprint("dumpstack\n");
582
583         x = 0;
584         x += iprint("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);
585         i = 0;
586         if(up
587         && (uintptr)&l >= (uintptr)up->kstack
588         && (uintptr)&l <= (uintptr)up->kstack+KSTACK)
589                 estack = (uintptr)up->kstack+KSTACK;
590         else if((uintptr)&l >= (uintptr)m->stack
591         && (uintptr)&l <= (uintptr)m+MACHSIZE)
592                 estack = (uintptr)m+MACHSIZE;
593         else
594                 return;
595         x += iprint("estackx %p\n", estack);
596
597         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
598                 v = *(uintptr*)l;
599                 if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){
600                         /*
601                          * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
602                          * and CALL indirect through AX
603                          * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
604                          * but this is too clever and misses faulting address.
605                          */
606                         x += iprint("%.8p=%.8p ", l, v);
607                         i++;
608                 }
609                 if(i == 4){
610                         i = 0;
611                         x += iprint("\n");
612                 }
613         }
614         if(i)
615                 iprint("\n");
616         iprint("EOF\n");
617
618         if(ureg->trap != VectorNMI)
619                 return;
620
621         i = 0;
622         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
623                 iprint("%.8p ", *(uintptr*)l);
624                 if(++i == 8){
625                         i = 0;
626                         iprint("\n");
627                 }
628         }
629         if(i)
630                 iprint("\n");
631 }
632
633 void
634 dumpstack(void)
635 {
636         callwithureg(_dumpstack);
637 }
638
639 static void
640 debugbpt(Ureg* ureg, void*)
641 {
642         char buf[ERRMAX];
643
644         if(up == 0)
645                 panic("kernel bpt");
646         /* restore pc to instruction that caused the trap */
647         ureg->pc--;
648         sprint(buf, "sys: breakpoint");
649         postnote(up, 1, buf, NDebug);
650 }
651
652 static void
653 doublefault(Ureg*, void*)
654 {
655         panic("double fault");
656 }
657
658 static void
659 unexpected(Ureg* ureg, void*)
660 {
661         print("unexpected trap %lud; ignoring\n", ureg->trap);
662 }
663
664 extern void checkpages(void);
665 extern void checkfault(ulong, ulong);
666 static void
667 fault386(Ureg* ureg, void*)
668 {
669         ulong addr;
670         int read, user, n, insyscall;
671         char buf[ERRMAX];
672
673         addr = getcr2();
674         read = !(ureg->ecode & 2);
675
676         user = userureg(ureg);
677         if(!user){
678                 if(vmapsync(addr))
679                         return;
680                 if(addr >= USTKTOP)
681                         panic("kernel fault: bad address pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
682                 if(up == nil)
683                         panic("kernel fault: no user process pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
684         }
685         if(up == nil)
686                 panic("user fault: up=0 pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
687
688         insyscall = up->insyscall;
689         up->insyscall = 1;
690         n = fault(addr, read);
691         if(n < 0){
692                 if(!user){
693                         dumpregs(ureg);
694                         panic("fault: 0x%lux", addr);
695                 }
696                 checkpages();
697                 checkfault(addr, ureg->pc);
698                 sprint(buf, "sys: trap: fault %s addr=0x%lux",
699                         read ? "read" : "write", addr);
700                 postnote(up, 1, buf, NDebug);
701         }
702         up->insyscall = insyscall;
703 }
704
705 /*
706  *  system calls
707  */
708 #include "../port/systab.h"
709
710 /*
711  *  Syscall is called directly from assembler without going through trap().
712  */
713 void
714 syscall(Ureg* ureg)
715 {
716         char *e;
717         ulong   sp;
718         long    ret;
719         int     i, s;
720         ulong scallnr;
721         vlong startns, stopns;
722
723         if(!userureg(ureg))
724                 panic("syscall: cs 0x%4.4luX", ureg->cs);
725
726         cycles(&up->kentry);
727
728         m->syscall++;
729         up->insyscall = 1;
730         up->pc = ureg->pc;
731         up->dbgreg = ureg;
732
733         sp = ureg->usp;
734         scallnr = ureg->ax;
735         up->scallnr = scallnr;
736
737         spllo();
738
739         up->nerrlab = 0;
740         ret = -1;
741         if(!waserror()){
742                 if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
743                         validaddr(sp, sizeof(Sargs)+BY2WD, 0);
744
745                 up->s = *((Sargs*)(sp+BY2WD));
746
747                 if(up->procctl == Proc_tracesyscall){
748                         syscallfmt(scallnr, ureg->pc, (va_list)up->s.args);
749                         s = splhi();
750                         up->procctl = Proc_stopme;
751                         procctl(up);
752                         splx(s);
753                         startns = todget(nil);
754                 }
755
756                 if(scallnr >= nsyscall || systab[scallnr] == 0){
757                         pprint("bad sys call number %lud pc %lux\n",
758                                 scallnr, ureg->pc);
759                         postnote(up, 1, "sys: bad sys call", NDebug);
760                         error(Ebadarg);
761                 }
762                 up->psstate = sysctab[scallnr];
763
764                 ret = systab[scallnr](up->s.args);
765                 poperror();
766         }else{
767                 /* failure: save the error buffer for errstr */
768                 e = up->syserrstr;
769                 up->syserrstr = up->errstr;
770                 up->errstr = e;
771                 if(0 && up->pid == 1)
772                         print("syscall %lud error %s\n", scallnr, up->syserrstr);
773         }
774         if(up->nerrlab){
775                 print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
776                 for(i = 0; i < NERR; i++)
777                         print("sp=%lux pc=%lux\n",
778                                 up->errlab[i].sp, up->errlab[i].pc);
779                 panic("error stack");
780         }
781
782         /*
783          *  Put return value in frame.  On the x86 the syscall is
784          *  just another trap and the return value from syscall is
785          *  ignored.  On other machines the return value is put into
786          *  the results register by caller of syscall.
787          */
788         ureg->ax = ret;
789
790         if(up->procctl == Proc_tracesyscall){
791                 stopns = todget(nil);
792                 sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
793                 s = splhi();
794                 up->procctl = Proc_stopme;
795                 procctl(up);
796                 splx(s);
797         }
798
799         up->insyscall = 0;
800         up->psstate = 0;
801
802         if(scallnr == NOTED)
803                 noted(ureg, up->s.args[0]);
804
805         if(scallnr!=RFORK && (up->procctl || up->nnote)){
806                 splhi();
807                 notify(ureg);
808         }
809         /* if we delayed sched because we held a lock, sched now */
810         if(up->delaysched)
811                 sched();
812         kexit(ureg);
813 }
814
815 /*
816  *  Call user, if necessary, with note.
817  *  Pass user the Ureg struct and the note on his stack.
818  */
819 int
820 notify(Ureg* ureg)
821 {
822         int l;
823         ulong s, sp;
824         Note *n;
825
826         if(up->procctl)
827                 procctl(up);
828         if(up->nnote == 0)
829                 return 0;
830
831         if(up->fpstate == FPactive){
832                 fpsave(&up->fpsave);
833                 up->fpstate = FPinactive;
834         }
835         up->fpstate |= FPillegal;
836
837         s = spllo();
838         qlock(&up->debug);
839         up->notepending = 0;
840         n = &up->note[0];
841         if(strncmp(n->msg, "sys:", 4) == 0){
842                 l = strlen(n->msg);
843                 if(l > ERRMAX-15)       /* " pc=0x12345678\0" */
844                         l = ERRMAX-15;
845                 sprint(n->msg+l, " pc=0x%.8lux", ureg->pc);
846         }
847
848         if(n->flag!=NUser && (up->notified || up->notify==0)){
849                 qunlock(&up->debug);
850                 if(n->flag == NDebug)
851                         pprint("suicide: %s\n", n->msg);
852                 pexit(n->msg, n->flag!=NDebug);
853         }
854
855         if(up->notified){
856                 qunlock(&up->debug);
857                 splhi();
858                 return 0;
859         }
860
861         if(!up->notify){
862                 qunlock(&up->debug);
863                 pexit(n->msg, n->flag!=NDebug);
864         }
865         sp = ureg->usp;
866         sp -= 256;      /* debugging: preserve context causing problem */
867         sp -= sizeof(Ureg);
868 if(0) print("%s %lud: notify %.8lux %.8lux %.8lux %s\n",
869         up->text, up->pid, ureg->pc, ureg->usp, sp, n->msg);
870
871         if(!okaddr((ulong)up->notify, 1, 0)
872         || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
873                 qunlock(&up->debug);
874                 pprint("suicide: bad address in notify\n");
875                 pexit("Suicide", 0);
876         }
877
878         memmove((Ureg*)sp, ureg, sizeof(Ureg));
879         *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
880         up->ureg = (void*)sp;
881         sp -= BY2WD+ERRMAX;
882         memmove((char*)sp, up->note[0].msg, ERRMAX);
883         sp -= 3*BY2WD;
884         *(ulong*)(sp+2*BY2WD) = sp+3*BY2WD;             /* arg 2 is string */
885         *(ulong*)(sp+1*BY2WD) = (ulong)up->ureg;        /* arg 1 is ureg* */
886         *(ulong*)(sp+0*BY2WD) = 0;                      /* arg 0 is pc */
887         ureg->usp = sp;
888         ureg->pc = (ulong)up->notify;
889         ureg->cs = UESEL;
890         ureg->ss = ureg->ds = ureg->es = UDSEL;
891         up->notified = 1;
892         up->nnote--;
893         memmove(&up->lastnote, &up->note[0], sizeof(Note));
894         memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
895
896         qunlock(&up->debug);
897         splx(s);
898         return 1;
899 }
900
901 /*
902  *   Return user to state before notify()
903  */
904 void
905 noted(Ureg* ureg, ulong arg0)
906 {
907         Ureg *nureg;
908         ulong oureg, sp;
909
910         qlock(&up->debug);
911         if(arg0!=NRSTR && !up->notified) {
912                 qunlock(&up->debug);
913                 pprint("call to noted() when not notified\n");
914                 pexit("Suicide", 0);
915         }
916         up->notified = 0;
917
918         nureg = up->ureg;       /* pointer to user returned Ureg struct */
919
920         up->fpstate &= ~FPillegal;
921
922         /* sanity clause */
923         oureg = (ulong)nureg;
924         if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
925                 qunlock(&up->debug);
926                 pprint("bad ureg in noted or call to noted when not notified\n");
927                 pexit("Suicide", 0);
928         }
929
930         /* don't let user change system flags */
931         nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
932         nureg->cs |= 3;
933         nureg->ss |= 3;
934
935         memmove(ureg, nureg, sizeof(Ureg));
936
937         switch(arg0){
938         case NCONT:
939         case NRSTR:
940 if(0) print("%s %lud: noted %.8lux %.8lux\n",
941         up->text, up->pid, nureg->pc, nureg->usp);
942                 if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){
943                         qunlock(&up->debug);
944                         pprint("suicide: trap in noted\n");
945                         pexit("Suicide", 0);
946                 }
947                 up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
948                 qunlock(&up->debug);
949                 break;
950
951         case NSAVE:
952                 if(!okaddr(nureg->pc, BY2WD, 0)
953                 || !okaddr(nureg->usp, BY2WD, 0)){
954                         qunlock(&up->debug);
955                         pprint("suicide: trap in noted\n");
956                         pexit("Suicide", 0);
957                 }
958                 qunlock(&up->debug);
959                 sp = oureg-4*BY2WD-ERRMAX;
960                 splhi();
961                 ureg->sp = sp;
962                 ((ulong*)sp)[1] = oureg;        /* arg 1 0(FP) is ureg* */
963                 ((ulong*)sp)[0] = 0;            /* arg 0 is pc */
964                 break;
965
966         default:
967                 up->lastnote.flag = NDebug;
968                 /* fall through */
969
970         case NDFLT:
971                 qunlock(&up->debug);
972                 if(up->lastnote.flag == NDebug)
973                         pprint("suicide: %s\n", up->lastnote.msg);
974                 pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
975         }
976 }
977
978 long
979 execregs(ulong entry, ulong ssize, ulong nargs)
980 {
981         ulong *sp;
982         Ureg *ureg;
983
984         sp = (ulong*)(USTKTOP - ssize);
985         *--sp = nargs;
986
987         ureg = up->dbgreg;
988         ureg->usp = (ulong)sp;
989         ureg->pc = entry;
990         ureg->cs = UESEL;
991         ureg->ss = ureg->ds = ureg->es = UDSEL;
992         ureg->fs = ureg->gs = NULLSEL;
993         return USTKTOP-sizeof(Tos);             /* address of kernel/user shared data */
994 }
995
996 /*
997  *  return the userpc the last exception happened at
998  */
999 ulong
1000 userpc(void)
1001 {
1002         Ureg *ureg;
1003
1004         ureg = (Ureg*)up->dbgreg;
1005         return ureg->pc;
1006 }
1007
1008 /* This routine must save the values of registers the user is not permitted
1009  * to write from devproc and then restore the saved values before returning.
1010  */
1011 void
1012 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
1013 {
1014         ulong flags;
1015
1016         flags = ureg->flags;
1017         memmove(pureg, uva, n);
1018         ureg->flags = (ureg->flags & 0xCD5) | (flags & ~0xCD5);
1019         ureg->cs |= 3;
1020         ureg->ss |= 3;
1021 }
1022
1023 static void
1024 linkproc(void)
1025 {
1026         spllo();
1027         up->kpfun(up->kparg);
1028         pexit("kproc dying", 0);
1029 }
1030
1031 void
1032 kprocchild(Proc* p, void (*func)(void*), void* arg)
1033 {
1034         /*
1035          * gotolabel() needs a word on the stack in
1036          * which to place the return PC used to jump
1037          * to linkproc().
1038          */
1039         p->sched.pc = (ulong)linkproc;
1040         p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;
1041
1042         p->kpfun = func;
1043         p->kparg = arg;
1044 }
1045
1046 void
1047 forkchild(Proc *p, Ureg *ureg)
1048 {
1049         Ureg *cureg;
1050
1051         /*
1052          * Add 2*BY2WD to the stack to account for
1053          *  - the return PC
1054          *  - trap's argument (ur)
1055          */
1056         p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1057         p->sched.pc = (ulong)forkret;
1058
1059         cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1060         memmove(cureg, ureg, sizeof(Ureg));
1061         /* return value of syscall in child */
1062         cureg->ax = 0;
1063
1064         /* Things from bottom of syscall which were never executed */
1065         p->psstate = 0;
1066         p->insyscall = 0;
1067 }
1068
1069 /* Give enough context in the ureg to produce a kernel stack for
1070  * a sleeping process
1071  */
1072 void
1073 setkernur(Ureg* ureg, Proc* p)
1074 {
1075         ureg->pc = p->sched.pc;
1076         ureg->sp = p->sched.sp+4;
1077 }
1078
1079 ulong
1080 dbgpc(Proc *p)
1081 {
1082         Ureg *ureg;
1083
1084         ureg = p->dbgreg;
1085         if(ureg == 0)
1086                 return 0;
1087
1088         return ureg->pc;
1089 }