]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/trap.c
fix _tos->pcycles, make _tos->kcycles actually count cycles executing kernel code...
[plan9front.git] / sys / src / 9 / pc / trap.c
1 #include        "u.h"
2 #include        "tos.h"
3 #include        "../port/lib.h"
4 #include        "mem.h"
5 #include        "dat.h"
6 #include        "fns.h"
7 #include        "io.h"
8 #include        "ureg.h"
9 #include        "../port/error.h"
10 #include        <trace.h>
11
12 static int trapinited;
13
14 void    noted(Ureg*, ulong);
15
16 static void debugbpt(Ureg*, void*);
17 static void fault386(Ureg*, void*);
18 static void doublefault(Ureg*, void*);
19 static void unexpected(Ureg*, void*);
20 static void _dumpstack(Ureg*);
21
22 static Lock vctllock;
23 static Vctl *vctl[256];
24
25 enum
26 {
27         Ntimevec = 20           /* number of time buckets for each intr */
28 };
29 ulong intrtimes[256][Ntimevec];
30
31 void
32 intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
33 {
34         int vno;
35         Vctl *v;
36
37         if(f == nil){
38                 print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
39                         irq, tbdf, name);
40                 return;
41         }
42
43         v = xalloc(sizeof(Vctl));
44         v->isintr = 1;
45         v->irq = irq;
46         v->tbdf = tbdf;
47         v->f = f;
48         v->a = a;
49         strncpy(v->name, name, KNAMELEN-1);
50         v->name[KNAMELEN-1] = 0;
51
52         ilock(&vctllock);
53         vno = arch->intrenable(v);
54         if(vno == -1){
55                 iunlock(&vctllock);
56                 print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
57                         irq, tbdf, v->name);
58                 xfree(v);
59                 return;
60         }
61         if(vctl[vno]){
62                 if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
63                         panic("intrenable: handler: %s %s %#p %#p %#p %#p",
64                                 vctl[vno]->name, v->name,
65                                 vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
66                 v->next = vctl[vno];
67         }
68         vctl[vno] = v;
69         iunlock(&vctllock);
70 }
71
72 int
73 intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
74 {
75         Vctl **pv, *v;
76         int vno;
77
78         /*
79          * For now, none of this will work with the APIC code,
80          * there is no mapping between irq and vector as the IRQ
81          * is pretty meaningless.
82          */
83         if(arch->intrvecno == nil)
84                 return -1;
85         vno = arch->intrvecno(irq);
86         ilock(&vctllock);
87         pv = &vctl[vno];
88         while (*pv &&
89                   ((*pv)->irq != irq || (*pv)->tbdf != tbdf || (*pv)->f != f || (*pv)->a != a ||
90                    strcmp((*pv)->name, name)))
91                 pv = &((*pv)->next);
92         assert(*pv);
93
94         v = *pv;
95         *pv = (*pv)->next;      /* Link out the entry */
96
97         if(vctl[vno] == nil && arch->intrdisable != nil)
98                 arch->intrdisable(irq);
99         iunlock(&vctllock);
100         xfree(v);
101         return 0;
102 }
103
104 static long
105 irqallocread(Chan*, void *vbuf, long n, vlong offset)
106 {
107         char *buf, *p, str[2*(11+1)+KNAMELEN+1+1];
108         int m, vno;
109         long oldn;
110         Vctl *v;
111
112         if(n < 0 || offset < 0)
113                 error(Ebadarg);
114
115         oldn = n;
116         buf = vbuf;
117         for(vno=0; vno<nelem(vctl); vno++){
118                 for(v=vctl[vno]; v; v=v->next){
119                         m = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
120                         if(m <= offset) /* if do not want this, skip entry */
121                                 offset -= m;
122                         else{
123                                 /* skip offset bytes */
124                                 m -= offset;
125                                 p = str+offset;
126                                 offset = 0;
127
128                                 /* write at most max(n,m) bytes */
129                                 if(m > n)
130                                         m = n;
131                                 memmove(buf, p, m);
132                                 n -= m;
133                                 buf += m;
134
135                                 if(n == 0)
136                                         return oldn;
137                         }
138                 }
139         }
140         return oldn - n;
141 }
142
143 void
144 trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
145 {
146         Vctl *v;
147
148         if(vno < 0 || vno >= VectorPIC)
149                 panic("trapenable: vno %d", vno);
150         v = xalloc(sizeof(Vctl));
151         v->tbdf = BUSUNKNOWN;
152         v->f = f;
153         v->a = a;
154         strncpy(v->name, name, KNAMELEN);
155         v->name[KNAMELEN-1] = 0;
156
157         ilock(&vctllock);
158         if(vctl[vno])
159                 v->next = vctl[vno]->next;
160         vctl[vno] = v;
161         iunlock(&vctllock);
162 }
163
164 static void
165 nmienable(void)
166 {
167         int x;
168
169         /*
170          * Hack: should be locked with NVRAM access.
171          */
172         outb(0x70, 0x80);               /* NMI latch clear */
173         outb(0x70, 0);
174
175         x = inb(0x61) & 0x07;           /* Enable NMI */
176         outb(0x61, 0x08|x);
177         outb(0x61, x);
178 }
179
180 /*
181  * Minimal trap setup.  Just enough so that we can panic
182  * on traps (bugs) during kernel initialization.
183  * Called very early - malloc is not yet available.
184  */
185 void
186 trapinit0(void)
187 {
188         int d1, v;
189         ulong vaddr;
190         Segdesc *idt;
191
192         idt = (Segdesc*)IDTADDR;
193         vaddr = (ulong)vectortable;
194         for(v = 0; v < 256; v++){
195                 d1 = (vaddr & 0xFFFF0000)|SEGP;
196                 switch(v){
197
198                 case VectorBPT:
199                         d1 |= SEGPL(3)|SEGIG;
200                         break;
201
202                 case VectorSYSCALL:
203                         d1 |= SEGPL(3)|SEGIG;
204                         break;
205
206                 default:
207                         d1 |= SEGPL(0)|SEGIG;
208                         break;
209                 }
210                 idt[v].d0 = (vaddr & 0xFFFF)|(KESEL<<16);
211                 idt[v].d1 = d1;
212                 vaddr += 6;
213         }
214 }
215
216 void
217 trapinit(void)
218 {
219         /*
220          * Special traps.
221          * Syscall() is called directly without going through trap().
222          */
223         trapenable(VectorBPT, debugbpt, 0, "debugpt");
224         trapenable(VectorPF, fault386, 0, "fault386");
225         trapenable(Vector2F, doublefault, 0, "doublefault");
226         trapenable(Vector15, unexpected, 0, "unexpected");
227         nmienable();
228
229         addarchfile("irqalloc", 0444, irqallocread, nil);
230         trapinited = 1;
231 }
232
233 static char* excname[32] = {
234         "divide error",
235         "debug exception",
236         "nonmaskable interrupt",
237         "breakpoint",
238         "overflow",
239         "bounds check",
240         "invalid opcode",
241         "coprocessor not available",
242         "double fault",
243         "coprocessor segment overrun",
244         "invalid TSS",
245         "segment not present",
246         "stack exception",
247         "general protection violation",
248         "page fault",
249         "15 (reserved)",
250         "coprocessor error",
251         "alignment check",
252         "machine check",
253         "19 (reserved)",
254         "20 (reserved)",
255         "21 (reserved)",
256         "22 (reserved)",
257         "23 (reserved)",
258         "24 (reserved)",
259         "25 (reserved)",
260         "26 (reserved)",
261         "27 (reserved)",
262         "28 (reserved)",
263         "29 (reserved)",
264         "30 (reserved)",
265         "31 (reserved)",
266 };
267
268 /*
269  *  keep histogram of interrupt service times
270  */
271 void
272 intrtime(Mach*, int vno)
273 {
274         ulong diff;
275         ulong x;
276
277         x = perfticks();
278         diff = x - m->perf.intrts;
279         m->perf.intrts = x;
280
281         m->perf.inintr += diff;
282         if(up == nil && m->perf.inidle > diff)
283                 m->perf.inidle -= diff;
284
285         diff /= m->cpumhz*100;          /* quantum = 100µsec */
286         if(diff >= Ntimevec)
287                 diff = Ntimevec-1;
288         intrtimes[vno][diff]++;
289 }
290
291 /* go to user space */
292 void
293 kexit(Ureg*)
294 {
295         uvlong t;
296         Tos *tos;
297
298         /* precise time accounting, kernel exit */
299         tos = (Tos*)(USTKTOP-sizeof(Tos));
300         cycles(&t);
301         tos->kcycles += t - up->kentry;
302         tos->pcycles = t + up->pcycles;
303         tos->pid = up->pid;
304 }
305
306 /*
307  *  All traps come here.  It is slower to have all traps call trap()
308  *  rather than directly vectoring the handler.  However, this avoids a
309  *  lot of code duplication and possible bugs.  The only exception is
310  *  VectorSYSCALL.
311  *  Trap is called with interrupts disabled via interrupt-gates.
312  */
313 void
314 trap(Ureg* ureg)
315 {
316         int clockintr, i, vno, user;
317         char buf[ERRMAX];
318         Vctl *ctl, *v;
319         Mach *mach;
320
321         if(!trapinited){
322                 /* fault386 can give a better error message */
323                 if(ureg->trap == VectorPF)
324                         fault386(ureg, nil);
325                 panic("trap %lud: not ready", ureg->trap);
326         }
327
328         m->perf.intrts = perfticks();
329         user = userureg(ureg);
330         if(user){
331                 up->dbgreg = ureg;
332                 cycles(&up->kentry);
333         }
334
335         clockintr = 0;
336
337         vno = ureg->trap;
338         if(ctl = vctl[vno]){
339                 if(ctl->isintr){
340                         m->intr++;
341                         if(vno >= VectorPIC && vno != VectorSYSCALL)
342                                 m->lastintr = ctl->irq;
343                 }
344
345                 if(ctl->isr)
346                         ctl->isr(vno);
347                 for(v = ctl; v != nil; v = v->next){
348                         if(v->f)
349                                 v->f(ureg, v->a);
350                 }
351                 if(ctl->eoi)
352                         ctl->eoi(vno);
353
354                 if(ctl->isintr){
355                         intrtime(m, vno);
356
357                         if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
358                                 clockintr = 1;
359
360                         if(up && !clockintr)
361                                 preempted();
362                 }
363         }
364         else if(vno < nelem(excname) && user){
365                 spllo();
366                 sprint(buf, "sys: trap: %s", excname[vno]);
367                 postnote(up, 1, buf, NDebug);
368         }
369         else if(vno >= VectorPIC && vno != VectorSYSCALL){
370                 /*
371                  * An unknown interrupt.
372                  * Check for a default IRQ7. This can happen when
373                  * the IRQ input goes away before the acknowledge.
374                  * In this case, a 'default IRQ7' is generated, but
375                  * the corresponding bit in the ISR isn't set.
376                  * In fact, just ignore all such interrupts.
377                  */
378
379                 /* call all interrupt routines, just in case */
380                 for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
381                         ctl = vctl[i];
382                         if(ctl == nil)
383                                 continue;
384                         if(!ctl->isintr)
385                                 continue;
386                         for(v = ctl; v != nil; v = v->next){
387                                 if(v->f)
388                                         v->f(ureg, v->a);
389                         }
390                         /* should we do this? */
391                         if(ctl->eoi)
392                                 ctl->eoi(i);
393                 }
394
395                 /* clear the interrupt */
396                 i8259isr(vno);
397
398                 if(0)print("cpu%d: spurious interrupt %d, last %d\n",
399                         m->machno, vno, m->lastintr);
400                 if(0)if(conf.nmach > 1){
401                         for(i = 0; i < 32; i++){
402                                 if(!(active.machs & (1<<i)))
403                                         continue;
404                                 mach = MACHP(i);
405                                 if(m->machno == mach->machno)
406                                         continue;
407                                 print(" cpu%d: last %d",
408                                         mach->machno, mach->lastintr);
409                         }
410                         print("\n");
411                 }
412                 m->spuriousintr++;
413                 if(user)
414                         kexit(ureg);
415                 return;
416         }
417         else{
418                 if(vno == VectorNMI){
419                         /*
420                          * Don't re-enable, it confuses the crash dumps.
421                         nmienable();
422                          */
423                         iprint("cpu%d: PC %#8.8lux\n", m->machno, ureg->pc);
424                         while(m->machno != 0)
425                                 ;
426                 }
427
428                 if(!user){
429                         void (*pc)(void);
430                         ulong *sp; 
431
432                         extern void _forkretpopgs(void);
433                         extern void _forkretpopfs(void);
434                         extern void _forkretpopes(void);
435                         extern void _forkretpopds(void);
436                         extern void _forkretiret(void);
437                         extern void _tryrdmsrinst(void);
438                         extern void _trywrmsrinst(void);
439
440                         extern void load_fs(ulong);
441                         extern void load_gs(ulong);
442
443                         load_fs(NULLSEL);
444                         load_gs(NULLSEL);
445
446                         sp = (ulong*)&ureg->sp; /* kernel stack */
447                         pc = (void*)ureg->pc;
448
449                         if(pc == _forkretpopgs || pc == _forkretpopfs || 
450                            pc == _forkretpopes || pc == _forkretpopds){
451                                 if(vno == VectorGPF || vno == VectorSNP){
452                                         sp[0] = NULLSEL;
453                                         return;
454                                 }
455                         } else if(pc == _forkretiret){
456                                 if(vno == VectorGPF || vno == VectorSNP){
457                                         sp[1] = UESEL;  /* CS */
458                                         sp[4] = UDSEL;  /* SS */
459                                         return;
460                                 }
461                         } else if(pc == _tryrdmsrinst || pc == _trywrmsrinst){
462                                 if(vno == VectorGPF){
463                                         ureg->bp = -1;
464                                         ureg->pc += 2;
465                                         return;
466                                 }
467                         }
468                 }
469
470                 dumpregs(ureg);
471                 if(!user){
472                         ureg->sp = (ulong)&ureg->sp;
473                         _dumpstack(ureg);
474                 }
475                 if(vno < nelem(excname))
476                         panic("%s", excname[vno]);
477                 panic("unknown trap/intr: %d", vno);
478         }
479         splhi();
480
481         /* delaysched set because we held a lock or because our quantum ended */
482         if(up && up->delaysched && clockintr){
483                 sched();
484                 splhi();
485         }
486
487         if(user){
488                 if(up->procctl || up->nnote)
489                         notify(ureg);
490                 kexit(ureg);
491         }
492 }
493
494 /*
495  *  dump registers
496  */
497 void
498 dumpregs2(Ureg* ureg)
499 {
500         if(up)
501                 iprint("cpu%d: registers for %s %lud\n",
502                         m->machno, up->text, up->pid);
503         else
504                 iprint("cpu%d: registers for kernel\n", m->machno);
505         iprint("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",
506                 ureg->flags, ureg->trap, ureg->ecode, ureg->pc);
507         if(userureg(ureg))
508                 iprint(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);
509         else
510                 iprint(" SP=%luX\n", (ulong)&ureg->sp);
511         iprint("  AX %8.8luX  BX %8.8luX  CX %8.8luX  DX %8.8luX\n",
512                 ureg->ax, ureg->bx, ureg->cx, ureg->dx);
513         iprint("  SI %8.8luX  DI %8.8luX  BP %8.8luX\n",
514                 ureg->si, ureg->di, ureg->bp);
515         iprint("  CS %4.4luX  DS %4.4luX  ES %4.4luX  FS %4.4luX  GS %4.4luX\n",
516                 ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
517                 ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
518 }
519
520 void
521 dumpregs(Ureg* ureg)
522 {
523         vlong mca, mct;
524
525         dumpregs2(ureg);
526
527         /*
528          * Processor control registers.
529          * If machine check exception, time stamp counter, page size extensions
530          * or enhanced virtual 8086 mode extensions are supported, there is a
531          * CR4. If there is a CR4 and machine check extensions, read the machine
532          * check address and machine check type registers if RDMSR supported.
533          */
534         iprint("  CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",
535                 getcr0(), getcr2(), getcr3());
536         if(m->cpuiddx & 0x9A){
537                 iprint(" CR4 %8.8lux", getcr4());
538                 if((m->cpuiddx & 0xA0) == 0xA0){
539                         rdmsr(0x00, &mca);
540                         rdmsr(0x01, &mct);
541                         iprint("\n  MCA %8.8llux MCT %8.8llux", mca, mct);
542                 }
543         }
544         iprint("\n  ur %#p up %#p\n", ureg, up);
545 }
546
547
548 /*
549  * Fill in enough of Ureg to get a stack trace, and call a function.
550  * Used by debugging interface rdb.
551  */
552 void
553 callwithureg(void (*fn)(Ureg*))
554 {
555         Ureg ureg;
556         ureg.pc = getcallerpc(&fn);
557         ureg.sp = (ulong)&fn;
558         fn(&ureg);
559 }
560
561 static void
562 _dumpstack(Ureg *ureg)
563 {
564         uintptr l, v, i, estack;
565         extern ulong etext;
566         int x;
567         char *s;
568
569         if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
570                 iprint("dumpstack disabled\n");
571                 return;
572         }
573         iprint("dumpstack\n");
574
575         x = 0;
576         x += iprint("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);
577         i = 0;
578         if(up
579         && (uintptr)&l >= (uintptr)up->kstack
580         && (uintptr)&l <= (uintptr)up->kstack+KSTACK)
581                 estack = (uintptr)up->kstack+KSTACK;
582         else if((uintptr)&l >= (uintptr)m->stack
583         && (uintptr)&l <= (uintptr)m+MACHSIZE)
584                 estack = (uintptr)m+MACHSIZE;
585         else
586                 return;
587         x += iprint("estackx %p\n", estack);
588
589         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
590                 v = *(uintptr*)l;
591                 if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){
592                         /*
593                          * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
594                          * and CALL indirect through AX
595                          * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
596                          * but this is too clever and misses faulting address.
597                          */
598                         x += iprint("%.8p=%.8p ", l, v);
599                         i++;
600                 }
601                 if(i == 4){
602                         i = 0;
603                         x += iprint("\n");
604                 }
605         }
606         if(i)
607                 iprint("\n");
608         iprint("EOF\n");
609
610         if(ureg->trap != VectorNMI)
611                 return;
612
613         i = 0;
614         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
615                 iprint("%.8p ", *(uintptr*)l);
616                 if(++i == 8){
617                         i = 0;
618                         iprint("\n");
619                 }
620         }
621         if(i)
622                 iprint("\n");
623 }
624
625 void
626 dumpstack(void)
627 {
628         callwithureg(_dumpstack);
629 }
630
631 static void
632 debugbpt(Ureg* ureg, void*)
633 {
634         char buf[ERRMAX];
635
636         if(up == 0)
637                 panic("kernel bpt");
638         /* restore pc to instruction that caused the trap */
639         ureg->pc--;
640         sprint(buf, "sys: breakpoint");
641         postnote(up, 1, buf, NDebug);
642 }
643
644 static void
645 doublefault(Ureg*, void*)
646 {
647         panic("double fault");
648 }
649
650 static void
651 unexpected(Ureg* ureg, void*)
652 {
653         print("unexpected trap %lud; ignoring\n", ureg->trap);
654 }
655
656 extern void checkpages(void);
657 extern void checkfault(ulong, ulong);
658 static void
659 fault386(Ureg* ureg, void*)
660 {
661         ulong addr;
662         int read, user, n, insyscall;
663         char buf[ERRMAX];
664
665         addr = getcr2();
666         read = !(ureg->ecode & 2);
667
668         user = userureg(ureg);
669         if(!user){
670                 if(vmapsync(addr))
671                         return;
672                 if(addr >= USTKTOP)
673                         panic("kernel fault: bad address pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
674                 if(up == nil)
675                         panic("kernel fault: no user process pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
676         }
677         if(up == nil)
678                 panic("user fault: up=0 pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
679
680         insyscall = up->insyscall;
681         up->insyscall = 1;
682         n = fault(addr, read);
683         if(n < 0){
684                 if(!user){
685                         dumpregs(ureg);
686                         panic("fault: 0x%lux", addr);
687                 }
688                 checkpages();
689                 checkfault(addr, ureg->pc);
690                 sprint(buf, "sys: trap: fault %s addr=0x%lux",
691                         read ? "read" : "write", addr);
692                 postnote(up, 1, buf, NDebug);
693         }
694         up->insyscall = insyscall;
695 }
696
697 /*
698  *  system calls
699  */
700 #include "../port/systab.h"
701
702 /*
703  *  Syscall is called directly from assembler without going through trap().
704  */
705 void
706 syscall(Ureg* ureg)
707 {
708         char *e;
709         ulong   sp;
710         long    ret;
711         int     i, s;
712         ulong scallnr;
713         vlong startns, stopns;
714
715         if(!userureg(ureg))
716                 panic("syscall: cs 0x%4.4luX", ureg->cs);
717
718         cycles(&up->kentry);
719
720         m->syscall++;
721         up->insyscall = 1;
722         up->pc = ureg->pc;
723         up->dbgreg = ureg;
724
725         sp = ureg->usp;
726         scallnr = ureg->ax;
727         up->scallnr = scallnr;
728
729         if(up->procctl == Proc_tracesyscall){
730                 /*
731                  * Redundant validaddr.  Do we care?
732                  * Tracing syscalls is not exactly a fast path...
733                  * Beware, validaddr currently does a pexit rather
734                  * than an error if there's a problem; that might
735                  * change in the future.
736                  */
737                 if(sp < (USTKTOP-BY2PG) || sp > (USTKTOP-sizeof(Sargs)-BY2WD))
738                         validaddr(sp, sizeof(Sargs)+BY2WD, 0);
739
740                 syscallfmt(scallnr, ureg->pc, (va_list)(sp+BY2WD));
741                 up->procctl = Proc_stopme;
742                 procctl(up);
743                 if(up->syscalltrace)
744                         free(up->syscalltrace);
745                 up->syscalltrace = nil;
746                 startns = todget(nil);
747         }
748
749         if(scallnr == RFORK && up->fpstate == FPactive){
750                 fpsave(&up->fpsave);
751                 up->fpstate = FPinactive;
752         }
753         spllo();
754
755         up->nerrlab = 0;
756         ret = -1;
757         if(!waserror()){
758                 if(scallnr >= nsyscall || systab[scallnr] == 0){
759                         pprint("bad sys call number %lud pc %lux\n",
760                                 scallnr, ureg->pc);
761                         postnote(up, 1, "sys: bad sys call", NDebug);
762                         error(Ebadarg);
763                 }
764
765                 if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
766                         validaddr(sp, sizeof(Sargs)+BY2WD, 0);
767
768                 up->s = *((Sargs*)(sp+BY2WD));
769                 up->psstate = sysctab[scallnr];
770
771                 ret = systab[scallnr](up->s.args);
772                 poperror();
773         }else{
774                 /* failure: save the error buffer for errstr */
775                 e = up->syserrstr;
776                 up->syserrstr = up->errstr;
777                 up->errstr = e;
778                 if(0 && up->pid == 1)
779                         print("syscall %lud error %s\n", scallnr, up->syserrstr);
780         }
781         if(up->nerrlab){
782                 print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
783                 for(i = 0; i < NERR; i++)
784                         print("sp=%lux pc=%lux\n",
785                                 up->errlab[i].sp, up->errlab[i].pc);
786                 panic("error stack");
787         }
788
789         /*
790          *  Put return value in frame.  On the x86 the syscall is
791          *  just another trap and the return value from syscall is
792          *  ignored.  On other machines the return value is put into
793          *  the results register by caller of syscall.
794          */
795         ureg->ax = ret;
796
797         if(up->procctl == Proc_tracesyscall){
798                 stopns = todget(nil);
799                 up->procctl = Proc_stopme;
800                 sysretfmt(scallnr, (va_list)(sp+BY2WD), ret, startns, stopns);
801                 s = splhi();
802                 procctl(up);
803                 splx(s);
804                 if(up->syscalltrace)
805                         free(up->syscalltrace);
806                 up->syscalltrace = nil;
807         }
808
809         up->insyscall = 0;
810         up->psstate = 0;
811
812         if(scallnr == NOTED)
813                 noted(ureg, *(ulong*)(sp+BY2WD));
814
815         if(scallnr!=RFORK && (up->procctl || up->nnote)){
816                 splhi();
817                 notify(ureg);
818         }
819         /* if we delayed sched because we held a lock, sched now */
820         if(up->delaysched)
821                 sched();
822         kexit(ureg);
823 }
824
825 /*
826  *  Call user, if necessary, with note.
827  *  Pass user the Ureg struct and the note on his stack.
828  */
829 int
830 notify(Ureg* ureg)
831 {
832         int l;
833         ulong s, sp;
834         Note *n;
835
836         if(up->procctl)
837                 procctl(up);
838         if(up->nnote == 0)
839                 return 0;
840
841         if(up->fpstate == FPactive){
842                 fpsave(&up->fpsave);
843                 up->fpstate = FPinactive;
844         }
845         up->fpstate |= FPillegal;
846
847         s = spllo();
848         qlock(&up->debug);
849         up->notepending = 0;
850         n = &up->note[0];
851         if(strncmp(n->msg, "sys:", 4) == 0){
852                 l = strlen(n->msg);
853                 if(l > ERRMAX-15)       /* " pc=0x12345678\0" */
854                         l = ERRMAX-15;
855                 sprint(n->msg+l, " pc=0x%.8lux", ureg->pc);
856         }
857
858         if(n->flag!=NUser && (up->notified || up->notify==0)){
859                 if(n->flag == NDebug)
860                         pprint("suicide: %s\n", n->msg);
861                 qunlock(&up->debug);
862                 pexit(n->msg, n->flag!=NDebug);
863         }
864
865         if(up->notified){
866                 qunlock(&up->debug);
867                 splhi();
868                 return 0;
869         }
870
871         if(!up->notify){
872                 qunlock(&up->debug);
873                 pexit(n->msg, n->flag!=NDebug);
874         }
875         sp = ureg->usp;
876         sp -= 256;      /* debugging: preserve context causing problem */
877         sp -= sizeof(Ureg);
878 if(0) print("%s %lud: notify %.8lux %.8lux %.8lux %s\n",
879         up->text, up->pid, ureg->pc, ureg->usp, sp, n->msg);
880
881         if(!okaddr((ulong)up->notify, 1, 0)
882         || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
883                 qunlock(&up->debug);
884                 pprint("suicide: bad address in notify\n");
885                 pexit("Suicide", 0);
886         }
887
888         memmove((Ureg*)sp, ureg, sizeof(Ureg));
889         *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
890         up->ureg = (void*)sp;
891         sp -= BY2WD+ERRMAX;
892         memmove((char*)sp, up->note[0].msg, ERRMAX);
893         sp -= 3*BY2WD;
894         *(ulong*)(sp+2*BY2WD) = sp+3*BY2WD;             /* arg 2 is string */
895         *(ulong*)(sp+1*BY2WD) = (ulong)up->ureg;        /* arg 1 is ureg* */
896         *(ulong*)(sp+0*BY2WD) = 0;                      /* arg 0 is pc */
897         ureg->usp = sp;
898         ureg->pc = (ulong)up->notify;
899         ureg->cs = UESEL;
900         ureg->ss = ureg->ds = ureg->es = UDSEL;
901         up->notified = 1;
902         up->nnote--;
903         memmove(&up->lastnote, &up->note[0], sizeof(Note));
904         memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
905
906         qunlock(&up->debug);
907         splx(s);
908         return 1;
909 }
910
911 /*
912  *   Return user to state before notify()
913  */
914 void
915 noted(Ureg* ureg, ulong arg0)
916 {
917         Ureg *nureg;
918         ulong oureg, sp;
919
920         qlock(&up->debug);
921         if(arg0!=NRSTR && !up->notified) {
922                 qunlock(&up->debug);
923                 pprint("call to noted() when not notified\n");
924                 pexit("Suicide", 0);
925         }
926         up->notified = 0;
927
928         nureg = up->ureg;       /* pointer to user returned Ureg struct */
929
930         up->fpstate &= ~FPillegal;
931
932         /* sanity clause */
933         oureg = (ulong)nureg;
934         if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
935                 qunlock(&up->debug);
936                 pprint("bad ureg in noted or call to noted when not notified\n");
937                 pexit("Suicide", 0);
938         }
939
940         /* don't let user change system flags */
941         nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
942         nureg->cs |= 3;
943         nureg->ss |= 3;
944
945         memmove(ureg, nureg, sizeof(Ureg));
946
947         switch(arg0){
948         case NCONT:
949         case NRSTR:
950 if(0) print("%s %lud: noted %.8lux %.8lux\n",
951         up->text, up->pid, nureg->pc, nureg->usp);
952                 if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){
953                         qunlock(&up->debug);
954                         pprint("suicide: trap in noted\n");
955                         pexit("Suicide", 0);
956                 }
957                 up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
958                 qunlock(&up->debug);
959                 break;
960
961         case NSAVE:
962                 if(!okaddr(nureg->pc, BY2WD, 0)
963                 || !okaddr(nureg->usp, BY2WD, 0)){
964                         qunlock(&up->debug);
965                         pprint("suicide: trap in noted\n");
966                         pexit("Suicide", 0);
967                 }
968                 qunlock(&up->debug);
969                 sp = oureg-4*BY2WD-ERRMAX;
970                 splhi();
971                 ureg->sp = sp;
972                 ((ulong*)sp)[1] = oureg;        /* arg 1 0(FP) is ureg* */
973                 ((ulong*)sp)[0] = 0;            /* arg 0 is pc */
974                 break;
975
976         default:
977                 pprint("unknown noted arg 0x%lux\n", arg0);
978                 up->lastnote.flag = NDebug;
979                 /* fall through */
980
981         case NDFLT:
982                 if(up->lastnote.flag == NDebug){
983                         qunlock(&up->debug);
984                         pprint("suicide: %s\n", up->lastnote.msg);
985                 } else
986                         qunlock(&up->debug);
987                 pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
988         }
989 }
990
991 long
992 execregs(ulong entry, ulong ssize, ulong nargs)
993 {
994         ulong *sp;
995         Ureg *ureg;
996
997         up->fpstate = FPinit;
998         fpoff();
999
1000         sp = (ulong*)(USTKTOP - ssize);
1001         *--sp = nargs;
1002
1003         ureg = up->dbgreg;
1004         ureg->usp = (ulong)sp;
1005         ureg->pc = entry;
1006         ureg->cs = UESEL;
1007         ureg->ss = ureg->ds = ureg->es = UDSEL;
1008         ureg->fs = ureg->gs = NULLSEL;
1009         return USTKTOP-sizeof(Tos);             /* address of kernel/user shared data */
1010 }
1011
1012 /*
1013  *  return the userpc the last exception happened at
1014  */
1015 ulong
1016 userpc(void)
1017 {
1018         Ureg *ureg;
1019
1020         ureg = (Ureg*)up->dbgreg;
1021         return ureg->pc;
1022 }
1023
1024 /* This routine must save the values of registers the user is not permitted
1025  * to write from devproc and then restore the saved values before returning.
1026  */
1027 void
1028 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
1029 {
1030         ulong flags;
1031
1032         flags = ureg->flags;
1033         memmove(pureg, uva, n);
1034         ureg->flags = (ureg->flags & 0xCD5) | (flags & ~0xCD5);
1035         ureg->cs |= 3;
1036         ureg->ss |= 3;
1037 }
1038
1039 static void
1040 linkproc(void)
1041 {
1042         spllo();
1043         up->kpfun(up->kparg);
1044         pexit("kproc dying", 0);
1045 }
1046
1047 void
1048 kprocchild(Proc* p, void (*func)(void*), void* arg)
1049 {
1050         /*
1051          * gotolabel() needs a word on the stack in
1052          * which to place the return PC used to jump
1053          * to linkproc().
1054          */
1055         p->sched.pc = (ulong)linkproc;
1056         p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;
1057
1058         p->kpfun = func;
1059         p->kparg = arg;
1060 }
1061
1062 void
1063 forkchild(Proc *p, Ureg *ureg)
1064 {
1065         Ureg *cureg;
1066
1067         /*
1068          * Add 2*BY2WD to the stack to account for
1069          *  - the return PC
1070          *  - trap's argument (ur)
1071          */
1072         p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1073         p->sched.pc = (ulong)forkret;
1074
1075         cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1076         memmove(cureg, ureg, sizeof(Ureg));
1077         /* return value of syscall in child */
1078         cureg->ax = 0;
1079
1080         /* Things from bottom of syscall which were never executed */
1081         p->psstate = 0;
1082         p->insyscall = 0;
1083 }
1084
1085 /* Give enough context in the ureg to produce a kernel stack for
1086  * a sleeping process
1087  */
1088 void
1089 setkernur(Ureg* ureg, Proc* p)
1090 {
1091         ureg->pc = p->sched.pc;
1092         ureg->sp = p->sched.sp+4;
1093 }
1094
1095 ulong
1096 dbgpc(Proc *p)
1097 {
1098         Ureg *ureg;
1099
1100         ureg = p->dbgreg;
1101         if(ureg == 0)
1102                 return 0;
1103
1104         return ureg->pc;
1105 }