]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/trap.c
kernel: remove implicit Proc* argument from procctl()
[plan9front.git] / sys / src / 9 / pc / trap.c
1 #include        "u.h"
2 #include        "tos.h"
3 #include        "../port/lib.h"
4 #include        "mem.h"
5 #include        "dat.h"
6 #include        "fns.h"
7 #include        "io.h"
8 #include        "ureg.h"
9 #include        "../port/error.h"
10 #include        <trace.h>
11
12 static int trapinited;
13
14 void    noted(Ureg*, ulong);
15
16 static void debugbpt(Ureg*, void*);
17 static void fault386(Ureg*, void*);
18 static void doublefault(Ureg*, void*);
19 static void unexpected(Ureg*, void*);
20 static void _dumpstack(Ureg*);
21
22 static Lock vctllock;
23 static Vctl *vctl[256];
24
25 enum
26 {
27         Ntimevec = 20           /* number of time buckets for each intr */
28 };
29 ulong intrtimes[256][Ntimevec];
30
31 void
32 intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
33 {
34         int vno;
35         Vctl *v;
36
37         if(f == nil){
38                 print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
39                         irq, tbdf, name);
40                 return;
41         }
42
43         if(tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0)){
44                 print("intrenable: got unassigned irq %d, tbdf 0x%uX for %s\n",
45                         irq, tbdf, name);
46                 irq = -1;
47         }
48
49         if((v = xalloc(sizeof(Vctl))) == nil)
50                 panic("intrenable: out of memory");
51         v->isintr = 1;
52         v->irq = irq;
53         v->tbdf = tbdf;
54         v->f = f;
55         v->a = a;
56         strncpy(v->name, name, KNAMELEN-1);
57         v->name[KNAMELEN-1] = 0;
58
59         ilock(&vctllock);
60         vno = arch->intrenable(v);
61         if(vno == -1){
62                 iunlock(&vctllock);
63                 print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
64                         irq, tbdf, v->name);
65                 xfree(v);
66                 return;
67         }
68         if(vctl[vno]){
69                 if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
70                         panic("intrenable: handler: %s %s %#p %#p %#p %#p",
71                                 vctl[vno]->name, v->name,
72                                 vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
73                 v->next = vctl[vno];
74         }
75         vctl[vno] = v;
76         iunlock(&vctllock);
77 }
78
79 int
80 intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
81 {
82         Vctl **pv, *v;
83         int vno;
84
85         /*
86          * For now, none of this will work with the APIC code,
87          * there is no mapping between irq and vector as the IRQ
88          * is pretty meaningless.
89          */
90         if(arch->intrvecno == nil)
91                 return -1;
92         vno = arch->intrvecno(irq);
93         ilock(&vctllock);
94         pv = &vctl[vno];
95         while (*pv &&
96                   ((*pv)->irq != irq || (*pv)->tbdf != tbdf || (*pv)->f != f || (*pv)->a != a ||
97                    strcmp((*pv)->name, name)))
98                 pv = &((*pv)->next);
99         assert(*pv);
100
101         v = *pv;
102         *pv = (*pv)->next;      /* Link out the entry */
103
104         if(vctl[vno] == nil && arch->intrdisable != nil)
105                 arch->intrdisable(irq);
106         iunlock(&vctllock);
107         xfree(v);
108         return 0;
109 }
110
111 static long
112 irqallocread(Chan*, void *vbuf, long n, vlong offset)
113 {
114         char *buf, *p, str[2*(11+1)+KNAMELEN+1+1];
115         int m, vno;
116         long oldn;
117         Vctl *v;
118
119         if(n < 0 || offset < 0)
120                 error(Ebadarg);
121
122         oldn = n;
123         buf = vbuf;
124         for(vno=0; vno<nelem(vctl); vno++){
125                 for(v=vctl[vno]; v; v=v->next){
126                         m = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
127                         if(m <= offset) /* if do not want this, skip entry */
128                                 offset -= m;
129                         else{
130                                 /* skip offset bytes */
131                                 m -= offset;
132                                 p = str+offset;
133                                 offset = 0;
134
135                                 /* write at most max(n,m) bytes */
136                                 if(m > n)
137                                         m = n;
138                                 memmove(buf, p, m);
139                                 n -= m;
140                                 buf += m;
141
142                                 if(n == 0)
143                                         return oldn;
144                         }
145                 }
146         }
147         return oldn - n;
148 }
149
150 void
151 trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
152 {
153         Vctl *v;
154
155         if(vno < 0 || vno >= VectorPIC)
156                 panic("trapenable: vno %d", vno);
157         if((v = xalloc(sizeof(Vctl))) == nil)
158                 panic("trapenable: out of memory");
159         v->tbdf = BUSUNKNOWN;
160         v->f = f;
161         v->a = a;
162         strncpy(v->name, name, KNAMELEN-1);
163         v->name[KNAMELEN-1] = 0;
164
165         ilock(&vctllock);
166         if(vctl[vno])
167                 v->next = vctl[vno]->next;
168         vctl[vno] = v;
169         iunlock(&vctllock);
170 }
171
172 static void
173 nmienable(void)
174 {
175         int x;
176
177         /*
178          * Hack: should be locked with NVRAM access.
179          */
180         outb(0x70, 0x80);               /* NMI latch clear */
181         outb(0x70, 0);
182
183         x = inb(0x61) & 0x07;           /* Enable NMI */
184         outb(0x61, 0x08|x);
185         outb(0x61, x);
186 }
187
188 /*
189  * Minimal trap setup.  Just enough so that we can panic
190  * on traps (bugs) during kernel initialization.
191  * Called very early - malloc is not yet available.
192  */
193 void
194 trapinit0(void)
195 {
196         int d1, v;
197         ulong vaddr;
198         Segdesc *idt;
199
200         idt = (Segdesc*)IDTADDR;
201         vaddr = (ulong)vectortable;
202         for(v = 0; v < 256; v++){
203                 d1 = (vaddr & 0xFFFF0000)|SEGP;
204                 switch(v){
205
206                 case VectorBPT:
207                         d1 |= SEGPL(3)|SEGIG;
208                         break;
209
210                 case VectorSYSCALL:
211                         d1 |= SEGPL(3)|SEGIG;
212                         break;
213
214                 default:
215                         d1 |= SEGPL(0)|SEGIG;
216                         break;
217                 }
218                 idt[v].d0 = (vaddr & 0xFFFF)|(KESEL<<16);
219                 idt[v].d1 = d1;
220                 vaddr += 6;
221         }
222 }
223
224 void
225 trapinit(void)
226 {
227         /*
228          * Special traps.
229          * Syscall() is called directly without going through trap().
230          */
231         trapenable(VectorBPT, debugbpt, 0, "debugpt");
232         trapenable(VectorPF, fault386, 0, "fault386");
233         trapenable(Vector2F, doublefault, 0, "doublefault");
234         trapenable(Vector15, unexpected, 0, "unexpected");
235         nmienable();
236
237         addarchfile("irqalloc", 0444, irqallocread, nil);
238         trapinited = 1;
239 }
240
241 static char* excname[32] = {
242         "divide error",
243         "debug exception",
244         "nonmaskable interrupt",
245         "breakpoint",
246         "overflow",
247         "bounds check",
248         "invalid opcode",
249         "coprocessor not available",
250         "double fault",
251         "coprocessor segment overrun",
252         "invalid TSS",
253         "segment not present",
254         "stack exception",
255         "general protection violation",
256         "page fault",
257         "15 (reserved)",
258         "coprocessor error",
259         "alignment check",
260         "machine check",
261         "simd error",
262         "20 (reserved)",
263         "21 (reserved)",
264         "22 (reserved)",
265         "23 (reserved)",
266         "24 (reserved)",
267         "25 (reserved)",
268         "26 (reserved)",
269         "27 (reserved)",
270         "28 (reserved)",
271         "29 (reserved)",
272         "30 (reserved)",
273         "31 (reserved)",
274 };
275
276 /*
277  *  keep histogram of interrupt service times
278  */
279 void
280 intrtime(Mach*, int vno)
281 {
282         ulong diff;
283         ulong x;
284
285         x = perfticks();
286         diff = x - m->perf.intrts;
287         m->perf.intrts = x;
288
289         m->perf.inintr += diff;
290         if(up == nil && m->perf.inidle > diff)
291                 m->perf.inidle -= diff;
292
293         diff /= m->cpumhz*100;          /* quantum = 100µsec */
294         if(diff >= Ntimevec)
295                 diff = Ntimevec-1;
296         intrtimes[vno][diff]++;
297 }
298
299 /* go to user space */
300 void
301 kexit(Ureg*)
302 {
303         uvlong t;
304         Tos *tos;
305
306         /* precise time accounting, kernel exit */
307         tos = (Tos*)(USTKTOP-sizeof(Tos));
308         cycles(&t);
309         tos->kcycles += t - up->kentry;
310         tos->pcycles = t + up->pcycles;
311         tos->pid = up->pid;
312 }
313
314 /*
315  *  All traps come here.  It is slower to have all traps call trap()
316  *  rather than directly vectoring the handler.  However, this avoids a
317  *  lot of code duplication and possible bugs.  The only exception is
318  *  VectorSYSCALL.
319  *  Trap is called with interrupts disabled via interrupt-gates.
320  */
321 void
322 trap(Ureg* ureg)
323 {
324         int clockintr, i, vno, user;
325         char buf[ERRMAX];
326         Vctl *ctl, *v;
327         Mach *mach;
328
329         if(!trapinited){
330                 /* fault386 can give a better error message */
331                 if(ureg->trap == VectorPF)
332                         fault386(ureg, nil);
333                 panic("trap %lud: not ready", ureg->trap);
334         }
335
336         m->perf.intrts = perfticks();
337         user = userureg(ureg);
338         if(user){
339                 up->dbgreg = ureg;
340                 cycles(&up->kentry);
341         }
342
343         clockintr = 0;
344
345         vno = ureg->trap;
346         if(ctl = vctl[vno]){
347                 if(ctl->isintr){
348                         m->intr++;
349                         if(vno >= VectorPIC && vno != VectorSYSCALL)
350                                 m->lastintr = ctl->irq;
351                 }
352
353                 if(ctl->isr)
354                         ctl->isr(vno);
355                 for(v = ctl; v != nil; v = v->next){
356                         if(v->f)
357                                 v->f(ureg, v->a);
358                 }
359                 if(ctl->eoi)
360                         ctl->eoi(vno);
361
362                 if(ctl->isintr){
363                         intrtime(m, vno);
364
365                         if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
366                                 clockintr = 1;
367
368                         if(up && !clockintr)
369                                 preempted();
370                 }
371         }
372         else if(vno < nelem(excname) && user){
373                 spllo();
374                 sprint(buf, "sys: trap: %s", excname[vno]);
375                 postnote(up, 1, buf, NDebug);
376         }
377         else if(vno >= VectorPIC && vno != VectorSYSCALL){
378                 /*
379                  * An unknown interrupt.
380                  * Check for a default IRQ7. This can happen when
381                  * the IRQ input goes away before the acknowledge.
382                  * In this case, a 'default IRQ7' is generated, but
383                  * the corresponding bit in the ISR isn't set.
384                  * In fact, just ignore all such interrupts.
385                  */
386
387                 /* call all interrupt routines, just in case */
388                 for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
389                         ctl = vctl[i];
390                         if(ctl == nil)
391                                 continue;
392                         if(!ctl->isintr)
393                                 continue;
394                         for(v = ctl; v != nil; v = v->next){
395                                 if(v->f)
396                                         v->f(ureg, v->a);
397                         }
398                         /* should we do this? */
399                         if(ctl->eoi)
400                                 ctl->eoi(i);
401                 }
402
403                 /* clear the interrupt */
404                 i8259isr(vno);
405
406                 if(0)print("cpu%d: spurious interrupt %d, last %d\n",
407                         m->machno, vno, m->lastintr);
408                 if(0)if(conf.nmach > 1){
409                         for(i = 0; i < 32; i++){
410                                 if(!(active.machs & (1<<i)))
411                                         continue;
412                                 mach = MACHP(i);
413                                 if(m->machno == mach->machno)
414                                         continue;
415                                 print(" cpu%d: last %d",
416                                         mach->machno, mach->lastintr);
417                         }
418                         print("\n");
419                 }
420                 m->spuriousintr++;
421                 if(user)
422                         kexit(ureg);
423                 return;
424         }
425         else{
426                 if(vno == VectorNMI){
427                         /*
428                          * Don't re-enable, it confuses the crash dumps.
429                         nmienable();
430                          */
431                         iprint("cpu%d: PC %#8.8lux\n", m->machno, ureg->pc);
432                         while(m->machno != 0)
433                                 ;
434                 }
435
436                 if(!user){
437                         void (*pc)(void);
438                         ulong *sp; 
439
440                         extern void _forkretpopgs(void);
441                         extern void _forkretpopfs(void);
442                         extern void _forkretpopes(void);
443                         extern void _forkretpopds(void);
444                         extern void _forkretiret(void);
445                         extern void _rdmsrinst(void);
446                         extern void _wrmsrinst(void);
447
448                         extern void load_fs(ulong);
449                         extern void load_gs(ulong);
450
451                         load_fs(NULLSEL);
452                         load_gs(NULLSEL);
453
454                         sp = (ulong*)&ureg->sp; /* kernel stack */
455                         pc = (void*)ureg->pc;
456
457                         if(pc == _forkretpopgs || pc == _forkretpopfs || 
458                            pc == _forkretpopes || pc == _forkretpopds){
459                                 if(vno == VectorGPF || vno == VectorSNP){
460                                         sp[0] = NULLSEL;
461                                         return;
462                                 }
463                         } else if(pc == _forkretiret){
464                                 if(vno == VectorGPF || vno == VectorSNP){
465                                         sp[1] = UESEL;  /* CS */
466                                         sp[4] = UDSEL;  /* SS */
467                                         return;
468                                 }
469                         } else if(pc == _rdmsrinst || pc == _wrmsrinst){
470                                 if(vno == VectorGPF){
471                                         ureg->bp = -1;
472                                         ureg->pc += 2;
473                                         return;
474                                 }
475                         }
476                 }
477
478                 dumpregs(ureg);
479                 if(!user){
480                         ureg->sp = (ulong)&ureg->sp;
481                         _dumpstack(ureg);
482                 }
483                 if(vno < nelem(excname))
484                         panic("%s", excname[vno]);
485                 panic("unknown trap/intr: %d", vno);
486         }
487         splhi();
488
489         /* delaysched set because we held a lock or because our quantum ended */
490         if(up && up->delaysched && clockintr){
491                 sched();
492                 splhi();
493         }
494
495         if(user){
496                 if(up->procctl || up->nnote)
497                         notify(ureg);
498                 kexit(ureg);
499         }
500 }
501
502 /*
503  *  dump registers
504  */
505 void
506 dumpregs2(Ureg* ureg)
507 {
508         if(up)
509                 iprint("cpu%d: registers for %s %lud\n",
510                         m->machno, up->text, up->pid);
511         else
512                 iprint("cpu%d: registers for kernel\n", m->machno);
513         iprint("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",
514                 ureg->flags, ureg->trap, ureg->ecode, ureg->pc);
515         if(userureg(ureg))
516                 iprint(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);
517         else
518                 iprint(" SP=%luX\n", (ulong)&ureg->sp);
519         iprint("  AX %8.8luX  BX %8.8luX  CX %8.8luX  DX %8.8luX\n",
520                 ureg->ax, ureg->bx, ureg->cx, ureg->dx);
521         iprint("  SI %8.8luX  DI %8.8luX  BP %8.8luX\n",
522                 ureg->si, ureg->di, ureg->bp);
523         iprint("  CS %4.4luX  DS %4.4luX  ES %4.4luX  FS %4.4luX  GS %4.4luX\n",
524                 ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
525                 ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
526 }
527
528 void
529 dumpregs(Ureg* ureg)
530 {
531         dumpregs2(ureg);
532
533         /*
534          * Processor control registers.
535          * If machine check exception, time stamp counter, page size extensions
536          * or enhanced virtual 8086 mode extensions are supported, there is a
537          * CR4. If there is a CR4 and machine check extensions, read the machine
538          * check address and machine check type registers if RDMSR supported.
539          */
540         iprint("  CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",
541                 getcr0(), getcr2(), getcr3());
542         if(m->cpuiddx & (Mce|Tsc|Pse|Vmex)){
543                 iprint(" CR4 %8.8lux\n", getcr4());
544                 if(ureg->trap == 18)
545                         dumpmcregs();
546         }
547         iprint("\n  ur %#p up %#p\n", ureg, up);
548 }
549
550
551 /*
552  * Fill in enough of Ureg to get a stack trace, and call a function.
553  * Used by debugging interface rdb.
554  */
555 void
556 callwithureg(void (*fn)(Ureg*))
557 {
558         Ureg ureg;
559         ureg.pc = getcallerpc(&fn);
560         ureg.sp = (ulong)&fn;
561         fn(&ureg);
562 }
563
564 static void
565 _dumpstack(Ureg *ureg)
566 {
567         uintptr l, v, i, estack;
568         extern ulong etext;
569         int x;
570         char *s;
571
572         if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
573                 iprint("dumpstack disabled\n");
574                 return;
575         }
576         iprint("dumpstack\n");
577
578         x = 0;
579         x += iprint("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);
580         i = 0;
581         if(up
582         && (uintptr)&l >= (uintptr)up->kstack
583         && (uintptr)&l <= (uintptr)up->kstack+KSTACK)
584                 estack = (uintptr)up->kstack+KSTACK;
585         else if((uintptr)&l >= (uintptr)m->stack
586         && (uintptr)&l <= (uintptr)m+MACHSIZE)
587                 estack = (uintptr)m+MACHSIZE;
588         else
589                 return;
590         x += iprint("estackx %p\n", estack);
591
592         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
593                 v = *(uintptr*)l;
594                 if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){
595                         /*
596                          * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
597                          * and CALL indirect through AX
598                          * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
599                          * but this is too clever and misses faulting address.
600                          */
601                         x += iprint("%.8p=%.8p ", l, v);
602                         i++;
603                 }
604                 if(i == 4){
605                         i = 0;
606                         x += iprint("\n");
607                 }
608         }
609         if(i)
610                 iprint("\n");
611         iprint("EOF\n");
612
613         if(ureg->trap != VectorNMI)
614                 return;
615
616         i = 0;
617         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
618                 iprint("%.8p ", *(uintptr*)l);
619                 if(++i == 8){
620                         i = 0;
621                         iprint("\n");
622                 }
623         }
624         if(i)
625                 iprint("\n");
626 }
627
628 void
629 dumpstack(void)
630 {
631         callwithureg(_dumpstack);
632 }
633
634 static void
635 debugbpt(Ureg* ureg, void*)
636 {
637         char buf[ERRMAX];
638
639         if(up == 0)
640                 panic("kernel bpt");
641         /* restore pc to instruction that caused the trap */
642         ureg->pc--;
643         sprint(buf, "sys: breakpoint");
644         postnote(up, 1, buf, NDebug);
645 }
646
647 static void
648 doublefault(Ureg*, void*)
649 {
650         panic("double fault");
651 }
652
653 static void
654 unexpected(Ureg* ureg, void*)
655 {
656         print("unexpected trap %lud; ignoring\n", ureg->trap);
657 }
658
659 extern void checkpages(void);
660 extern void checkfault(ulong, ulong);
661 static void
662 fault386(Ureg* ureg, void*)
663 {
664         ulong addr;
665         int read, user, n, insyscall;
666         char buf[ERRMAX];
667
668         addr = getcr2();
669         read = !(ureg->ecode & 2);
670
671         user = userureg(ureg);
672         if(!user){
673                 if(vmapsync(addr))
674                         return;
675                 if(addr >= USTKTOP)
676                         panic("kernel fault: bad address pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
677                 if(up == nil)
678                         panic("kernel fault: no user process pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
679         }
680         if(up == nil)
681                 panic("user fault: up=0 pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
682
683         insyscall = up->insyscall;
684         up->insyscall = 1;
685         n = fault(addr, read);
686         if(n < 0){
687                 if(!user){
688                         dumpregs(ureg);
689                         panic("fault: 0x%lux", addr);
690                 }
691                 checkpages();
692                 checkfault(addr, ureg->pc);
693                 sprint(buf, "sys: trap: fault %s addr=0x%lux",
694                         read ? "read" : "write", addr);
695                 postnote(up, 1, buf, NDebug);
696         }
697         up->insyscall = insyscall;
698 }
699
700 /*
701  *  system calls
702  */
703 #include "../port/systab.h"
704
705 /*
706  *  Syscall is called directly from assembler without going through trap().
707  */
708 void
709 syscall(Ureg* ureg)
710 {
711         char *e;
712         ulong   sp;
713         long    ret;
714         int     i, s;
715         ulong scallnr;
716         vlong startns, stopns;
717
718         if(!userureg(ureg))
719                 panic("syscall: cs 0x%4.4luX", ureg->cs);
720
721         cycles(&up->kentry);
722
723         m->syscall++;
724         up->insyscall = 1;
725         up->pc = ureg->pc;
726         up->dbgreg = ureg;
727
728         sp = ureg->usp;
729         scallnr = ureg->ax;
730         up->scallnr = scallnr;
731
732         spllo();
733
734         up->nerrlab = 0;
735         ret = -1;
736         if(!waserror()){
737                 if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
738                         validaddr(sp, sizeof(Sargs)+BY2WD, 0);
739
740                 up->s = *((Sargs*)(sp+BY2WD));
741
742                 if(up->procctl == Proc_tracesyscall){
743                         syscallfmt(scallnr, ureg->pc, (va_list)up->s.args);
744                         s = splhi();
745                         up->procctl = Proc_stopme;
746                         procctl();
747                         splx(s);
748                         startns = todget(nil);
749                 }
750
751                 if(scallnr >= nsyscall || systab[scallnr] == 0){
752                         pprint("bad sys call number %lud pc %lux\n",
753                                 scallnr, ureg->pc);
754                         postnote(up, 1, "sys: bad sys call", NDebug);
755                         error(Ebadarg);
756                 }
757                 up->psstate = sysctab[scallnr];
758                 ret = systab[scallnr]((va_list)up->s.args);
759                 poperror();
760         }else{
761                 /* failure: save the error buffer for errstr */
762                 e = up->syserrstr;
763                 up->syserrstr = up->errstr;
764                 up->errstr = e;
765                 if(0 && up->pid == 1)
766                         print("syscall %lud error %s\n", scallnr, up->syserrstr);
767         }
768         if(up->nerrlab){
769                 print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
770                 for(i = 0; i < NERR; i++)
771                         print("sp=%lux pc=%lux\n",
772                                 up->errlab[i].sp, up->errlab[i].pc);
773                 panic("error stack");
774         }
775
776         /*
777          *  Put return value in frame.  On the x86 the syscall is
778          *  just another trap and the return value from syscall is
779          *  ignored.  On other machines the return value is put into
780          *  the results register by caller of syscall.
781          */
782         ureg->ax = ret;
783
784         if(up->procctl == Proc_tracesyscall){
785                 stopns = todget(nil);
786                 sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
787                 s = splhi();
788                 up->procctl = Proc_stopme;
789                 procctl();
790                 splx(s);
791         }
792
793         up->insyscall = 0;
794         up->psstate = 0;
795
796         if(scallnr == NOTED)
797                 noted(ureg, *((ulong*)up->s.args));
798
799         if(scallnr!=RFORK && (up->procctl || up->nnote)){
800                 splhi();
801                 notify(ureg);
802         }
803         /* if we delayed sched because we held a lock, sched now */
804         if(up->delaysched)
805                 sched();
806         kexit(ureg);
807 }
808
809 /*
810  *  Call user, if necessary, with note.
811  *  Pass user the Ureg struct and the note on his stack.
812  */
813 int
814 notify(Ureg* ureg)
815 {
816         int l;
817         ulong s, sp;
818         Note *n;
819
820         if(up->procctl)
821                 procctl();
822         if(up->nnote == 0)
823                 return 0;
824
825         if(up->fpstate == FPactive){
826                 fpsave(&up->fpsave);
827                 up->fpstate = FPinactive;
828         }
829         up->fpstate |= FPillegal;
830
831         s = spllo();
832         qlock(&up->debug);
833         up->notepending = 0;
834         n = &up->note[0];
835         if(strncmp(n->msg, "sys:", 4) == 0){
836                 l = strlen(n->msg);
837                 if(l > ERRMAX-15)       /* " pc=0x12345678\0" */
838                         l = ERRMAX-15;
839                 sprint(n->msg+l, " pc=0x%.8lux", ureg->pc);
840         }
841
842         if(n->flag!=NUser && (up->notified || up->notify==0)){
843                 qunlock(&up->debug);
844                 if(n->flag == NDebug)
845                         pprint("suicide: %s\n", n->msg);
846                 pexit(n->msg, n->flag!=NDebug);
847         }
848
849         if(up->notified){
850                 qunlock(&up->debug);
851                 splhi();
852                 return 0;
853         }
854
855         if(!up->notify){
856                 qunlock(&up->debug);
857                 pexit(n->msg, n->flag!=NDebug);
858         }
859         sp = ureg->usp;
860         sp -= 256;      /* debugging: preserve context causing problem */
861         sp -= sizeof(Ureg);
862 if(0) print("%s %lud: notify %.8lux %.8lux %.8lux %s\n",
863         up->text, up->pid, ureg->pc, ureg->usp, sp, n->msg);
864
865         if(!okaddr((uintptr)up->notify, 1, 0)
866         || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
867                 qunlock(&up->debug);
868                 pprint("suicide: bad address in notify\n");
869                 pexit("Suicide", 0);
870         }
871
872         memmove((Ureg*)sp, ureg, sizeof(Ureg));
873         *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
874         up->ureg = (void*)sp;
875         sp -= BY2WD+ERRMAX;
876         memmove((char*)sp, up->note[0].msg, ERRMAX);
877         sp -= 3*BY2WD;
878         *(ulong*)(sp+2*BY2WD) = sp+3*BY2WD;             /* arg 2 is string */
879         *(ulong*)(sp+1*BY2WD) = (ulong)up->ureg;        /* arg 1 is ureg* */
880         *(ulong*)(sp+0*BY2WD) = 0;                      /* arg 0 is pc */
881         ureg->usp = sp;
882         ureg->pc = (ulong)up->notify;
883         ureg->cs = UESEL;
884         ureg->ss = ureg->ds = ureg->es = UDSEL;
885         up->notified = 1;
886         up->nnote--;
887         memmove(&up->lastnote, &up->note[0], sizeof(Note));
888         memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
889
890         qunlock(&up->debug);
891         splx(s);
892         return 1;
893 }
894
895 /*
896  *   Return user to state before notify()
897  */
898 void
899 noted(Ureg* ureg, ulong arg0)
900 {
901         Ureg *nureg;
902         ulong oureg, sp;
903
904         qlock(&up->debug);
905         if(arg0!=NRSTR && !up->notified) {
906                 qunlock(&up->debug);
907                 pprint("call to noted() when not notified\n");
908                 pexit("Suicide", 0);
909         }
910         up->notified = 0;
911
912         nureg = up->ureg;       /* pointer to user returned Ureg struct */
913
914         up->fpstate &= ~FPillegal;
915
916         /* sanity clause */
917         oureg = (ulong)nureg;
918         if(!okaddr(oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
919                 qunlock(&up->debug);
920                 pprint("bad ureg in noted or call to noted when not notified\n");
921                 pexit("Suicide", 0);
922         }
923
924         /* don't let user change system flags */
925         nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
926         nureg->cs |= 3;
927         nureg->ss |= 3;
928
929         memmove(ureg, nureg, sizeof(Ureg));
930
931         switch(arg0){
932         case NCONT:
933         case NRSTR:
934 if(0) print("%s %lud: noted %.8lux %.8lux\n",
935         up->text, up->pid, nureg->pc, nureg->usp);
936                 if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){
937                         qunlock(&up->debug);
938                         pprint("suicide: trap in noted\n");
939                         pexit("Suicide", 0);
940                 }
941                 up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
942                 qunlock(&up->debug);
943                 break;
944
945         case NSAVE:
946                 if(!okaddr(nureg->pc, BY2WD, 0)
947                 || !okaddr(nureg->usp, BY2WD, 0)){
948                         qunlock(&up->debug);
949                         pprint("suicide: trap in noted\n");
950                         pexit("Suicide", 0);
951                 }
952                 qunlock(&up->debug);
953                 sp = oureg-4*BY2WD-ERRMAX;
954                 splhi();
955                 ureg->sp = sp;
956                 ((ulong*)sp)[1] = oureg;        /* arg 1 0(FP) is ureg* */
957                 ((ulong*)sp)[0] = 0;            /* arg 0 is pc */
958                 break;
959
960         default:
961                 up->lastnote.flag = NDebug;
962                 /* fall through */
963
964         case NDFLT:
965                 qunlock(&up->debug);
966                 if(up->lastnote.flag == NDebug)
967                         pprint("suicide: %s\n", up->lastnote.msg);
968                 pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
969         }
970 }
971
972 uintptr
973 execregs(uintptr entry, ulong ssize, ulong nargs)
974 {
975         ulong *sp;
976         Ureg *ureg;
977
978         sp = (ulong*)(USTKTOP - ssize);
979         *--sp = nargs;
980
981         ureg = up->dbgreg;
982         ureg->usp = (ulong)sp;
983         ureg->pc = entry;
984         ureg->cs = UESEL;
985         ureg->ss = ureg->ds = ureg->es = UDSEL;
986         ureg->fs = ureg->gs = NULLSEL;
987         return USTKTOP-sizeof(Tos);             /* address of kernel/user shared data */
988 }
989
990 /*
991  *  return the userpc the last exception happened at
992  */
993 uintptr
994 userpc(void)
995 {
996         Ureg *ureg;
997
998         ureg = (Ureg*)up->dbgreg;
999         return ureg->pc;
1000 }
1001
1002 /* This routine must save the values of registers the user is not permitted
1003  * to write from devproc and then restore the saved values before returning.
1004  */
1005 void
1006 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
1007 {
1008         ulong flags;
1009
1010         flags = ureg->flags;
1011         memmove(pureg, uva, n);
1012         ureg->flags = (ureg->flags & 0xCD5) | (flags & ~0xCD5);
1013         ureg->cs |= 3;
1014         ureg->ss |= 3;
1015 }
1016
1017 static void
1018 linkproc(void)
1019 {
1020         spllo();
1021         up->kpfun(up->kparg);
1022         pexit("kproc dying", 0);
1023 }
1024
1025 void
1026 kprocchild(Proc* p, void (*func)(void*), void* arg)
1027 {
1028         /*
1029          * gotolabel() needs a word on the stack in
1030          * which to place the return PC used to jump
1031          * to linkproc().
1032          */
1033         p->sched.pc = (ulong)linkproc;
1034         p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;
1035
1036         p->kpfun = func;
1037         p->kparg = arg;
1038 }
1039
1040 void
1041 forkchild(Proc *p, Ureg *ureg)
1042 {
1043         Ureg *cureg;
1044
1045         /*
1046          * Add 2*BY2WD to the stack to account for
1047          *  - the return PC
1048          *  - trap's argument (ur)
1049          */
1050         p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1051         p->sched.pc = (ulong)forkret;
1052
1053         cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1054         memmove(cureg, ureg, sizeof(Ureg));
1055         /* return value of syscall in child */
1056         cureg->ax = 0;
1057
1058         /* Things from bottom of syscall which were never executed */
1059         p->psstate = 0;
1060         p->insyscall = 0;
1061 }
1062
1063 /* Give enough context in the ureg to produce a kernel stack for
1064  * a sleeping process
1065  */
1066 void
1067 setkernur(Ureg* ureg, Proc* p)
1068 {
1069         ureg->pc = p->sched.pc;
1070         ureg->sp = p->sched.sp+4;
1071 }
1072
1073 ulong
1074 dbgpc(Proc *p)
1075 {
1076         Ureg *ureg;
1077
1078         ureg = p->dbgreg;
1079         if(ureg == 0)
1080                 return 0;
1081
1082         return ureg->pc;
1083 }