]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/trap.c
kernel: add support for hardware watchpoints
[plan9front.git] / sys / src / 9 / pc / trap.c
1 #include        "u.h"
2 #include        "tos.h"
3 #include        "../port/lib.h"
4 #include        "mem.h"
5 #include        "dat.h"
6 #include        "fns.h"
7 #include        "io.h"
8 #include        "ureg.h"
9 #include        "../port/error.h"
10 #include        <trace.h>
11
12 static int trapinited;
13
14 void    noted(Ureg*, ulong);
15
16 static void debugexc(Ureg*, void*);
17 static void debugbpt(Ureg*, void*);
18 static void fault386(Ureg*, void*);
19 static void doublefault(Ureg*, void*);
20 static void unexpected(Ureg*, void*);
21 static void _dumpstack(Ureg*);
22
23 static Lock vctllock;
24 static Vctl *vctl[256];
25
26 enum
27 {
28         Ntimevec = 20           /* number of time buckets for each intr */
29 };
30 ulong intrtimes[256][Ntimevec];
31
32 void
33 intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
34 {
35         int vno;
36         Vctl *v;
37
38         if(f == nil){
39                 print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
40                         irq, tbdf, name);
41                 return;
42         }
43
44         if(tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0)){
45                 print("intrenable: got unassigned irq %d, tbdf 0x%uX for %s\n",
46                         irq, tbdf, name);
47                 irq = -1;
48         }
49
50         if((v = xalloc(sizeof(Vctl))) == nil)
51                 panic("intrenable: out of memory");
52         v->isintr = 1;
53         v->irq = irq;
54         v->tbdf = tbdf;
55         v->f = f;
56         v->a = a;
57         strncpy(v->name, name, KNAMELEN-1);
58         v->name[KNAMELEN-1] = 0;
59
60         ilock(&vctllock);
61         vno = arch->intrenable(v);
62         if(vno == -1){
63                 iunlock(&vctllock);
64                 print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
65                         irq, tbdf, v->name);
66                 xfree(v);
67                 return;
68         }
69         if(vctl[vno]){
70                 if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
71                         panic("intrenable: handler: %s %s %#p %#p %#p %#p",
72                                 vctl[vno]->name, v->name,
73                                 vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
74                 v->next = vctl[vno];
75         }
76         vctl[vno] = v;
77         iunlock(&vctllock);
78 }
79
80 void
81 intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
82 {
83         Vctl **pv, *v;
84         int vno;
85
86         if(arch->intrvecno == nil || (tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0))){
87                 /*
88                  * on APIC machine, irq is pretty meaningless
89                  * and disabling a the vector is not implemented.
90                  * however, we still want to remove the matching
91                  * Vctl entry to prevent calling Vctl.f() with a
92                  * stale Vctl.a pointer.
93                  */
94                 irq = -1;
95                 vno = VectorPIC;
96         } else {
97                 vno = arch->intrvecno(irq);
98         }
99         ilock(&vctllock);
100         do {
101                 for(pv = &vctl[vno]; (v = *pv) != nil; pv = &v->next){
102                         if(v->isintr && (v->irq == irq || irq == -1)
103                         && v->tbdf == tbdf && v->f == f && v->a == a
104                         && strcmp(v->name, name) == 0)
105                                 break;
106                 }
107                 if(v != nil){
108                         *pv = v->next;
109                         xfree(v);
110
111                         if(irq != -1 && vctl[vno] == nil && arch->intrdisable != nil)
112                                 arch->intrdisable(irq);
113                         break;
114                 }
115         } while(irq == -1 && ++vno <= MaxVectorAPIC);
116         iunlock(&vctllock);
117 }
118
119 static long
120 irqallocread(Chan*, void *a, long n, vlong offset)
121 {
122         char buf[2*(11+1)+KNAMELEN+1+1];
123         int vno, m;
124         Vctl *v;
125
126         if(n < 0 || offset < 0)
127                 error(Ebadarg);
128
129         for(vno=0; vno<nelem(vctl); vno++){
130                 for(v=vctl[vno]; v; v=v->next){
131                         m = snprint(buf, sizeof(buf), "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
132                         offset -= m;
133                         if(offset >= 0)
134                                 continue;
135                         if(n > -offset)
136                                 n = -offset;
137                         offset += m;
138                         memmove(a, buf+offset, n);
139                         return n;
140                 }
141         }
142         return 0;
143 }
144
145 void
146 trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
147 {
148         Vctl *v;
149
150         if(vno < 0 || vno >= VectorPIC)
151                 panic("trapenable: vno %d", vno);
152         if((v = xalloc(sizeof(Vctl))) == nil)
153                 panic("trapenable: out of memory");
154         v->tbdf = BUSUNKNOWN;
155         v->f = f;
156         v->a = a;
157         strncpy(v->name, name, KNAMELEN-1);
158         v->name[KNAMELEN-1] = 0;
159
160         ilock(&vctllock);
161         if(vctl[vno])
162                 v->next = vctl[vno]->next;
163         vctl[vno] = v;
164         iunlock(&vctllock);
165 }
166
167 static void
168 nmienable(void)
169 {
170         int x;
171
172         /*
173          * Hack: should be locked with NVRAM access.
174          */
175         outb(0x70, 0x80);               /* NMI latch clear */
176         outb(0x70, 0);
177
178         x = inb(0x61) & 0x07;           /* Enable NMI */
179         outb(0x61, 0x0C|x);
180         outb(0x61, x);
181 }
182
183 /*
184  * Minimal trap setup.  Just enough so that we can panic
185  * on traps (bugs) during kernel initialization.
186  * Called very early - malloc is not yet available.
187  */
188 void
189 trapinit0(void)
190 {
191         int d1, v;
192         ulong vaddr;
193         Segdesc *idt;
194
195         idt = (Segdesc*)IDTADDR;
196         vaddr = (ulong)vectortable;
197         for(v = 0; v < 256; v++){
198                 d1 = (vaddr & 0xFFFF0000)|SEGP;
199                 switch(v){
200
201                 case VectorBPT:
202                         d1 |= SEGPL(3)|SEGIG;
203                         break;
204
205                 case VectorSYSCALL:
206                         d1 |= SEGPL(3)|SEGIG;
207                         break;
208
209                 default:
210                         d1 |= SEGPL(0)|SEGIG;
211                         break;
212                 }
213                 idt[v].d0 = (vaddr & 0xFFFF)|(KESEL<<16);
214                 idt[v].d1 = d1;
215                 vaddr += 6;
216         }
217 }
218
219 void
220 trapinit(void)
221 {
222         /*
223          * Special traps.
224          * Syscall() is called directly without going through trap().
225          */
226         trapenable(VectorDE, debugexc, 0, "debugexc");
227         trapenable(VectorBPT, debugbpt, 0, "debugpt");
228         trapenable(VectorPF, fault386, 0, "fault386");
229         trapenable(Vector2F, doublefault, 0, "doublefault");
230         trapenable(Vector15, unexpected, 0, "unexpected");
231         nmienable();
232
233         addarchfile("irqalloc", 0444, irqallocread, nil);
234         trapinited = 1;
235 }
236
237 static char* excname[32] = {
238         "divide error",
239         "debug exception",
240         "nonmaskable interrupt",
241         "breakpoint",
242         "overflow",
243         "bounds check",
244         "invalid opcode",
245         "coprocessor not available",
246         "double fault",
247         "coprocessor segment overrun",
248         "invalid TSS",
249         "segment not present",
250         "stack exception",
251         "general protection violation",
252         "page fault",
253         "15 (reserved)",
254         "coprocessor error",
255         "alignment check",
256         "machine check",
257         "simd error",
258         "20 (reserved)",
259         "21 (reserved)",
260         "22 (reserved)",
261         "23 (reserved)",
262         "24 (reserved)",
263         "25 (reserved)",
264         "26 (reserved)",
265         "27 (reserved)",
266         "28 (reserved)",
267         "29 (reserved)",
268         "30 (reserved)",
269         "31 (reserved)",
270 };
271
272 /*
273  *  keep histogram of interrupt service times
274  */
275 void
276 intrtime(Mach*, int vno)
277 {
278         ulong diff;
279         ulong x;
280
281         x = perfticks();
282         diff = x - m->perf.intrts;
283         m->perf.intrts = x;
284
285         m->perf.inintr += diff;
286         if(up == nil && m->perf.inidle > diff)
287                 m->perf.inidle -= diff;
288
289         diff /= m->cpumhz*100;          /* quantum = 100µsec */
290         if(diff >= Ntimevec)
291                 diff = Ntimevec-1;
292         intrtimes[vno][diff]++;
293 }
294
295 /* go to user space */
296 void
297 kexit(Ureg*)
298 {
299         uvlong t;
300         Tos *tos;
301
302         /* precise time accounting, kernel exit */
303         tos = (Tos*)(USTKTOP-sizeof(Tos));
304         cycles(&t);
305         tos->kcycles += t - up->kentry;
306         tos->pcycles = t + up->pcycles;
307         tos->pid = up->pid;
308 }
309
310 /*
311  *  All traps come here.  It is slower to have all traps call trap()
312  *  rather than directly vectoring the handler.  However, this avoids a
313  *  lot of code duplication and possible bugs.  The only exception is
314  *  VectorSYSCALL.
315  *  Trap is called with interrupts disabled via interrupt-gates.
316  */
317 void
318 trap(Ureg* ureg)
319 {
320         int clockintr, i, vno, user;
321         char buf[ERRMAX];
322         Vctl *ctl, *v;
323         Mach *mach;
324
325         if(!trapinited){
326                 /* fault386 can give a better error message */
327                 if(ureg->trap == VectorPF)
328                         fault386(ureg, nil);
329                 panic("trap %lud: not ready", ureg->trap);
330         }
331
332         m->perf.intrts = perfticks();
333         user = userureg(ureg);
334         if(user){
335                 up->dbgreg = ureg;
336                 cycles(&up->kentry);
337         }
338
339         clockintr = 0;
340
341         vno = ureg->trap;
342         if(ctl = vctl[vno]){
343                 if(ctl->isintr){
344                         m->intr++;
345                         if(vno >= VectorPIC && vno != VectorSYSCALL)
346                                 m->lastintr = ctl->irq;
347                 }
348
349                 if(ctl->isr)
350                         ctl->isr(vno);
351                 for(v = ctl; v != nil; v = v->next){
352                         if(v->f)
353                                 v->f(ureg, v->a);
354                 }
355                 if(ctl->eoi)
356                         ctl->eoi(vno);
357
358                 if(ctl->isintr){
359                         intrtime(m, vno);
360
361                         if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
362                                 clockintr = 1;
363
364                         if(up && !clockintr)
365                                 preempted();
366                 }
367         }
368         else if(vno < nelem(excname) && user){
369                 spllo();
370                 sprint(buf, "sys: trap: %s", excname[vno]);
371                 postnote(up, 1, buf, NDebug);
372         }
373         else if(vno >= VectorPIC && vno != VectorSYSCALL){
374                 /*
375                  * An unknown interrupt.
376                  * Check for a default IRQ7. This can happen when
377                  * the IRQ input goes away before the acknowledge.
378                  * In this case, a 'default IRQ7' is generated, but
379                  * the corresponding bit in the ISR isn't set.
380                  * In fact, just ignore all such interrupts.
381                  */
382
383                 /* call all interrupt routines, just in case */
384                 for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
385                         ctl = vctl[i];
386                         if(ctl == nil)
387                                 continue;
388                         if(!ctl->isintr)
389                                 continue;
390                         for(v = ctl; v != nil; v = v->next){
391                                 if(v->f)
392                                         v->f(ureg, v->a);
393                         }
394                         /* should we do this? */
395                         if(ctl->eoi)
396                                 ctl->eoi(i);
397                 }
398
399                 /* clear the interrupt */
400                 i8259isr(vno);
401
402                 if(0)print("cpu%d: spurious interrupt %d, last %d\n",
403                         m->machno, vno, m->lastintr);
404                 if(0)if(conf.nmach > 1){
405                         for(i = 0; i < MAXMACH; i++){
406                                 if(active.machs[i] == 0)
407                                         continue;
408                                 mach = MACHP(i);
409                                 if(m->machno == mach->machno)
410                                         continue;
411                                 print(" cpu%d: last %d",
412                                         mach->machno, mach->lastintr);
413                         }
414                         print("\n");
415                 }
416                 m->spuriousintr++;
417                 if(user)
418                         kexit(ureg);
419                 return;
420         }
421         else{
422                 if(vno == VectorNMI){
423                         /*
424                          * Don't re-enable, it confuses the crash dumps.
425                         nmienable();
426                          */
427                         iprint("cpu%d: nmi PC %#8.8lux, status %ux\n",
428                                 m->machno, ureg->pc, inb(0x61));
429                         while(m->machno != 0)
430                                 ;
431                 }
432
433                 if(!user){
434                         void (*pc)(void);
435                         ulong *sp; 
436
437                         extern void _forkretpopgs(void);
438                         extern void _forkretpopfs(void);
439                         extern void _forkretpopes(void);
440                         extern void _forkretpopds(void);
441                         extern void _forkretiret(void);
442                         extern void _rdmsrinst(void);
443                         extern void _wrmsrinst(void);
444
445                         extern void load_fs(ulong);
446                         extern void load_gs(ulong);
447
448                         load_fs(NULLSEL);
449                         load_gs(NULLSEL);
450
451                         sp = (ulong*)&ureg->sp; /* kernel stack */
452                         pc = (void*)ureg->pc;
453
454                         if(pc == _forkretpopgs || pc == _forkretpopfs || 
455                            pc == _forkretpopes || pc == _forkretpopds){
456                                 if(vno == VectorGPF || vno == VectorSNP){
457                                         sp[0] = NULLSEL;
458                                         return;
459                                 }
460                         } else if(pc == _forkretiret){
461                                 if(vno == VectorGPF || vno == VectorSNP){
462                                         sp[1] = UESEL;  /* CS */
463                                         sp[4] = UDSEL;  /* SS */
464                                         return;
465                                 }
466                         } else if(pc == _rdmsrinst || pc == _wrmsrinst){
467                                 if(vno == VectorGPF){
468                                         ureg->bp = -1;
469                                         ureg->pc += 2;
470                                         return;
471                                 }
472                         }
473                 }
474
475                 dumpregs(ureg);
476                 if(!user){
477                         ureg->sp = (ulong)&ureg->sp;
478                         _dumpstack(ureg);
479                 }
480                 if(vno < nelem(excname))
481                         panic("%s", excname[vno]);
482                 panic("unknown trap/intr: %d", vno);
483         }
484         splhi();
485
486         /* delaysched set because we held a lock or because our quantum ended */
487         if(up && up->delaysched && clockintr){
488                 sched();
489                 splhi();
490         }
491
492         if(user){
493                 if(up->procctl || up->nnote)
494                         notify(ureg);
495                 kexit(ureg);
496         }
497 }
498
499 /*
500  *  dump registers
501  */
502 void
503 dumpregs2(Ureg* ureg)
504 {
505         if(up)
506                 iprint("cpu%d: registers for %s %lud\n",
507                         m->machno, up->text, up->pid);
508         else
509                 iprint("cpu%d: registers for kernel\n", m->machno);
510         iprint("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",
511                 ureg->flags, ureg->trap, ureg->ecode, ureg->pc);
512         if(userureg(ureg))
513                 iprint(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);
514         else
515                 iprint(" SP=%luX\n", (ulong)&ureg->sp);
516         iprint("  AX %8.8luX  BX %8.8luX  CX %8.8luX  DX %8.8luX\n",
517                 ureg->ax, ureg->bx, ureg->cx, ureg->dx);
518         iprint("  SI %8.8luX  DI %8.8luX  BP %8.8luX\n",
519                 ureg->si, ureg->di, ureg->bp);
520         iprint("  CS %4.4luX  DS %4.4luX  ES %4.4luX  FS %4.4luX  GS %4.4luX\n",
521                 ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
522                 ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
523 }
524
525 void
526 dumpregs(Ureg* ureg)
527 {
528         dumpregs2(ureg);
529
530         /*
531          * Processor control registers.
532          * If machine check exception, time stamp counter, page size extensions
533          * or enhanced virtual 8086 mode extensions are supported, there is a
534          * CR4. If there is a CR4 and machine check extensions, read the machine
535          * check address and machine check type registers if RDMSR supported.
536          */
537         iprint("  CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",
538                 getcr0(), getcr2(), getcr3());
539         if(m->cpuiddx & (Mce|Tsc|Pse|Vmex)){
540                 iprint(" CR4 %8.8lux\n", getcr4());
541                 if(ureg->trap == 18)
542                         dumpmcregs();
543         }
544         iprint("\n  ur %#p up %#p\n", ureg, up);
545 }
546
547
548 /*
549  * Fill in enough of Ureg to get a stack trace, and call a function.
550  * Used by debugging interface rdb.
551  */
552 void
553 callwithureg(void (*fn)(Ureg*))
554 {
555         Ureg ureg;
556         ureg.pc = getcallerpc(&fn);
557         ureg.sp = (ulong)&fn;
558         fn(&ureg);
559 }
560
561 static void
562 _dumpstack(Ureg *ureg)
563 {
564         uintptr l, v, i, estack;
565         extern ulong etext;
566         int x;
567         char *s;
568
569         if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
570                 iprint("dumpstack disabled\n");
571                 return;
572         }
573         iprint("dumpstack\n");
574
575         x = 0;
576         x += iprint("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);
577         i = 0;
578         if(up
579         && (uintptr)&l >= (uintptr)up->kstack
580         && (uintptr)&l <= (uintptr)up->kstack+KSTACK)
581                 estack = (uintptr)up->kstack+KSTACK;
582         else if((uintptr)&l >= (uintptr)m->stack
583         && (uintptr)&l <= (uintptr)m+MACHSIZE)
584                 estack = (uintptr)m+MACHSIZE;
585         else
586                 return;
587         x += iprint("estackx %p\n", estack);
588
589         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
590                 v = *(uintptr*)l;
591                 if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){
592                         /*
593                          * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
594                          * and CALL indirect through AX
595                          * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
596                          * but this is too clever and misses faulting address.
597                          */
598                         x += iprint("%.8p=%.8p ", l, v);
599                         i++;
600                 }
601                 if(i == 4){
602                         i = 0;
603                         x += iprint("\n");
604                 }
605         }
606         if(i)
607                 iprint("\n");
608         iprint("EOF\n");
609
610         if(ureg->trap != VectorNMI)
611                 return;
612
613         i = 0;
614         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
615                 iprint("%.8p ", *(uintptr*)l);
616                 if(++i == 8){
617                         i = 0;
618                         iprint("\n");
619                 }
620         }
621         if(i)
622                 iprint("\n");
623 }
624
625 void
626 dumpstack(void)
627 {
628         callwithureg(_dumpstack);
629 }
630
631 static void
632 debugexc(Ureg *, void *)
633 {
634         u32int dr6, m;
635         char buf[ERRMAX];
636         char *p, *e;
637         int i;
638
639         dr6 = getdr6();
640         if(up == nil)
641                 panic("kernel debug exception dr6=%#.8ux", dr6);
642         putdr6(up->dr[6]);
643         m = up->dr[7];
644         m = (m >> 4 | m >> 3) & 8 | (m >> 3 | m >> 2) & 4 | (m >> 2 | m >> 1) & 2 | (m >> 1 | m) & 1;
645         m &= dr6;
646         if(m == 0){
647                 sprint(buf, "sys: debug exception dr6=%#.8ux", dr6);
648                 postnote(up, 1, buf, NDebug);
649         }else{
650                 p = buf;
651                 e = buf + sizeof(buf);
652                 p = seprint(p, e, "sys: watchpoint ");
653                 for(i = 0; i < 4; i++)
654                         if((m & 1<<i) != 0)
655                                 p = seprint(p, e, "%d%s", i, (m >> i + 1 != 0) ? "," : "");
656                 postnote(up, 1, buf, NDebug);
657         }
658 }
659
660 static void
661 debugbpt(Ureg* ureg, void*)
662 {
663         char buf[ERRMAX];
664
665         if(up == 0)
666                 panic("kernel bpt");
667         /* restore pc to instruction that caused the trap */
668         ureg->pc--;
669         sprint(buf, "sys: breakpoint");
670         postnote(up, 1, buf, NDebug);
671 }
672
673 static void
674 doublefault(Ureg*, void*)
675 {
676         panic("double fault");
677 }
678
679 static void
680 unexpected(Ureg* ureg, void*)
681 {
682         print("unexpected trap %lud; ignoring\n", ureg->trap);
683 }
684
685 extern void checkpages(void);
686 extern void checkfault(ulong, ulong);
687 static void
688 fault386(Ureg* ureg, void*)
689 {
690         ulong addr;
691         int read, user, n, insyscall;
692         char buf[ERRMAX];
693
694         addr = getcr2();
695         read = !(ureg->ecode & 2);
696
697         user = userureg(ureg);
698         if(!user){
699                 if(vmapsync(addr))
700                         return;
701                 if(addr >= USTKTOP)
702                         panic("kernel fault: bad address pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
703                 if(up == nil)
704                         panic("kernel fault: no user process pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
705         }
706         if(up == nil)
707                 panic("user fault: up=0 pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
708
709         insyscall = up->insyscall;
710         up->insyscall = 1;
711         n = fault(addr, read);
712         if(n < 0){
713                 if(!user){
714                         dumpregs(ureg);
715                         panic("fault: 0x%lux", addr);
716                 }
717                 checkpages();
718                 checkfault(addr, ureg->pc);
719                 sprint(buf, "sys: trap: fault %s addr=0x%lux",
720                         read ? "read" : "write", addr);
721                 postnote(up, 1, buf, NDebug);
722         }
723         up->insyscall = insyscall;
724 }
725
726 /*
727  *  system calls
728  */
729 #include "../port/systab.h"
730
731 /*
732  *  Syscall is called directly from assembler without going through trap().
733  */
734 void
735 syscall(Ureg* ureg)
736 {
737         char *e;
738         ulong   sp;
739         long    ret;
740         int     i, s;
741         ulong scallnr;
742         vlong startns, stopns;
743
744         if(!userureg(ureg))
745                 panic("syscall: cs 0x%4.4luX", ureg->cs);
746
747         cycles(&up->kentry);
748
749         m->syscall++;
750         up->insyscall = 1;
751         up->pc = ureg->pc;
752         up->dbgreg = ureg;
753
754         sp = ureg->usp;
755         scallnr = ureg->ax;
756         up->scallnr = scallnr;
757
758         spllo();
759
760         up->nerrlab = 0;
761         ret = -1;
762         if(!waserror()){
763                 if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
764                         validaddr(sp, sizeof(Sargs)+BY2WD, 0);
765
766                 up->s = *((Sargs*)(sp+BY2WD));
767
768                 if(up->procctl == Proc_tracesyscall){
769                         syscallfmt(scallnr, ureg->pc, (va_list)up->s.args);
770                         s = splhi();
771                         up->procctl = Proc_stopme;
772                         procctl();
773                         splx(s);
774                         startns = todget(nil);
775                 }
776
777                 if(scallnr >= nsyscall || systab[scallnr] == 0){
778                         pprint("bad sys call number %lud pc %lux\n",
779                                 scallnr, ureg->pc);
780                         postnote(up, 1, "sys: bad sys call", NDebug);
781                         error(Ebadarg);
782                 }
783                 up->psstate = sysctab[scallnr];
784                 ret = systab[scallnr]((va_list)up->s.args);
785                 poperror();
786         }else{
787                 /* failure: save the error buffer for errstr */
788                 e = up->syserrstr;
789                 up->syserrstr = up->errstr;
790                 up->errstr = e;
791                 if(0 && up->pid == 1)
792                         print("syscall %lud error %s\n", scallnr, up->syserrstr);
793         }
794         if(up->nerrlab){
795                 print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
796                 for(i = 0; i < NERR; i++)
797                         print("sp=%lux pc=%lux\n",
798                                 up->errlab[i].sp, up->errlab[i].pc);
799                 panic("error stack");
800         }
801
802         /*
803          *  Put return value in frame.  On the x86 the syscall is
804          *  just another trap and the return value from syscall is
805          *  ignored.  On other machines the return value is put into
806          *  the results register by caller of syscall.
807          */
808         ureg->ax = ret;
809
810         if(up->procctl == Proc_tracesyscall){
811                 stopns = todget(nil);
812                 sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
813                 s = splhi();
814                 up->procctl = Proc_stopme;
815                 procctl();
816                 splx(s);
817         }
818
819         up->insyscall = 0;
820         up->psstate = 0;
821
822         if(scallnr == NOTED)
823                 noted(ureg, *((ulong*)up->s.args));
824
825         if(scallnr!=RFORK && (up->procctl || up->nnote)){
826                 splhi();
827                 notify(ureg);
828         }
829         /* if we delayed sched because we held a lock, sched now */
830         if(up->delaysched)
831                 sched();
832         kexit(ureg);
833 }
834
835 /*
836  *  Call user, if necessary, with note.
837  *  Pass user the Ureg struct and the note on his stack.
838  */
839 int
840 notify(Ureg* ureg)
841 {
842         int l;
843         ulong s, sp;
844         Note *n;
845
846         if(up->procctl)
847                 procctl();
848         if(up->nnote == 0)
849                 return 0;
850
851         if(up->fpstate == FPactive){
852                 fpsave(&up->fpsave);
853                 up->fpstate = FPinactive;
854         }
855         up->fpstate |= FPillegal;
856
857         s = spllo();
858         qlock(&up->debug);
859         up->notepending = 0;
860         n = &up->note[0];
861         if(strncmp(n->msg, "sys:", 4) == 0){
862                 l = strlen(n->msg);
863                 if(l > ERRMAX-15)       /* " pc=0x12345678\0" */
864                         l = ERRMAX-15;
865                 sprint(n->msg+l, " pc=0x%.8lux", ureg->pc);
866         }
867
868         if(n->flag!=NUser && (up->notified || up->notify==0)){
869                 qunlock(&up->debug);
870                 if(n->flag == NDebug)
871                         pprint("suicide: %s\n", n->msg);
872                 pexit(n->msg, n->flag!=NDebug);
873         }
874
875         if(up->notified){
876                 qunlock(&up->debug);
877                 splhi();
878                 return 0;
879         }
880
881         if(!up->notify){
882                 qunlock(&up->debug);
883                 pexit(n->msg, n->flag!=NDebug);
884         }
885         sp = ureg->usp;
886         sp -= 256;      /* debugging: preserve context causing problem */
887         sp -= sizeof(Ureg);
888 if(0) print("%s %lud: notify %.8lux %.8lux %.8lux %s\n",
889         up->text, up->pid, ureg->pc, ureg->usp, sp, n->msg);
890
891         if(!okaddr((uintptr)up->notify, 1, 0)
892         || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
893                 qunlock(&up->debug);
894                 pprint("suicide: bad address in notify\n");
895                 pexit("Suicide", 0);
896         }
897
898         memmove((Ureg*)sp, ureg, sizeof(Ureg));
899         *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
900         up->ureg = (void*)sp;
901         sp -= BY2WD+ERRMAX;
902         memmove((char*)sp, up->note[0].msg, ERRMAX);
903         sp -= 3*BY2WD;
904         *(ulong*)(sp+2*BY2WD) = sp+3*BY2WD;             /* arg 2 is string */
905         *(ulong*)(sp+1*BY2WD) = (ulong)up->ureg;        /* arg 1 is ureg* */
906         *(ulong*)(sp+0*BY2WD) = 0;                      /* arg 0 is pc */
907         ureg->usp = sp;
908         ureg->pc = (ulong)up->notify;
909         ureg->cs = UESEL;
910         ureg->ss = ureg->ds = ureg->es = UDSEL;
911         up->notified = 1;
912         up->nnote--;
913         memmove(&up->lastnote, &up->note[0], sizeof(Note));
914         memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
915
916         qunlock(&up->debug);
917         splx(s);
918         return 1;
919 }
920
921 /*
922  *   Return user to state before notify()
923  */
924 void
925 noted(Ureg* ureg, ulong arg0)
926 {
927         Ureg *nureg;
928         ulong oureg, sp;
929
930         qlock(&up->debug);
931         if(arg0!=NRSTR && !up->notified) {
932                 qunlock(&up->debug);
933                 pprint("call to noted() when not notified\n");
934                 pexit("Suicide", 0);
935         }
936         up->notified = 0;
937
938         nureg = up->ureg;       /* pointer to user returned Ureg struct */
939
940         up->fpstate &= ~FPillegal;
941
942         /* sanity clause */
943         oureg = (ulong)nureg;
944         if(!okaddr(oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
945                 qunlock(&up->debug);
946                 pprint("bad ureg in noted or call to noted when not notified\n");
947                 pexit("Suicide", 0);
948         }
949
950         /* don't let user change system flags */
951         nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
952         nureg->cs |= 3;
953         nureg->ss |= 3;
954
955         memmove(ureg, nureg, sizeof(Ureg));
956
957         switch(arg0){
958         case NCONT:
959         case NRSTR:
960 if(0) print("%s %lud: noted %.8lux %.8lux\n",
961         up->text, up->pid, nureg->pc, nureg->usp);
962                 if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){
963                         qunlock(&up->debug);
964                         pprint("suicide: trap in noted\n");
965                         pexit("Suicide", 0);
966                 }
967                 up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
968                 qunlock(&up->debug);
969                 break;
970
971         case NSAVE:
972                 if(!okaddr(nureg->pc, BY2WD, 0)
973                 || !okaddr(nureg->usp, BY2WD, 0)){
974                         qunlock(&up->debug);
975                         pprint("suicide: trap in noted\n");
976                         pexit("Suicide", 0);
977                 }
978                 qunlock(&up->debug);
979                 sp = oureg-4*BY2WD-ERRMAX;
980                 splhi();
981                 ureg->sp = sp;
982                 ((ulong*)sp)[1] = oureg;        /* arg 1 0(FP) is ureg* */
983                 ((ulong*)sp)[0] = 0;            /* arg 0 is pc */
984                 break;
985
986         default:
987                 up->lastnote.flag = NDebug;
988                 /* fall through */
989
990         case NDFLT:
991                 qunlock(&up->debug);
992                 if(up->lastnote.flag == NDebug)
993                         pprint("suicide: %s\n", up->lastnote.msg);
994                 pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
995         }
996 }
997
998 uintptr
999 execregs(uintptr entry, ulong ssize, ulong nargs)
1000 {
1001         ulong *sp;
1002         Ureg *ureg;
1003
1004         sp = (ulong*)(USTKTOP - ssize);
1005         *--sp = nargs;
1006
1007         ureg = up->dbgreg;
1008         ureg->usp = (ulong)sp;
1009         ureg->pc = entry;
1010         ureg->cs = UESEL;
1011         ureg->ss = ureg->ds = ureg->es = UDSEL;
1012         ureg->fs = ureg->gs = NULLSEL;
1013         return USTKTOP-sizeof(Tos);             /* address of kernel/user shared data */
1014 }
1015
1016 /*
1017  *  return the userpc the last exception happened at
1018  */
1019 uintptr
1020 userpc(void)
1021 {
1022         Ureg *ureg;
1023
1024         ureg = (Ureg*)up->dbgreg;
1025         return ureg->pc;
1026 }
1027
1028 /* This routine must save the values of registers the user is not permitted
1029  * to write from devproc and then restore the saved values before returning.
1030  */
1031 void
1032 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
1033 {
1034         ulong flags;
1035
1036         flags = ureg->flags;
1037         memmove(pureg, uva, n);
1038         ureg->flags = (ureg->flags & 0xCD5) | (flags & ~0xCD5);
1039         ureg->cs |= 3;
1040         ureg->ss |= 3;
1041 }
1042
1043 static void
1044 linkproc(void)
1045 {
1046         spllo();
1047         up->kpfun(up->kparg);
1048         pexit("kproc dying", 0);
1049 }
1050
1051 void
1052 kprocchild(Proc* p, void (*func)(void*), void* arg)
1053 {
1054         /*
1055          * gotolabel() needs a word on the stack in
1056          * which to place the return PC used to jump
1057          * to linkproc().
1058          */
1059         p->sched.pc = (ulong)linkproc;
1060         p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;
1061
1062         p->kpfun = func;
1063         p->kparg = arg;
1064 }
1065
1066 void
1067 forkchild(Proc *p, Ureg *ureg)
1068 {
1069         Ureg *cureg;
1070
1071         /*
1072          * Add 2*BY2WD to the stack to account for
1073          *  - the return PC
1074          *  - trap's argument (ur)
1075          */
1076         p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1077         p->sched.pc = (ulong)forkret;
1078
1079         cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1080         memmove(cureg, ureg, sizeof(Ureg));
1081         /* return value of syscall in child */
1082         cureg->ax = 0;
1083
1084         /* Things from bottom of syscall which were never executed */
1085         p->psstate = 0;
1086         p->insyscall = 0;
1087 }
1088
1089 /* Give enough context in the ureg to produce a kernel stack for
1090  * a sleeping process
1091  */
1092 void
1093 setkernur(Ureg* ureg, Proc* p)
1094 {
1095         ureg->pc = p->sched.pc;
1096         ureg->sp = p->sched.sp+4;
1097 }
1098
1099 ulong
1100 dbgpc(Proc *p)
1101 {
1102         Ureg *ureg;
1103
1104         ureg = p->dbgreg;
1105         if(ureg == 0)
1106                 return 0;
1107
1108         return ureg->pc;
1109 }