]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc64/trap.c
devip: cleanup rudp.c
[plan9front.git] / sys / src / 9 / pc64 / trap.c
1 #include        "u.h"
2 #include        "tos.h"
3 #include        "../port/lib.h"
4 #include        "mem.h"
5 #include        "dat.h"
6 #include        "fns.h"
7 #include        "io.h"
8 #include        "ureg.h"
9 #include        "../port/error.h"
10 #include        <trace.h>
11
12 static int trapinited;
13
14 void    noted(Ureg*, ulong);
15
16 static void debugexc(Ureg*, void*);
17 static void debugbpt(Ureg*, void*);
18 static void faultamd64(Ureg*, void*);
19 static void doublefault(Ureg*, void*);
20 static void unexpected(Ureg*, void*);
21 static void _dumpstack(Ureg*);
22
23 static Lock vctllock;
24 static Vctl *vctl[256];
25
26 enum
27 {
28         Ntimevec = 20           /* number of time buckets for each intr */
29 };
30 ulong intrtimes[256][Ntimevec];
31
32 void
33 intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
34 {
35         int vno;
36         Vctl *v;
37
38         if(f == nil){
39                 print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
40                         irq, tbdf, name);
41                 return;
42         }
43
44         if(tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0)){
45                 print("intrenable: got unassigned irq %d, tbdf 0x%uX for %s\n",
46                         irq, tbdf, name);
47                 irq = -1;
48         }
49
50
51         /*
52          * IRQ2 doesn't really exist, it's used to gang the interrupt
53          * controllers together. A device set to IRQ2 will appear on
54          * the second interrupt controller as IRQ9.
55          */
56         if(irq == 2)
57                 irq = 9;
58
59         if((v = xalloc(sizeof(Vctl))) == nil)
60                 panic("intrenable: out of memory");
61         v->isintr = 1;
62         v->irq = irq;
63         v->tbdf = tbdf;
64         v->f = f;
65         v->a = a;
66         strncpy(v->name, name, KNAMELEN-1);
67         v->name[KNAMELEN-1] = 0;
68
69         ilock(&vctllock);
70         vno = arch->intrenable(v);
71         if(vno == -1){
72                 iunlock(&vctllock);
73                 print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
74                         irq, tbdf, v->name);
75                 xfree(v);
76                 return;
77         }
78         if(vctl[vno]){
79                 if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
80                         panic("intrenable: handler: %s %s %#p %#p %#p %#p",
81                                 vctl[vno]->name, v->name,
82                                 vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
83                 v->next = vctl[vno];
84         }
85         vctl[vno] = v;
86         iunlock(&vctllock);
87 }
88
89 void
90 intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
91 {
92         Vctl **pv, *v;
93         int vno;
94
95         if(irq == 2)
96                 irq = 9;
97         if(arch->intrvecno == nil || (tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0))){
98                 /*
99                  * on APIC machine, irq is pretty meaningless
100                  * and disabling a the vector is not implemented.
101                  * however, we still want to remove the matching
102                  * Vctl entry to prevent calling Vctl.f() with a
103                  * stale Vctl.a pointer.
104                  */
105                 irq = -1;
106                 vno = VectorPIC;
107         } else {
108                 vno = arch->intrvecno(irq);
109         }
110         ilock(&vctllock);
111         do {
112                 for(pv = &vctl[vno]; (v = *pv) != nil; pv = &v->next){
113                         if(v->isintr && (v->irq == irq || irq == -1)
114                         && v->tbdf == tbdf && v->f == f && v->a == a
115                         && strcmp(v->name, name) == 0)
116                                 break;
117                 }
118                 if(v != nil){
119                         *pv = v->next;
120                         xfree(v);
121
122                         if(irq != -1 && vctl[vno] == nil && arch->intrdisable != nil)
123                                 arch->intrdisable(irq);
124                         break;
125                 }
126         } while(irq == -1 && ++vno <= MaxVectorAPIC);
127         iunlock(&vctllock);
128 }
129
130 static long
131 irqallocread(Chan*, void *a, long n, vlong offset)
132 {
133         char buf[2*(11+1)+KNAMELEN+1+1];
134         int vno, m;
135         Vctl *v;
136
137         if(n < 0 || offset < 0)
138                 error(Ebadarg);
139
140         for(vno=0; vno<nelem(vctl); vno++){
141                 for(v=vctl[vno]; v; v=v->next){
142                         m = snprint(buf, sizeof(buf), "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
143                         offset -= m;
144                         if(offset >= 0)
145                                 continue;
146                         if(n > -offset)
147                                 n = -offset;
148                         offset += m;
149                         memmove(a, buf+offset, n);
150                         return n;
151                 }
152         }
153         return 0;
154 }
155
156 void
157 trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
158 {
159         Vctl *v;
160
161         if(vno < 0 || vno >= VectorPIC)
162                 panic("trapenable: vno %d", vno);
163         if((v = xalloc(sizeof(Vctl))) == nil)
164                 panic("trapenable: out of memory");
165         v->tbdf = BUSUNKNOWN;
166         v->f = f;
167         v->a = a;
168         strncpy(v->name, name, KNAMELEN-1);
169         v->name[KNAMELEN-1] = 0;
170
171         ilock(&vctllock);
172         if(vctl[vno])
173                 v->next = vctl[vno]->next;
174         vctl[vno] = v;
175         iunlock(&vctllock);
176 }
177
178 static void
179 nmienable(void)
180 {
181         int x;
182
183         /*
184          * Hack: should be locked with NVRAM access.
185          */
186         outb(0x70, 0x80);               /* NMI latch clear */
187         outb(0x70, 0);
188
189         x = inb(0x61) & 0x07;           /* Enable NMI */
190         outb(0x61, 0x0C|x);
191         outb(0x61, x);
192 }
193
194 void
195 trapinit0(void)
196 {
197         u32int d1, v;
198         uintptr vaddr;
199         Segdesc *idt;
200
201         idt = (Segdesc*)IDTADDR;
202         vaddr = (uintptr)vectortable;
203         for(v = 0; v < 256; v++){
204                 d1 = (vaddr & 0xFFFF0000)|SEGP;
205                 switch(v){
206
207                 case VectorBPT:
208                         d1 |= SEGPL(3)|SEGIG;
209                         break;
210
211                 case VectorSYSCALL:
212                         d1 |= SEGPL(3)|SEGIG;
213                         break;
214
215                 default:
216                         d1 |= SEGPL(0)|SEGIG;
217                         break;
218                 }
219
220                 idt->d0 = (vaddr & 0xFFFF)|(KESEL<<16);
221                 idt->d1 = d1;
222                 idt++;
223
224                 idt->d0 = (vaddr >> 32);
225                 idt->d1 = 0;
226                 idt++;
227
228                 vaddr += 6;
229         }
230 }
231
232 void
233 trapinit(void)
234 {
235         /*
236          * Special traps.
237          * Syscall() is called directly without going through trap().
238          */
239         trapenable(VectorDE, debugexc, 0, "debugexc");
240         trapenable(VectorBPT, debugbpt, 0, "debugpt");
241         trapenable(VectorPF, faultamd64, 0, "faultamd64");
242         trapenable(Vector2F, doublefault, 0, "doublefault");
243         trapenable(Vector15, unexpected, 0, "unexpected");
244         nmienable();
245         addarchfile("irqalloc", 0444, irqallocread, nil);
246         trapinited = 1;
247 }
248
249 static char* excname[32] = {
250         "divide error",
251         "debug exception",
252         "nonmaskable interrupt",
253         "breakpoint",
254         "overflow",
255         "bounds check",
256         "invalid opcode",
257         "coprocessor not available",
258         "double fault",
259         "coprocessor segment overrun",
260         "invalid TSS",
261         "segment not present",
262         "stack exception",
263         "general protection violation",
264         "page fault",
265         "15 (reserved)",
266         "coprocessor error",
267         "alignment check",
268         "machine check",
269         "simd error",
270         "20 (reserved)",
271         "21 (reserved)",
272         "22 (reserved)",
273         "23 (reserved)",
274         "24 (reserved)",
275         "25 (reserved)",
276         "26 (reserved)",
277         "27 (reserved)",
278         "28 (reserved)",
279         "29 (reserved)",
280         "30 (reserved)",
281         "31 (reserved)",
282 };
283
284 /*
285  *  keep histogram of interrupt service times
286  */
287 void
288 intrtime(Mach*, int vno)
289 {
290         ulong diff;
291         ulong x;
292
293         x = perfticks();
294         diff = x - m->perf.intrts;
295         m->perf.intrts = x;
296
297         m->perf.inintr += diff;
298         if(up == nil && m->perf.inidle > diff)
299                 m->perf.inidle -= diff;
300
301         diff /= m->cpumhz*100;          /* quantum = 100µsec */
302         if(diff >= Ntimevec)
303                 diff = Ntimevec-1;
304         intrtimes[vno][diff]++;
305 }
306
307 /* go to user space */
308 void
309 kexit(Ureg*)
310 {
311         uvlong t;
312         Tos *tos;
313
314         /* precise time accounting, kernel exit */
315         tos = (Tos*)((uintptr)USTKTOP-sizeof(Tos));
316         cycles(&t);
317         tos->kcycles += t - up->kentry;
318         tos->pcycles = t + up->pcycles;
319         tos->pid = up->pid;
320 }
321
322 void
323 trap(Ureg *ureg)
324 {
325         int clockintr, i, vno, user;
326         char buf[ERRMAX];
327         Vctl *ctl, *v;
328         Mach *mach;
329
330         if(!trapinited){
331                 /* faultamd64 can give a better error message */
332                 if(ureg->type == VectorPF)
333                         faultamd64(ureg, nil);
334                 panic("trap %llud: not ready", ureg->type);
335         }
336
337         m->perf.intrts = perfticks();
338         user = userureg(ureg);
339         if(user){
340                 up->dbgreg = ureg;
341                 cycles(&up->kentry);
342         }
343
344         clockintr = 0;
345
346         vno = ureg->type;
347
348         if(ctl = vctl[vno]){
349                 if(ctl->isintr){
350                         m->intr++;
351                         if(vno >= VectorPIC)
352                                 m->lastintr = ctl->irq;
353                 }
354                 if(ctl->isr)
355                         ctl->isr(vno);
356                 for(v = ctl; v != nil; v = v->next){
357                         if(v->f)
358                                 v->f(ureg, v->a);
359                 }
360                 if(ctl->eoi)
361                         ctl->eoi(vno);
362
363                 if(ctl->isintr){
364                         intrtime(m, vno);
365
366                         if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
367                                 clockintr = 1;
368
369                         if(up && !clockintr)
370                                 preempted();
371                 }
372         }
373         else if(vno < nelem(excname) && user){
374                 spllo();
375                 sprint(buf, "sys: trap: %s", excname[vno]);
376                 postnote(up, 1, buf, NDebug);
377         }
378         else if(vno >= VectorPIC){
379                 /*
380                  * An unknown interrupt.
381                  * Check for a default IRQ7. This can happen when
382                  * the IRQ input goes away before the acknowledge.
383                  * In this case, a 'default IRQ7' is generated, but
384                  * the corresponding bit in the ISR isn't set.
385                  * In fact, just ignore all such interrupts.
386                  */
387
388                 /* call all interrupt routines, just in case */
389                 for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
390                         ctl = vctl[i];
391                         if(ctl == nil)
392                                 continue;
393                         if(!ctl->isintr)
394                                 continue;
395                         for(v = ctl; v != nil; v = v->next){
396                                 if(v->f)
397                                         v->f(ureg, v->a);
398                         }
399                         /* should we do this? */
400                         if(ctl->eoi)
401                                 ctl->eoi(i);
402                 }
403
404                 /* clear the interrupt */
405                 i8259isr(vno);
406
407                 if(0)print("cpu%d: spurious interrupt %d, last %d\n",
408                         m->machno, vno, m->lastintr);
409                 if(0)if(conf.nmach > 1){
410                         for(i = 0; i < MAXMACH; i++){
411                                 if(active.machs[i] == 0)
412                                         continue;
413                                 mach = MACHP(i);
414                                 if(m->machno == mach->machno)
415                                         continue;
416                                 print(" cpu%d: last %d",
417                                         mach->machno, mach->lastintr);
418                         }
419                         print("\n");
420                 }
421                 m->spuriousintr++;
422                 if(user)
423                         kexit(ureg);
424                 return;
425         }
426         else{
427                 if(vno == VectorNMI){
428                         /*
429                          * Don't re-enable, it confuses the crash dumps.
430                         nmienable();
431                          */
432                         iprint("cpu%d: nmi PC %#p, status %ux\n",
433                                 m->machno, ureg->pc, inb(0x61));
434                         while(m->machno != 0)
435                                 ;
436                 }
437
438                 if(!user){
439                         void (*pc)(void);
440
441                         extern void _rdmsrinst(void);
442                         extern void _wrmsrinst(void);
443
444                         pc = (void*)ureg->pc;
445                         if(pc == _rdmsrinst || pc == _wrmsrinst){
446                                 if(vno == VectorGPF){
447                                         ureg->bp = -1;
448                                         ureg->pc += 2;
449                                         return;
450                                 }
451                         }
452                 }
453
454                 dumpregs(ureg);
455                 if(!user){
456                         ureg->sp = (uintptr)&ureg->sp;
457                         _dumpstack(ureg);
458                 }
459                 if(vno < nelem(excname))
460                         panic("%s", excname[vno]);
461                 panic("unknown trap/intr: %d", vno);
462         }
463         splhi();
464
465         /* delaysched set because we held a lock or because our quantum ended */
466         if(up && up->delaysched && clockintr){
467                 sched();
468                 splhi();
469         }
470
471         if(user){
472                 if(up->procctl || up->nnote)
473                         notify(ureg);
474                 kexit(ureg);
475         }
476 }
477
478 void
479 dumpregs(Ureg* ureg)
480 {
481         if(up)
482                 iprint("cpu%d: registers for %s %lud\n",
483                         m->machno, up->text, up->pid);
484         else
485                 iprint("cpu%d: registers for kernel\n", m->machno);
486
487         iprint("  AX %.16lluX  BX %.16lluX  CX %.16lluX\n",
488                 ureg->ax, ureg->bx, ureg->cx);
489         iprint("  DX %.16lluX  SI %.16lluX  DI %.16lluX\n",
490                 ureg->dx, ureg->si, ureg->di);
491         iprint("  BP %.16lluX  R8 %.16lluX  R9 %.16lluX\n",
492                 ureg->bp, ureg->r8, ureg->r9);
493         iprint(" R10 %.16lluX R11 %.16lluX R12 %.16lluX\n",
494                 ureg->r10, ureg->r11, ureg->r12);
495         iprint(" R13 %.16lluX R14 %.16lluX R15 %.16lluX\n",
496                 ureg->r13, ureg->r14, ureg->r15);
497         iprint("  CS %.4lluX   SS %.4lluX    PC %.16lluX  SP %.16lluX\n",
498                 ureg->cs & 0xffff, ureg->ss & 0xffff, ureg->pc, ureg->sp);
499         iprint("TYPE %.2lluX  ERROR %.4lluX FLAGS %.8lluX\n",
500                 ureg->type & 0xff, ureg->error & 0xffff, ureg->flags & 0xffffffff);
501
502         /*
503          * Processor control registers.
504          * If machine check exception, time stamp counter, page size extensions
505          * or enhanced virtual 8086 mode extensions are supported, there is a
506          * CR4. If there is a CR4 and machine check extensions, read the machine
507          * check address and machine check type registers if RDMSR supported.
508          */
509         iprint(" CR0 %8.8llux CR2 %16.16llux CR3 %16.16llux",
510                 getcr0(), getcr2(), getcr3());
511         if(m->cpuiddx & (Mce|Tsc|Pse|Vmex)){
512                 iprint(" CR4 %16.16llux\n", getcr4());
513                 if(ureg->type == 18)
514                         dumpmcregs();
515         }
516         iprint("  ur %#p up %#p\n", ureg, up);
517 }
518
519
520 /*
521  * Fill in enough of Ureg to get a stack trace, and call a function.
522  * Used by debugging interface rdb.
523  */
524 void
525 callwithureg(void (*fn)(Ureg*))
526 {
527         Ureg ureg;
528         ureg.pc = getcallerpc(&fn);
529         ureg.sp = (uintptr)&fn;
530         fn(&ureg);
531 }
532
533 static void
534 _dumpstack(Ureg *ureg)
535 {
536         uintptr l, v, i, estack;
537         extern ulong etext;
538         int x;
539         char *s;
540
541         if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
542                 iprint("dumpstack disabled\n");
543                 return;
544         }
545         iprint("dumpstack\n");
546
547         x = 0;
548         x += iprint("ktrace /kernel/path %#p %#p <<EOF\n", ureg->pc, ureg->sp);
549         i = 0;
550         if(up
551         && (uintptr)&l >= (uintptr)up->kstack
552         && (uintptr)&l <= (uintptr)up->kstack+KSTACK)
553                 estack = (uintptr)up->kstack+KSTACK;
554         else if((uintptr)&l >= (uintptr)m->stack
555         && (uintptr)&l <= (uintptr)m+MACHSIZE)
556                 estack = (uintptr)m+MACHSIZE;
557         else
558                 return;
559         x += iprint("estackx %p\n", estack);
560
561         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
562                 v = *(uintptr*)l;
563                 if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){
564                         /*
565                          * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
566                          * and CALL indirect through AX
567                          * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
568                          * but this is too clever and misses faulting address.
569                          */
570                         x += iprint("%.8lux=%.8lux ", (ulong)l, (ulong)v);
571                         i++;
572                 }
573                 if(i == 4){
574                         i = 0;
575                         x += iprint("\n");
576                 }
577         }
578         if(i)
579                 iprint("\n");
580         iprint("EOF\n");
581
582         if(ureg->type != VectorNMI)
583                 return;
584
585         i = 0;
586         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
587                 iprint("%.8p ", *(uintptr*)l);
588                 if(++i == 8){
589                         i = 0;
590                         iprint("\n");
591                 }
592         }
593         if(i)
594                 iprint("\n");
595 }
596
597 void
598 dumpstack(void)
599 {
600         callwithureg(_dumpstack);
601 }
602
603 static void
604 debugexc(Ureg *ureg, void *)
605 {
606         u64int dr6, m;
607         char buf[ERRMAX];
608         char *p, *e;
609         int i;
610
611         dr6 = getdr6();
612         if(up == nil)
613                 panic("kernel debug exception dr6=%#.8ullx", dr6);
614         putdr6(up->dr[6]);
615         if(userureg(ureg))
616                 qlock(&up->debug);
617         else if(!canqlock(&up->debug))
618                 return;
619         m = up->dr[7];
620         m = (m >> 4 | m >> 3) & 8 | (m >> 3 | m >> 2) & 4 | (m >> 2 | m >> 1) & 2 | (m >> 1 | m) & 1;
621         m &= dr6;
622         if(m == 0){
623                 sprint(buf, "sys: debug exception dr6=%#.8ullx", dr6);
624                 postnote(up, 0, buf, NDebug);
625         }else{
626                 p = buf;
627                 e = buf + sizeof(buf);
628                 p = seprint(p, e, "sys: watchpoint ");
629                 for(i = 0; i < 4; i++)
630                         if((m & 1<<i) != 0)
631                                 p = seprint(p, e, "%d%s", i, (m >> i + 1 != 0) ? "," : "");
632                 postnote(up, 0, buf, NDebug);
633         }
634         qunlock(&up->debug);
635 }
636                         
637 static void
638 debugbpt(Ureg* ureg, void*)
639 {
640         char buf[ERRMAX];
641
642         if(up == 0)
643                 panic("kernel bpt");
644         /* restore pc to instruction that caused the trap */
645         ureg->pc--;
646         sprint(buf, "sys: breakpoint");
647         postnote(up, 1, buf, NDebug);
648 }
649
650 static void
651 doublefault(Ureg*, void*)
652 {
653         panic("double fault");
654 }
655
656 static void
657 unexpected(Ureg* ureg, void*)
658 {
659         print("unexpected trap %llud; ignoring\n", ureg->type);
660 }
661
662 extern void checkpages(void);
663
664 static void
665 faultamd64(Ureg* ureg, void*)
666 {
667         uintptr addr;
668         int read, user, n, insyscall, f;
669         char buf[ERRMAX];
670
671         addr = getcr2();
672         read = !(ureg->error & 2);
673         user = userureg(ureg);
674         if(!user){
675                 if(addr >= USTKTOP)
676                         panic("kernel fault: bad address pc=%#p addr=%#p", ureg->pc, addr);
677                 if(up == nil)
678                         panic("kernel fault: no user process pc=%#p addr=%#p", ureg->pc, addr);
679         }
680         if(up == nil)
681                 panic("user fault: up=0 pc=%#p addr=%#p", ureg->pc, addr);
682
683         insyscall = up->insyscall;
684         up->insyscall = 1;
685         f = fpusave();
686         if(!user && waserror()){
687                 int s = splhi();
688                 fpurestore(f);
689                 up->insyscall = insyscall;
690                 splx(s);
691                 nexterror();
692         }
693         n = fault(addr, read);
694         if(n < 0){
695                 if(!user){
696                         dumpregs(ureg);
697                         panic("fault: %#p", addr);
698                 }
699                 checkpages();
700                 sprint(buf, "sys: trap: fault %s addr=%#p",
701                         read ? "read" : "write", addr);
702                 postnote(up, 1, buf, NDebug);
703         }
704         if(!user) poperror();
705         splhi();
706         fpurestore(f);
707         up->insyscall = insyscall;
708 }
709
710 /*
711  *  system calls
712  */
713 #include "../port/systab.h"
714
715 /*
716  *  Syscall is called directly from assembler without going through trap().
717  */
718 void
719 syscall(Ureg* ureg)
720 {
721         char *e;
722         uintptr sp;
723         long long ret;
724         int     i, s, f;
725         ulong scallnr;
726         vlong startns, stopns;
727
728         if(!userureg(ureg))
729                 panic("syscall: cs 0x%4.4lluX", ureg->cs);
730
731         cycles(&up->kentry);
732
733         m->syscall++;
734         up->insyscall = 1;
735         up->pc = ureg->pc;
736         up->dbgreg = ureg;
737
738         sp = ureg->sp;
739         scallnr = ureg->bp;     /* RARG */
740         up->scallnr = scallnr;
741         f = fpusave();
742         spllo();
743
744         ret = -1;
745         startns = 0;
746         up->nerrlab = 0;
747         if(!waserror()){
748                 if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
749                         validaddr(sp, sizeof(Sargs)+BY2WD, 0);
750
751                 up->s = *((Sargs*)(sp+BY2WD));
752                 if(0){
753                         syscallfmt(scallnr, ureg->pc, (va_list)up->s.args);
754                         print("syscall: %s\n", up->syscalltrace);
755                 }
756
757                 if(up->procctl == Proc_tracesyscall){
758                         syscallfmt(scallnr, ureg->pc, (va_list)up->s.args);
759                         s = splhi();
760                         up->procctl = Proc_stopme;
761                         procctl();
762                         splx(s);
763                         startns = todget(nil);
764                 }
765                 if(scallnr >= nsyscall || systab[scallnr] == 0){
766                         pprint("bad sys call number %lud pc %#p\n",
767                                 scallnr, ureg->pc);
768                         postnote(up, 1, "sys: bad sys call", NDebug);
769                         error(Ebadarg);
770                 }
771                 up->psstate = sysctab[scallnr];
772                 ret = systab[scallnr]((va_list)up->s.args);
773                 poperror();
774         }else{
775                 /* failure: save the error buffer for errstr */
776                 e = up->syserrstr;
777                 up->syserrstr = up->errstr;
778                 up->errstr = e;
779                 if(0 && up->pid == 1)
780                         print("syscall %lud error %s\n", scallnr, up->syserrstr);
781         }
782         if(up->nerrlab){
783                 print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
784                 for(i = 0; i < NERR; i++)
785                         print("sp=%#p pc=%#p\n",
786                                 up->errlab[i].sp, up->errlab[i].pc);
787                 panic("error stack");
788         }
789         ureg->ax = ret;
790
791         if(0){
792                 print("syscallret: %lud %s %s ret=%lld\n", 
793                         up->pid, up->text, sysctab[scallnr], ret);
794         }
795
796         if(up->procctl == Proc_tracesyscall){
797                 stopns = todget(nil);
798                 sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
799                 s = splhi();
800                 up->procctl = Proc_stopme;
801                 procctl();
802                 splx(s);
803         }
804
805         splhi();
806         fpurestore(f);
807         up->insyscall = 0;
808         up->psstate = 0;
809
810         if(scallnr == NOTED){
811                 noted(ureg, *((ulong*)up->s.args));
812                 /*
813                  * normally, syscall() returns to forkret()
814                  * not restoring general registers when going
815                  * to userspace. to completely restore the
816                  * interrupted context, we have to return thru
817                  * noteret(). we override return pc to jump to
818                  * to it when returning form syscall()
819                  */
820                 ((void**)&ureg)[-1] = (void*)noteret;
821         }
822
823         if(scallnr!=RFORK && (up->procctl || up->nnote)){
824                 notify(ureg);
825                 ((void**)&ureg)[-1] = (void*)noteret;   /* loads RARG */
826         }
827
828         /* if we delayed sched because we held a lock, sched now */
829         if(up->delaysched)
830                 sched();
831         kexit(ureg);
832 }
833
834 /*
835  *  Call user, if necessary, with note.
836  *  Pass user the Ureg struct and the note on his stack.
837  */
838 int
839 notify(Ureg* ureg)
840 {
841         int l;
842         uintptr sp;
843         Note *n;
844
845         if(up->procctl)
846                 procctl();
847         if(up->nnote == 0)
848                 return 0;
849         spllo();
850         qlock(&up->debug);
851         up->notepending = 0;
852         n = &up->note[0];
853         if(strncmp(n->msg, "sys:", 4) == 0){
854                 l = strlen(n->msg);
855                 if(l > ERRMAX-15)       /* " pc=0x12345678\0" */
856                         l = ERRMAX-15;
857                 sprint(n->msg+l, " pc=%#p", ureg->pc);
858         }
859
860         if(n->flag!=NUser && (up->notified || up->notify==0)){
861                 qunlock(&up->debug);
862                 if(n->flag == NDebug)
863                         pprint("suicide: %s\n", n->msg);
864                 pexit(n->msg, n->flag!=NDebug);
865         }
866
867         if(up->notified){
868                 qunlock(&up->debug);
869                 splhi();
870                 return 0;
871         }
872
873         if(!up->notify){
874                 qunlock(&up->debug);
875                 pexit(n->msg, n->flag!=NDebug);
876         }
877         sp = ureg->sp;
878         sp -= 256;      /* debugging: preserve context causing problem */
879         sp -= sizeof(Ureg);
880 if(0) print("%s %lud: notify %#p %#p %#p %s\n",
881         up->text, up->pid, ureg->pc, ureg->sp, sp, n->msg);
882
883         if(!okaddr((uintptr)up->notify, 1, 0)
884         || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
885                 qunlock(&up->debug);
886                 pprint("suicide: bad address in notify\n");
887                 pexit("Suicide", 0);
888         }
889
890         memmove((Ureg*)sp, ureg, sizeof(Ureg));
891         *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
892         up->ureg = (void*)sp;
893         sp -= BY2WD+ERRMAX;
894         memmove((char*)sp, up->note[0].msg, ERRMAX);
895         sp -= 3*BY2WD;
896         ((uintptr*)sp)[2] = sp + 3*BY2WD;       /* arg2 string */
897         ((uintptr*)sp)[1] = (uintptr)up->ureg;  /* arg1 is ureg* */
898         ((uintptr*)sp)[0] = 0;                  /* arg0 is pc */
899         ureg->sp = sp;
900         ureg->pc = (uintptr)up->notify;
901         ureg->bp = (uintptr)up->ureg;           /* arg1 passed in RARG */
902         ureg->cs = UESEL;
903         ureg->ss = UDSEL;
904         up->notified = 1;
905         up->nnote--;
906         memmove(&up->lastnote, &up->note[0], sizeof(Note));
907         memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
908         qunlock(&up->debug);
909         splhi();
910         if(up->fpstate == FPactive){
911                 fpsave(up->fpsave);
912                 up->fpstate = FPinactive;
913         }
914         up->fpstate |= FPillegal;
915         return 1;
916 }
917
918 /*
919  *   Return user to state before notify()
920  */
921 void
922 noted(Ureg* ureg, ulong arg0)
923 {
924         Ureg *nureg;
925         uintptr oureg, sp;
926
927         up->fpstate &= ~FPillegal;
928         spllo();
929         qlock(&up->debug);
930         if(arg0!=NRSTR && !up->notified) {
931                 qunlock(&up->debug);
932                 pprint("call to noted() when not notified\n");
933                 pexit("Suicide", 0);
934         }
935         up->notified = 0;
936
937         nureg = up->ureg;       /* pointer to user returned Ureg struct */
938
939         /* sanity clause */
940         oureg = (uintptr)nureg;
941         if(!okaddr(oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
942                 qunlock(&up->debug);
943                 pprint("bad ureg in noted or call to noted when not notified\n");
944                 pexit("Suicide", 0);
945         }
946
947         /* don't let user change system flags or segment registers */
948         setregisters(ureg, (char*)ureg, (char*)nureg, sizeof(Ureg));
949
950         switch(arg0){
951         case NCONT:
952         case NRSTR:
953 if(0) print("%s %lud: noted %#p %#p\n",
954         up->text, up->pid, nureg->pc, nureg->sp);
955                 if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->sp, BY2WD, 0)){
956                         qunlock(&up->debug);
957                         pprint("suicide: trap in noted\n");
958                         pexit("Suicide", 0);
959                 }
960                 up->ureg = (Ureg*)(*(uintptr*)(oureg-BY2WD));
961                 qunlock(&up->debug);
962                 break;
963
964         case NSAVE:
965                 if(!okaddr(nureg->pc, 1, 0)
966                 || !okaddr(nureg->sp, BY2WD, 0)){
967                         qunlock(&up->debug);
968                         pprint("suicide: trap in noted\n");
969                         pexit("Suicide", 0);
970                 }
971                 qunlock(&up->debug);
972                 sp = oureg-4*BY2WD-ERRMAX;
973                 splhi();
974                 ureg->sp = sp;
975                 ureg->bp = oureg;               /* arg 1 passed in RARG */
976                 ((uintptr*)sp)[1] = oureg;      /* arg 1 0(FP) is ureg* */
977                 ((uintptr*)sp)[0] = 0;          /* arg 0 is pc */
978                 break;
979
980         default:
981                 up->lastnote.flag = NDebug;
982                 /* fall through */
983
984         case NDFLT:
985                 qunlock(&up->debug);
986                 if(up->lastnote.flag == NDebug)
987                         pprint("suicide: %s\n", up->lastnote.msg);
988                 pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
989         }
990 }
991
992 uintptr
993 execregs(uintptr entry, ulong ssize, ulong nargs)
994 {
995         uintptr *sp;
996         Ureg *ureg;
997
998         sp = (uintptr*)(USTKTOP - ssize);
999         *--sp = nargs;
1000         ureg = up->dbgreg;
1001         ureg->sp = (uintptr)sp;
1002         ureg->pc = entry;
1003         ureg->cs = UESEL;
1004         ureg->ss = UDSEL;
1005         ureg->r14 = ureg->r15 = 0;      /* extern user registers */
1006         return (uintptr)USTKTOP-sizeof(Tos);            /* address of kernel/user shared data */
1007 }
1008
1009 /*
1010  *  return the userpc the last exception happened at
1011  */
1012 uintptr
1013 userpc(void)
1014 {
1015         Ureg *ureg;
1016
1017         ureg = (Ureg*)up->dbgreg;
1018         return ureg->pc;
1019 }
1020
1021 /* This routine must save the values of registers the user is not permitted
1022  * to write from devproc and noted() and then restore the saved values before returning.
1023  */
1024 void
1025 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
1026 {
1027         u64int flags;
1028
1029         flags = ureg->flags;
1030         memmove(pureg, uva, n);
1031         ureg->cs = UESEL;
1032         ureg->ss = UDSEL;
1033         ureg->flags = (ureg->flags & 0x00ff) | (flags & 0xff00);
1034         ureg->pc &= UADDRMASK;
1035 }
1036
1037 static void
1038 linkproc(void)
1039 {
1040         spllo();
1041         up->kpfun(up->kparg);
1042         pexit("kproc dying", 0);
1043 }
1044
1045 void
1046 kprocchild(Proc* p, void (*func)(void*), void* arg)
1047 {
1048         /*
1049          * gotolabel() needs a word on the stack in
1050          * which to place the return PC used to jump
1051          * to linkproc().
1052          */
1053         p->sched.pc = (uintptr)linkproc;
1054         p->sched.sp = (uintptr)p->kstack+KSTACK-BY2WD;
1055
1056         p->kpfun = func;
1057         p->kparg = arg;
1058 }
1059
1060 void
1061 forkchild(Proc *p, Ureg *ureg)
1062 {
1063         Ureg *cureg;
1064
1065         /*
1066          * Add 2*BY2WD to the stack to account for
1067          *  - the return PC
1068          *  - trap's argument (ur)
1069          */
1070         p->sched.sp = (uintptr)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1071         p->sched.pc = (uintptr)forkret;
1072
1073         cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1074         memmove(cureg, ureg, sizeof(Ureg));
1075
1076         cureg->ax = 0;
1077
1078         /* Things from bottom of syscall which were never executed */
1079         p->psstate = 0;
1080         p->insyscall = 0;
1081 }
1082
1083 /* Give enough context in the ureg to produce a kernel stack for
1084  * a sleeping process
1085  */
1086 void
1087 setkernur(Ureg* ureg, Proc* p)
1088 {
1089         ureg->pc = p->sched.pc;
1090         ureg->sp = p->sched.sp+8;
1091         ureg->r14 = (uintptr)p;
1092 }
1093
1094 uintptr
1095 dbgpc(Proc *p)
1096 {
1097         Ureg *ureg;
1098
1099         ureg = p->dbgreg;
1100         if(ureg == nil)
1101                 return 0;
1102         return ureg->pc;
1103 }