]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc64/trap.c
bcm, bcm64: add support for device tree parameter passing
[plan9front.git] / sys / src / 9 / pc64 / trap.c
1 #include        "u.h"
2 #include        "tos.h"
3 #include        "../port/lib.h"
4 #include        "mem.h"
5 #include        "dat.h"
6 #include        "fns.h"
7 #include        "io.h"
8 #include        "ureg.h"
9 #include        "../port/error.h"
10 #include        <trace.h>
11
12 static int trapinited;
13
14 void    noted(Ureg*, ulong);
15
16 static void debugexc(Ureg*, void*);
17 static void debugbpt(Ureg*, void*);
18 static void faultamd64(Ureg*, void*);
19 static void doublefault(Ureg*, void*);
20 static void unexpected(Ureg*, void*);
21 static void _dumpstack(Ureg*);
22
23 static Lock vctllock;
24 static Vctl *vctl[256];
25
26 enum
27 {
28         Ntimevec = 20           /* number of time buckets for each intr */
29 };
30 ulong intrtimes[256][Ntimevec];
31
32 void
33 intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
34 {
35         int vno;
36         Vctl *v;
37
38         if(f == nil){
39                 print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
40                         irq, tbdf, name);
41                 return;
42         }
43
44         if(tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0)){
45                 print("intrenable: got unassigned irq %d, tbdf 0x%uX for %s\n",
46                         irq, tbdf, name);
47                 irq = -1;
48         }
49
50
51         /*
52          * IRQ2 doesn't really exist, it's used to gang the interrupt
53          * controllers together. A device set to IRQ2 will appear on
54          * the second interrupt controller as IRQ9.
55          */
56         if(irq == 2)
57                 irq = 9;
58
59         if((v = xalloc(sizeof(Vctl))) == nil)
60                 panic("intrenable: out of memory");
61         v->isintr = 1;
62         v->irq = irq;
63         v->tbdf = tbdf;
64         v->f = f;
65         v->a = a;
66         strncpy(v->name, name, KNAMELEN-1);
67         v->name[KNAMELEN-1] = 0;
68
69         ilock(&vctllock);
70         vno = arch->intrenable(v);
71         if(vno == -1){
72                 iunlock(&vctllock);
73                 print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
74                         irq, tbdf, v->name);
75                 xfree(v);
76                 return;
77         }
78         if(vctl[vno]){
79                 if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
80                         panic("intrenable: handler: %s %s %#p %#p %#p %#p",
81                                 vctl[vno]->name, v->name,
82                                 vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
83                 v->next = vctl[vno];
84         }
85         vctl[vno] = v;
86         iunlock(&vctllock);
87 }
88
89 void
90 intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
91 {
92         Vctl **pv, *v;
93         int vno;
94
95         if(irq == 2)
96                 irq = 9;
97         if(arch->intrvecno == nil || (tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0))){
98                 /*
99                  * on APIC machine, irq is pretty meaningless
100                  * and disabling a the vector is not implemented.
101                  * however, we still want to remove the matching
102                  * Vctl entry to prevent calling Vctl.f() with a
103                  * stale Vctl.a pointer.
104                  */
105                 irq = -1;
106                 vno = VectorPIC;
107         } else {
108                 vno = arch->intrvecno(irq);
109         }
110         ilock(&vctllock);
111         do {
112                 for(pv = &vctl[vno]; (v = *pv) != nil; pv = &v->next){
113                         if(v->isintr && (v->irq == irq || irq == -1)
114                         && v->tbdf == tbdf && v->f == f && v->a == a
115                         && strcmp(v->name, name) == 0)
116                                 break;
117                 }
118                 if(v != nil){
119                         *pv = v->next;
120                         xfree(v);
121
122                         if(irq != -1 && vctl[vno] == nil && arch->intrdisable != nil)
123                                 arch->intrdisable(irq);
124                         break;
125                 }
126         } while(irq == -1 && ++vno <= MaxVectorAPIC);
127         iunlock(&vctllock);
128 }
129
130 static long
131 irqallocread(Chan*, void *a, long n, vlong offset)
132 {
133         char buf[2*(11+1)+KNAMELEN+1+1];
134         int vno, m;
135         Vctl *v;
136
137         if(n < 0 || offset < 0)
138                 error(Ebadarg);
139
140         for(vno=0; vno<nelem(vctl); vno++){
141                 for(v=vctl[vno]; v; v=v->next){
142                         m = snprint(buf, sizeof(buf), "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
143                         offset -= m;
144                         if(offset >= 0)
145                                 continue;
146                         if(n > -offset)
147                                 n = -offset;
148                         offset += m;
149                         memmove(a, buf+offset, n);
150                         return n;
151                 }
152         }
153         return 0;
154 }
155
156 void
157 trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
158 {
159         Vctl *v;
160
161         if(vno < 0 || vno >= VectorPIC)
162                 panic("trapenable: vno %d", vno);
163         if((v = xalloc(sizeof(Vctl))) == nil)
164                 panic("trapenable: out of memory");
165         v->tbdf = BUSUNKNOWN;
166         v->f = f;
167         v->a = a;
168         strncpy(v->name, name, KNAMELEN-1);
169         v->name[KNAMELEN-1] = 0;
170
171         ilock(&vctllock);
172         if(vctl[vno])
173                 v->next = vctl[vno]->next;
174         vctl[vno] = v;
175         iunlock(&vctllock);
176 }
177
178 static void
179 nmienable(void)
180 {
181         int x;
182
183         /*
184          * Hack: should be locked with NVRAM access.
185          */
186         outb(0x70, 0x80);               /* NMI latch clear */
187         outb(0x70, 0);
188
189         x = inb(0x61) & 0x07;           /* Enable NMI */
190         outb(0x61, 0x0C|x);
191         outb(0x61, x);
192 }
193
194 void
195 trapinit0(void)
196 {
197         u32int d1, v;
198         uintptr vaddr;
199         Segdesc *idt;
200
201         idt = (Segdesc*)IDTADDR;
202         vaddr = (uintptr)vectortable;
203         for(v = 0; v < 256; v++){
204                 d1 = (vaddr & 0xFFFF0000)|SEGP;
205                 switch(v){
206
207                 case VectorBPT:
208                         d1 |= SEGPL(3)|SEGIG;
209                         break;
210
211                 case VectorSYSCALL:
212                         d1 |= SEGPL(3)|SEGIG;
213                         break;
214
215                 default:
216                         d1 |= SEGPL(0)|SEGIG;
217                         break;
218                 }
219
220                 idt->d0 = (vaddr & 0xFFFF)|(KESEL<<16);
221                 idt->d1 = d1;
222                 idt++;
223
224                 idt->d0 = (vaddr >> 32);
225                 idt->d1 = 0;
226                 idt++;
227
228                 vaddr += 6;
229         }
230 }
231
232 void
233 trapinit(void)
234 {
235         /*
236          * Special traps.
237          * Syscall() is called directly without going through trap().
238          */
239         trapenable(VectorDE, debugexc, 0, "debugexc");
240         trapenable(VectorBPT, debugbpt, 0, "debugpt");
241         trapenable(VectorPF, faultamd64, 0, "faultamd64");
242         trapenable(Vector2F, doublefault, 0, "doublefault");
243         trapenable(Vector15, unexpected, 0, "unexpected");
244         nmienable();
245         addarchfile("irqalloc", 0444, irqallocread, nil);
246         trapinited = 1;
247 }
248
249 static char* excname[32] = {
250         "divide error",
251         "debug exception",
252         "nonmaskable interrupt",
253         "breakpoint",
254         "overflow",
255         "bounds check",
256         "invalid opcode",
257         "coprocessor not available",
258         "double fault",
259         "coprocessor segment overrun",
260         "invalid TSS",
261         "segment not present",
262         "stack exception",
263         "general protection violation",
264         "page fault",
265         "15 (reserved)",
266         "coprocessor error",
267         "alignment check",
268         "machine check",
269         "simd error",
270         "20 (reserved)",
271         "21 (reserved)",
272         "22 (reserved)",
273         "23 (reserved)",
274         "24 (reserved)",
275         "25 (reserved)",
276         "26 (reserved)",
277         "27 (reserved)",
278         "28 (reserved)",
279         "29 (reserved)",
280         "30 (reserved)",
281         "31 (reserved)",
282 };
283
284 /*
285  *  keep histogram of interrupt service times
286  */
287 void
288 intrtime(Mach*, int vno)
289 {
290         ulong diff;
291         ulong x;
292
293         x = perfticks();
294         diff = x - m->perf.intrts;
295         m->perf.intrts = x;
296
297         m->perf.inintr += diff;
298         if(up == nil && m->perf.inidle > diff)
299                 m->perf.inidle -= diff;
300
301         diff /= m->cpumhz*100;          /* quantum = 100µsec */
302         if(diff >= Ntimevec)
303                 diff = Ntimevec-1;
304         intrtimes[vno][diff]++;
305 }
306
307 /* go to user space */
308 void
309 kexit(Ureg*)
310 {
311         uvlong t;
312         Tos *tos;
313
314         /* precise time accounting, kernel exit */
315         tos = (Tos*)((uintptr)USTKTOP-sizeof(Tos));
316         cycles(&t);
317         tos->kcycles += t - up->kentry;
318         tos->pcycles = t + up->pcycles;
319         tos->pid = up->pid;
320 }
321
322 void
323 trap(Ureg *ureg)
324 {
325         int clockintr, i, vno, user;
326         char buf[ERRMAX];
327         Vctl *ctl, *v;
328         Mach *mach;
329
330         if(!trapinited){
331                 /* faultamd64 can give a better error message */
332                 if(ureg->type == VectorPF)
333                         faultamd64(ureg, nil);
334                 panic("trap %llud: not ready", ureg->type);
335         }
336
337         m->perf.intrts = perfticks();
338         user = userureg(ureg);
339         if(user){
340                 up->dbgreg = ureg;
341                 cycles(&up->kentry);
342         }
343
344         clockintr = 0;
345
346         vno = ureg->type;
347
348         if(ctl = vctl[vno]){
349                 if(ctl->isintr){
350                         m->intr++;
351                         if(vno >= VectorPIC)
352                                 m->lastintr = ctl->irq;
353                 }
354                 if(ctl->isr)
355                         ctl->isr(vno);
356                 for(v = ctl; v != nil; v = v->next){
357                         if(v->f)
358                                 v->f(ureg, v->a);
359                 }
360                 if(ctl->eoi)
361                         ctl->eoi(vno);
362
363                 if(ctl->isintr){
364                         intrtime(m, vno);
365
366                         if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
367                                 clockintr = 1;
368
369                         if(up && !clockintr)
370                                 preempted();
371                 }
372         }
373         else if(vno < nelem(excname) && user){
374                 spllo();
375                 sprint(buf, "sys: trap: %s", excname[vno]);
376                 postnote(up, 1, buf, NDebug);
377         }
378         else if(vno >= VectorPIC){
379                 /*
380                  * An unknown interrupt.
381                  * Check for a default IRQ7. This can happen when
382                  * the IRQ input goes away before the acknowledge.
383                  * In this case, a 'default IRQ7' is generated, but
384                  * the corresponding bit in the ISR isn't set.
385                  * In fact, just ignore all such interrupts.
386                  */
387
388                 /* call all interrupt routines, just in case */
389                 for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
390                         ctl = vctl[i];
391                         if(ctl == nil)
392                                 continue;
393                         if(!ctl->isintr)
394                                 continue;
395                         for(v = ctl; v != nil; v = v->next){
396                                 if(v->f)
397                                         v->f(ureg, v->a);
398                         }
399                         /* should we do this? */
400                         if(ctl->eoi)
401                                 ctl->eoi(i);
402                 }
403
404                 /* clear the interrupt */
405                 i8259isr(vno);
406
407                 if(0)print("cpu%d: spurious interrupt %d, last %d\n",
408                         m->machno, vno, m->lastintr);
409                 if(0)if(conf.nmach > 1){
410                         for(i = 0; i < MAXMACH; i++){
411                                 if(active.machs[i] == 0)
412                                         continue;
413                                 mach = MACHP(i);
414                                 if(m->machno == mach->machno)
415                                         continue;
416                                 print(" cpu%d: last %d",
417                                         mach->machno, mach->lastintr);
418                         }
419                         print("\n");
420                 }
421                 m->spuriousintr++;
422                 if(user)
423                         kexit(ureg);
424                 return;
425         }
426         else{
427                 if(vno == VectorNMI){
428                         /*
429                          * Don't re-enable, it confuses the crash dumps.
430                         nmienable();
431                          */
432                         iprint("cpu%d: nmi PC %#p, status %ux\n",
433                                 m->machno, ureg->pc, inb(0x61));
434                         while(m->machno != 0)
435                                 ;
436                 }
437
438                 if(!user){
439                         void (*pc)(void);
440
441                         extern void _rdmsrinst(void);
442                         extern void _wrmsrinst(void);
443                         extern void _peekinst(void);
444
445                         pc = (void*)ureg->pc;
446                         if(pc == _rdmsrinst || pc == _wrmsrinst){
447                                 if(vno == VectorGPF){
448                                         ureg->bp = -1;
449                                         ureg->pc += 2;
450                                         return;
451                                 }
452                         } else if(pc == _peekinst){
453                                 if(vno == VectorGPF){
454                                         ureg->pc += 2;
455                                         return;
456                                 }
457                         }
458                 }
459
460                 dumpregs(ureg);
461                 if(!user){
462                         ureg->sp = (uintptr)&ureg->sp;
463                         _dumpstack(ureg);
464                 }
465                 if(vno < nelem(excname))
466                         panic("%s", excname[vno]);
467                 panic("unknown trap/intr: %d", vno);
468         }
469         splhi();
470
471         /* delaysched set because we held a lock or because our quantum ended */
472         if(up && up->delaysched && clockintr){
473                 sched();
474                 splhi();
475         }
476
477         if(user){
478                 if(up->procctl || up->nnote)
479                         notify(ureg);
480                 kexit(ureg);
481         }
482 }
483
484 void
485 dumpregs(Ureg* ureg)
486 {
487         if(up)
488                 iprint("cpu%d: registers for %s %lud\n",
489                         m->machno, up->text, up->pid);
490         else
491                 iprint("cpu%d: registers for kernel\n", m->machno);
492
493         iprint("  AX %.16lluX  BX %.16lluX  CX %.16lluX\n",
494                 ureg->ax, ureg->bx, ureg->cx);
495         iprint("  DX %.16lluX  SI %.16lluX  DI %.16lluX\n",
496                 ureg->dx, ureg->si, ureg->di);
497         iprint("  BP %.16lluX  R8 %.16lluX  R9 %.16lluX\n",
498                 ureg->bp, ureg->r8, ureg->r9);
499         iprint(" R10 %.16lluX R11 %.16lluX R12 %.16lluX\n",
500                 ureg->r10, ureg->r11, ureg->r12);
501         iprint(" R13 %.16lluX R14 %.16lluX R15 %.16lluX\n",
502                 ureg->r13, ureg->r14, ureg->r15);
503         iprint("  CS %.4lluX   SS %.4lluX    PC %.16lluX  SP %.16lluX\n",
504                 ureg->cs & 0xffff, ureg->ss & 0xffff, ureg->pc, ureg->sp);
505         iprint("TYPE %.2lluX  ERROR %.4lluX FLAGS %.8lluX\n",
506                 ureg->type & 0xff, ureg->error & 0xffff, ureg->flags & 0xffffffff);
507
508         /*
509          * Processor control registers.
510          * If machine check exception, time stamp counter, page size extensions
511          * or enhanced virtual 8086 mode extensions are supported, there is a
512          * CR4. If there is a CR4 and machine check extensions, read the machine
513          * check address and machine check type registers if RDMSR supported.
514          */
515         iprint(" CR0 %8.8llux CR2 %16.16llux CR3 %16.16llux",
516                 getcr0(), getcr2(), getcr3());
517         if(m->cpuiddx & (Mce|Tsc|Pse|Vmex)){
518                 iprint(" CR4 %16.16llux\n", getcr4());
519                 if(ureg->type == 18)
520                         dumpmcregs();
521         }
522         iprint("  ur %#p up %#p\n", ureg, up);
523 }
524
525
526 /*
527  * Fill in enough of Ureg to get a stack trace, and call a function.
528  * Used by debugging interface rdb.
529  */
530 void
531 callwithureg(void (*fn)(Ureg*))
532 {
533         Ureg ureg;
534         ureg.pc = getcallerpc(&fn);
535         ureg.sp = (uintptr)&fn;
536         fn(&ureg);
537 }
538
539 static void
540 _dumpstack(Ureg *ureg)
541 {
542         uintptr l, v, i, estack;
543         extern ulong etext;
544         int x;
545         char *s;
546
547         if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
548                 iprint("dumpstack disabled\n");
549                 return;
550         }
551         iprint("dumpstack\n");
552
553         x = 0;
554         x += iprint("ktrace /kernel/path %#p %#p <<EOF\n", ureg->pc, ureg->sp);
555         i = 0;
556         if(up
557         && (uintptr)&l >= (uintptr)up->kstack
558         && (uintptr)&l <= (uintptr)up->kstack+KSTACK)
559                 estack = (uintptr)up->kstack+KSTACK;
560         else if((uintptr)&l >= (uintptr)m->stack
561         && (uintptr)&l <= (uintptr)m+MACHSIZE)
562                 estack = (uintptr)m+MACHSIZE;
563         else
564                 return;
565         x += iprint("estackx %p\n", estack);
566
567         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
568                 v = *(uintptr*)l;
569                 if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){
570                         /*
571                          * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
572                          * and CALL indirect through AX
573                          * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
574                          * but this is too clever and misses faulting address.
575                          */
576                         x += iprint("%.8lux=%.8lux ", (ulong)l, (ulong)v);
577                         i++;
578                 }
579                 if(i == 4){
580                         i = 0;
581                         x += iprint("\n");
582                 }
583         }
584         if(i)
585                 iprint("\n");
586         iprint("EOF\n");
587
588         if(ureg->type != VectorNMI)
589                 return;
590
591         i = 0;
592         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
593                 iprint("%.8p ", *(uintptr*)l);
594                 if(++i == 8){
595                         i = 0;
596                         iprint("\n");
597                 }
598         }
599         if(i)
600                 iprint("\n");
601 }
602
603 void
604 dumpstack(void)
605 {
606         callwithureg(_dumpstack);
607 }
608
609 static void
610 debugexc(Ureg *ureg, void *)
611 {
612         u64int dr6, m;
613         char buf[ERRMAX];
614         char *p, *e;
615         int i;
616
617         dr6 = getdr6();
618         if(up == nil)
619                 panic("kernel debug exception dr6=%#.8ullx", dr6);
620         putdr6(up->dr[6]);
621         if(userureg(ureg))
622                 qlock(&up->debug);
623         else if(!canqlock(&up->debug))
624                 return;
625         m = up->dr[7];
626         m = (m >> 4 | m >> 3) & 8 | (m >> 3 | m >> 2) & 4 | (m >> 2 | m >> 1) & 2 | (m >> 1 | m) & 1;
627         m &= dr6;
628         if(m == 0){
629                 sprint(buf, "sys: debug exception dr6=%#.8ullx", dr6);
630                 postnote(up, 0, buf, NDebug);
631         }else{
632                 p = buf;
633                 e = buf + sizeof(buf);
634                 p = seprint(p, e, "sys: watchpoint ");
635                 for(i = 0; i < 4; i++)
636                         if((m & 1<<i) != 0)
637                                 p = seprint(p, e, "%d%s", i, (m >> i + 1 != 0) ? "," : "");
638                 postnote(up, 0, buf, NDebug);
639         }
640         qunlock(&up->debug);
641 }
642                         
643 static void
644 debugbpt(Ureg* ureg, void*)
645 {
646         char buf[ERRMAX];
647
648         if(up == 0)
649                 panic("kernel bpt");
650         /* restore pc to instruction that caused the trap */
651         ureg->pc--;
652         sprint(buf, "sys: breakpoint");
653         postnote(up, 1, buf, NDebug);
654 }
655
656 static void
657 doublefault(Ureg*, void*)
658 {
659         panic("double fault");
660 }
661
662 static void
663 unexpected(Ureg* ureg, void*)
664 {
665         print("unexpected trap %llud; ignoring\n", ureg->type);
666 }
667
668 extern void checkpages(void);
669
670 static void
671 faultamd64(Ureg* ureg, void*)
672 {
673         uintptr addr;
674         int read, user, n, insyscall, f;
675         char buf[ERRMAX];
676
677         addr = getcr2();
678         read = !(ureg->error & 2);
679         user = userureg(ureg);
680         if(!user){
681                 {
682                         extern void _peekinst(void);
683                         
684                         if((void(*)(void))ureg->pc == _peekinst){
685                                 ureg->pc += 2;
686                                 return;
687                         }
688                 }
689                 if(addr >= USTKTOP)
690                         panic("kernel fault: bad address pc=%#p addr=%#p", ureg->pc, addr);
691                 if(up == nil)
692                         panic("kernel fault: no user process pc=%#p addr=%#p", ureg->pc, addr);
693         }
694         if(up == nil)
695                 panic("user fault: up=0 pc=%#p addr=%#p", ureg->pc, addr);
696
697         insyscall = up->insyscall;
698         up->insyscall = 1;
699         f = fpusave();
700         if(!user && waserror()){
701                 if(up->nerrlab == 0){
702                         pprint("suicide: sys: %s\n", up->errstr);
703                         pexit(up->errstr, 1);
704                 }
705                 int s = splhi();
706                 fpurestore(f);
707                 up->insyscall = insyscall;
708                 splx(s);
709                 nexterror();
710         }
711         n = fault(addr, read);
712         if(n < 0){
713                 if(!user){
714                         dumpregs(ureg);
715                         panic("fault: %#p", addr);
716                 }
717                 checkpages();
718                 sprint(buf, "sys: trap: fault %s addr=%#p",
719                         read ? "read" : "write", addr);
720                 postnote(up, 1, buf, NDebug);
721         }
722         if(!user) poperror();
723         splhi();
724         fpurestore(f);
725         up->insyscall = insyscall;
726 }
727
728 /*
729  *  system calls
730  */
731 #include "../port/systab.h"
732
733 /*
734  *  Syscall is called directly from assembler without going through trap().
735  */
736 void
737 syscall(Ureg* ureg)
738 {
739         char *e;
740         uintptr sp;
741         long long ret;
742         int     i, s, f;
743         ulong scallnr;
744         vlong startns, stopns;
745
746         if(!userureg(ureg))
747                 panic("syscall: cs 0x%4.4lluX", ureg->cs);
748
749         cycles(&up->kentry);
750
751         m->syscall++;
752         up->insyscall = 1;
753         up->pc = ureg->pc;
754         up->dbgreg = ureg;
755
756         sp = ureg->sp;
757         scallnr = ureg->bp;     /* RARG */
758         up->scallnr = scallnr;
759         f = fpusave();
760         spllo();
761
762         ret = -1;
763         startns = 0;
764         up->nerrlab = 0;
765         if(!waserror()){
766                 if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
767                         validaddr(sp, sizeof(Sargs)+BY2WD, 0);
768
769                 up->s = *((Sargs*)(sp+BY2WD));
770                 if(0){
771                         syscallfmt(scallnr, ureg->pc, (va_list)up->s.args);
772                         print("syscall: %s\n", up->syscalltrace);
773                 }
774
775                 if(up->procctl == Proc_tracesyscall){
776                         syscallfmt(scallnr, ureg->pc, (va_list)up->s.args);
777                         s = splhi();
778                         up->procctl = Proc_stopme;
779                         procctl();
780                         splx(s);
781                         startns = todget(nil);
782                 }
783                 if(scallnr >= nsyscall || systab[scallnr] == 0){
784                         pprint("bad sys call number %lud pc %#p\n",
785                                 scallnr, ureg->pc);
786                         postnote(up, 1, "sys: bad sys call", NDebug);
787                         error(Ebadarg);
788                 }
789                 up->psstate = sysctab[scallnr];
790                 ret = systab[scallnr]((va_list)up->s.args);
791                 poperror();
792         }else{
793                 /* failure: save the error buffer for errstr */
794                 e = up->syserrstr;
795                 up->syserrstr = up->errstr;
796                 up->errstr = e;
797                 if(0 && up->pid == 1)
798                         print("syscall %lud error %s\n", scallnr, up->syserrstr);
799         }
800         if(up->nerrlab){
801                 print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
802                 for(i = 0; i < NERR; i++)
803                         print("sp=%#p pc=%#p\n",
804                                 up->errlab[i].sp, up->errlab[i].pc);
805                 panic("error stack");
806         }
807         ureg->ax = ret;
808
809         if(0){
810                 print("syscallret: %lud %s %s ret=%lld\n", 
811                         up->pid, up->text, sysctab[scallnr], ret);
812         }
813
814         if(up->procctl == Proc_tracesyscall){
815                 stopns = todget(nil);
816                 sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
817                 s = splhi();
818                 up->procctl = Proc_stopme;
819                 procctl();
820                 splx(s);
821         }
822
823         splhi();
824         fpurestore(f);
825         up->insyscall = 0;
826         up->psstate = 0;
827
828         if(scallnr == NOTED){
829                 noted(ureg, *((ulong*)up->s.args));
830                 /*
831                  * normally, syscall() returns to forkret()
832                  * not restoring general registers when going
833                  * to userspace. to completely restore the
834                  * interrupted context, we have to return thru
835                  * noteret(). we override return pc to jump to
836                  * to it when returning form syscall()
837                  */
838                 ((void**)&ureg)[-1] = (void*)noteret;
839         }
840
841         if(scallnr!=RFORK && (up->procctl || up->nnote)){
842                 notify(ureg);
843                 ((void**)&ureg)[-1] = (void*)noteret;   /* loads RARG */
844         }
845
846         /* if we delayed sched because we held a lock, sched now */
847         if(up->delaysched)
848                 sched();
849         kexit(ureg);
850 }
851
852 /*
853  *  Call user, if necessary, with note.
854  *  Pass user the Ureg struct and the note on his stack.
855  */
856 int
857 notify(Ureg* ureg)
858 {
859         int l;
860         uintptr sp;
861         Note *n;
862
863         if(up->procctl)
864                 procctl();
865         if(up->nnote == 0)
866                 return 0;
867         spllo();
868         qlock(&up->debug);
869         up->notepending = 0;
870         n = &up->note[0];
871         if(strncmp(n->msg, "sys:", 4) == 0){
872                 l = strlen(n->msg);
873                 if(l > ERRMAX-15)       /* " pc=0x12345678\0" */
874                         l = ERRMAX-15;
875                 sprint(n->msg+l, " pc=%#p", ureg->pc);
876         }
877
878         if(n->flag!=NUser && (up->notified || up->notify==0)){
879                 qunlock(&up->debug);
880                 if(n->flag == NDebug)
881                         pprint("suicide: %s\n", n->msg);
882                 pexit(n->msg, n->flag!=NDebug);
883         }
884
885         if(up->notified){
886                 qunlock(&up->debug);
887                 splhi();
888                 return 0;
889         }
890
891         if(!up->notify){
892                 qunlock(&up->debug);
893                 pexit(n->msg, n->flag!=NDebug);
894         }
895         sp = ureg->sp;
896         sp -= 256;      /* debugging: preserve context causing problem */
897         sp -= sizeof(Ureg);
898 if(0) print("%s %lud: notify %#p %#p %#p %s\n",
899         up->text, up->pid, ureg->pc, ureg->sp, sp, n->msg);
900
901         if(!okaddr((uintptr)up->notify, 1, 0)
902         || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
903                 qunlock(&up->debug);
904                 pprint("suicide: bad address in notify\n");
905                 pexit("Suicide", 0);
906         }
907
908         memmove((Ureg*)sp, ureg, sizeof(Ureg));
909         *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
910         up->ureg = (void*)sp;
911         sp -= BY2WD+ERRMAX;
912         memmove((char*)sp, up->note[0].msg, ERRMAX);
913         sp -= 3*BY2WD;
914         ((uintptr*)sp)[2] = sp + 3*BY2WD;       /* arg2 string */
915         ((uintptr*)sp)[1] = (uintptr)up->ureg;  /* arg1 is ureg* */
916         ((uintptr*)sp)[0] = 0;                  /* arg0 is pc */
917         ureg->sp = sp;
918         ureg->pc = (uintptr)up->notify;
919         ureg->bp = (uintptr)up->ureg;           /* arg1 passed in RARG */
920         ureg->cs = UESEL;
921         ureg->ss = UDSEL;
922         up->notified = 1;
923         up->nnote--;
924         memmove(&up->lastnote, &up->note[0], sizeof(Note));
925         memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
926         qunlock(&up->debug);
927         splhi();
928         if(up->fpstate == FPactive){
929                 fpsave(up->fpsave);
930                 up->fpstate = FPinactive;
931         }
932         up->fpstate |= FPillegal;
933         return 1;
934 }
935
936 /*
937  *   Return user to state before notify()
938  */
939 void
940 noted(Ureg* ureg, ulong arg0)
941 {
942         Ureg *nureg;
943         uintptr oureg, sp;
944
945         up->fpstate &= ~FPillegal;
946         spllo();
947         qlock(&up->debug);
948         if(arg0!=NRSTR && !up->notified) {
949                 qunlock(&up->debug);
950                 pprint("call to noted() when not notified\n");
951                 pexit("Suicide", 0);
952         }
953         up->notified = 0;
954
955         nureg = up->ureg;       /* pointer to user returned Ureg struct */
956
957         /* sanity clause */
958         oureg = (uintptr)nureg;
959         if(!okaddr(oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
960                 qunlock(&up->debug);
961                 pprint("bad ureg in noted or call to noted when not notified\n");
962                 pexit("Suicide", 0);
963         }
964
965         /* don't let user change system flags or segment registers */
966         setregisters(ureg, (char*)ureg, (char*)nureg, sizeof(Ureg));
967
968         switch(arg0){
969         case NCONT:
970         case NRSTR:
971 if(0) print("%s %lud: noted %#p %#p\n",
972         up->text, up->pid, nureg->pc, nureg->sp);
973                 if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->sp, BY2WD, 0)){
974                         qunlock(&up->debug);
975                         pprint("suicide: trap in noted\n");
976                         pexit("Suicide", 0);
977                 }
978                 up->ureg = (Ureg*)(*(uintptr*)(oureg-BY2WD));
979                 qunlock(&up->debug);
980                 break;
981
982         case NSAVE:
983                 if(!okaddr(nureg->pc, 1, 0)
984                 || !okaddr(nureg->sp, BY2WD, 0)){
985                         qunlock(&up->debug);
986                         pprint("suicide: trap in noted\n");
987                         pexit("Suicide", 0);
988                 }
989                 qunlock(&up->debug);
990                 sp = oureg-4*BY2WD-ERRMAX;
991                 splhi();
992                 ureg->sp = sp;
993                 ureg->bp = oureg;               /* arg 1 passed in RARG */
994                 ((uintptr*)sp)[1] = oureg;      /* arg 1 0(FP) is ureg* */
995                 ((uintptr*)sp)[0] = 0;          /* arg 0 is pc */
996                 break;
997
998         default:
999                 up->lastnote.flag = NDebug;
1000                 /* fall through */
1001
1002         case NDFLT:
1003                 qunlock(&up->debug);
1004                 if(up->lastnote.flag == NDebug)
1005                         pprint("suicide: %s\n", up->lastnote.msg);
1006                 pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
1007         }
1008 }
1009
1010 uintptr
1011 execregs(uintptr entry, ulong ssize, ulong nargs)
1012 {
1013         uintptr *sp;
1014         Ureg *ureg;
1015
1016         sp = (uintptr*)(USTKTOP - ssize);
1017         *--sp = nargs;
1018         ureg = up->dbgreg;
1019         ureg->sp = (uintptr)sp;
1020         ureg->pc = entry;
1021         ureg->cs = UESEL;
1022         ureg->ss = UDSEL;
1023         ureg->r14 = ureg->r15 = 0;      /* extern user registers */
1024         return (uintptr)USTKTOP-sizeof(Tos);            /* address of kernel/user shared data */
1025 }
1026
1027 /*
1028  *  return the userpc the last exception happened at
1029  */
1030 uintptr
1031 userpc(void)
1032 {
1033         Ureg *ureg;
1034
1035         ureg = (Ureg*)up->dbgreg;
1036         return ureg->pc;
1037 }
1038
1039 /* This routine must save the values of registers the user is not permitted
1040  * to write from devproc and noted() and then restore the saved values before returning.
1041  */
1042 void
1043 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
1044 {
1045         u64int flags;
1046
1047         flags = ureg->flags;
1048         memmove(pureg, uva, n);
1049         ureg->cs = UESEL;
1050         ureg->ss = UDSEL;
1051         ureg->flags = (ureg->flags & 0x00ff) | (flags & 0xff00);
1052         ureg->pc &= UADDRMASK;
1053 }
1054
1055 static void
1056 linkproc(void)
1057 {
1058         spllo();
1059         up->kpfun(up->kparg);
1060         pexit("kproc dying", 0);
1061 }
1062
1063 void
1064 kprocchild(Proc* p, void (*func)(void*), void* arg)
1065 {
1066         /*
1067          * gotolabel() needs a word on the stack in
1068          * which to place the return PC used to jump
1069          * to linkproc().
1070          */
1071         p->sched.pc = (uintptr)linkproc;
1072         p->sched.sp = (uintptr)p->kstack+KSTACK-BY2WD;
1073
1074         p->kpfun = func;
1075         p->kparg = arg;
1076 }
1077
1078 void
1079 forkchild(Proc *p, Ureg *ureg)
1080 {
1081         Ureg *cureg;
1082
1083         /*
1084          * Add 2*BY2WD to the stack to account for
1085          *  - the return PC
1086          *  - trap's argument (ur)
1087          */
1088         p->sched.sp = (uintptr)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1089         p->sched.pc = (uintptr)forkret;
1090
1091         cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1092         memmove(cureg, ureg, sizeof(Ureg));
1093
1094         cureg->ax = 0;
1095
1096         /* Things from bottom of syscall which were never executed */
1097         p->psstate = 0;
1098         p->insyscall = 0;
1099 }
1100
1101 /* Give enough context in the ureg to produce a kernel stack for
1102  * a sleeping process
1103  */
1104 void
1105 setkernur(Ureg* ureg, Proc* p)
1106 {
1107         ureg->pc = p->sched.pc;
1108         ureg->sp = p->sched.sp+8;
1109         ureg->r14 = (uintptr)p;
1110 }
1111
1112 uintptr
1113 dbgpc(Proc *p)
1114 {
1115         Ureg *ureg;
1116
1117         ureg = p->dbgreg;
1118         if(ureg == nil)
1119                 return 0;
1120         return ureg->pc;
1121 }