]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/trap.c
97931807faf37b02f75074b5021b976c610c007b
[plan9front.git] / sys / src / 9 / pc / trap.c
1 #include        "u.h"
2 #include        "tos.h"
3 #include        "../port/lib.h"
4 #include        "mem.h"
5 #include        "dat.h"
6 #include        "fns.h"
7 #include        "io.h"
8 #include        "ureg.h"
9 #include        "../port/error.h"
10 #include        <trace.h>
11
12 static int trapinited;
13
14 void    noted(Ureg*, ulong);
15
16 static void debugexc(Ureg*, void*);
17 static void debugbpt(Ureg*, void*);
18 static void fault386(Ureg*, void*);
19 static void doublefault(Ureg*, void*);
20 static void unexpected(Ureg*, void*);
21 static void _dumpstack(Ureg*);
22
23 static Lock vctllock;
24 static Vctl *vctl[256];
25
26 enum
27 {
28         Ntimevec = 20           /* number of time buckets for each intr */
29 };
30 ulong intrtimes[256][Ntimevec];
31
32 void
33 intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
34 {
35         int vno;
36         Vctl *v;
37
38         if(f == nil){
39                 print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
40                         irq, tbdf, name);
41                 return;
42         }
43         if(tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0)){
44                 print("intrenable: got unassigned irq %d, tbdf 0x%uX for %s\n",
45                         irq, tbdf, name);
46                 irq = -1;
47         }
48
49         /*
50          * IRQ2 doesn't really exist, it's used to gang the interrupt
51          * controllers together. A device set to IRQ2 will appear on
52          * the second interrupt controller as IRQ9.
53          */
54         if(irq == 2)
55                 irq = 9;
56
57         if((v = xalloc(sizeof(Vctl))) == nil)
58                 panic("intrenable: out of memory");
59         v->isintr = 1;
60         v->irq = irq;
61         v->tbdf = tbdf;
62         v->f = f;
63         v->a = a;
64         strncpy(v->name, name, KNAMELEN-1);
65         v->name[KNAMELEN-1] = 0;
66
67         ilock(&vctllock);
68         vno = arch->intrenable(v);
69         if(vno == -1){
70                 iunlock(&vctllock);
71                 print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
72                         irq, tbdf, v->name);
73                 xfree(v);
74                 return;
75         }
76         if(vctl[vno]){
77                 if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
78                         panic("intrenable: handler: %s %s %#p %#p %#p %#p",
79                                 vctl[vno]->name, v->name,
80                                 vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
81                 v->next = vctl[vno];
82         }
83         vctl[vno] = v;
84         iunlock(&vctllock);
85 }
86
87 void
88 intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
89 {
90         Vctl **pv, *v;
91         int vno;
92
93         if(irq == 2)
94                 irq = 9;
95         if(arch->intrvecno == nil || (tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0))){
96                 /*
97                  * on APIC machine, irq is pretty meaningless
98                  * and disabling a the vector is not implemented.
99                  * however, we still want to remove the matching
100                  * Vctl entry to prevent calling Vctl.f() with a
101                  * stale Vctl.a pointer.
102                  */
103                 irq = -1;
104                 vno = VectorPIC;
105         } else {
106                 vno = arch->intrvecno(irq);
107         }
108         ilock(&vctllock);
109         do {
110                 for(pv = &vctl[vno]; (v = *pv) != nil; pv = &v->next){
111                         if(v->isintr && (v->irq == irq || irq == -1)
112                         && v->tbdf == tbdf && v->f == f && v->a == a
113                         && strcmp(v->name, name) == 0)
114                                 break;
115                 }
116                 if(v != nil){
117                         *pv = v->next;
118                         xfree(v);
119
120                         if(irq != -1 && vctl[vno] == nil && arch->intrdisable != nil)
121                                 arch->intrdisable(irq);
122                         break;
123                 }
124         } while(irq == -1 && ++vno <= MaxVectorAPIC);
125         iunlock(&vctllock);
126 }
127
128 static long
129 irqallocread(Chan*, void *a, long n, vlong offset)
130 {
131         char buf[2*(11+1)+KNAMELEN+1+1];
132         int vno, m;
133         Vctl *v;
134
135         if(n < 0 || offset < 0)
136                 error(Ebadarg);
137
138         for(vno=0; vno<nelem(vctl); vno++){
139                 for(v=vctl[vno]; v; v=v->next){
140                         m = snprint(buf, sizeof(buf), "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
141                         offset -= m;
142                         if(offset >= 0)
143                                 continue;
144                         if(n > -offset)
145                                 n = -offset;
146                         offset += m;
147                         memmove(a, buf+offset, n);
148                         return n;
149                 }
150         }
151         return 0;
152 }
153
154 void
155 trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
156 {
157         Vctl *v;
158
159         if(vno < 0 || vno >= VectorPIC)
160                 panic("trapenable: vno %d", vno);
161         if((v = xalloc(sizeof(Vctl))) == nil)
162                 panic("trapenable: out of memory");
163         v->tbdf = BUSUNKNOWN;
164         v->f = f;
165         v->a = a;
166         strncpy(v->name, name, KNAMELEN-1);
167         v->name[KNAMELEN-1] = 0;
168
169         ilock(&vctllock);
170         if(vctl[vno])
171                 v->next = vctl[vno]->next;
172         vctl[vno] = v;
173         iunlock(&vctllock);
174 }
175
176 static void
177 nmienable(void)
178 {
179         int x;
180
181         /*
182          * Hack: should be locked with NVRAM access.
183          */
184         outb(0x70, 0x80);               /* NMI latch clear */
185         outb(0x70, 0);
186
187         x = inb(0x61) & 0x07;           /* Enable NMI */
188         outb(0x61, 0x0C|x);
189         outb(0x61, x);
190 }
191
192 /*
193  * Minimal trap setup.  Just enough so that we can panic
194  * on traps (bugs) during kernel initialization.
195  * Called very early - malloc is not yet available.
196  */
197 void
198 trapinit0(void)
199 {
200         int d1, v;
201         ulong vaddr;
202         Segdesc *idt;
203
204         idt = (Segdesc*)IDTADDR;
205         vaddr = (ulong)vectortable;
206         for(v = 0; v < 256; v++){
207                 d1 = (vaddr & 0xFFFF0000)|SEGP;
208                 switch(v){
209
210                 case VectorBPT:
211                         d1 |= SEGPL(3)|SEGIG;
212                         break;
213
214                 case VectorSYSCALL:
215                         d1 |= SEGPL(3)|SEGIG;
216                         break;
217
218                 default:
219                         d1 |= SEGPL(0)|SEGIG;
220                         break;
221                 }
222                 idt[v].d0 = (vaddr & 0xFFFF)|(KESEL<<16);
223                 idt[v].d1 = d1;
224                 vaddr += 6;
225         }
226 }
227
228 void
229 trapinit(void)
230 {
231         /*
232          * Special traps.
233          * Syscall() is called directly without going through trap().
234          */
235         trapenable(VectorDE, debugexc, 0, "debugexc");
236         trapenable(VectorBPT, debugbpt, 0, "debugpt");
237         trapenable(VectorPF, fault386, 0, "fault386");
238         trapenable(Vector2F, doublefault, 0, "doublefault");
239         trapenable(Vector15, unexpected, 0, "unexpected");
240         nmienable();
241
242         addarchfile("irqalloc", 0444, irqallocread, nil);
243         trapinited = 1;
244 }
245
246 static char* excname[32] = {
247         "divide error",
248         "debug exception",
249         "nonmaskable interrupt",
250         "breakpoint",
251         "overflow",
252         "bounds check",
253         "invalid opcode",
254         "coprocessor not available",
255         "double fault",
256         "coprocessor segment overrun",
257         "invalid TSS",
258         "segment not present",
259         "stack exception",
260         "general protection violation",
261         "page fault",
262         "15 (reserved)",
263         "coprocessor error",
264         "alignment check",
265         "machine check",
266         "simd error",
267         "20 (reserved)",
268         "21 (reserved)",
269         "22 (reserved)",
270         "23 (reserved)",
271         "24 (reserved)",
272         "25 (reserved)",
273         "26 (reserved)",
274         "27 (reserved)",
275         "28 (reserved)",
276         "29 (reserved)",
277         "30 (reserved)",
278         "31 (reserved)",
279 };
280
281 /*
282  *  keep histogram of interrupt service times
283  */
284 void
285 intrtime(Mach*, int vno)
286 {
287         ulong diff;
288         ulong x;
289
290         x = perfticks();
291         diff = x - m->perf.intrts;
292         m->perf.intrts = x;
293
294         m->perf.inintr += diff;
295         if(up == nil && m->perf.inidle > diff)
296                 m->perf.inidle -= diff;
297
298         diff /= m->cpumhz*100;          /* quantum = 100µsec */
299         if(diff >= Ntimevec)
300                 diff = Ntimevec-1;
301         intrtimes[vno][diff]++;
302 }
303
304 /* go to user space */
305 void
306 kexit(Ureg*)
307 {
308         uvlong t;
309         Tos *tos;
310
311         /* precise time accounting, kernel exit */
312         tos = (Tos*)(USTKTOP-sizeof(Tos));
313         cycles(&t);
314         tos->kcycles += t - up->kentry;
315         tos->pcycles = t + up->pcycles;
316         tos->pid = up->pid;
317 }
318
319 /*
320  *  All traps come here.  It is slower to have all traps call trap()
321  *  rather than directly vectoring the handler.  However, this avoids a
322  *  lot of code duplication and possible bugs.  The only exception is
323  *  VectorSYSCALL.
324  *  Trap is called with interrupts disabled via interrupt-gates.
325  */
326 void
327 trap(Ureg* ureg)
328 {
329         int clockintr, i, vno, user;
330         char buf[ERRMAX];
331         Vctl *ctl, *v;
332         Mach *mach;
333
334         if(!trapinited){
335                 /* fault386 can give a better error message */
336                 if(ureg->trap == VectorPF)
337                         fault386(ureg, nil);
338                 panic("trap %lud: not ready", ureg->trap);
339         }
340
341         m->perf.intrts = perfticks();
342         user = userureg(ureg);
343         if(user){
344                 up->dbgreg = ureg;
345                 cycles(&up->kentry);
346         }
347
348         clockintr = 0;
349
350         vno = ureg->trap;
351         if(ctl = vctl[vno]){
352                 if(ctl->isintr){
353                         m->intr++;
354                         if(vno >= VectorPIC && vno != VectorSYSCALL)
355                                 m->lastintr = ctl->irq;
356                 }
357
358                 if(ctl->isr)
359                         ctl->isr(vno);
360                 for(v = ctl; v != nil; v = v->next){
361                         if(v->f)
362                                 v->f(ureg, v->a);
363                 }
364                 if(ctl->eoi)
365                         ctl->eoi(vno);
366
367                 if(ctl->isintr){
368                         intrtime(m, vno);
369
370                         if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
371                                 clockintr = 1;
372
373                         if(up && !clockintr)
374                                 preempted();
375                 }
376         }
377         else if(vno < nelem(excname) && user){
378                 spllo();
379                 sprint(buf, "sys: trap: %s", excname[vno]);
380                 postnote(up, 1, buf, NDebug);
381         }
382         else if(vno >= VectorPIC && vno != VectorSYSCALL){
383                 /*
384                  * An unknown interrupt.
385                  * Check for a default IRQ7. This can happen when
386                  * the IRQ input goes away before the acknowledge.
387                  * In this case, a 'default IRQ7' is generated, but
388                  * the corresponding bit in the ISR isn't set.
389                  * In fact, just ignore all such interrupts.
390                  */
391
392                 /* call all interrupt routines, just in case */
393                 for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
394                         ctl = vctl[i];
395                         if(ctl == nil)
396                                 continue;
397                         if(!ctl->isintr)
398                                 continue;
399                         for(v = ctl; v != nil; v = v->next){
400                                 if(v->f)
401                                         v->f(ureg, v->a);
402                         }
403                         /* should we do this? */
404                         if(ctl->eoi)
405                                 ctl->eoi(i);
406                 }
407
408                 /* clear the interrupt */
409                 i8259isr(vno);
410
411                 if(0)print("cpu%d: spurious interrupt %d, last %d\n",
412                         m->machno, vno, m->lastintr);
413                 if(0)if(conf.nmach > 1){
414                         for(i = 0; i < MAXMACH; i++){
415                                 if(active.machs[i] == 0)
416                                         continue;
417                                 mach = MACHP(i);
418                                 if(m->machno == mach->machno)
419                                         continue;
420                                 print(" cpu%d: last %d",
421                                         mach->machno, mach->lastintr);
422                         }
423                         print("\n");
424                 }
425                 m->spuriousintr++;
426                 if(user)
427                         kexit(ureg);
428                 return;
429         }
430         else{
431                 if(vno == VectorNMI){
432                         /*
433                          * Don't re-enable, it confuses the crash dumps.
434                         nmienable();
435                          */
436                         iprint("cpu%d: nmi PC %#8.8lux, status %ux\n",
437                                 m->machno, ureg->pc, inb(0x61));
438                         while(m->machno != 0)
439                                 ;
440                 }
441
442                 if(!user){
443                         void (*pc)(void);
444                         ulong *sp; 
445
446                         extern void _forkretpopgs(void);
447                         extern void _forkretpopfs(void);
448                         extern void _forkretpopes(void);
449                         extern void _forkretpopds(void);
450                         extern void _forkretiret(void);
451                         extern void _rdmsrinst(void);
452                         extern void _wrmsrinst(void);
453                         extern void _peekinst(void);
454
455                         extern void load_fs(ulong);
456                         extern void load_gs(ulong);
457
458                         load_fs(NULLSEL);
459                         load_gs(NULLSEL);
460
461                         sp = (ulong*)&ureg->sp; /* kernel stack */
462                         pc = (void*)ureg->pc;
463
464                         if(pc == _forkretpopgs || pc == _forkretpopfs || 
465                            pc == _forkretpopes || pc == _forkretpopds){
466                                 if(vno == VectorGPF || vno == VectorSNP){
467                                         sp[0] = NULLSEL;
468                                         return;
469                                 }
470                         } else if(pc == _forkretiret){
471                                 if(vno == VectorGPF || vno == VectorSNP){
472                                         sp[1] = UESEL;  /* CS */
473                                         sp[4] = UDSEL;  /* SS */
474                                         return;
475                                 }
476                         } else if(pc == _rdmsrinst || pc == _wrmsrinst){
477                                 if(vno == VectorGPF){
478                                         ureg->bp = -1;
479                                         ureg->pc += 2;
480                                         return;
481                                 }
482                         } else if(pc == _peekinst){
483                                 if(vno == VectorGPF){
484                                         ureg->pc += 2;
485                                         return;
486                                 }
487                         }
488                 }
489
490                 dumpregs(ureg);
491                 if(!user){
492                         ureg->sp = (ulong)&ureg->sp;
493                         _dumpstack(ureg);
494                 }
495                 if(vno < nelem(excname))
496                         panic("%s", excname[vno]);
497                 panic("unknown trap/intr: %d", vno);
498         }
499         splhi();
500
501         /* delaysched set because we held a lock or because our quantum ended */
502         if(up && up->delaysched && clockintr){
503                 sched();
504                 splhi();
505         }
506
507         if(user){
508                 if(up->procctl || up->nnote)
509                         notify(ureg);
510                 kexit(ureg);
511         }
512 }
513
514 /*
515  *  dump registers
516  */
517 void
518 dumpregs2(Ureg* ureg)
519 {
520         if(up)
521                 iprint("cpu%d: registers for %s %lud\n",
522                         m->machno, up->text, up->pid);
523         else
524                 iprint("cpu%d: registers for kernel\n", m->machno);
525         iprint("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",
526                 ureg->flags, ureg->trap, ureg->ecode, ureg->pc);
527         if(userureg(ureg))
528                 iprint(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);
529         else
530                 iprint(" SP=%luX\n", (ulong)&ureg->sp);
531         iprint("  AX %8.8luX  BX %8.8luX  CX %8.8luX  DX %8.8luX\n",
532                 ureg->ax, ureg->bx, ureg->cx, ureg->dx);
533         iprint("  SI %8.8luX  DI %8.8luX  BP %8.8luX\n",
534                 ureg->si, ureg->di, ureg->bp);
535         iprint("  CS %4.4luX  DS %4.4luX  ES %4.4luX  FS %4.4luX  GS %4.4luX\n",
536                 ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
537                 ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
538 }
539
540 void
541 dumpregs(Ureg* ureg)
542 {
543         dumpregs2(ureg);
544
545         /*
546          * Processor control registers.
547          * If machine check exception, time stamp counter, page size extensions
548          * or enhanced virtual 8086 mode extensions are supported, there is a
549          * CR4. If there is a CR4 and machine check extensions, read the machine
550          * check address and machine check type registers if RDMSR supported.
551          */
552         iprint("  CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",
553                 getcr0(), getcr2(), getcr3());
554         if(m->cpuiddx & (Mce|Tsc|Pse|Vmex)){
555                 iprint(" CR4 %8.8lux\n", getcr4());
556                 if(ureg->trap == 18)
557                         dumpmcregs();
558         }
559         iprint("\n  ur %#p up %#p\n", ureg, up);
560 }
561
562
563 /*
564  * Fill in enough of Ureg to get a stack trace, and call a function.
565  * Used by debugging interface rdb.
566  */
567 void
568 callwithureg(void (*fn)(Ureg*))
569 {
570         Ureg ureg;
571         ureg.pc = getcallerpc(&fn);
572         ureg.sp = (ulong)&fn;
573         fn(&ureg);
574 }
575
576 static void
577 _dumpstack(Ureg *ureg)
578 {
579         uintptr l, v, i, estack;
580         extern ulong etext;
581         int x;
582         char *s;
583
584         if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
585                 iprint("dumpstack disabled\n");
586                 return;
587         }
588         iprint("dumpstack\n");
589
590         x = 0;
591         x += iprint("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);
592         i = 0;
593         if(up
594         && (uintptr)&l >= (uintptr)up->kstack
595         && (uintptr)&l <= (uintptr)up->kstack+KSTACK)
596                 estack = (uintptr)up->kstack+KSTACK;
597         else if((uintptr)&l >= (uintptr)m->stack
598         && (uintptr)&l <= (uintptr)m+MACHSIZE)
599                 estack = (uintptr)m+MACHSIZE;
600         else
601                 return;
602         x += iprint("estackx %p\n", estack);
603
604         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
605                 v = *(uintptr*)l;
606                 if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){
607                         /*
608                          * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
609                          * and CALL indirect through AX
610                          * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
611                          * but this is too clever and misses faulting address.
612                          */
613                         x += iprint("%.8p=%.8p ", l, v);
614                         i++;
615                 }
616                 if(i == 4){
617                         i = 0;
618                         x += iprint("\n");
619                 }
620         }
621         if(i)
622                 iprint("\n");
623         iprint("EOF\n");
624
625         if(ureg->trap != VectorNMI)
626                 return;
627
628         i = 0;
629         for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
630                 iprint("%.8p ", *(uintptr*)l);
631                 if(++i == 8){
632                         i = 0;
633                         iprint("\n");
634                 }
635         }
636         if(i)
637                 iprint("\n");
638 }
639
640 void
641 dumpstack(void)
642 {
643         callwithureg(_dumpstack);
644 }
645
646 static void
647 debugexc(Ureg *ureg, void *)
648 {
649         u32int dr6, m;
650         char buf[ERRMAX];
651         char *p, *e;
652         int i;
653
654         dr6 = getdr6();
655         if(up == nil)
656                 panic("kernel debug exception dr6=%#.8ux", dr6);
657         putdr6(up->dr[6]);
658         if(userureg(ureg))
659                 qlock(&up->debug);
660         else if(!canqlock(&up->debug))
661                 return;
662         m = up->dr[7];
663         m = (m >> 4 | m >> 3) & 8 | (m >> 3 | m >> 2) & 4 | (m >> 2 | m >> 1) & 2 | (m >> 1 | m) & 1;
664         m &= dr6;
665         if(m == 0){
666                 sprint(buf, "sys: debug exception dr6=%#.8ux", dr6);
667                 postnote(up, 0, buf, NDebug);
668         }else{
669                 p = buf;
670                 e = buf + sizeof(buf);
671                 p = seprint(p, e, "sys: watchpoint ");
672                 for(i = 0; i < 4; i++)
673                         if((m & 1<<i) != 0)
674                                 p = seprint(p, e, "%d%s", i, (m >> i + 1 != 0) ? "," : "");
675                 postnote(up, 0, buf, NDebug);
676         }
677         qunlock(&up->debug);
678 }
679
680 static void
681 debugbpt(Ureg* ureg, void*)
682 {
683         char buf[ERRMAX];
684
685         if(up == 0)
686                 panic("kernel bpt");
687         /* restore pc to instruction that caused the trap */
688         ureg->pc--;
689         sprint(buf, "sys: breakpoint");
690         postnote(up, 1, buf, NDebug);
691 }
692
693 static void
694 doublefault(Ureg*, void*)
695 {
696         panic("double fault");
697 }
698
699 static void
700 unexpected(Ureg* ureg, void*)
701 {
702         print("unexpected trap %lud; ignoring\n", ureg->trap);
703 }
704
705 extern void checkpages(void);
706 extern void checkfault(ulong, ulong);
707 static void
708 fault386(Ureg* ureg, void*)
709 {
710         ulong addr;
711         int read, user, n, insyscall;
712         char buf[ERRMAX];
713
714         addr = getcr2();
715         read = !(ureg->ecode & 2);
716
717         user = userureg(ureg);
718         if(!user){
719                 if(vmapsync(addr))
720                         return;
721                 {
722                         extern void _peekinst(void);
723                         if((void(*)(void))ureg->pc == _peekinst){
724                                 ureg->pc += 2;
725                                 return;
726                         }
727                 }
728                 if(addr >= USTKTOP)
729                         panic("kernel fault: bad address pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
730                 if(up == nil)
731                         panic("kernel fault: no user process pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
732         }
733         if(up == nil)
734                 panic("user fault: up=0 pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
735
736         insyscall = up->insyscall;
737         up->insyscall = 1;
738         n = fault(addr, ureg->pc, read);
739         if(n < 0){
740                 if(!user){
741                         dumpregs(ureg);
742                         panic("fault: 0x%lux", addr);
743                 }
744                 checkpages();
745                 checkfault(addr, ureg->pc);
746                 sprint(buf, "sys: trap: fault %s addr=0x%lux",
747                         read ? "read" : "write", addr);
748                 postnote(up, 1, buf, NDebug);
749         }
750         up->insyscall = insyscall;
751 }
752
753 /*
754  *  system calls
755  */
756 #include "../port/systab.h"
757
758 /*
759  *  Syscall is called directly from assembler without going through trap().
760  */
761 void
762 syscall(Ureg* ureg)
763 {
764         char *e;
765         ulong   sp;
766         long    ret;
767         int     i, s;
768         ulong scallnr;
769         vlong startns, stopns;
770
771         if(!userureg(ureg))
772                 panic("syscall: cs 0x%4.4luX", ureg->cs);
773
774         cycles(&up->kentry);
775
776         m->syscall++;
777         up->insyscall = 1;
778         up->pc = ureg->pc;
779         up->dbgreg = ureg;
780
781         sp = ureg->usp;
782         scallnr = ureg->ax;
783         up->scallnr = scallnr;
784
785         spllo();
786
787         up->nerrlab = 0;
788         ret = -1;
789         if(!waserror()){
790                 if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
791                         validaddr(sp, sizeof(Sargs)+BY2WD, 0);
792
793                 up->s = *((Sargs*)(sp+BY2WD));
794
795                 if(up->procctl == Proc_tracesyscall){
796                         syscallfmt(scallnr, ureg->pc, (va_list)up->s.args);
797                         s = splhi();
798                         up->procctl = Proc_stopme;
799                         procctl();
800                         splx(s);
801                         startns = todget(nil);
802                 }
803
804                 if(scallnr >= nsyscall || systab[scallnr] == 0){
805                         pprint("bad sys call number %lud pc %lux\n",
806                                 scallnr, ureg->pc);
807                         postnote(up, 1, "sys: bad sys call", NDebug);
808                         error(Ebadarg);
809                 }
810                 up->psstate = sysctab[scallnr];
811                 ret = systab[scallnr]((va_list)up->s.args);
812                 poperror();
813         }else{
814                 /* failure: save the error buffer for errstr */
815                 e = up->syserrstr;
816                 up->syserrstr = up->errstr;
817                 up->errstr = e;
818                 if(0 && up->pid == 1)
819                         print("syscall %lud error %s\n", scallnr, up->syserrstr);
820         }
821         if(up->nerrlab){
822                 print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
823                 for(i = 0; i < NERR; i++)
824                         print("sp=%lux pc=%lux\n",
825                                 up->errlab[i].sp, up->errlab[i].pc);
826                 panic("error stack");
827         }
828
829         /*
830          *  Put return value in frame.  On the x86 the syscall is
831          *  just another trap and the return value from syscall is
832          *  ignored.  On other machines the return value is put into
833          *  the results register by caller of syscall.
834          */
835         ureg->ax = ret;
836
837         if(up->procctl == Proc_tracesyscall){
838                 stopns = todget(nil);
839                 sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
840                 s = splhi();
841                 up->procctl = Proc_stopme;
842                 procctl();
843                 splx(s);
844         }
845
846         up->insyscall = 0;
847         up->psstate = 0;
848
849         if(scallnr == NOTED)
850                 noted(ureg, *((ulong*)up->s.args));
851
852         if(scallnr!=RFORK && (up->procctl || up->nnote)){
853                 splhi();
854                 notify(ureg);
855         }
856         /* if we delayed sched because we held a lock, sched now */
857         if(up->delaysched)
858                 sched();
859         kexit(ureg);
860 }
861
862 /*
863  *  Call user, if necessary, with note.
864  *  Pass user the Ureg struct and the note on his stack.
865  */
866 int
867 notify(Ureg* ureg)
868 {
869         int l;
870         ulong s, sp;
871         Note *n;
872
873         if(up->procctl)
874                 procctl();
875         if(up->nnote == 0)
876                 return 0;
877
878         if(up->fpstate == FPactive){
879                 fpsave(up->fpsave);
880                 up->fpstate = FPinactive;
881         }
882         up->fpstate |= FPillegal;
883
884         s = spllo();
885         qlock(&up->debug);
886         up->notepending = 0;
887         n = &up->note[0];
888         if(strncmp(n->msg, "sys:", 4) == 0){
889                 l = strlen(n->msg);
890                 if(l > ERRMAX-15)       /* " pc=0x12345678\0" */
891                         l = ERRMAX-15;
892                 sprint(n->msg+l, " pc=0x%.8lux", ureg->pc);
893         }
894
895         if(n->flag!=NUser && (up->notified || up->notify==0)){
896                 qunlock(&up->debug);
897                 if(n->flag == NDebug)
898                         pprint("suicide: %s\n", n->msg);
899                 pexit(n->msg, n->flag!=NDebug);
900         }
901
902         if(up->notified){
903                 qunlock(&up->debug);
904                 splhi();
905                 return 0;
906         }
907
908         if(!up->notify){
909                 qunlock(&up->debug);
910                 pexit(n->msg, n->flag!=NDebug);
911         }
912         sp = ureg->usp;
913         sp -= 256;      /* debugging: preserve context causing problem */
914         sp -= sizeof(Ureg);
915 if(0) print("%s %lud: notify %.8lux %.8lux %.8lux %s\n",
916         up->text, up->pid, ureg->pc, ureg->usp, sp, n->msg);
917
918         if(!okaddr((uintptr)up->notify, 1, 0)
919         || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
920                 qunlock(&up->debug);
921                 pprint("suicide: bad address in notify\n");
922                 pexit("Suicide", 0);
923         }
924
925         memmove((Ureg*)sp, ureg, sizeof(Ureg));
926         *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
927         up->ureg = (void*)sp;
928         sp -= BY2WD+ERRMAX;
929         memmove((char*)sp, up->note[0].msg, ERRMAX);
930         sp -= 3*BY2WD;
931         *(ulong*)(sp+2*BY2WD) = sp+3*BY2WD;             /* arg 2 is string */
932         *(ulong*)(sp+1*BY2WD) = (ulong)up->ureg;        /* arg 1 is ureg* */
933         *(ulong*)(sp+0*BY2WD) = 0;                      /* arg 0 is pc */
934         ureg->usp = sp;
935         ureg->pc = (ulong)up->notify;
936         ureg->cs = UESEL;
937         ureg->ss = ureg->ds = ureg->es = UDSEL;
938         up->notified = 1;
939         up->nnote--;
940         memmove(&up->lastnote, &up->note[0], sizeof(Note));
941         memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
942
943         qunlock(&up->debug);
944         splx(s);
945         return 1;
946 }
947
948 /*
949  *   Return user to state before notify()
950  */
951 void
952 noted(Ureg* ureg, ulong arg0)
953 {
954         Ureg *nureg;
955         ulong oureg, sp;
956
957         qlock(&up->debug);
958         if(arg0!=NRSTR && !up->notified) {
959                 qunlock(&up->debug);
960                 pprint("call to noted() when not notified\n");
961                 pexit("Suicide", 0);
962         }
963         up->notified = 0;
964
965         nureg = up->ureg;       /* pointer to user returned Ureg struct */
966
967         up->fpstate &= ~FPillegal;
968
969         /* sanity clause */
970         oureg = (ulong)nureg;
971         if(!okaddr(oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
972                 qunlock(&up->debug);
973                 pprint("bad ureg in noted or call to noted when not notified\n");
974                 pexit("Suicide", 0);
975         }
976
977         /* don't let user change system flags */
978         nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
979         nureg->cs |= 3;
980         nureg->ss |= 3;
981
982         memmove(ureg, nureg, sizeof(Ureg));
983
984         switch(arg0){
985         case NCONT:
986         case NRSTR:
987 if(0) print("%s %lud: noted %.8lux %.8lux\n",
988         up->text, up->pid, nureg->pc, nureg->usp);
989                 if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){
990                         qunlock(&up->debug);
991                         pprint("suicide: trap in noted\n");
992                         pexit("Suicide", 0);
993                 }
994                 up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
995                 qunlock(&up->debug);
996                 break;
997
998         case NSAVE:
999                 if(!okaddr(nureg->pc, BY2WD, 0)
1000                 || !okaddr(nureg->usp, BY2WD, 0)){
1001                         qunlock(&up->debug);
1002                         pprint("suicide: trap in noted\n");
1003                         pexit("Suicide", 0);
1004                 }
1005                 qunlock(&up->debug);
1006                 sp = oureg-4*BY2WD-ERRMAX;
1007                 splhi();
1008                 ureg->sp = sp;
1009                 ((ulong*)sp)[1] = oureg;        /* arg 1 0(FP) is ureg* */
1010                 ((ulong*)sp)[0] = 0;            /* arg 0 is pc */
1011                 break;
1012
1013         default:
1014                 up->lastnote.flag = NDebug;
1015                 /* fall through */
1016
1017         case NDFLT:
1018                 qunlock(&up->debug);
1019                 if(up->lastnote.flag == NDebug)
1020                         pprint("suicide: %s\n", up->lastnote.msg);
1021                 pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
1022         }
1023 }
1024
1025 uintptr
1026 execregs(uintptr entry, ulong ssize, ulong nargs)
1027 {
1028         ulong *sp;
1029         Ureg *ureg;
1030
1031         sp = (ulong*)(USTKTOP - ssize);
1032         *--sp = nargs;
1033
1034         ureg = up->dbgreg;
1035         ureg->usp = (ulong)sp;
1036         ureg->pc = entry;
1037         ureg->cs = UESEL;
1038         ureg->ss = ureg->ds = ureg->es = UDSEL;
1039         ureg->fs = ureg->gs = NULLSEL;
1040         return USTKTOP-sizeof(Tos);             /* address of kernel/user shared data */
1041 }
1042
1043 /*
1044  *  return the userpc the last exception happened at
1045  */
1046 uintptr
1047 userpc(void)
1048 {
1049         Ureg *ureg;
1050
1051         ureg = (Ureg*)up->dbgreg;
1052         return ureg->pc;
1053 }
1054
1055 /* This routine must save the values of registers the user is not permitted
1056  * to write from devproc and then restore the saved values before returning.
1057  */
1058 void
1059 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
1060 {
1061         ulong flags;
1062
1063         flags = ureg->flags;
1064         memmove(pureg, uva, n);
1065         ureg->flags = (ureg->flags & 0xCD5) | (flags & ~0xCD5);
1066         ureg->cs |= 3;
1067         ureg->ss |= 3;
1068 }
1069
1070 static void
1071 linkproc(void)
1072 {
1073         spllo();
1074         up->kpfun(up->kparg);
1075         pexit("kproc dying", 0);
1076 }
1077
1078 void
1079 kprocchild(Proc* p, void (*func)(void*), void* arg)
1080 {
1081         /*
1082          * gotolabel() needs a word on the stack in
1083          * which to place the return PC used to jump
1084          * to linkproc().
1085          */
1086         p->sched.pc = (ulong)linkproc;
1087         p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;
1088
1089         p->kpfun = func;
1090         p->kparg = arg;
1091 }
1092
1093 void
1094 forkchild(Proc *p, Ureg *ureg)
1095 {
1096         Ureg *cureg;
1097
1098         /*
1099          * Add 2*BY2WD to the stack to account for
1100          *  - the return PC
1101          *  - trap's argument (ur)
1102          */
1103         p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1104         p->sched.pc = (ulong)forkret;
1105
1106         cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1107         memmove(cureg, ureg, sizeof(Ureg));
1108         /* return value of syscall in child */
1109         cureg->ax = 0;
1110
1111         /* Things from bottom of syscall which were never executed */
1112         p->psstate = 0;
1113         p->insyscall = 0;
1114 }
1115
1116 /* Give enough context in the ureg to produce a kernel stack for
1117  * a sleeping process
1118  */
1119 void
1120 setkernur(Ureg* ureg, Proc* p)
1121 {
1122         ureg->pc = p->sched.pc;
1123         ureg->sp = p->sched.sp+4;
1124 }
1125
1126 ulong
1127 dbgpc(Proc *p)
1128 {
1129         Ureg *ureg;
1130
1131         ureg = p->dbgreg;
1132         if(ureg == 0)
1133                 return 0;
1134
1135         return ureg->pc;
1136 }