3 #include "../port/lib.h"
9 #include "../port/error.h"
12 static int trapinited;
14 void noted(Ureg*, ulong);
16 static void debugexc(Ureg*, void*);
17 static void debugbpt(Ureg*, void*);
18 static void fault386(Ureg*, void*);
19 static void doublefault(Ureg*, void*);
20 static void unexpected(Ureg*, void*);
21 static void _dumpstack(Ureg*);
24 static Vctl *vctl[256];
28 Ntimevec = 20 /* number of time buckets for each intr */
30 ulong intrtimes[256][Ntimevec];
33 intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
39 print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
43 if(tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0)){
44 print("intrenable: got unassigned irq %d, tbdf 0x%uX for %s\n",
50 * IRQ2 doesn't really exist, it's used to gang the interrupt
51 * controllers together. A device set to IRQ2 will appear on
52 * the second interrupt controller as IRQ9.
57 if((v = xalloc(sizeof(Vctl))) == nil)
58 panic("intrenable: out of memory");
64 strncpy(v->name, name, KNAMELEN-1);
65 v->name[KNAMELEN-1] = 0;
68 vno = arch->intrenable(v);
71 print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
77 if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
78 panic("intrenable: handler: %s %s %#p %#p %#p %#p",
79 vctl[vno]->name, v->name,
80 vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
88 intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
95 if(arch->intrvecno == nil || (tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0))){
97 * on APIC machine, irq is pretty meaningless
98 * and disabling a the vector is not implemented.
99 * however, we still want to remove the matching
100 * Vctl entry to prevent calling Vctl.f() with a
101 * stale Vctl.a pointer.
106 vno = arch->intrvecno(irq);
110 for(pv = &vctl[vno]; (v = *pv) != nil; pv = &v->next){
111 if(v->isintr && (v->irq == irq || irq == -1)
112 && v->tbdf == tbdf && v->f == f && v->a == a
113 && strcmp(v->name, name) == 0)
120 if(irq != -1 && vctl[vno] == nil && arch->intrdisable != nil)
121 arch->intrdisable(irq);
124 } while(irq == -1 && ++vno <= MaxVectorAPIC);
129 irqallocread(Chan*, void *a, long n, vlong offset)
131 char buf[2*(11+1)+KNAMELEN+1+1];
135 if(n < 0 || offset < 0)
138 for(vno=0; vno<nelem(vctl); vno++){
139 for(v=vctl[vno]; v; v=v->next){
140 m = snprint(buf, sizeof(buf), "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
147 memmove(a, buf+offset, n);
155 trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
159 if(vno < 0 || vno >= VectorPIC)
160 panic("trapenable: vno %d", vno);
161 if((v = xalloc(sizeof(Vctl))) == nil)
162 panic("trapenable: out of memory");
163 v->tbdf = BUSUNKNOWN;
166 strncpy(v->name, name, KNAMELEN-1);
167 v->name[KNAMELEN-1] = 0;
171 v->next = vctl[vno]->next;
182 * Hack: should be locked with NVRAM access.
184 outb(0x70, 0x80); /* NMI latch clear */
187 x = inb(0x61) & 0x07; /* Enable NMI */
193 * Minimal trap setup. Just enough so that we can panic
194 * on traps (bugs) during kernel initialization.
195 * Called very early - malloc is not yet available.
204 idt = (Segdesc*)IDTADDR;
205 vaddr = (ulong)vectortable;
206 for(v = 0; v < 256; v++){
207 d1 = (vaddr & 0xFFFF0000)|SEGP;
211 d1 |= SEGPL(3)|SEGIG;
215 d1 |= SEGPL(3)|SEGIG;
219 d1 |= SEGPL(0)|SEGIG;
222 idt[v].d0 = (vaddr & 0xFFFF)|(KESEL<<16);
233 * Syscall() is called directly without going through trap().
235 trapenable(VectorDE, debugexc, 0, "debugexc");
236 trapenable(VectorBPT, debugbpt, 0, "debugpt");
237 trapenable(VectorPF, fault386, 0, "fault386");
238 trapenable(Vector2F, doublefault, 0, "doublefault");
239 trapenable(Vector15, unexpected, 0, "unexpected");
242 addarchfile("irqalloc", 0444, irqallocread, nil);
246 static char* excname[32] = {
249 "nonmaskable interrupt",
254 "coprocessor not available",
256 "coprocessor segment overrun",
258 "segment not present",
260 "general protection violation",
282 * keep histogram of interrupt service times
285 intrtime(Mach*, int vno)
291 diff = x - m->perf.intrts;
294 m->perf.inintr += diff;
295 if(up == nil && m->perf.inidle > diff)
296 m->perf.inidle -= diff;
298 diff /= m->cpumhz*100; /* quantum = 100µsec */
301 intrtimes[vno][diff]++;
304 /* go to user space */
311 /* precise time accounting, kernel exit */
312 tos = (Tos*)(USTKTOP-sizeof(Tos));
314 tos->kcycles += t - up->kentry;
315 tos->pcycles = t + up->pcycles;
320 * All traps come here. It is slower to have all traps call trap()
321 * rather than directly vectoring the handler. However, this avoids a
322 * lot of code duplication and possible bugs. The only exception is
324 * Trap is called with interrupts disabled via interrupt-gates.
329 int clockintr, i, vno, user;
335 /* fault386 can give a better error message */
336 if(ureg->trap == VectorPF)
338 panic("trap %lud: not ready", ureg->trap);
341 m->perf.intrts = perfticks();
342 user = userureg(ureg);
354 if(vno >= VectorPIC && vno != VectorSYSCALL)
355 m->lastintr = ctl->irq;
360 for(v = ctl; v != nil; v = v->next){
370 if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
377 else if(vno < nelem(excname) && user){
379 sprint(buf, "sys: trap: %s", excname[vno]);
380 postnote(up, 1, buf, NDebug);
382 else if(vno >= VectorPIC && vno != VectorSYSCALL){
384 * An unknown interrupt.
385 * Check for a default IRQ7. This can happen when
386 * the IRQ input goes away before the acknowledge.
387 * In this case, a 'default IRQ7' is generated, but
388 * the corresponding bit in the ISR isn't set.
389 * In fact, just ignore all such interrupts.
392 /* call all interrupt routines, just in case */
393 for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
399 for(v = ctl; v != nil; v = v->next){
403 /* should we do this? */
408 /* clear the interrupt */
411 if(0)print("cpu%d: spurious interrupt %d, last %d\n",
412 m->machno, vno, m->lastintr);
413 if(0)if(conf.nmach > 1){
414 for(i = 0; i < MAXMACH; i++){
415 if(active.machs[i] == 0)
418 if(m->machno == mach->machno)
420 print(" cpu%d: last %d",
421 mach->machno, mach->lastintr);
431 if(vno == VectorNMI){
433 * Don't re-enable, it confuses the crash dumps.
436 iprint("cpu%d: nmi PC %#8.8lux, status %ux\n",
437 m->machno, ureg->pc, inb(0x61));
438 while(m->machno != 0)
446 extern void _forkretpopgs(void);
447 extern void _forkretpopfs(void);
448 extern void _forkretpopes(void);
449 extern void _forkretpopds(void);
450 extern void _forkretiret(void);
451 extern void _rdmsrinst(void);
452 extern void _wrmsrinst(void);
453 extern void _peekinst(void);
455 extern void load_fs(ulong);
456 extern void load_gs(ulong);
461 sp = (ulong*)&ureg->sp; /* kernel stack */
462 pc = (void*)ureg->pc;
464 if(pc == _forkretpopgs || pc == _forkretpopfs ||
465 pc == _forkretpopes || pc == _forkretpopds){
466 if(vno == VectorGPF || vno == VectorSNP){
470 } else if(pc == _forkretiret){
471 if(vno == VectorGPF || vno == VectorSNP){
472 sp[1] = UESEL; /* CS */
473 sp[4] = UDSEL; /* SS */
476 } else if(pc == _rdmsrinst || pc == _wrmsrinst){
477 if(vno == VectorGPF){
482 } else if(pc == _peekinst){
483 if(vno == VectorGPF){
492 ureg->sp = (ulong)&ureg->sp;
495 if(vno < nelem(excname))
496 panic("%s", excname[vno]);
497 panic("unknown trap/intr: %d", vno);
501 /* delaysched set because we held a lock or because our quantum ended */
502 if(up && up->delaysched && clockintr){
508 if(up->procctl || up->nnote)
518 dumpregs2(Ureg* ureg)
521 iprint("cpu%d: registers for %s %lud\n",
522 m->machno, up->text, up->pid);
524 iprint("cpu%d: registers for kernel\n", m->machno);
525 iprint("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",
526 ureg->flags, ureg->trap, ureg->ecode, ureg->pc);
528 iprint(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);
530 iprint(" SP=%luX\n", (ulong)&ureg->sp);
531 iprint(" AX %8.8luX BX %8.8luX CX %8.8luX DX %8.8luX\n",
532 ureg->ax, ureg->bx, ureg->cx, ureg->dx);
533 iprint(" SI %8.8luX DI %8.8luX BP %8.8luX\n",
534 ureg->si, ureg->di, ureg->bp);
535 iprint(" CS %4.4luX DS %4.4luX ES %4.4luX FS %4.4luX GS %4.4luX\n",
536 ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
537 ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
546 * Processor control registers.
547 * If machine check exception, time stamp counter, page size extensions
548 * or enhanced virtual 8086 mode extensions are supported, there is a
549 * CR4. If there is a CR4 and machine check extensions, read the machine
550 * check address and machine check type registers if RDMSR supported.
552 iprint(" CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",
553 getcr0(), getcr2(), getcr3());
554 if(m->cpuiddx & (Mce|Tsc|Pse|Vmex)){
555 iprint(" CR4 %8.8lux\n", getcr4());
559 iprint("\n ur %#p up %#p\n", ureg, up);
564 * Fill in enough of Ureg to get a stack trace, and call a function.
565 * Used by debugging interface rdb.
568 callwithureg(void (*fn)(Ureg*))
571 ureg.pc = getcallerpc(&fn);
572 ureg.sp = (ulong)&fn;
577 _dumpstack(Ureg *ureg)
579 uintptr l, v, i, estack;
584 if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
585 iprint("dumpstack disabled\n");
588 iprint("dumpstack\n");
591 x += iprint("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);
594 && (uintptr)&l >= (uintptr)up->kstack
595 && (uintptr)&l <= (uintptr)up->kstack+KSTACK)
596 estack = (uintptr)up->kstack+KSTACK;
597 else if((uintptr)&l >= (uintptr)m->stack
598 && (uintptr)&l <= (uintptr)m+MACHSIZE)
599 estack = (uintptr)m+MACHSIZE;
602 x += iprint("estackx %p\n", estack);
604 for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
606 if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){
608 * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
609 * and CALL indirect through AX
610 * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
611 * but this is too clever and misses faulting address.
613 x += iprint("%.8p=%.8p ", l, v);
625 if(ureg->trap != VectorNMI)
629 for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
630 iprint("%.8p ", *(uintptr*)l);
643 callwithureg(_dumpstack);
647 debugexc(Ureg *ureg, void *)
656 panic("kernel debug exception dr6=%#.8ux", dr6);
660 else if(!canqlock(&up->debug))
663 m = (m >> 4 | m >> 3) & 8 | (m >> 3 | m >> 2) & 4 | (m >> 2 | m >> 1) & 2 | (m >> 1 | m) & 1;
666 sprint(buf, "sys: debug exception dr6=%#.8ux", dr6);
667 postnote(up, 0, buf, NDebug);
670 e = buf + sizeof(buf);
671 p = seprint(p, e, "sys: watchpoint ");
672 for(i = 0; i < 4; i++)
674 p = seprint(p, e, "%d%s", i, (m >> i + 1 != 0) ? "," : "");
675 postnote(up, 0, buf, NDebug);
681 debugbpt(Ureg* ureg, void*)
687 /* restore pc to instruction that caused the trap */
689 sprint(buf, "sys: breakpoint");
690 postnote(up, 1, buf, NDebug);
694 doublefault(Ureg*, void*)
696 panic("double fault");
700 unexpected(Ureg* ureg, void*)
702 print("unexpected trap %lud; ignoring\n", ureg->trap);
705 extern void checkpages(void);
706 extern void checkfault(ulong, ulong);
708 fault386(Ureg* ureg, void*)
711 int read, user, n, insyscall;
715 read = !(ureg->ecode & 2);
717 user = userureg(ureg);
722 extern void _peekinst(void);
723 if((void(*)(void))ureg->pc == _peekinst){
729 panic("kernel fault: bad address pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
731 panic("kernel fault: no user process pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
734 panic("user fault: up=0 pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
736 insyscall = up->insyscall;
738 n = fault(addr, ureg->pc, read);
742 panic("fault: 0x%lux", addr);
745 checkfault(addr, ureg->pc);
746 sprint(buf, "sys: trap: fault %s addr=0x%lux",
747 read ? "read" : "write", addr);
748 postnote(up, 1, buf, NDebug);
750 up->insyscall = insyscall;
756 #include "../port/systab.h"
759 * Syscall is called directly from assembler without going through trap().
769 vlong startns, stopns;
772 panic("syscall: cs 0x%4.4luX", ureg->cs);
783 up->scallnr = scallnr;
790 if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
791 validaddr(sp, sizeof(Sargs)+BY2WD, 0);
793 up->s = *((Sargs*)(sp+BY2WD));
795 if(up->procctl == Proc_tracesyscall){
796 syscallfmt(scallnr, ureg->pc, (va_list)up->s.args);
798 up->procctl = Proc_stopme;
801 startns = todget(nil);
804 if(scallnr >= nsyscall || systab[scallnr] == 0){
805 pprint("bad sys call number %lud pc %lux\n",
807 postnote(up, 1, "sys: bad sys call", NDebug);
810 up->psstate = sysctab[scallnr];
811 ret = systab[scallnr]((va_list)up->s.args);
814 /* failure: save the error buffer for errstr */
816 up->syserrstr = up->errstr;
818 if(0 && up->pid == 1)
819 print("syscall %lud error %s\n", scallnr, up->syserrstr);
822 print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
823 for(i = 0; i < NERR; i++)
824 print("sp=%lux pc=%lux\n",
825 up->errlab[i].sp, up->errlab[i].pc);
826 panic("error stack");
830 * Put return value in frame. On the x86 the syscall is
831 * just another trap and the return value from syscall is
832 * ignored. On other machines the return value is put into
833 * the results register by caller of syscall.
837 if(up->procctl == Proc_tracesyscall){
838 stopns = todget(nil);
839 sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
841 up->procctl = Proc_stopme;
850 noted(ureg, *((ulong*)up->s.args));
852 if(scallnr!=RFORK && (up->procctl || up->nnote)){
856 /* if we delayed sched because we held a lock, sched now */
863 * Call user, if necessary, with note.
864 * Pass user the Ureg struct and the note on his stack.
878 if(up->fpstate == FPactive){
880 up->fpstate = FPinactive;
882 up->fpstate |= FPillegal;
888 if(strncmp(n->msg, "sys:", 4) == 0){
890 if(l > ERRMAX-15) /* " pc=0x12345678\0" */
892 sprint(n->msg+l, " pc=0x%.8lux", ureg->pc);
895 if(n->flag!=NUser && (up->notified || up->notify==0)){
897 if(n->flag == NDebug)
898 pprint("suicide: %s\n", n->msg);
899 pexit(n->msg, n->flag!=NDebug);
910 pexit(n->msg, n->flag!=NDebug);
913 sp -= 256; /* debugging: preserve context causing problem */
915 if(0) print("%s %lud: notify %.8lux %.8lux %.8lux %s\n",
916 up->text, up->pid, ureg->pc, ureg->usp, sp, n->msg);
918 if(!okaddr((uintptr)up->notify, 1, 0)
919 || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
921 pprint("suicide: bad address in notify\n");
925 memmove((Ureg*)sp, ureg, sizeof(Ureg));
926 *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
927 up->ureg = (void*)sp;
929 memmove((char*)sp, up->note[0].msg, ERRMAX);
931 *(ulong*)(sp+2*BY2WD) = sp+3*BY2WD; /* arg 2 is string */
932 *(ulong*)(sp+1*BY2WD) = (ulong)up->ureg; /* arg 1 is ureg* */
933 *(ulong*)(sp+0*BY2WD) = 0; /* arg 0 is pc */
935 ureg->pc = (ulong)up->notify;
937 ureg->ss = ureg->ds = ureg->es = UDSEL;
940 memmove(&up->lastnote, &up->note[0], sizeof(Note));
941 memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
949 * Return user to state before notify()
952 noted(Ureg* ureg, ulong arg0)
958 if(arg0!=NRSTR && !up->notified) {
960 pprint("call to noted() when not notified\n");
965 nureg = up->ureg; /* pointer to user returned Ureg struct */
967 up->fpstate &= ~FPillegal;
970 oureg = (ulong)nureg;
971 if(!okaddr(oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
973 pprint("bad ureg in noted or call to noted when not notified\n");
977 /* don't let user change system flags */
978 nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
982 memmove(ureg, nureg, sizeof(Ureg));
987 if(0) print("%s %lud: noted %.8lux %.8lux\n",
988 up->text, up->pid, nureg->pc, nureg->usp);
989 if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){
991 pprint("suicide: trap in noted\n");
994 up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
999 if(!okaddr(nureg->pc, BY2WD, 0)
1000 || !okaddr(nureg->usp, BY2WD, 0)){
1001 qunlock(&up->debug);
1002 pprint("suicide: trap in noted\n");
1003 pexit("Suicide", 0);
1005 qunlock(&up->debug);
1006 sp = oureg-4*BY2WD-ERRMAX;
1009 ((ulong*)sp)[1] = oureg; /* arg 1 0(FP) is ureg* */
1010 ((ulong*)sp)[0] = 0; /* arg 0 is pc */
1014 up->lastnote.flag = NDebug;
1018 qunlock(&up->debug);
1019 if(up->lastnote.flag == NDebug)
1020 pprint("suicide: %s\n", up->lastnote.msg);
1021 pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
1026 execregs(uintptr entry, ulong ssize, ulong nargs)
1031 sp = (ulong*)(USTKTOP - ssize);
1035 ureg->usp = (ulong)sp;
1038 ureg->ss = ureg->ds = ureg->es = UDSEL;
1039 ureg->fs = ureg->gs = NULLSEL;
1040 return USTKTOP-sizeof(Tos); /* address of kernel/user shared data */
1044 * return the userpc the last exception happened at
1051 ureg = (Ureg*)up->dbgreg;
1055 /* This routine must save the values of registers the user is not permitted
1056 * to write from devproc and then restore the saved values before returning.
1059 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
1063 flags = ureg->flags;
1064 memmove(pureg, uva, n);
1065 ureg->flags = (ureg->flags & 0xCD5) | (flags & ~0xCD5);
1074 up->kpfun(up->kparg);
1075 pexit("kproc dying", 0);
1079 kprocchild(Proc* p, void (*func)(void*), void* arg)
1082 * gotolabel() needs a word on the stack in
1083 * which to place the return PC used to jump
1086 p->sched.pc = (ulong)linkproc;
1087 p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;
1094 forkchild(Proc *p, Ureg *ureg)
1099 * Add 2*BY2WD to the stack to account for
1101 * - trap's argument (ur)
1103 p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1104 p->sched.pc = (ulong)forkret;
1106 cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1107 memmove(cureg, ureg, sizeof(Ureg));
1108 /* return value of syscall in child */
1111 /* Things from bottom of syscall which were never executed */
1116 /* Give enough context in the ureg to produce a kernel stack for
1117 * a sleeping process
1120 setkernur(Ureg* ureg, Proc* p)
1122 ureg->pc = p->sched.pc;
1123 ureg->sp = p->sched.sp+4;