3 #include "../port/lib.h"
9 #include "../port/error.h"
12 static int trapinited;
14 void noted(Ureg*, ulong);
16 static void debugbpt(Ureg*, void*);
17 static void fault386(Ureg*, void*);
18 static void doublefault(Ureg*, void*);
19 static void unexpected(Ureg*, void*);
20 static void _dumpstack(Ureg*);
23 static Vctl *vctl[256];
27 Ntimevec = 20 /* number of time buckets for each intr */
29 ulong intrtimes[256][Ntimevec];
32 intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
38 print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
43 v = xalloc(sizeof(Vctl));
49 strncpy(v->name, name, KNAMELEN-1);
50 v->name[KNAMELEN-1] = 0;
53 vno = arch->intrenable(v);
56 print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
62 if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
63 panic("intrenable: handler: %s %s %#p %#p %#p %#p",
64 vctl[vno]->name, v->name,
65 vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
73 intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
79 * For now, none of this will work with the APIC code,
80 * there is no mapping between irq and vector as the IRQ
81 * is pretty meaningless.
83 if(arch->intrvecno == nil)
85 vno = arch->intrvecno(irq);
89 ((*pv)->irq != irq || (*pv)->tbdf != tbdf || (*pv)->f != f || (*pv)->a != a ||
90 strcmp((*pv)->name, name)))
95 *pv = (*pv)->next; /* Link out the entry */
97 if(vctl[vno] == nil && arch->intrdisable != nil)
98 arch->intrdisable(irq);
105 irqallocread(Chan*, void *vbuf, long n, vlong offset)
107 char *buf, *p, str[2*(11+1)+KNAMELEN+1+1];
112 if(n < 0 || offset < 0)
117 for(vno=0; vno<nelem(vctl); vno++){
118 for(v=vctl[vno]; v; v=v->next){
119 m = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
120 if(m <= offset) /* if do not want this, skip entry */
123 /* skip offset bytes */
128 /* write at most max(n,m) bytes */
144 trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
148 if(vno < 0 || vno >= VectorPIC)
149 panic("trapenable: vno %d", vno);
150 v = xalloc(sizeof(Vctl));
151 v->tbdf = BUSUNKNOWN;
154 strncpy(v->name, name, KNAMELEN);
155 v->name[KNAMELEN-1] = 0;
159 v->next = vctl[vno]->next;
170 * Hack: should be locked with NVRAM access.
172 outb(0x70, 0x80); /* NMI latch clear */
175 x = inb(0x61) & 0x07; /* Enable NMI */
181 * Minimal trap setup. Just enough so that we can panic
182 * on traps (bugs) during kernel initialization.
183 * Called very early - malloc is not yet available.
192 idt = (Segdesc*)IDTADDR;
193 vaddr = (ulong)vectortable;
194 for(v = 0; v < 256; v++){
195 d1 = (vaddr & 0xFFFF0000)|SEGP;
199 d1 |= SEGPL(3)|SEGIG;
203 d1 |= SEGPL(3)|SEGIG;
207 d1 |= SEGPL(0)|SEGIG;
210 idt[v].d0 = (vaddr & 0xFFFF)|(KESEL<<16);
221 * Syscall() is called directly without going through trap().
223 trapenable(VectorBPT, debugbpt, 0, "debugpt");
224 trapenable(VectorPF, fault386, 0, "fault386");
225 trapenable(Vector2F, doublefault, 0, "doublefault");
226 trapenable(Vector15, unexpected, 0, "unexpected");
229 addarchfile("irqalloc", 0444, irqallocread, nil);
233 static char* excname[32] = {
236 "nonmaskable interrupt",
241 "coprocessor not available",
243 "coprocessor segment overrun",
245 "segment not present",
247 "general protection violation",
269 * keep histogram of interrupt service times
272 intrtime(Mach*, int vno)
278 diff = x - m->perf.intrts;
281 m->perf.inintr += diff;
282 if(up == nil && m->perf.inidle > diff)
283 m->perf.inidle -= diff;
285 diff /= m->cpumhz*100; /* quantum = 100µsec */
288 intrtimes[vno][diff]++;
291 /* go to user space */
298 /* precise time accounting, kernel exit */
299 tos = (Tos*)(USTKTOP-sizeof(Tos));
301 tos->kcycles += t - up->kentry;
302 tos->pcycles = up->pcycles;
307 * All traps come here. It is slower to have all traps call trap()
308 * rather than directly vectoring the handler. However, this avoids a
309 * lot of code duplication and possible bugs. The only exception is
311 * Trap is called with interrupts disabled via interrupt-gates.
316 int clockintr, i, vno, user;
322 /* fault386 can give a better error message */
323 if(ureg->trap == VectorPF)
325 panic("trap %lud: not ready", ureg->trap);
328 m->perf.intrts = perfticks();
329 user = userureg(ureg);
341 if(vno >= VectorPIC && vno != VectorSYSCALL)
342 m->lastintr = ctl->irq;
347 for(v = ctl; v != nil; v = v->next){
357 if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
364 else if(vno < nelem(excname) && user){
366 sprint(buf, "sys: trap: %s", excname[vno]);
367 postnote(up, 1, buf, NDebug);
369 else if(vno >= VectorPIC && vno != VectorSYSCALL){
371 * An unknown interrupt.
372 * Check for a default IRQ7. This can happen when
373 * the IRQ input goes away before the acknowledge.
374 * In this case, a 'default IRQ7' is generated, but
375 * the corresponding bit in the ISR isn't set.
376 * In fact, just ignore all such interrupts.
379 /* call all interrupt routines, just in case */
380 for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
386 for(v = ctl; v != nil; v = v->next){
390 /* should we do this? */
395 /* clear the interrupt */
398 if(0)print("cpu%d: spurious interrupt %d, last %d\n",
399 m->machno, vno, m->lastintr);
400 if(0)if(conf.nmach > 1){
401 for(i = 0; i < 32; i++){
402 if(!(active.machs & (1<<i)))
405 if(m->machno == mach->machno)
407 print(" cpu%d: last %d",
408 mach->machno, mach->lastintr);
418 if(vno == VectorNMI){
420 * Don't re-enable, it confuses the crash dumps.
423 iprint("cpu%d: PC %#8.8lux\n", m->machno, ureg->pc);
424 while(m->machno != 0)
428 if(vno == VectorGPF || vno == VectorSNP){
433 extern void load_fs(ulong);
434 extern void load_gs(ulong);
437 * CS, SS, DS and ES are initialized by strayintr
438 * in l.s. initialize the others too so we dont trap
439 * again when restoring the old context.
444 pc = (uchar*)ureg->pc;
445 sp = (ulong*)&ureg->sp;
448 * we test for the instructions used by forkret()
449 * to load the segments. this needs to be changed
450 * if forkret changes!
454 if((pc[0] == 0x0f && (pc[1] == 0xa9 /*GS*/ ||
455 pc[1] == 0xa1 /*FS*/)) || (pc[0] == 0x07) /*ES*/ ||
456 (pc[0] == 0x1f) /*DS*/){
463 sp[1] = UESEL; /*CS*/
464 sp[4] = UDSEL; /*SS*/
471 ureg->sp = (ulong)&ureg->sp;
474 if(vno < nelem(excname))
475 panic("%s", excname[vno]);
476 panic("unknown trap/intr: %d", vno);
480 /* delaysched set because we held a lock or because our quantum ended */
481 if(up && up->delaysched && clockintr){
487 if(up->procctl || up->nnote)
497 dumpregs2(Ureg* ureg)
500 iprint("cpu%d: registers for %s %lud\n",
501 m->machno, up->text, up->pid);
503 iprint("cpu%d: registers for kernel\n", m->machno);
504 iprint("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",
505 ureg->flags, ureg->trap, ureg->ecode, ureg->pc);
507 iprint(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);
509 iprint(" SP=%luX\n", (ulong)&ureg->sp);
510 iprint(" AX %8.8luX BX %8.8luX CX %8.8luX DX %8.8luX\n",
511 ureg->ax, ureg->bx, ureg->cx, ureg->dx);
512 iprint(" SI %8.8luX DI %8.8luX BP %8.8luX\n",
513 ureg->si, ureg->di, ureg->bp);
514 iprint(" CS %4.4luX DS %4.4luX ES %4.4luX FS %4.4luX GS %4.4luX\n",
515 ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
516 ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
527 * Processor control registers.
528 * If machine check exception, time stamp counter, page size extensions
529 * or enhanced virtual 8086 mode extensions are supported, there is a
530 * CR4. If there is a CR4 and machine check extensions, read the machine
531 * check address and machine check type registers if RDMSR supported.
533 iprint(" CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",
534 getcr0(), getcr2(), getcr3());
535 if(m->cpuiddx & 0x9A){
536 iprint(" CR4 %8.8lux", getcr4());
537 if((m->cpuiddx & 0xA0) == 0xA0){
540 iprint("\n MCA %8.8llux MCT %8.8llux", mca, mct);
543 iprint("\n ur %#p up %#p\n", ureg, up);
548 * Fill in enough of Ureg to get a stack trace, and call a function.
549 * Used by debugging interface rdb.
552 callwithureg(void (*fn)(Ureg*))
555 ureg.pc = getcallerpc(&fn);
556 ureg.sp = (ulong)&fn;
561 _dumpstack(Ureg *ureg)
563 uintptr l, v, i, estack;
568 if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
569 iprint("dumpstack disabled\n");
572 iprint("dumpstack\n");
575 x += iprint("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);
578 && (uintptr)&l >= (uintptr)up->kstack
579 && (uintptr)&l <= (uintptr)up->kstack+KSTACK)
580 estack = (uintptr)up->kstack+KSTACK;
581 else if((uintptr)&l >= (uintptr)m->stack
582 && (uintptr)&l <= (uintptr)m+MACHSIZE)
583 estack = (uintptr)m+MACHSIZE;
586 x += iprint("estackx %p\n", estack);
588 for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
590 if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){
592 * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
593 * and CALL indirect through AX
594 * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
595 * but this is too clever and misses faulting address.
597 x += iprint("%.8p=%.8p ", l, v);
609 if(ureg->trap != VectorNMI)
613 for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
614 iprint("%.8p ", *(uintptr*)l);
627 callwithureg(_dumpstack);
631 debugbpt(Ureg* ureg, void*)
637 /* restore pc to instruction that caused the trap */
639 sprint(buf, "sys: breakpoint");
640 postnote(up, 1, buf, NDebug);
644 doublefault(Ureg*, void*)
646 panic("double fault");
650 unexpected(Ureg* ureg, void*)
652 print("unexpected trap %lud; ignoring\n", ureg->trap);
655 extern void checkpages(void);
656 extern void checkfault(ulong, ulong);
658 fault386(Ureg* ureg, void*)
661 int read, user, n, insyscall;
665 read = !(ureg->ecode & 2);
667 user = userureg(ureg);
672 panic("kernel fault: bad address pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
674 panic("kernel fault: no user process pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
677 panic("user fault: up=0 pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
679 insyscall = up->insyscall;
681 n = fault(addr, read);
685 panic("fault: 0x%lux", addr);
688 checkfault(addr, ureg->pc);
689 sprint(buf, "sys: trap: fault %s addr=0x%lux",
690 read ? "read" : "write", addr);
691 postnote(up, 1, buf, NDebug);
693 up->insyscall = insyscall;
699 #include "../port/systab.h"
702 * Syscall is called directly from assembler without going through trap().
712 vlong startns, stopns;
715 panic("syscall: cs 0x%4.4luX", ureg->cs);
726 up->scallnr = scallnr;
728 if(up->procctl == Proc_tracesyscall){
730 * Redundant validaddr. Do we care?
731 * Tracing syscalls is not exactly a fast path...
732 * Beware, validaddr currently does a pexit rather
733 * than an error if there's a problem; that might
734 * change in the future.
736 if(sp < (USTKTOP-BY2PG) || sp > (USTKTOP-sizeof(Sargs)-BY2WD))
737 validaddr(sp, sizeof(Sargs)+BY2WD, 0);
739 syscallfmt(scallnr, ureg->pc, (va_list)(sp+BY2WD));
740 up->procctl = Proc_stopme;
743 free(up->syscalltrace);
744 up->syscalltrace = nil;
745 startns = todget(nil);
748 if(scallnr == RFORK && up->fpstate == FPactive){
750 up->fpstate = FPinactive;
757 if(scallnr >= nsyscall || systab[scallnr] == 0){
758 pprint("bad sys call number %lud pc %lux\n",
760 postnote(up, 1, "sys: bad sys call", NDebug);
764 if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
765 validaddr(sp, sizeof(Sargs)+BY2WD, 0);
767 up->s = *((Sargs*)(sp+BY2WD));
768 up->psstate = sysctab[scallnr];
770 ret = systab[scallnr](up->s.args);
773 /* failure: save the error buffer for errstr */
775 up->syserrstr = up->errstr;
777 if(0 && up->pid == 1)
778 print("syscall %lud error %s\n", scallnr, up->syserrstr);
781 print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
782 for(i = 0; i < NERR; i++)
783 print("sp=%lux pc=%lux\n",
784 up->errlab[i].sp, up->errlab[i].pc);
785 panic("error stack");
789 * Put return value in frame. On the x86 the syscall is
790 * just another trap and the return value from syscall is
791 * ignored. On other machines the return value is put into
792 * the results register by caller of syscall.
796 if(up->procctl == Proc_tracesyscall){
797 stopns = todget(nil);
798 up->procctl = Proc_stopme;
799 sysretfmt(scallnr, (va_list)(sp+BY2WD), ret, startns, stopns);
804 free(up->syscalltrace);
805 up->syscalltrace = nil;
812 noted(ureg, *(ulong*)(sp+BY2WD));
814 if(scallnr!=RFORK && (up->procctl || up->nnote)){
818 /* if we delayed sched because we held a lock, sched now */
825 * Call user, if necessary, with note.
826 * Pass user the Ureg struct and the note on his stack.
840 if(up->fpstate == FPactive){
842 up->fpstate = FPinactive;
844 up->fpstate |= FPillegal;
850 if(strncmp(n->msg, "sys:", 4) == 0){
852 if(l > ERRMAX-15) /* " pc=0x12345678\0" */
854 sprint(n->msg+l, " pc=0x%.8lux", ureg->pc);
857 if(n->flag!=NUser && (up->notified || up->notify==0)){
858 if(n->flag == NDebug)
859 pprint("suicide: %s\n", n->msg);
861 pexit(n->msg, n->flag!=NDebug);
872 pexit(n->msg, n->flag!=NDebug);
875 sp -= 256; /* debugging: preserve context causing problem */
877 if(0) print("%s %lud: notify %.8lux %.8lux %.8lux %s\n",
878 up->text, up->pid, ureg->pc, ureg->usp, sp, n->msg);
880 if(!okaddr((ulong)up->notify, 1, 0)
881 || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
883 pprint("suicide: bad address in notify\n");
887 memmove((Ureg*)sp, ureg, sizeof(Ureg));
888 *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
889 up->ureg = (void*)sp;
891 memmove((char*)sp, up->note[0].msg, ERRMAX);
893 *(ulong*)(sp+2*BY2WD) = sp+3*BY2WD; /* arg 2 is string */
894 *(ulong*)(sp+1*BY2WD) = (ulong)up->ureg; /* arg 1 is ureg* */
895 *(ulong*)(sp+0*BY2WD) = 0; /* arg 0 is pc */
897 ureg->pc = (ulong)up->notify;
899 ureg->ss = ureg->ds = ureg->es = UDSEL;
902 memmove(&up->lastnote, &up->note[0], sizeof(Note));
903 memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
911 * Return user to state before notify()
914 noted(Ureg* ureg, ulong arg0)
920 if(arg0!=NRSTR && !up->notified) {
922 pprint("call to noted() when not notified\n");
927 nureg = up->ureg; /* pointer to user returned Ureg struct */
929 up->fpstate &= ~FPillegal;
932 oureg = (ulong)nureg;
933 if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
935 pprint("bad ureg in noted or call to noted when not notified\n");
939 /* don't let user change system flags */
940 nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
944 memmove(ureg, nureg, sizeof(Ureg));
949 if(0) print("%s %lud: noted %.8lux %.8lux\n",
950 up->text, up->pid, nureg->pc, nureg->usp);
951 if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){
953 pprint("suicide: trap in noted\n");
956 up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
961 if(!okaddr(nureg->pc, BY2WD, 0)
962 || !okaddr(nureg->usp, BY2WD, 0)){
964 pprint("suicide: trap in noted\n");
968 sp = oureg-4*BY2WD-ERRMAX;
971 ((ulong*)sp)[1] = oureg; /* arg 1 0(FP) is ureg* */
972 ((ulong*)sp)[0] = 0; /* arg 0 is pc */
976 pprint("unknown noted arg 0x%lux\n", arg0);
977 up->lastnote.flag = NDebug;
981 if(up->lastnote.flag == NDebug){
983 pprint("suicide: %s\n", up->lastnote.msg);
986 pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
991 execregs(ulong entry, ulong ssize, ulong nargs)
996 up->fpstate = FPinit;
999 sp = (ulong*)(USTKTOP - ssize);
1003 ureg->usp = (ulong)sp;
1006 ureg->ss = ureg->ds = ureg->es = UDSEL;
1007 ureg->fs = ureg->gs = NULLSEL;
1008 return USTKTOP-sizeof(Tos); /* address of kernel/user shared data */
1012 * return the userpc the last exception happened at
1019 ureg = (Ureg*)up->dbgreg;
1023 /* This routine must save the values of registers the user is not permitted
1024 * to write from devproc and then restore the saved values before returning.
1027 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
1031 flags = ureg->flags;
1032 memmove(pureg, uva, n);
1033 ureg->flags = (ureg->flags & 0xCD5) | (flags & ~0xCD5);
1042 up->kpfun(up->kparg);
1043 pexit("kproc dying", 0);
1047 kprocchild(Proc* p, void (*func)(void*), void* arg)
1050 * gotolabel() needs a word on the stack in
1051 * which to place the return PC used to jump
1054 p->sched.pc = (ulong)linkproc;
1055 p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;
1062 forkchild(Proc *p, Ureg *ureg)
1067 * Add 2*BY2WD to the stack to account for
1069 * - trap's argument (ur)
1071 p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1072 p->sched.pc = (ulong)forkret;
1074 cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1075 memmove(cureg, ureg, sizeof(Ureg));
1076 /* return value of syscall in child */
1079 /* Things from bottom of syscall which were never executed */
1084 /* Give enough context in the ureg to produce a kernel stack for
1085 * a sleeping process
1088 setkernur(Ureg* ureg, Proc* p)
1090 ureg->pc = p->sched.pc;
1091 ureg->sp = p->sched.sp+4;