3 #include "../port/lib.h"
9 #include "../port/error.h"
12 static int trapinited;
14 void noted(Ureg*, ulong);
16 static void debugexc(Ureg*, void*);
17 static void debugbpt(Ureg*, void*);
18 static void faultamd64(Ureg*, void*);
19 static void doublefault(Ureg*, void*);
20 static void unexpected(Ureg*, void*);
21 static void _dumpstack(Ureg*);
24 static Vctl *vctl[256];
28 Ntimevec = 20 /* number of time buckets for each intr */
30 ulong intrtimes[256][Ntimevec];
33 intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
39 print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
44 if(tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0)){
45 print("intrenable: got unassigned irq %d, tbdf 0x%uX for %s\n",
52 * IRQ2 doesn't really exist, it's used to gang the interrupt
53 * controllers together. A device set to IRQ2 will appear on
54 * the second interrupt controller as IRQ9.
59 if((v = xalloc(sizeof(Vctl))) == nil)
60 panic("intrenable: out of memory");
66 strncpy(v->name, name, KNAMELEN-1);
67 v->name[KNAMELEN-1] = 0;
70 vno = arch->intrenable(v);
73 print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
79 if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
80 panic("intrenable: handler: %s %s %#p %#p %#p %#p",
81 vctl[vno]->name, v->name,
82 vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
90 intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
97 if(arch->intrvecno == nil || (tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0))){
99 * on APIC machine, irq is pretty meaningless
100 * and disabling a the vector is not implemented.
101 * however, we still want to remove the matching
102 * Vctl entry to prevent calling Vctl.f() with a
103 * stale Vctl.a pointer.
108 vno = arch->intrvecno(irq);
112 for(pv = &vctl[vno]; (v = *pv) != nil; pv = &v->next){
113 if(v->isintr && (v->irq == irq || irq == -1)
114 && v->tbdf == tbdf && v->f == f && v->a == a
115 && strcmp(v->name, name) == 0)
122 if(irq != -1 && vctl[vno] == nil && arch->intrdisable != nil)
123 arch->intrdisable(irq);
126 } while(irq == -1 && ++vno <= MaxVectorAPIC);
131 irqallocread(Chan*, void *a, long n, vlong offset)
133 char buf[2*(11+1)+KNAMELEN+1+1];
137 if(n < 0 || offset < 0)
140 for(vno=0; vno<nelem(vctl); vno++){
141 for(v=vctl[vno]; v; v=v->next){
142 m = snprint(buf, sizeof(buf), "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
149 memmove(a, buf+offset, n);
157 trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
161 if(vno < 0 || vno >= VectorPIC)
162 panic("trapenable: vno %d", vno);
163 if((v = xalloc(sizeof(Vctl))) == nil)
164 panic("trapenable: out of memory");
165 v->tbdf = BUSUNKNOWN;
168 strncpy(v->name, name, KNAMELEN-1);
169 v->name[KNAMELEN-1] = 0;
173 v->next = vctl[vno]->next;
184 * Hack: should be locked with NVRAM access.
186 outb(0x70, 0x80); /* NMI latch clear */
189 x = inb(0x61) & 0x07; /* Enable NMI */
201 idt = (Segdesc*)IDTADDR;
202 vaddr = (uintptr)vectortable;
203 for(v = 0; v < 256; v++){
204 d1 = (vaddr & 0xFFFF0000)|SEGP;
208 d1 |= SEGPL(3)|SEGIG;
212 d1 |= SEGPL(3)|SEGIG;
216 d1 |= SEGPL(0)|SEGIG;
220 idt->d0 = (vaddr & 0xFFFF)|(KESEL<<16);
224 idt->d0 = (vaddr >> 32);
237 * Syscall() is called directly without going through trap().
239 trapenable(VectorDE, debugexc, 0, "debugexc");
240 trapenable(VectorBPT, debugbpt, 0, "debugpt");
241 trapenable(VectorPF, faultamd64, 0, "faultamd64");
242 trapenable(Vector2F, doublefault, 0, "doublefault");
243 trapenable(Vector15, unexpected, 0, "unexpected");
245 addarchfile("irqalloc", 0444, irqallocread, nil);
249 static char* excname[32] = {
252 "nonmaskable interrupt",
257 "coprocessor not available",
259 "coprocessor segment overrun",
261 "segment not present",
263 "general protection violation",
285 * keep histogram of interrupt service times
288 intrtime(Mach*, int vno)
294 diff = x - m->perf.intrts;
297 m->perf.inintr += diff;
298 if(up == nil && m->perf.inidle > diff)
299 m->perf.inidle -= diff;
301 diff /= m->cpumhz*100; /* quantum = 100µsec */
304 intrtimes[vno][diff]++;
307 /* go to user space */
314 /* precise time accounting, kernel exit */
315 tos = (Tos*)((uintptr)USTKTOP-sizeof(Tos));
317 tos->kcycles += t - up->kentry;
318 tos->pcycles = t + up->pcycles;
325 int clockintr, i, vno, user;
331 /* faultamd64 can give a better error message */
332 if(ureg->type == VectorPF)
333 faultamd64(ureg, nil);
334 panic("trap %llud: not ready", ureg->type);
337 m->perf.intrts = perfticks();
338 user = userureg(ureg);
352 m->lastintr = ctl->irq;
356 for(v = ctl; v != nil; v = v->next){
366 if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
373 else if(vno < nelem(excname) && user){
375 sprint(buf, "sys: trap: %s", excname[vno]);
376 postnote(up, 1, buf, NDebug);
378 else if(vno >= VectorPIC){
380 * An unknown interrupt.
381 * Check for a default IRQ7. This can happen when
382 * the IRQ input goes away before the acknowledge.
383 * In this case, a 'default IRQ7' is generated, but
384 * the corresponding bit in the ISR isn't set.
385 * In fact, just ignore all such interrupts.
388 /* call all interrupt routines, just in case */
389 for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
395 for(v = ctl; v != nil; v = v->next){
399 /* should we do this? */
404 /* clear the interrupt */
407 if(0)print("cpu%d: spurious interrupt %d, last %d\n",
408 m->machno, vno, m->lastintr);
409 if(0)if(conf.nmach > 1){
410 for(i = 0; i < MAXMACH; i++){
411 if(active.machs[i] == 0)
414 if(m->machno == mach->machno)
416 print(" cpu%d: last %d",
417 mach->machno, mach->lastintr);
427 if(vno == VectorNMI){
429 * Don't re-enable, it confuses the crash dumps.
432 iprint("cpu%d: nmi PC %#p, status %ux\n",
433 m->machno, ureg->pc, inb(0x61));
434 while(m->machno != 0)
441 extern void _rdmsrinst(void);
442 extern void _wrmsrinst(void);
443 extern void _peekinst(void);
445 pc = (void*)ureg->pc;
446 if(pc == _rdmsrinst || pc == _wrmsrinst){
447 if(vno == VectorGPF){
452 } else if(pc == _peekinst){
453 if(vno == VectorGPF){
462 ureg->sp = (uintptr)&ureg->sp;
465 if(vno < nelem(excname))
466 panic("%s", excname[vno]);
467 panic("unknown trap/intr: %d", vno);
471 /* delaysched set because we held a lock or because our quantum ended */
472 if(up && up->delaysched && clockintr){
478 if(up->procctl || up->nnote)
488 iprint("cpu%d: registers for %s %lud\n",
489 m->machno, up->text, up->pid);
491 iprint("cpu%d: registers for kernel\n", m->machno);
493 iprint(" AX %.16lluX BX %.16lluX CX %.16lluX\n",
494 ureg->ax, ureg->bx, ureg->cx);
495 iprint(" DX %.16lluX SI %.16lluX DI %.16lluX\n",
496 ureg->dx, ureg->si, ureg->di);
497 iprint(" BP %.16lluX R8 %.16lluX R9 %.16lluX\n",
498 ureg->bp, ureg->r8, ureg->r9);
499 iprint(" R10 %.16lluX R11 %.16lluX R12 %.16lluX\n",
500 ureg->r10, ureg->r11, ureg->r12);
501 iprint(" R13 %.16lluX R14 %.16lluX R15 %.16lluX\n",
502 ureg->r13, ureg->r14, ureg->r15);
503 iprint(" CS %.4lluX SS %.4lluX PC %.16lluX SP %.16lluX\n",
504 ureg->cs & 0xffff, ureg->ss & 0xffff, ureg->pc, ureg->sp);
505 iprint("TYPE %.2lluX ERROR %.4lluX FLAGS %.8lluX\n",
506 ureg->type & 0xff, ureg->error & 0xffff, ureg->flags & 0xffffffff);
509 * Processor control registers.
510 * If machine check exception, time stamp counter, page size extensions
511 * or enhanced virtual 8086 mode extensions are supported, there is a
512 * CR4. If there is a CR4 and machine check extensions, read the machine
513 * check address and machine check type registers if RDMSR supported.
515 iprint(" CR0 %8.8llux CR2 %16.16llux CR3 %16.16llux",
516 getcr0(), getcr2(), getcr3());
517 if(m->cpuiddx & (Mce|Tsc|Pse|Vmex)){
518 iprint(" CR4 %16.16llux\n", getcr4());
522 iprint(" ur %#p up %#p\n", ureg, up);
527 * Fill in enough of Ureg to get a stack trace, and call a function.
528 * Used by debugging interface rdb.
531 callwithureg(void (*fn)(Ureg*))
534 ureg.pc = getcallerpc(&fn);
535 ureg.sp = (uintptr)&fn;
540 _dumpstack(Ureg *ureg)
542 uintptr l, v, i, estack;
547 if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
548 iprint("dumpstack disabled\n");
551 iprint("dumpstack\n");
554 x += iprint("ktrace /kernel/path %#p %#p <<EOF\n", ureg->pc, ureg->sp);
557 && (uintptr)&l >= (uintptr)up->kstack
558 && (uintptr)&l <= (uintptr)up->kstack+KSTACK)
559 estack = (uintptr)up->kstack+KSTACK;
560 else if((uintptr)&l >= (uintptr)m->stack
561 && (uintptr)&l <= (uintptr)m+MACHSIZE)
562 estack = (uintptr)m+MACHSIZE;
565 x += iprint("estackx %p\n", estack);
567 for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
569 if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){
571 * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
572 * and CALL indirect through AX
573 * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
574 * but this is too clever and misses faulting address.
576 x += iprint("%.8lux=%.8lux ", (ulong)l, (ulong)v);
588 if(ureg->type != VectorNMI)
592 for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
593 iprint("%.8p ", *(uintptr*)l);
606 callwithureg(_dumpstack);
610 debugexc(Ureg *ureg, void *)
619 panic("kernel debug exception dr6=%#.8ullx", dr6);
623 else if(!canqlock(&up->debug))
626 m = (m >> 4 | m >> 3) & 8 | (m >> 3 | m >> 2) & 4 | (m >> 2 | m >> 1) & 2 | (m >> 1 | m) & 1;
629 sprint(buf, "sys: debug exception dr6=%#.8ullx", dr6);
630 postnote(up, 0, buf, NDebug);
633 e = buf + sizeof(buf);
634 p = seprint(p, e, "sys: watchpoint ");
635 for(i = 0; i < 4; i++)
637 p = seprint(p, e, "%d%s", i, (m >> i + 1 != 0) ? "," : "");
638 postnote(up, 0, buf, NDebug);
644 debugbpt(Ureg* ureg, void*)
650 /* restore pc to instruction that caused the trap */
652 sprint(buf, "sys: breakpoint");
653 postnote(up, 1, buf, NDebug);
657 doublefault(Ureg*, void*)
659 panic("double fault");
663 unexpected(Ureg* ureg, void*)
665 print("unexpected trap %llud; ignoring\n", ureg->type);
668 extern void checkpages(void);
671 faultamd64(Ureg* ureg, void*)
674 int read, user, n, insyscall, f;
678 read = !(ureg->error & 2);
679 user = userureg(ureg);
682 extern void _peekinst(void);
684 if((void(*)(void))ureg->pc == _peekinst){
690 panic("kernel fault: bad address pc=%#p addr=%#p", ureg->pc, addr);
692 panic("kernel fault: no user process pc=%#p addr=%#p", ureg->pc, addr);
695 panic("user fault: up=0 pc=%#p addr=%#p", ureg->pc, addr);
697 insyscall = up->insyscall;
700 if(!user && waserror()){
701 if(up->nerrlab == 0){
702 pprint("suicide: sys: %s\n", up->errstr);
703 pexit(up->errstr, 1);
707 up->insyscall = insyscall;
711 n = fault(addr, read);
715 panic("fault: %#p", addr);
718 sprint(buf, "sys: trap: fault %s addr=%#p",
719 read ? "read" : "write", addr);
720 postnote(up, 1, buf, NDebug);
722 if(!user) poperror();
725 up->insyscall = insyscall;
731 #include "../port/systab.h"
734 * Syscall is called directly from assembler without going through trap().
744 vlong startns, stopns;
747 panic("syscall: cs 0x%4.4lluX", ureg->cs);
757 scallnr = ureg->bp; /* RARG */
758 up->scallnr = scallnr;
766 if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
767 validaddr(sp, sizeof(Sargs)+BY2WD, 0);
769 up->s = *((Sargs*)(sp+BY2WD));
771 syscallfmt(scallnr, ureg->pc, (va_list)up->s.args);
772 print("syscall: %s\n", up->syscalltrace);
775 if(up->procctl == Proc_tracesyscall){
776 syscallfmt(scallnr, ureg->pc, (va_list)up->s.args);
778 up->procctl = Proc_stopme;
781 startns = todget(nil);
783 if(scallnr >= nsyscall || systab[scallnr] == 0){
784 pprint("bad sys call number %lud pc %#p\n",
786 postnote(up, 1, "sys: bad sys call", NDebug);
789 up->psstate = sysctab[scallnr];
790 ret = systab[scallnr]((va_list)up->s.args);
793 /* failure: save the error buffer for errstr */
795 up->syserrstr = up->errstr;
797 if(0 && up->pid == 1)
798 print("syscall %lud error %s\n", scallnr, up->syserrstr);
801 print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
802 for(i = 0; i < NERR; i++)
803 print("sp=%#p pc=%#p\n",
804 up->errlab[i].sp, up->errlab[i].pc);
805 panic("error stack");
810 print("syscallret: %lud %s %s ret=%lld\n",
811 up->pid, up->text, sysctab[scallnr], ret);
814 if(up->procctl == Proc_tracesyscall){
815 stopns = todget(nil);
816 sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
818 up->procctl = Proc_stopme;
828 if(scallnr == NOTED){
829 noted(ureg, *((ulong*)up->s.args));
831 * normally, syscall() returns to forkret()
832 * not restoring general registers when going
833 * to userspace. to completely restore the
834 * interrupted context, we have to return thru
835 * noteret(). we override return pc to jump to
836 * to it when returning form syscall()
838 ((void**)&ureg)[-1] = (void*)noteret;
841 if(scallnr!=RFORK && (up->procctl || up->nnote)){
843 ((void**)&ureg)[-1] = (void*)noteret; /* loads RARG */
846 /* if we delayed sched because we held a lock, sched now */
853 * Call user, if necessary, with note.
854 * Pass user the Ureg struct and the note on his stack.
871 if(strncmp(n->msg, "sys:", 4) == 0){
873 if(l > ERRMAX-15) /* " pc=0x12345678\0" */
875 sprint(n->msg+l, " pc=%#p", ureg->pc);
878 if(n->flag!=NUser && (up->notified || up->notify==0)){
880 if(n->flag == NDebug)
881 pprint("suicide: %s\n", n->msg);
882 pexit(n->msg, n->flag!=NDebug);
893 pexit(n->msg, n->flag!=NDebug);
896 sp -= 256; /* debugging: preserve context causing problem */
898 if(0) print("%s %lud: notify %#p %#p %#p %s\n",
899 up->text, up->pid, ureg->pc, ureg->sp, sp, n->msg);
901 if(!okaddr((uintptr)up->notify, 1, 0)
902 || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
904 pprint("suicide: bad address in notify\n");
908 memmove((Ureg*)sp, ureg, sizeof(Ureg));
909 *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
910 up->ureg = (void*)sp;
912 memmove((char*)sp, up->note[0].msg, ERRMAX);
914 ((uintptr*)sp)[2] = sp + 3*BY2WD; /* arg2 string */
915 ((uintptr*)sp)[1] = (uintptr)up->ureg; /* arg1 is ureg* */
916 ((uintptr*)sp)[0] = 0; /* arg0 is pc */
918 ureg->pc = (uintptr)up->notify;
919 ureg->bp = (uintptr)up->ureg; /* arg1 passed in RARG */
924 memmove(&up->lastnote, &up->note[0], sizeof(Note));
925 memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
928 if(up->fpstate == FPactive){
930 up->fpstate = FPinactive;
932 up->fpstate |= FPillegal;
937 * Return user to state before notify()
940 noted(Ureg* ureg, ulong arg0)
945 up->fpstate &= ~FPillegal;
948 if(arg0!=NRSTR && !up->notified) {
950 pprint("call to noted() when not notified\n");
955 nureg = up->ureg; /* pointer to user returned Ureg struct */
958 oureg = (uintptr)nureg;
959 if(!okaddr(oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
961 pprint("bad ureg in noted or call to noted when not notified\n");
965 /* don't let user change system flags or segment registers */
966 setregisters(ureg, (char*)ureg, (char*)nureg, sizeof(Ureg));
971 if(0) print("%s %lud: noted %#p %#p\n",
972 up->text, up->pid, nureg->pc, nureg->sp);
973 if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->sp, BY2WD, 0)){
975 pprint("suicide: trap in noted\n");
978 up->ureg = (Ureg*)(*(uintptr*)(oureg-BY2WD));
983 if(!okaddr(nureg->pc, 1, 0)
984 || !okaddr(nureg->sp, BY2WD, 0)){
986 pprint("suicide: trap in noted\n");
990 sp = oureg-4*BY2WD-ERRMAX;
993 ureg->bp = oureg; /* arg 1 passed in RARG */
994 ((uintptr*)sp)[1] = oureg; /* arg 1 0(FP) is ureg* */
995 ((uintptr*)sp)[0] = 0; /* arg 0 is pc */
999 up->lastnote.flag = NDebug;
1003 qunlock(&up->debug);
1004 if(up->lastnote.flag == NDebug)
1005 pprint("suicide: %s\n", up->lastnote.msg);
1006 pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
1011 execregs(uintptr entry, ulong ssize, ulong nargs)
1016 sp = (uintptr*)(USTKTOP - ssize);
1019 ureg->sp = (uintptr)sp;
1023 ureg->r14 = ureg->r15 = 0; /* extern user registers */
1024 return (uintptr)USTKTOP-sizeof(Tos); /* address of kernel/user shared data */
1028 * return the userpc the last exception happened at
1035 ureg = (Ureg*)up->dbgreg;
1039 /* This routine must save the values of registers the user is not permitted
1040 * to write from devproc and noted() and then restore the saved values before returning.
1043 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
1047 flags = ureg->flags;
1048 memmove(pureg, uva, n);
1051 ureg->flags = (ureg->flags & 0x00ff) | (flags & 0xff00);
1052 ureg->pc &= UADDRMASK;
1059 up->kpfun(up->kparg);
1060 pexit("kproc dying", 0);
1064 kprocchild(Proc* p, void (*func)(void*), void* arg)
1067 * gotolabel() needs a word on the stack in
1068 * which to place the return PC used to jump
1071 p->sched.pc = (uintptr)linkproc;
1072 p->sched.sp = (uintptr)p->kstack+KSTACK-BY2WD;
1079 forkchild(Proc *p, Ureg *ureg)
1084 * Add 2*BY2WD to the stack to account for
1086 * - trap's argument (ur)
1088 p->sched.sp = (uintptr)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1089 p->sched.pc = (uintptr)forkret;
1091 cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1092 memmove(cureg, ureg, sizeof(Ureg));
1096 /* Things from bottom of syscall which were never executed */
1101 /* Give enough context in the ureg to produce a kernel stack for
1102 * a sleeping process
1105 setkernur(Ureg* ureg, Proc* p)
1107 ureg->pc = p->sched.pc;
1108 ureg->sp = p->sched.sp+8;
1109 ureg->r14 = (uintptr)p;