3 #include "../port/lib.h"
9 #include "../port/error.h"
16 #define FAULTLOGFAST(a)
17 #define POSTNOTELOG(a)
23 /* trap_info_t flags */
29 void noted(Ureg*, ulong);
31 static void debugbpt(Ureg*, void*);
32 static void fault386(Ureg*, void*);
33 static void safe_fault386(Ureg*, void*);
34 static void doublefault(Ureg*, void*);
35 static void unexpected(Ureg*, void*);
36 static void _dumpstack(Ureg*);
39 static Vctl *vctl[256];
43 Ntimevec = 20 /* number of time buckets for each intr */
45 ulong intrtimes[256][Ntimevec];
48 intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
54 SETUPLOG(dprint("intrenable: irq %d, f %p, a %p, tbdf 0x%x, name %s\n",
55 irq, f, a, tbdf, name);)
58 print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
63 v = xalloc(sizeof(Vctl));
69 strncpy(v->name, name, KNAMELEN-1);
70 v->name[KNAMELEN-1] = 0;
73 vno = arch->intrenable(v);
76 print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
82 if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
83 panic("intrenable: handler: %s %s %p %p %p %p\n",
84 vctl[vno]->name, v->name,
85 vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
89 SETUPLOG(dprint("INTRENABLE: vctl[%d] is %p\n", vno, vctl[vno]);)
94 intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
99 vno = arch->intrvecno(irq);
101 for(pv = &vctl[vno]; (v = *pv) != nil; pv = &v->next){
102 if(v->isintr && v->irq == irq
103 && v->tbdf == tbdf && v->f == f && v->a == a
104 && strcmp(v->name, name) == 0){
108 if(vctl[vno] == nil && arch->intrdisable != nil)
109 arch->intrdisable(irq);
117 irqallocread(Chan*, void *a, long n, vlong offset)
119 char buf[2*(11+1)+KNAMELEN+1+1];
123 if(n < 0 || offset < 0)
126 for(vno=0; vno<nelem(vctl); vno++){
127 for(v=vctl[vno]; v; v=v->next){
128 m = snprint(buf, sizeof(buf), "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
135 memmove(a, buf+offset, n);
143 trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
147 if(vno < 0 || vno >= VectorPIC)
148 panic("trapenable: vno %d\n", vno);
149 v = xalloc(sizeof(Vctl));
150 v->tbdf = BUSUNKNOWN;
153 strncpy(v->name, name, KNAMELEN);
154 v->name[KNAMELEN-1] = 0;
158 v->next = vctl[vno]->next;
166 /* leave this here in case plan 9 ever makes it to dom0 */
169 * Hack: should be locked with NVRAM access.
171 outb(0x70, 0x80); /* NMI latch clear */
174 x = inb(0x61) & 0x07; /* Enable NMI */
180 /* we started out doing the 'giant bulk init' for all traps.
181 * we're going to do them one-by-one since error analysis is
182 * so much easier that way.
191 HYPERVISOR_set_callbacks(
192 KESEL, (ulong)hypervisor_callback,
193 KESEL, (ulong)failsafe_callback);
195 /* XXX rework as single hypercall once debugged */
197 vaddr = (ulong)vectortable;
198 for(v = 0; v < 256; v++){
202 flag = SPL3 | EvDisable;
205 flag = SPL0 | EvDisable;
208 t[0] = (trap_info_t){ v, flag, KESEL, vaddr };
209 if(HYPERVISOR_set_trap_table(t) < 0)
210 panic("trapinit: FAIL: try to set: 0x%x, 0x%x, 0x%x, 0x%ulx\n",
211 t[0].vector, t[0].flags, t[0].cs, t[0].address);
217 * Syscall() is called directly without going through trap().
219 trapenable(VectorBPT, debugbpt, 0, "debugpt");
220 trapenable(VectorPF, fault386, 0, "fault386");
221 trapenable(Vector2F, doublefault, 0, "doublefault");
222 trapenable(Vector15, unexpected, 0, "unexpected");
225 addarchfile("irqalloc", 0444, irqallocread, nil);
228 static char* excname[32] = {
231 "nonmaskable interrupt",
236 "coprocessor not available",
238 "coprocessor segment overrun",
240 "segment not present",
242 "general protection violation",
264 * keep histogram of interrupt service times
267 intrtime(Mach*, int vno)
273 diff = x - m->perf.intrts;
276 m->perf.inintr += diff;
277 if(up == nil && m->perf.inidle > diff)
278 m->perf.inidle -= diff;
280 diff /= m->cpumhz*100; // quantum = 100µsec
283 intrtimes[vno][diff]++;
286 /* go to user space */
293 /* precise time accounting, kernel exit */
294 tos = (Tos*)(USTKTOP-sizeof(Tos));
296 tos->kcycles += t - up->kentry;
297 tos->pcycles = up->pcycles;
299 INTRLOG(dprint("leave kexit, TOS %p\n", tos);)
303 * All traps come here. It is slower to have all traps call trap()
304 * rather than directly vectoring the handler. However, this avoids a
305 * lot of code duplication and possible bugs. The only exception is
307 * Trap is called with interrupts (and events) disabled via interrupt-gates.
312 int clockintr, i, vno, user;
317 TRAPLOG(dprint("trap ureg %lux %lux\n", (ulong*)ureg, ureg->trap);)
318 m->perf.intrts = perfticks();
319 user = (ureg->cs & 0xFFFF) == UESEL;
328 if(vno < 0 || vno >= 256)
329 panic("bad interrupt number %d\n", vno);
330 TRAPLOG(dprint("trap: vno is 0x%x, vctl[%d] is %p\n", vno, vno, vctl[vno]);)
332 INTRLOG(dprint("ctl is %p, isintr is %d\n", ctl, ctl->isintr);)
335 if(vno >= VectorPIC && vno != VectorSYSCALL)
336 m->lastintr = ctl->irq;
339 INTRLOG(dprint("ctl %p, isr %p\n", ctl, ctl->isr);)
342 for(v = ctl; v != nil; v = v->next){
343 INTRLOG(dprint("ctl %p, f is %p\n", v, v->f);)
347 INTRLOG(dprint("ctl %p, eoi %p\n", ctl, ctl->eoi);)
354 //if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
355 if (ctl->tbdf != BUSUNKNOWN && ctl->irq == VIRQ_TIMER)
362 else if(vno <= nelem(excname) && user){
364 sprint(buf, "sys: trap: %s", excname[vno]);
365 postnote(up, 1, buf, NDebug);
367 else if(vno >= VectorPIC && vno != VectorSYSCALL){
369 * An unknown interrupt.
370 * Check for a default IRQ7. This can happen when
371 * the IRQ input goes away before the acknowledge.
372 * In this case, a 'default IRQ7' is generated, but
373 * the corresponding bit in the ISR isn't set.
374 * In fact, just ignore all such interrupts.
377 /* call all interrupt routines, just in case */
378 for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
384 for(v = ctl; v != nil; v = v->next){
388 /* should we do this? */
393 iprint("cpu%d: spurious interrupt %d, last %d\n",
394 m->machno, vno, m->lastintr);
395 if(0)if(conf.nmach > 1){
396 for(i = 0; i < MAXMACH; i++){
397 if(active.machs[i] == 0)
400 if(m->machno == mach->machno)
402 print(" cpu%d: last %d",
403 mach->machno, mach->lastintr);
413 if(vno == VectorNMI){
416 print("cpu%d: PC %8.8luX\n",
417 m->machno, ureg->pc);
423 ureg->sp = (ulong)&ureg->sp;
426 if(vno < nelem(excname))
427 panic("%s", excname[vno]);
428 panic("unknown trap/intr: %d\n", vno);
432 /* delaysched set because we held a lock or because our quantum ended */
433 if(up && up->delaysched && clockintr){
434 INTRLOG(dprint("calling sched in trap? \n");)
436 INTRLOG(dprint("Back from calling sched in trap?\n");)
441 if(up->procctl || up->nnote)
446 if (ureg->trap == 0xe) {
448 * on page fault, we need to restore the old spl
449 * Xen won't do it for us.
452 if (ureg->flags & 0x200)
458 dumpregs2(Ureg* ureg)
461 print("cpu%d: registers for %s %lud\n",
462 m->machno, up->text, up->pid);
464 print("cpu%d: registers for kernel\n", m->machno);
465 print("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",
466 ureg->flags, ureg->trap, ureg->ecode, ureg->pc);
467 print(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);
468 print(" AX %8.8luX BX %8.8luX CX %8.8luX DX %8.8luX\n",
469 ureg->ax, ureg->bx, ureg->cx, ureg->dx);
470 print(" SI %8.8luX DI %8.8luX BP %8.8luX\n",
471 ureg->si, ureg->di, ureg->bp);
472 print(" CS %4.4luX DS %4.4luX ES %4.4luX FS %4.4luX GS %4.4luX\n",
473 ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
474 ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
485 * Processor control registers.
486 * If machine check exception, time stamp counter, page size extensions
487 * or enhanced virtual 8086 mode extensions are supported, there is a
488 * CR4. If there is a CR4 and machine check extensions, read the machine
489 * check address and machine check type registers if RDMSR supported.
491 print("SKIPPING get of crx and other such stuff.\n");/* */
493 print(" CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",
494 getcr0(), getcr2(), getcr3());
495 if(m->cpuiddx & 0x9A){
496 print(" CR4 %8.8lux", getcr4());
497 if((m->cpuiddx & 0xA0) == 0xA0){
500 print("\n MCA %8.8llux MCT %8.8llux", mca, mct);
504 print("\n ur %lux up %lux\n", (ulong)ureg, (ulong)up);
509 * Fill in enough of Ureg to get a stack trace, and call a function.
510 * Used by debugging interface rdb.
513 callwithureg(void (*fn)(Ureg*))
516 ureg.pc = getcallerpc(&fn);
517 ureg.sp = (ulong)&fn;
522 _dumpstack(Ureg *ureg)
524 ulong l, v, i, estack;
528 if(getconf("*nodumpstack")){
529 iprint("dumpstack disabled\n");
532 iprint("dumpstack\n");
534 x += print("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);
537 && (ulong)&l >= (ulong)up->kstack
538 && (ulong)&l <= (ulong)up->kstack+KSTACK)
539 estack = (ulong)up->kstack+KSTACK;
540 else if((ulong)&l >= (ulong)m->stack
541 && (ulong)&l <= (ulong)m+BY2PG)
542 estack = (ulong)m+MACHSIZE;
545 x += print("estackx %.8lux\n", estack);
547 for(l=(ulong)&l; l<estack; l+=4){
549 if((KTZERO < v && v < (ulong)&etext) || estack-l<32){
551 * we could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
552 * and CALL indirect through AX (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
553 * but this is too clever and misses faulting address.
555 x += print("%.8lux=%.8lux ", l, v);
571 callwithureg(_dumpstack);
575 debugbpt(Ureg* ureg, void*)
581 /* restore pc to instruction that caused the trap */
583 sprint(buf, "sys: breakpoint");
584 postnote(up, 1, buf, NDebug);
585 print("debugbpt for proc %lud\n", up->pid);
589 doublefault(Ureg*, void*)
591 panic("double fault");
595 unexpected(Ureg* ureg, void*)
597 print("unexpected trap %lud; ignoring\n", ureg->trap);
601 fault386(Ureg* ureg, void*)
604 int read, user, n, insyscall;
607 addr = HYPERVISOR_shared_info->vcpu_info[m->machno].arch.cr2;
609 dprint("cr2 is 0x%lx\n", addr);
616 user = (ureg->cs & 0xFFFF) == UESEL;
617 if(!user && mmukmapsync(addr))
619 read = !(ureg->ecode & 2);
621 panic("fault but up is zero; pc 0x%8.8lux addr 0x%8.8lux\n", ureg->pc, addr);
622 insyscall = up->insyscall;
624 n = fault(addr, ureg->pc, read);
628 panic("fault: 0x%lux\n", addr);
630 sprint(buf, "sys: trap: fault %s addr=0x%lux",
631 read? "read" : "write", addr);
632 dprint("Posting %s to %lud\n", buf, up->pid);
633 postnote(up, 1, buf, NDebug);
635 up->insyscall = insyscall;
636 FAULTLOG(dprint("fault386: all done\n");)
642 #include "../port/systab.h"
645 * Syscall is called directly from assembler without going through trap().
656 SYSCALLLOG(dprint("%d: syscall ...#%ld(%s)\n",
657 up->pid, ureg->ax, sysctab[ureg->ax]);)
659 if((ureg->cs & 0xFFFF) != UESEL)
660 panic("syscall: cs 0x%4.4luX\n", ureg->cs);
669 if(up->procctl == Proc_tracesyscall){
670 up->procctl = Proc_stopme;
675 up->scallnr = scallnr;
676 if(scallnr == RFORK && up->fpstate == FPactive){
678 up->fpstate = FPinactive;
686 if(scallnr >= nsyscall || systab[scallnr] == 0){
687 pprint("bad sys call number %lud pc %lux\n",
689 postnote(up, 1, "sys: bad sys call", NDebug);
693 if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
694 validaddr(sp, sizeof(Sargs)+BY2WD, 0);
696 up->s = *((Sargs*)(sp+BY2WD));
697 up->psstate = sysctab[scallnr];
699 ret = systab[scallnr]((va_list)up->s.args);
702 /* failure: save the error buffer for errstr */
704 up->syserrstr = up->errstr;
706 if(0 && up->pid == 1)
707 print("syscall %lud error %s\n", scallnr, up->syserrstr);
710 print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
711 for(i = 0; i < NERR; i++)
712 print("sp=%lux pc=%lux\n",
713 up->errlab[i].sp, up->errlab[i].pc);
714 panic("error stack");
717 SYSCALLLOG(dprint("%d: Syscall %d returns %d, ureg %p\n", up->pid, scallnr, ret, ureg);)
719 * Put return value in frame. On the x86 the syscall is
720 * just another trap and the return value from syscall is
721 * ignored. On other machines the return value is put into
722 * the results register by caller of syscall.
726 if(up->procctl == Proc_tracesyscall){
728 up->procctl = Proc_stopme;
735 INTRLOG(dprint("cleared insyscall\n");)
737 noted(ureg, *(ulong*)(sp+BY2WD));
739 if(scallnr!=RFORK && (up->procctl || up->nnote)){
743 /* if we delayed sched because we held a lock, sched now */
746 INTRLOG(dprint("before kexit\n");)
751 * Call user, if necessary, with note.
752 * Pass user the Ureg struct and the note on his stack.
766 if(up->fpstate == FPactive){
768 up->fpstate = FPinactive;
770 up->fpstate |= FPillegal;
776 if(strncmp(n->msg, "sys:", 4) == 0){
778 if(l > ERRMAX-15) /* " pc=0x12345678\0" */
780 sprint(n->msg+l, " pc=0x%.8lux", ureg->pc);
783 if(n->flag!=NUser && (up->notified || up->notify==0)){
784 if(n->flag == NDebug)
785 pprint("suicide: %s\n", n->msg);
787 pexit(n->msg, n->flag!=NDebug);
798 pexit(n->msg, n->flag!=NDebug);
803 if(!okaddr((ulong)up->notify, 1, 0)
804 || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
805 pprint("suicide: bad address in notify\n");
810 up->ureg = (void*)sp;
811 memmove((Ureg*)sp, ureg, sizeof(Ureg));
812 *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
813 up->ureg = (void*)sp;
815 memmove((char*)sp, up->note[0].msg, ERRMAX);
817 *(ulong*)(sp+2*BY2WD) = sp+3*BY2WD; /* arg 2 is string */
818 *(ulong*)(sp+1*BY2WD) = (ulong)up->ureg; /* arg 1 is ureg* */
819 *(ulong*)(sp+0*BY2WD) = 0; /* arg 0 is pc */
821 ureg->pc = (ulong)up->notify;
824 memmove(&up->lastnote, &up->note[0], sizeof(Note));
825 memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
833 * Return user to state before notify()
836 noted(Ureg* ureg, ulong arg0)
842 if(arg0!=NRSTR && !up->notified) {
844 pprint("call to noted() when not notified\n");
849 nureg = up->ureg; /* pointer to user returned Ureg struct */
851 up->fpstate &= ~FPillegal;
854 oureg = (ulong)nureg;
855 if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
856 pprint("bad ureg in noted or call to noted when not notified\n");
862 * Check the segment selectors are all valid, otherwise
863 * a fault will be taken on attempting to return to the
865 * Take care with the comparisons as different processor
866 * generations push segment descriptors in different ways.
868 if((nureg->cs & 0xFFFF) != UESEL || (nureg->ss & 0xFFFF) != UDSEL
869 || (nureg->ds & 0xFFFF) != UDSEL || (nureg->es & 0xFFFF) != UDSEL
870 || (nureg->fs & 0xFFFF) != UDSEL || (nureg->gs & 0xFFFF) != UDSEL){
871 pprint("bad segment selector in noted\n");
872 pprint("cs is %#lux, wanted %#ux\n", nureg->cs, UESEL);
873 pprint("ds is %#lux, wanted %#ux\n", nureg->ds, UDSEL);
874 pprint("es is %#lux, fs is %#lux, gs %#lux, wanted %#ux\n",
875 ureg->es, ureg->fs, ureg->gs, UDSEL);
876 pprint("ss is %#lux, wanted %#ux\n", nureg->ss, UDSEL);
881 /* don't let user change system flags */
882 nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
884 memmove(ureg, nureg, sizeof(Ureg));
889 if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){
891 pprint("suicide: trap in noted\n");
894 up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
899 if(!okaddr(nureg->pc, BY2WD, 0)
900 || !okaddr(nureg->usp, BY2WD, 0)){
902 pprint("suicide: trap in noted\n");
906 sp = oureg-4*BY2WD-ERRMAX;
909 ((ulong*)sp)[1] = oureg; /* arg 1 0(FP) is ureg* */
910 ((ulong*)sp)[0] = 0; /* arg 0 is pc */
914 pprint("unknown noted arg 0x%lux\n", arg0);
915 up->lastnote.flag = NDebug;
919 if(up->lastnote.flag == NDebug){
921 pprint("suicide: %s\n", up->lastnote.msg);
924 pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
929 execregs(uintptr entry, ulong ssize, ulong nargs)
934 up->fpstate = FPinit;
937 sp = (ulong*)(USTKTOP - ssize);
941 ureg->usp = (ulong)sp;
943 // print("execregs returns 0x%x\n", USTKTOP-sizeof(Tos));
944 return USTKTOP-sizeof(Tos); /* address of kernel/user shared data */
948 * return the userpc the last exception happened at
955 ureg = (Ureg*)up->dbgreg;
959 /* This routine must save the values of registers the user is not permitted
960 * to write from devproc and then restore the saved values before returning.
963 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
972 memmove(pureg, uva, n);
973 ureg->flags = (ureg->flags & 0x00FF) | (flags & 0xFF00);
982 up->kpfun(up->kparg);
983 pexit("kproc dying", 0);
987 kprocchild(Proc* p, void (*func)(void*), void* arg)
990 * gotolabel() needs a word on the stack in
991 * which to place the return PC used to jump
994 p->sched.pc = (ulong)linkproc;
995 p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;
1002 forkchild(Proc *p, Ureg *ureg)
1007 * Add 2*BY2WD to the stack to account for
1009 * - trap's argument (ur)
1011 p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1012 p->sched.pc = (ulong)forkret;
1014 cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1015 memmove(cureg, ureg, sizeof(Ureg));
1016 /* return value of syscall in child */
1020 /* Give enough context in the ureg to produce a kernel stack for
1021 * a sleeping process
1024 setkernur(Ureg* ureg, Proc* p)
1026 ureg->pc = p->sched.pc;
1027 ureg->sp = p->sched.sp+4;
1043 * install_safe_pf_handler / install_normal_pf_handler:
1045 * These are used within the failsafe_callback handler in entry.S to avoid
1046 * taking a full page fault when reloading FS and GS. This is because FS and
1047 * GS could be invalid at pretty much any point while Xenolinux executes (we
1048 * don't set them to safe values on entry to the kernel). At *any* point Xen
1049 * may be entered due to a hardware interrupt --- on exit from Xen an invalid
1050 * FS/GS will cause our failsafe_callback to be executed. This could occur,
1051 * for example, while the mmu_update_queue is in an inconsistent state. This
1052 * is disastrous because the normal page-fault handler touches the update
1055 * Fortunately, within the failsafe handler it is safe to force DS/ES/FS/GS
1056 * to zero if they cannot be reloaded -- at this point executing a normal
1057 * page fault would not change this effect. The safe page-fault handler
1058 * ensures this end result (blow away the selector value) without the dangers
1059 * of the normal page-fault handler.
1061 * NB. Perhaps this can all go away after we have implemented writeable
1065 safe_fault386(Ureg* , void* ) {
1066 panic("DO SAFE PAGE FAULT!\n");
1072 unsigned long install_safe_pf_handler(void)
1074 dprint("called from failsafe callback\n");
1075 trapenable(VectorPF, safe_fault386, 0, "safe_fault386");
1079 void install_normal_pf_handler(unsigned long)
1081 trapenable(VectorPF, fault386, 0, "fault386");