3 #include "../port/lib.h"
9 #include "../port/error.h"
16 #define FAULTLOGFAST(a)
17 #define POSTNOTELOG(a)
23 /* trap_info_t flags */
29 void noted(Ureg*, ulong);
31 extern void irqinit(void);
32 extern int irqhandled(Ureg*, int);
34 static void debugbpt(Ureg*, void*);
35 static void fault386(Ureg*, void*);
36 static void safe_fault386(Ureg*, void*);
37 static void doublefault(Ureg*, void*);
38 static void unexpected(Ureg*, void*);
39 static void _dumpstack(Ureg*);
41 /* we started out doing the 'giant bulk init' for all traps.
42 * we're going to do them one-by-one since error analysis is
43 * so much easier that way.
52 HYPERVISOR_set_callbacks(
53 KESEL, (ulong)hypervisor_callback,
54 KESEL, (ulong)failsafe_callback);
56 /* XXX rework as single hypercall once debugged */
58 vaddr = (ulong)vectortable;
59 for(v = 0; v < 256; v++){
63 flag = SPL3 | EvDisable;
66 flag = SPL0 | EvDisable;
69 t[0] = (trap_info_t){ v, flag, KESEL, vaddr };
70 if(HYPERVISOR_set_trap_table(t) < 0)
71 panic("trapinit: FAIL: try to set: 0x%x, 0x%x, 0x%x, 0x%ulx\n",
72 t[0].vector, t[0].flags, t[0].cs, t[0].address);
80 * Syscall() is called directly without going through trap().
82 trapenable(VectorBPT, debugbpt, 0, "debugpt");
83 trapenable(VectorPF, fault386, 0, "fault386");
84 trapenable(Vector2F, doublefault, 0, "doublefault");
85 trapenable(Vector15, unexpected, 0, "unexpected");
88 static char* excname[32] = {
91 "nonmaskable interrupt",
96 "coprocessor not available",
98 "coprocessor segment overrun",
100 "segment not present",
102 "general protection violation",
128 if(vno < nelem(excname)){
130 sprint(buf, "sys: trap: %s", excname[vno]);
131 postnote(up, 1, buf, NDebug);
138 * All traps come here. It is slower to have all traps call trap()
139 * rather than directly vectoring the handler. However, this avoids a
140 * lot of code duplication and possible bugs. The only exception is
142 * Trap is called with interrupts disabled via interrupt-gates.
151 if(!irqhandled(ureg, vno) && (!user || !usertrap(vno))){
153 /* early fault before trapinit() */
160 ureg->sp = (ulong)&ureg->sp;
163 if(vno < nelem(excname))
164 panic("%s", excname[vno]);
165 panic("unknown trap/intr: %d", vno);
170 if(up->procctl || up->nnote)
175 if (ureg->trap == 0xe) {
177 * on page fault, we need to restore the old spl
178 * Xen won't do it for us.
181 if (ureg->flags & 0x200)
187 dumpregs2(Ureg* ureg)
190 print("cpu%d: registers for %s %lud\n",
191 m->machno, up->text, up->pid);
193 print("cpu%d: registers for kernel\n", m->machno);
194 print("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",
195 ureg->flags, ureg->trap, ureg->ecode, ureg->pc);
196 print(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);
197 print(" AX %8.8luX BX %8.8luX CX %8.8luX DX %8.8luX\n",
198 ureg->ax, ureg->bx, ureg->cx, ureg->dx);
199 print(" SI %8.8luX DI %8.8luX BP %8.8luX\n",
200 ureg->si, ureg->di, ureg->bp);
201 print(" CS %4.4luX DS %4.4luX ES %4.4luX FS %4.4luX GS %4.4luX\n",
202 ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
203 ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
214 * Processor control registers.
215 * If machine check exception, time stamp counter, page size extensions
216 * or enhanced virtual 8086 mode extensions are supported, there is a
217 * CR4. If there is a CR4 and machine check extensions, read the machine
218 * check address and machine check type registers if RDMSR supported.
220 print("SKIPPING get of crx and other such stuff.\n");/* */
222 print(" CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",
223 getcr0(), getcr2(), getcr3());
224 if(m->cpuiddx & 0x9A){
225 print(" CR4 %8.8lux", getcr4());
226 if((m->cpuiddx & 0xA0) == 0xA0){
229 print("\n MCA %8.8llux MCT %8.8llux", mca, mct);
233 print("\n ur %lux up %lux\n", (ulong)ureg, (ulong)up);
238 * Fill in enough of Ureg to get a stack trace, and call a function.
239 * Used by debugging interface rdb.
242 callwithureg(void (*fn)(Ureg*))
245 ureg.pc = getcallerpc(&fn);
246 ureg.sp = (ulong)&fn;
251 _dumpstack(Ureg *ureg)
253 ulong l, v, i, estack;
257 if(getconf("*nodumpstack")){
258 iprint("dumpstack disabled\n");
261 iprint("dumpstack\n");
263 x += print("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);
266 && (ulong)&l >= (ulong)up->kstack
267 && (ulong)&l <= (ulong)up->kstack+KSTACK)
268 estack = (ulong)up->kstack+KSTACK;
269 else if((ulong)&l >= (ulong)m->stack
270 && (ulong)&l <= (ulong)m+BY2PG)
271 estack = (ulong)m+MACHSIZE;
274 x += print("estackx %.8lux\n", estack);
276 for(l=(ulong)&l; l<estack; l+=4){
278 if((KTZERO < v && v < (ulong)&etext) || estack-l<32){
280 * we could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
281 * and CALL indirect through AX (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
282 * but this is too clever and misses faulting address.
284 x += print("%.8lux=%.8lux ", l, v);
300 callwithureg(_dumpstack);
304 debugbpt(Ureg* ureg, void*)
310 /* restore pc to instruction that caused the trap */
312 sprint(buf, "sys: breakpoint");
313 postnote(up, 1, buf, NDebug);
314 print("debugbpt for proc %lud\n", up->pid);
318 doublefault(Ureg*, void*)
320 panic("double fault");
324 unexpected(Ureg* ureg, void*)
326 print("unexpected trap %lud; ignoring\n", ureg->trap);
330 fault386(Ureg* ureg, void*)
333 int read, user, n, insyscall;
336 addr = HYPERVISOR_shared_info->vcpu_info[m->machno].arch.cr2;
338 dprint("cr2 is 0x%lx\n", addr);
345 user = (ureg->cs & 0xFFFF) == UESEL;
346 if(!user && mmukmapsync(addr))
348 read = !(ureg->ecode & 2);
350 panic("fault but up is zero; pc 0x%8.8lux addr 0x%8.8lux\n", ureg->pc, addr);
351 insyscall = up->insyscall;
353 n = fault(addr, ureg->pc, read);
357 panic("fault: 0x%lux\n", addr);
359 sprint(buf, "sys: trap: fault %s addr=0x%lux",
360 read? "read" : "write", addr);
361 dprint("Posting %s to %lud\n", buf, up->pid);
362 postnote(up, 1, buf, NDebug);
364 up->insyscall = insyscall;
365 FAULTLOG(dprint("fault386: all done\n");)
371 #include "../port/systab.h"
374 * Syscall is called directly from assembler without going through trap().
385 SYSCALLLOG(dprint("%d: syscall ...#%ld(%s)\n",
386 up->pid, ureg->ax, sysctab[ureg->ax]);)
389 panic("syscall: cs 0x%4.4luX\n", ureg->cs);
395 if(up->procctl == Proc_tracesyscall){
396 up->procctl = Proc_stopme;
401 up->scallnr = scallnr;
402 if(scallnr == RFORK && up->fpstate == FPactive){
404 up->fpstate = FPinactive;
412 if(scallnr >= nsyscall || systab[scallnr] == 0){
413 pprint("bad sys call number %lud pc %lux\n",
415 postnote(up, 1, "sys: bad sys call", NDebug);
419 if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
420 validaddr(sp, sizeof(Sargs)+BY2WD, 0);
422 up->s = *((Sargs*)(sp+BY2WD));
423 up->psstate = sysctab[scallnr];
425 ret = systab[scallnr]((va_list)up->s.args);
428 /* failure: save the error buffer for errstr */
430 up->syserrstr = up->errstr;
432 if(0 && up->pid == 1)
433 print("syscall %lud error %s\n", scallnr, up->syserrstr);
436 print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
437 for(i = 0; i < NERR; i++)
438 print("sp=%lux pc=%lux\n",
439 up->errlab[i].sp, up->errlab[i].pc);
440 panic("error stack");
443 SYSCALLLOG(dprint("%d: Syscall %d returns %d, ureg %p\n", up->pid, scallnr, ret, ureg);)
445 * Put return value in frame. On the x86 the syscall is
446 * just another trap and the return value from syscall is
447 * ignored. On other machines the return value is put into
448 * the results register by caller of syscall.
452 if(up->procctl == Proc_tracesyscall){
454 up->procctl = Proc_stopme;
461 INTRLOG(dprint("cleared insyscall\n");)
463 noted(ureg, *(ulong*)(sp+BY2WD));
465 if(scallnr!=RFORK && (up->procctl || up->nnote)){
469 /* if we delayed sched because we held a lock, sched now */
472 INTRLOG(dprint("before kexit\n");)
477 * Call user, if necessary, with note.
478 * Pass user the Ureg struct and the note on his stack.
492 if(up->fpstate == FPactive){
494 up->fpstate = FPinactive;
496 up->fpstate |= FPillegal;
502 if(strncmp(n->msg, "sys:", 4) == 0){
504 if(l > ERRMAX-15) /* " pc=0x12345678\0" */
506 sprint(n->msg+l, " pc=0x%.8lux", ureg->pc);
509 if(n->flag!=NUser && (up->notified || up->notify==0)){
510 if(n->flag == NDebug)
511 pprint("suicide: %s\n", n->msg);
513 pexit(n->msg, n->flag!=NDebug);
524 pexit(n->msg, n->flag!=NDebug);
529 if(!okaddr((ulong)up->notify, 1, 0)
530 || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
531 pprint("suicide: bad address in notify\n");
536 up->ureg = (void*)sp;
537 memmove((Ureg*)sp, ureg, sizeof(Ureg));
538 *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
539 up->ureg = (void*)sp;
541 memmove((char*)sp, up->note[0].msg, ERRMAX);
543 *(ulong*)(sp+2*BY2WD) = sp+3*BY2WD; /* arg 2 is string */
544 *(ulong*)(sp+1*BY2WD) = (ulong)up->ureg; /* arg 1 is ureg* */
545 *(ulong*)(sp+0*BY2WD) = 0; /* arg 0 is pc */
547 ureg->pc = (ulong)up->notify;
550 memmove(&up->lastnote, &up->note[0], sizeof(Note));
551 memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
559 * Return user to state before notify()
562 noted(Ureg* ureg, ulong arg0)
568 if(arg0!=NRSTR && !up->notified) {
570 pprint("call to noted() when not notified\n");
575 nureg = up->ureg; /* pointer to user returned Ureg struct */
577 up->fpstate &= ~FPillegal;
580 oureg = (ulong)nureg;
581 if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
582 pprint("bad ureg in noted or call to noted when not notified\n");
588 * Check the segment selectors are all valid, otherwise
589 * a fault will be taken on attempting to return to the
591 * Take care with the comparisons as different processor
592 * generations push segment descriptors in different ways.
594 if((nureg->cs & 0xFFFF) != UESEL || (nureg->ss & 0xFFFF) != UDSEL
595 || (nureg->ds & 0xFFFF) != UDSEL || (nureg->es & 0xFFFF) != UDSEL
596 || (nureg->fs & 0xFFFF) != UDSEL || (nureg->gs & 0xFFFF) != UDSEL){
597 pprint("bad segment selector in noted\n");
598 pprint("cs is %#lux, wanted %#ux\n", nureg->cs, UESEL);
599 pprint("ds is %#lux, wanted %#ux\n", nureg->ds, UDSEL);
600 pprint("es is %#lux, fs is %#lux, gs %#lux, wanted %#ux\n",
601 ureg->es, ureg->fs, ureg->gs, UDSEL);
602 pprint("ss is %#lux, wanted %#ux\n", nureg->ss, UDSEL);
607 /* don't let user change system flags */
608 nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
610 memmove(ureg, nureg, sizeof(Ureg));
615 if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){
617 pprint("suicide: trap in noted\n");
620 up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
625 if(!okaddr(nureg->pc, BY2WD, 0)
626 || !okaddr(nureg->usp, BY2WD, 0)){
628 pprint("suicide: trap in noted\n");
632 sp = oureg-4*BY2WD-ERRMAX;
635 ((ulong*)sp)[1] = oureg; /* arg 1 0(FP) is ureg* */
636 ((ulong*)sp)[0] = 0; /* arg 0 is pc */
640 pprint("unknown noted arg 0x%lux\n", arg0);
641 up->lastnote.flag = NDebug;
645 if(up->lastnote.flag == NDebug){
647 pprint("suicide: %s\n", up->lastnote.msg);
650 pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
655 execregs(uintptr entry, ulong ssize, ulong nargs)
660 up->fpstate = FPinit;
663 sp = (ulong*)(USTKTOP - ssize);
667 ureg->usp = (ulong)sp;
669 // print("execregs returns 0x%x\n", USTKTOP-sizeof(Tos));
670 return USTKTOP-sizeof(Tos); /* address of kernel/user shared data */
674 * return the userpc the last exception happened at
681 ureg = (Ureg*)up->dbgreg;
685 /* This routine must save the values of registers the user is not permitted
686 * to write from devproc and then restore the saved values before returning.
689 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
698 memmove(pureg, uva, n);
699 ureg->flags = (ureg->flags & 0x00FF) | (flags & 0xFF00);
705 kprocchild(Proc *p, void (*entry)(void))
708 * gotolabel() needs a word on the stack in
709 * which to place the return PC used to jump
712 p->sched.pc = (ulong)entry;
713 p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;
717 forkchild(Proc *p, Ureg *ureg)
722 * Add 2*BY2WD to the stack to account for
724 * - trap's argument (ur)
726 p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
727 p->sched.pc = (ulong)forkret;
729 cureg = (Ureg*)(p->sched.sp+2*BY2WD);
730 memmove(cureg, ureg, sizeof(Ureg));
731 /* return value of syscall in child */
735 /* Give enough context in the ureg to produce a kernel stack for
739 setkernur(Ureg* ureg, Proc* p)
741 ureg->pc = p->sched.pc;
742 ureg->sp = p->sched.sp+4;
758 * install_safe_pf_handler / install_normal_pf_handler:
760 * These are used within the failsafe_callback handler in entry.S to avoid
761 * taking a full page fault when reloading FS and GS. This is because FS and
762 * GS could be invalid at pretty much any point while Xenolinux executes (we
763 * don't set them to safe values on entry to the kernel). At *any* point Xen
764 * may be entered due to a hardware interrupt --- on exit from Xen an invalid
765 * FS/GS will cause our failsafe_callback to be executed. This could occur,
766 * for example, while the mmu_update_queue is in an inconsistent state. This
767 * is disastrous because the normal page-fault handler touches the update
770 * Fortunately, within the failsafe handler it is safe to force DS/ES/FS/GS
771 * to zero if they cannot be reloaded -- at this point executing a normal
772 * page fault would not change this effect. The safe page-fault handler
773 * ensures this end result (blow away the selector value) without the dangers
774 * of the normal page-fault handler.
776 * NB. Perhaps this can all go away after we have implemented writeable
780 safe_fault386(Ureg* , void* ) {
781 panic("DO SAFE PAGE FAULT!\n");
787 unsigned long install_safe_pf_handler(void)
789 dprint("called from failsafe callback\n");
790 trapenable(VectorPF, safe_fault386, 0, "safe_fault386");
794 void install_normal_pf_handler(unsigned long)
796 trapenable(VectorPF, fault386, 0, "fault386");