]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/xen/trap.c
kernel: move devether and wifi to port/
[plan9front.git] / sys / src / 9 / xen / trap.c
1 #include        "u.h"
2 #include        "tos.h"
3 #include        "../port/lib.h"
4 #include        "mem.h"
5 #include        "dat.h"
6 #include        "fns.h"
7 #include        "io.h"
8 #include        "ureg.h"
9 #include        "../port/error.h"
10 #include        <trace.h>
11
12 #define INTRLOG(a)  
13 #define SETUPLOG(a)
14 #define SYSCALLLOG(a)
15 #define FAULTLOG(a) 
16 #define FAULTLOGFAST(a)
17 #define POSTNOTELOG(a)
18 #define TRAPLOG(a)
19
20 int faultpanic = 0;
21
22 enum {
23         /* trap_info_t flags */
24         SPL0 = 0,
25         SPL3 = 3,
26         EvDisable = 4,
27 };
28   
29 void    noted(Ureg*, ulong);
30
31 static void debugbpt(Ureg*, void*);
32 static void fault386(Ureg*, void*);
33 static void safe_fault386(Ureg*, void*);
34 static void doublefault(Ureg*, void*);
35 static void unexpected(Ureg*, void*);
36 static void _dumpstack(Ureg*);
37
38 static Lock vctllock;
39 static Vctl *vctl[256];
40
41 enum
42 {
43         Ntimevec = 20           /* number of time buckets for each intr */
44 };
45 ulong intrtimes[256][Ntimevec];
46
47 void
48 intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
49 {
50         int vno;
51         Vctl *v;
52
53 /**/
54         SETUPLOG(dprint("intrenable: irq %d, f %p, a %p, tbdf 0x%x, name %s\n", 
55                         irq, f, a, tbdf, name);)
56 /**/
57         if(f == nil){
58                 print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
59                         irq, tbdf, name);
60                 return;
61         }
62
63         v = xalloc(sizeof(Vctl));
64         v->isintr = 1;
65         v->irq = irq;
66         v->tbdf = tbdf;
67         v->f = f;
68         v->a = a;
69         strncpy(v->name, name, KNAMELEN-1);
70         v->name[KNAMELEN-1] = 0;
71
72         ilock(&vctllock);
73         vno = arch->intrenable(v);
74         if(vno == -1){
75                 iunlock(&vctllock);
76                 print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
77                         irq, tbdf, v->name);
78                 xfree(v);
79                 return;
80         }
81         if(vctl[vno]){
82                 if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
83                         panic("intrenable: handler: %s %s %p %p %p %p\n",
84                                 vctl[vno]->name, v->name,
85                                 vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
86                 v->next = vctl[vno];
87         }
88         vctl[vno] = v;
89         SETUPLOG(dprint("INTRENABLE: vctl[%d] is %p\n", vno, vctl[vno]);)
90         iunlock(&vctllock);
91 }
92
93 void
94 intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
95 {
96         Vctl **pv, *v;
97         int vno;
98
99         vno = arch->intrvecno(irq);
100         ilock(&vctllock);
101         for(pv = &vctl[vno]; (v = *pv) != nil; pv = &v->next){
102                 if(v->isintr && v->irq == irq
103                 && v->tbdf == tbdf && v->f == f && v->a == a
104                 && strcmp(v->name, name) == 0){
105                         *pv = v->next;
106                         xfree(v);
107
108                         if(vctl[vno] == nil && arch->intrdisable != nil)
109                                 arch->intrdisable(irq);
110                         break;
111                 }
112         }
113         iunlock(&vctllock);
114 }
115
116 static long
117 irqallocread(Chan*, void *a, long n, vlong offset)
118 {
119         char buf[2*(11+1)+KNAMELEN+1+1];
120         int vno, m;
121         Vctl *v;
122
123         if(n < 0 || offset < 0)
124                 error(Ebadarg);
125
126         for(vno=0; vno<nelem(vctl); vno++){
127                 for(v=vctl[vno]; v; v=v->next){
128                         m = snprint(buf, sizeof(buf), "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
129                         offset -= m;
130                         if(offset >= 0)
131                                 continue;
132                         if(n > -offset)
133                                 n = -offset;
134                         offset += m;
135                         memmove(a, buf+offset, n);
136                         return n;
137                 }
138         }
139         return 0;
140 }
141
142 void
143 trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
144 {
145         Vctl *v;
146
147         if(vno < 0 || vno >= VectorPIC)
148                 panic("trapenable: vno %d\n", vno);
149         v = xalloc(sizeof(Vctl));
150         v->tbdf = BUSUNKNOWN;
151         v->f = f;
152         v->a = a;
153         strncpy(v->name, name, KNAMELEN);
154         v->name[KNAMELEN-1] = 0;
155
156         lock(&vctllock);
157         if(vctl[vno])
158                 v->next = vctl[vno]->next;
159         vctl[vno] = v;
160         unlock(&vctllock);
161 }
162
163 static void
164 nmienable(void)
165 {
166         /* leave this here in case plan 9 ever makes it to dom0 */
167 #ifdef NOWAY
168         /*
169          * Hack: should be locked with NVRAM access.
170          */
171         outb(0x70, 0x80);               /* NMI latch clear */
172         outb(0x70, 0);
173
174         x = inb(0x61) & 0x07;           /* Enable NMI */
175         outb(0x61, 0x08|x);
176         outb(0x61, x);
177 #endif
178 }
179
180 /* we started out doing the 'giant bulk init' for all traps. 
181   * we're going to do them one-by-one since error analysis is 
182   * so much easier that way.
183   */
184 void
185 trapinit(void)
186 {
187         trap_info_t t[2];
188         ulong vaddr;
189         int v, flag;
190
191         HYPERVISOR_set_callbacks(
192                 KESEL, (ulong)hypervisor_callback,
193                 KESEL, (ulong)failsafe_callback);
194
195         /* XXX rework as single hypercall once debugged */
196         t[1].address = 0;
197         vaddr = (ulong)vectortable;
198         for(v = 0; v < 256; v++){
199                 switch(v){
200                 case VectorBPT:
201                 case VectorSYSCALL:
202                         flag = SPL3 | EvDisable;
203                         break;
204                 default:
205                         flag = SPL0 | EvDisable;
206                         break;
207                 }
208                 t[0] = (trap_info_t){ v, flag, KESEL, vaddr };
209                 if(HYPERVISOR_set_trap_table(t) < 0)
210                         panic("trapinit: FAIL: try to set: 0x%x, 0x%x, 0x%x, 0x%ulx\n", 
211                                 t[0].vector, t[0].flags, t[0].cs, t[0].address);
212                 vaddr += 6;
213         }
214
215         /*
216          * Special traps.
217          * Syscall() is called directly without going through trap().
218          */
219         trapenable(VectorBPT, debugbpt, 0, "debugpt");
220         trapenable(VectorPF, fault386, 0, "fault386");
221         trapenable(Vector2F, doublefault, 0, "doublefault");
222         trapenable(Vector15, unexpected, 0, "unexpected");
223
224         nmienable();
225         addarchfile("irqalloc", 0444, irqallocread, nil);
226 }
227
228 static char* excname[32] = {
229         "divide error",
230         "debug exception",
231         "nonmaskable interrupt",
232         "breakpoint",
233         "overflow",
234         "bounds check",
235         "invalid opcode",
236         "coprocessor not available",
237         "double fault",
238         "coprocessor segment overrun",
239         "invalid TSS",
240         "segment not present",
241         "stack exception",
242         "general protection violation",
243         "page fault",
244         "15 (reserved)",
245         "coprocessor error",
246         "alignment check",
247         "machine check",
248         "19 (reserved)",
249         "20 (reserved)",
250         "21 (reserved)",
251         "22 (reserved)",
252         "23 (reserved)",
253         "24 (reserved)",
254         "25 (reserved)",
255         "26 (reserved)",
256         "27 (reserved)",
257         "28 (reserved)",
258         "29 (reserved)",
259         "30 (reserved)",
260         "31 (reserved)",
261 };
262
263 /*
264  *  keep histogram of interrupt service times
265  */
266 void
267 intrtime(Mach*, int vno)
268 {
269         ulong diff;
270         ulong x;
271
272         x = perfticks();
273         diff = x - m->perf.intrts;
274         m->perf.intrts = x;
275
276         m->perf.inintr += diff;
277         if(up == nil && m->perf.inidle > diff)
278                 m->perf.inidle -= diff;
279
280         diff /= m->cpumhz*100;  // quantum = 100µsec
281         if(diff >= Ntimevec)
282                 diff = Ntimevec-1;
283         intrtimes[vno][diff]++;
284 }
285
286 /* go to user space */
287 void
288 kexit(Ureg*)
289 {
290         uvlong t;
291         Tos *tos;
292
293         /* precise time accounting, kernel exit */
294         tos = (Tos*)(USTKTOP-sizeof(Tos));
295         cycles(&t);
296         tos->kcycles += t - up->kentry;
297         tos->pcycles = up->pcycles;
298         tos->pid = up->pid;
299         INTRLOG(dprint("leave kexit, TOS %p\n", tos);)
300 }
301
302 /*
303  *  All traps come here.  It is slower to have all traps call trap()
304  *  rather than directly vectoring the handler.  However, this avoids a
305  *  lot of code duplication and possible bugs.  The only exception is
306  *  VectorSYSCALL.
307  *  Trap is called with interrupts (and events) disabled via interrupt-gates.
308  */
309 void
310 trap(Ureg* ureg)
311 {
312         int clockintr, i, vno, user;
313         char buf[ERRMAX];
314         Vctl *ctl, *v;
315         Mach *mach;
316
317         TRAPLOG(dprint("trap ureg %lux %lux\n", (ulong*)ureg, ureg->trap);)
318         m->perf.intrts = perfticks();
319         user = (ureg->cs & 0xFFFF) == UESEL;
320         if(user){
321                 up->dbgreg = ureg;
322                 cycles(&up->kentry);
323         }
324
325         clockintr = 0;
326
327         vno = ureg->trap;
328         if(vno < 0 || vno >= 256)
329                 panic("bad interrupt number %d\n", vno);
330         TRAPLOG(dprint("trap: vno is 0x%x, vctl[%d] is %p\n", vno, vno, vctl[vno]);)
331         if(ctl = vctl[vno]){
332                 INTRLOG(dprint("ctl is %p, isintr is %d\n", ctl, ctl->isintr);)
333                 if(ctl->isintr){
334                         m->intr++;
335                         if(vno >= VectorPIC && vno != VectorSYSCALL)
336                                 m->lastintr = ctl->irq;
337                 }
338
339                 INTRLOG(dprint("ctl %p, isr %p\n", ctl, ctl->isr);)
340                 if(ctl->isr)
341                         ctl->isr(vno);
342                 for(v = ctl; v != nil; v = v->next){
343                         INTRLOG(dprint("ctl %p, f is %p\n", v, v->f);)
344                         if(v->f)
345                                 v->f(ureg, v->a);
346                 }
347                 INTRLOG(dprint("ctl %p, eoi %p\n", ctl, ctl->eoi);)
348                 if(ctl->eoi)
349                         ctl->eoi(vno);
350
351                 if(ctl->isintr){
352                         intrtime(m, vno);
353
354                         //if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
355                         if (ctl->tbdf != BUSUNKNOWN && ctl->irq == VIRQ_TIMER)
356                                 clockintr = 1;
357
358                         if(up && !clockintr)
359                                 preempted();
360                 }
361         }
362         else if(vno <= nelem(excname) && user){
363                 spllo();
364                 sprint(buf, "sys: trap: %s", excname[vno]);
365                 postnote(up, 1, buf, NDebug);
366         }
367         else if(vno >= VectorPIC && vno != VectorSYSCALL){
368                 /*
369                  * An unknown interrupt.
370                  * Check for a default IRQ7. This can happen when
371                  * the IRQ input goes away before the acknowledge.
372                  * In this case, a 'default IRQ7' is generated, but
373                  * the corresponding bit in the ISR isn't set.
374                  * In fact, just ignore all such interrupts.
375                  */
376
377                 /* call all interrupt routines, just in case */
378                 for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
379                         ctl = vctl[i];
380                         if(ctl == nil)
381                                 continue;
382                         if(!ctl->isintr)
383                                 continue;
384                         for(v = ctl; v != nil; v = v->next){
385                                 if(v->f)
386                                         v->f(ureg, v->a);
387                         }
388                         /* should we do this? */
389                         if(ctl->eoi)
390                                 ctl->eoi(i);
391                 }
392
393                 iprint("cpu%d: spurious interrupt %d, last %d\n",
394                         m->machno, vno, m->lastintr);
395                 if(0)if(conf.nmach > 1){
396                         for(i = 0; i < MAXMACH; i++){
397                                 if(active.machs[i] == 0)
398                                         continue;
399                                 mach = MACHP(i);
400                                 if(m->machno == mach->machno)
401                                         continue;
402                                 print(" cpu%d: last %d",
403                                         mach->machno, mach->lastintr);
404                         }
405                         print("\n");
406                 }
407                 m->spuriousintr++;
408                 if(user)
409                         kexit(ureg);
410                 return;
411         }
412         else{
413                 if(vno == VectorNMI){
414                         nmienable();
415                         if(m->machno != 0){
416                                 print("cpu%d: PC %8.8luX\n",
417                                         m->machno, ureg->pc);
418                                 for(;;);
419                         }
420                 }
421                 dumpregs(ureg);
422                 if(!user){
423                         ureg->sp = (ulong)&ureg->sp;
424                         _dumpstack(ureg);
425                 }
426                 if(vno < nelem(excname))
427                         panic("%s", excname[vno]);
428                 panic("unknown trap/intr: %d\n", vno);
429         }
430         splhi();
431
432         /* delaysched set because we held a lock or because our quantum ended */
433         if(up && up->delaysched && clockintr){
434                 INTRLOG(dprint("calling sched in trap? \n");)
435                 sched();
436                 INTRLOG(dprint("Back from calling sched in trap?\n");)
437                 splhi();
438         }
439
440         if(user){
441                 if(up->procctl || up->nnote)
442                         notify(ureg);
443                 kexit(ureg);
444         }
445
446         if (ureg->trap == 0xe) {
447                 /*
448                   * on page fault, we need to restore the old spl
449                   * Xen won't do it for us.
450                   * XXX verify this.
451                   */
452                 if (ureg->flags & 0x200)
453                         spllo();
454         }
455 }
456
457 void
458 dumpregs2(Ureg* ureg)
459 {
460         if(up)
461                 print("cpu%d: registers for %s %lud\n",
462                         m->machno, up->text, up->pid);
463         else
464                 print("cpu%d: registers for kernel\n", m->machno);
465         print("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",
466                 ureg->flags, ureg->trap, ureg->ecode, ureg->pc);
467         print(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);
468         print("  AX %8.8luX  BX %8.8luX  CX %8.8luX  DX %8.8luX\n",
469                 ureg->ax, ureg->bx, ureg->cx, ureg->dx);
470         print("  SI %8.8luX  DI %8.8luX  BP %8.8luX\n",
471                 ureg->si, ureg->di, ureg->bp);
472         print("  CS %4.4luX  DS %4.4luX  ES %4.4luX  FS %4.4luX  GS %4.4luX\n",
473                 ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
474                 ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
475 }
476
477 void
478 dumpregs(Ureg* ureg)
479 {
480         extern ulong etext;
481
482         dumpregs2(ureg);
483
484         /*
485          * Processor control registers.
486          * If machine check exception, time stamp counter, page size extensions
487          * or enhanced virtual 8086 mode extensions are supported, there is a
488          * CR4. If there is a CR4 and machine check extensions, read the machine
489          * check address and machine check type registers if RDMSR supported.
490          */
491         print("SKIPPING get of crx and other such stuff.\n");/* */
492 #ifdef NOT
493         print("  CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",
494                 getcr0(), getcr2(), getcr3());
495         if(m->cpuiddx & 0x9A){
496                 print(" CR4 %8.8lux", getcr4());
497                 if((m->cpuiddx & 0xA0) == 0xA0){
498                         rdmsr(0x00, &mca);
499                         rdmsr(0x01, &mct);
500                         print("\n  MCA %8.8llux MCT %8.8llux", mca, mct);
501                 }
502         }
503 #endif
504         print("\n  ur %lux up %lux\n", (ulong)ureg, (ulong)up);
505 }
506
507
508 /*
509  * Fill in enough of Ureg to get a stack trace, and call a function.
510  * Used by debugging interface rdb.
511  */
512 void
513 callwithureg(void (*fn)(Ureg*))
514 {
515         Ureg ureg;
516         ureg.pc = getcallerpc(&fn);
517         ureg.sp = (ulong)&fn;
518         fn(&ureg);
519 }
520
521 static void
522 _dumpstack(Ureg *ureg)
523 {
524         ulong l, v, i, estack;
525         extern ulong etext;
526         int x;
527
528         if(getconf("*nodumpstack")){
529                 iprint("dumpstack disabled\n");
530                 return;
531         }
532         iprint("dumpstack\n");
533         x = 0;
534         x += print("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);
535         i = 0;
536         if(up
537         && (ulong)&l >= (ulong)up->kstack
538         && (ulong)&l <= (ulong)up->kstack+KSTACK)
539                 estack = (ulong)up->kstack+KSTACK;
540         else if((ulong)&l >= (ulong)m->stack
541         && (ulong)&l <= (ulong)m+BY2PG)
542                 estack = (ulong)m+MACHSIZE;
543         else
544                 return;
545         x += print("estackx %.8lux\n", estack);
546
547         for(l=(ulong)&l; l<estack; l+=4){
548                 v = *(ulong*)l;
549                 if((KTZERO < v && v < (ulong)&etext) || estack-l<32){
550                         /*
551                          * we could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
552                          * and CALL indirect through AX (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
553                          * but this is too clever and misses faulting address.
554                          */
555                         x += print("%.8lux=%.8lux ", l, v);
556                         i++;
557                 }
558                 if(i == 4){
559                         i = 0;
560                         x += print("\n");
561                 }
562         }
563         if(i)
564                 print("\n");
565         print("EOF\n");
566 }
567
568 void
569 dumpstack(void)
570 {
571         callwithureg(_dumpstack);
572 }
573
574 static void
575 debugbpt(Ureg* ureg, void*)
576 {
577         char buf[ERRMAX];
578         print("debugbpt\n");
579         if(up == 0)
580                 panic("kernel bpt");
581         /* restore pc to instruction that caused the trap */
582         ureg->pc--;
583         sprint(buf, "sys: breakpoint");
584         postnote(up, 1, buf, NDebug);
585         print("debugbpt for proc %lud\n", up->pid);
586 }
587
588 static void
589 doublefault(Ureg*, void*)
590 {
591         panic("double fault");
592 }
593
594 static void
595 unexpected(Ureg* ureg, void*)
596 {
597         print("unexpected trap %lud; ignoring\n", ureg->trap);
598 }
599
600 static void
601 fault386(Ureg* ureg, void* )
602 {
603         ulong addr;
604         int read, user, n, insyscall;
605         char buf[ERRMAX];
606
607         addr = HYPERVISOR_shared_info->vcpu_info[m->machno].arch.cr2;
608         if (faultpanic) {
609                 dprint("cr2 is 0x%lx\n", addr);
610                 //dumpregs(ureg);
611                 dumpstack();
612                 panic("fault386");
613                 exit(1);
614         }
615         
616         user = (ureg->cs & 0xFFFF) == UESEL;
617         if(!user && mmukmapsync(addr))
618                 return;
619         read = !(ureg->ecode & 2);
620         if(up == nil)
621                 panic("fault but up is zero; pc 0x%8.8lux addr 0x%8.8lux\n", ureg->pc, addr);
622         insyscall = up->insyscall;
623         up->insyscall = 1;
624         n = fault(addr, read);
625         if(n < 0){
626                 if(!user){
627                         dumpregs(ureg);
628                         panic("fault: 0x%lux\n", addr);
629                 }
630                 sprint(buf, "sys: trap: fault %s addr=0x%lux",
631                         read? "read" : "write", addr);
632                 dprint("Posting %s to %lud\n", buf, up->pid);
633                 postnote(up, 1, buf, NDebug);
634         }
635         up->insyscall = insyscall;
636         FAULTLOG(dprint("fault386: all done\n");)
637 }
638
639 /*
640  *  system calls
641  */
642 #include "../port/systab.h"
643
644 /*
645  *  Syscall is called directly from assembler without going through trap().
646  */
647 void
648 syscall(Ureg* ureg)
649 {
650         char *e;
651         ulong   sp;
652         long    ret;
653         int     i, s;
654         ulong scallnr;
655
656         SYSCALLLOG(dprint("%d: syscall ...#%ld(%s)\n", 
657                         up->pid, ureg->ax, sysctab[ureg->ax]);)
658         
659         if((ureg->cs & 0xFFFF) != UESEL)
660                 panic("syscall: cs 0x%4.4luX\n", ureg->cs);
661
662         cycles(&up->kentry);
663
664         m->syscall++;
665         up->insyscall = 1;
666         up->pc = ureg->pc;
667         up->dbgreg = ureg;
668
669         if(up->procctl == Proc_tracesyscall){
670                 up->procctl = Proc_stopme;
671                 procctl();
672         }
673
674         scallnr = ureg->ax;
675         up->scallnr = scallnr;
676         if(scallnr == RFORK && up->fpstate == FPactive){
677                 fpsave(up->fpsave);
678                 up->fpstate = FPinactive;
679         }
680         spllo();
681
682         sp = ureg->usp;
683         up->nerrlab = 0;
684         ret = -1;
685         if(!waserror()){
686                 if(scallnr >= nsyscall || systab[scallnr] == 0){
687                         pprint("bad sys call number %lud pc %lux\n",
688                                 scallnr, ureg->pc);
689                         postnote(up, 1, "sys: bad sys call", NDebug);
690                         error(Ebadarg);
691                 }
692
693                 if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
694                         validaddr(sp, sizeof(Sargs)+BY2WD, 0);
695
696                 up->s = *((Sargs*)(sp+BY2WD));
697                 up->psstate = sysctab[scallnr];
698
699                 ret = systab[scallnr]((va_list)up->s.args);
700                 poperror();
701         }else{
702                 /* failure: save the error buffer for errstr */
703                 e = up->syserrstr;
704                 up->syserrstr = up->errstr;
705                 up->errstr = e;
706                 if(0 && up->pid == 1)
707                         print("syscall %lud error %s\n", scallnr, up->syserrstr);
708         }
709         if(up->nerrlab){
710                 print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
711                 for(i = 0; i < NERR; i++)
712                         print("sp=%lux pc=%lux\n",
713                                 up->errlab[i].sp, up->errlab[i].pc);
714                 panic("error stack");
715         }
716
717         SYSCALLLOG(dprint("%d: Syscall %d returns %d, ureg %p\n", up->pid, scallnr, ret, ureg);)
718         /*
719          *  Put return value in frame.  On the x86 the syscall is
720          *  just another trap and the return value from syscall is
721          *  ignored.  On other machines the return value is put into
722          *  the results register by caller of syscall.
723          */
724         ureg->ax = ret;
725
726         if(up->procctl == Proc_tracesyscall){
727                 s = splhi();
728                 up->procctl = Proc_stopme;
729                 procctl();
730                 splx(s);
731         }
732
733         up->insyscall = 0;
734         up->psstate = 0;
735         INTRLOG(dprint("cleared insyscall\n");)
736         if(scallnr == NOTED)
737                 noted(ureg, *(ulong*)(sp+BY2WD));
738
739         if(scallnr!=RFORK && (up->procctl || up->nnote)){
740                 splhi();
741                 notify(ureg);
742         }
743         /* if we delayed sched because we held a lock, sched now */
744         if(up->delaysched)
745                 sched();
746         INTRLOG(dprint("before kexit\n");)
747         kexit(ureg);
748 }
749
750 /*
751  *  Call user, if necessary, with note.
752  *  Pass user the Ureg struct and the note on his stack.
753  */
754 int
755 notify(Ureg* ureg)
756 {
757         int l;
758         ulong s, sp;
759         Note *n;
760
761         if(up->procctl)
762                 procctl();
763         if(up->nnote == 0)
764                 return 0;
765
766         if(up->fpstate == FPactive){
767                 fpsave(up->fpsave);
768                 up->fpstate = FPinactive;
769         }
770         up->fpstate |= FPillegal;
771
772         s = spllo();
773         qlock(&up->debug);
774         up->notepending = 0;
775         n = &up->note[0];
776         if(strncmp(n->msg, "sys:", 4) == 0){
777                 l = strlen(n->msg);
778                 if(l > ERRMAX-15)       /* " pc=0x12345678\0" */
779                         l = ERRMAX-15;
780                 sprint(n->msg+l, " pc=0x%.8lux", ureg->pc);
781         }
782
783         if(n->flag!=NUser && (up->notified || up->notify==0)){
784                 if(n->flag == NDebug)
785                         pprint("suicide: %s\n", n->msg);
786                 qunlock(&up->debug);
787                 pexit(n->msg, n->flag!=NDebug);
788         }
789
790         if(up->notified){
791                 qunlock(&up->debug);
792                 splhi();
793                 return 0;
794         }
795                 
796         if(!up->notify){
797                 qunlock(&up->debug);
798                 pexit(n->msg, n->flag!=NDebug);
799         }
800         sp = ureg->usp;
801         sp -= sizeof(Ureg);
802
803         if(!okaddr((ulong)up->notify, 1, 0)
804         || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
805                 pprint("suicide: bad address in notify\n");
806                 qunlock(&up->debug);
807                 pexit("Suicide", 0);
808         }
809
810         up->ureg = (void*)sp;
811         memmove((Ureg*)sp, ureg, sizeof(Ureg));
812         *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
813         up->ureg = (void*)sp;
814         sp -= BY2WD+ERRMAX;
815         memmove((char*)sp, up->note[0].msg, ERRMAX);
816         sp -= 3*BY2WD;
817         *(ulong*)(sp+2*BY2WD) = sp+3*BY2WD;             /* arg 2 is string */
818         *(ulong*)(sp+1*BY2WD) = (ulong)up->ureg;        /* arg 1 is ureg* */
819         *(ulong*)(sp+0*BY2WD) = 0;                      /* arg 0 is pc */
820         ureg->usp = sp;
821         ureg->pc = (ulong)up->notify;
822         up->notified = 1;
823         up->nnote--;
824         memmove(&up->lastnote, &up->note[0], sizeof(Note));
825         memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
826
827         qunlock(&up->debug);
828         splx(s);
829         return 1;
830 }
831
832 /*
833  *   Return user to state before notify()
834  */
835 void
836 noted(Ureg* ureg, ulong arg0)
837 {
838         Ureg *nureg;
839         ulong oureg, sp;
840
841         qlock(&up->debug);
842         if(arg0!=NRSTR && !up->notified) {
843                 qunlock(&up->debug);
844                 pprint("call to noted() when not notified\n");
845                 pexit("Suicide", 0);
846         }
847         up->notified = 0;
848
849         nureg = up->ureg;       /* pointer to user returned Ureg struct */
850
851         up->fpstate &= ~FPillegal;
852
853         /* sanity clause */
854         oureg = (ulong)nureg;
855         if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
856                 pprint("bad ureg in noted or call to noted when not notified\n");
857                 qunlock(&up->debug);
858                 pexit("Suicide", 0);
859         }
860
861         /*
862          * Check the segment selectors are all valid, otherwise
863          * a fault will be taken on attempting to return to the
864          * user process.
865          * Take care with the comparisons as different processor
866          * generations push segment descriptors in different ways.
867          */
868         if((nureg->cs & 0xFFFF) != UESEL || (nureg->ss & 0xFFFF) != UDSEL
869           || (nureg->ds & 0xFFFF) != UDSEL || (nureg->es & 0xFFFF) != UDSEL
870           || (nureg->fs & 0xFFFF) != UDSEL || (nureg->gs & 0xFFFF) != UDSEL){
871                 pprint("bad segment selector in noted\n");
872                 pprint("cs is %#lux, wanted %#ux\n", nureg->cs, UESEL);
873                 pprint("ds is %#lux, wanted %#ux\n", nureg->ds, UDSEL);
874                 pprint("es is %#lux, fs is %#lux, gs %#lux, wanted %#ux\n", 
875                         ureg->es, ureg->fs, ureg->gs, UDSEL);
876                 pprint("ss is %#lux, wanted %#ux\n", nureg->ss, UDSEL);
877                 qunlock(&up->debug);
878                 pexit("Suicide", 0);
879         }
880
881         /* don't let user change system flags */
882         nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
883
884         memmove(ureg, nureg, sizeof(Ureg));
885
886         switch(arg0){
887         case NCONT:
888         case NRSTR:
889                 if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){
890                         qunlock(&up->debug);
891                         pprint("suicide: trap in noted\n");
892                         pexit("Suicide", 0);
893                 }
894                 up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
895                 qunlock(&up->debug);
896                 break;
897
898         case NSAVE:
899                 if(!okaddr(nureg->pc, BY2WD, 0)
900                 || !okaddr(nureg->usp, BY2WD, 0)){
901                         qunlock(&up->debug);
902                         pprint("suicide: trap in noted\n");
903                         pexit("Suicide", 0);
904                 }
905                 qunlock(&up->debug);
906                 sp = oureg-4*BY2WD-ERRMAX;
907                 splhi();
908                 ureg->sp = sp;
909                 ((ulong*)sp)[1] = oureg;        /* arg 1 0(FP) is ureg* */
910                 ((ulong*)sp)[0] = 0;            /* arg 0 is pc */
911                 break;
912
913         default:
914                 pprint("unknown noted arg 0x%lux\n", arg0);
915                 up->lastnote.flag = NDebug;
916                 /* fall through */
917                 
918         case NDFLT:
919                 if(up->lastnote.flag == NDebug){ 
920                         qunlock(&up->debug);
921                         pprint("suicide: %s\n", up->lastnote.msg);
922                 } else
923                         qunlock(&up->debug);
924                 pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
925         }
926 }
927
928 uintptr
929 execregs(uintptr entry, ulong ssize, ulong nargs)
930 {
931         ulong *sp;
932         Ureg *ureg;
933
934         up->fpstate = FPinit;
935         fpoff();
936
937         sp = (ulong*)(USTKTOP - ssize);
938         *--sp = nargs;
939
940         ureg = up->dbgreg;
941         ureg->usp = (ulong)sp;
942         ureg->pc = entry;
943 //      print("execregs returns 0x%x\n", USTKTOP-sizeof(Tos));
944         return USTKTOP-sizeof(Tos);             /* address of kernel/user shared data */
945 }
946
947 /*
948  *  return the userpc the last exception happened at
949  */
950 ulong
951 userpc(void)
952 {
953         Ureg *ureg;
954
955         ureg = (Ureg*)up->dbgreg;
956         return ureg->pc;
957 }
958
959 /* This routine must save the values of registers the user is not permitted
960  * to write from devproc and then restore the saved values before returning.
961  */
962 void
963 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
964 {
965         ulong flags;
966         ulong cs;
967         ulong ss;
968
969         flags = ureg->flags;
970         cs = ureg->cs;
971         ss = ureg->ss;
972         memmove(pureg, uva, n);
973         ureg->flags = (ureg->flags & 0x00FF) | (flags & 0xFF00);
974         ureg->cs = cs;
975         ureg->ss = ss;
976 }
977
978 static void
979 linkproc(void)
980 {
981         spllo();
982         up->kpfun(up->kparg);
983         pexit("kproc dying", 0);
984 }
985
986 void
987 kprocchild(Proc* p, void (*func)(void*), void* arg)
988 {
989         /*
990          * gotolabel() needs a word on the stack in
991          * which to place the return PC used to jump
992          * to linkproc().
993          */
994         p->sched.pc = (ulong)linkproc;
995         p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;
996
997         p->kpfun = func;
998         p->kparg = arg;
999 }
1000
1001 void
1002 forkchild(Proc *p, Ureg *ureg)
1003 {
1004         Ureg *cureg;
1005
1006         /*
1007          * Add 2*BY2WD to the stack to account for
1008          *  - the return PC
1009          *  - trap's argument (ur)
1010          */
1011         p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1012         p->sched.pc = (ulong)forkret;
1013
1014         cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1015         memmove(cureg, ureg, sizeof(Ureg));
1016         /* return value of syscall in child */
1017         cureg->ax = 0;
1018
1019         /* Things from bottom of syscall which were never executed */
1020         p->psstate = 0;
1021         p->insyscall = 0;
1022 }
1023
1024 /* Give enough context in the ureg to produce a kernel stack for
1025  * a sleeping process
1026  */
1027 void
1028 setkernur(Ureg* ureg, Proc* p)
1029 {
1030         ureg->pc = p->sched.pc;
1031         ureg->sp = p->sched.sp+4;
1032 }
1033
1034 ulong
1035 dbgpc(Proc *p)
1036 {
1037         Ureg *ureg;
1038
1039         ureg = p->dbgreg;
1040         if(ureg == 0)
1041                 return 0;
1042
1043         return ureg->pc;
1044 }
1045
1046 /*
1047  * install_safe_pf_handler / install_normal_pf_handler:
1048  * 
1049  * These are used within the failsafe_callback handler in entry.S to avoid
1050  * taking a full page fault when reloading FS and GS. This is because FS and 
1051  * GS could be invalid at pretty much any point while Xenolinux executes (we 
1052  * don't set them to safe values on entry to the kernel). At *any* point Xen 
1053  * may be entered due to a hardware interrupt --- on exit from Xen an invalid 
1054  * FS/GS will cause our failsafe_callback to be executed. This could occur, 
1055  * for example, while the mmu_update_queue is in an inconsistent state. This
1056  * is disastrous because the normal page-fault handler touches the update
1057  * queue!
1058  * 
1059  * Fortunately, within the failsafe handler it is safe to force DS/ES/FS/GS
1060  * to zero if they cannot be reloaded -- at this point executing a normal
1061  * page fault would not change this effect. The safe page-fault handler
1062  * ensures this end result (blow away the selector value) without the dangers
1063  * of the normal page-fault handler.
1064  * 
1065  * NB. Perhaps this can all go away after we have implemented writeable
1066  * page tables. :-)
1067  */
1068 static void
1069 safe_fault386(Ureg* , void* ) {
1070         panic("DO SAFE PAGE FAULT!\n");
1071
1072
1073    
1074 }
1075
1076 unsigned long install_safe_pf_handler(void)
1077 {
1078         dprint("called from failsafe callback\n");
1079         trapenable(VectorPF, safe_fault386, 0, "safe_fault386");
1080         return 0;
1081 }
1082
1083 void install_normal_pf_handler(unsigned long)
1084 {
1085         trapenable(VectorPF, fault386, 0, "fault386");
1086 }