]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/devvmx.c
kernel: cleanup the software mouse cursor mess
[plan9front.git] / sys / src / 9 / pc / devvmx.c
1 #include "u.h"
2 #include "../port/lib.h"
3 #include "mem.h"
4 #include "dat.h"
5 #include "fns.h"
6 #include "../port/error.h"
7 #include "ureg.h"
8
9 extern int vmxon(u64int);
10 extern int vmxoff(void);
11 extern int vmclear(u64int);
12 extern int vmptrld(u64int);
13 extern int vmlaunch(Ureg *, int);
14 extern int vmread(u32int, uintptr *);
15 extern int vmwrite(u32int, uintptr);
16 extern int invept(u32int, uvlong, uvlong);
17 extern int invvpid(u32int, uvlong, uvlong);
18
19 static int gotvmx;
20 static vlong procb_ctls, pinb_ctls;
21
22 enum {
23         VMX_BASIC_MSR = 0x480,
24         VMX_PINB_CTLS_MSR = 0x481,
25         VMX_PROCB_CTLS_MSR = 0x482,
26         VMX_VMEXIT_CTLS_MSR = 0x483,
27         VMX_VMENTRY_CTLS_MSR = 0x484,
28         VMX_MISC_MSR = 0x485,
29         VMX_CR0_FIXED0 = 0x486,
30         VMX_CR0_FIXED1 = 0x487,
31         VMX_CR4_FIXED0 = 0x488,
32         VMX_CR4_FIXED1 = 0x489,
33         VMX_VMCS_ENUM = 0x48A,
34         VMX_PROCB_CTLS2_MSR = 0x48B,
35         VMX_TRUE_PINB_CTLS_MSR = 0x48D,
36         VMX_TRUE_PROCB_CTLS_MSR = 0x48E,
37         VMX_TRUE_EXIT_CTLS_MSR = 0x48F,
38         VMX_TRUE_ENTRY_CTLS_MSR = 0x490,
39         VMX_VMFUNC_MSR = 0x491,
40         
41         PINB_CTLS = 0x4000,
42         PINB_EXITIRQ = 1<<0,
43         PINB_EXITNMI = 1<<3,
44         
45         PROCB_CTLS = 0x4002,
46         PROCB_IRQWIN = 1<<2,
47         PROCB_EXITHLT = 1<<7,
48         PROCB_EXITINVLPG = 1<<9,
49         PROCB_EXITMWAIT = 1<<10,
50         PROCB_EXITRDPMC = 1<<11,
51         PROCB_EXITRDTSC = 1<<12,
52         PROCB_EXITCR3LD = 1<<15,
53         PROCB_EXITCR3ST = 1<<16,
54         PROCB_EXITCR8LD = 1<<19,
55         PROCB_EXITCR8ST = 1<<20,
56         PROCB_EXITMOVDR = 1<<23,
57         PROCB_EXITIO = 1<<24,
58         PROCB_MONTRAP = 1<<27,
59         PROCB_MSRBITMAP = 1<<28,
60         PROCB_EXITMONITOR = 1<<29,
61         PROCB_EXITPAUSE = 1<<30,
62         PROCB_USECTLS2 = 1<<31,
63         
64         PROCB_CTLS2 = 0x401E,
65         PROCB_EPT = 1<<1,
66         PROCB_EXITGDT = 1<<2,
67         PROCB_VPID = 1<<5,
68         PROCB_UNRESTR = 1<<7,
69
70         EXC_BITMAP = 0x4004,
71         PFAULT_MASK = 0x4006,
72         PFAULT_MATCH = 0x4008,
73         CR3_TARGCNT = 0x400a,
74         MSR_BITMAP = 0x2004,
75         
76         VMEXIT_CTLS = 0x400c,
77         VMEXIT_ST_DEBUG = 1<<2,
78         VMEXIT_HOST64 = 1<<9,
79         VMEXIT_LD_IA32_PERF_GLOBAL_CTRL = 1<<12,
80         VMEXIT_ST_IA32_PAT = 1<<18,
81         VMEXIT_LD_IA32_PAT = 1<<19,
82         VMEXIT_ST_IA32_EFER = 1<<20,
83         VMEXIT_LD_IA32_EFER = 1<<21,    
84         
85         VMEXIT_MSRSTCNT = 0x400e,
86         VMEXIT_MSRLDCNT = 0x4010,
87         VMEXIT_MSRSTADDR = 0x2006,
88         VMEXIT_MSRLDADDR = 0x2008,
89         VMENTRY_MSRLDADDR = 0x200A,
90         
91         VMENTRY_CTLS = 0x4012,
92         VMENTRY_LD_DEBUG = 1<<2,
93         VMENTRY_GUEST64 = 1<<9,
94         VMENTRY_LD_IA32_PERF_GLOBAL_CTRL = 1<<13,
95         VMENTRY_LD_IA32_PAT = 1<<14,
96         VMENTRY_LD_IA32_EFER = 1<<15,
97         
98         VMENTRY_MSRLDCNT = 0x4014,
99         VMENTRY_INTRINFO = 0x4016,
100         VMENTRY_INTRCODE = 0x4018,
101         VMENTRY_INTRILEN = 0x401a,
102         
103         VMCS_LINK = 0x2800,
104         
105         GUEST_ES = 0x800,
106         GUEST_CS = 0x802,
107         GUEST_SS = 0x804,
108         GUEST_DS = 0x806,
109         GUEST_FS = 0x808,
110         GUEST_GS = 0x80A,
111         GUEST_LDTR = 0x80C,
112         GUEST_TR = 0x80E,
113         GUEST_CR0 = 0x6800,
114         GUEST_CR3 = 0x6802,
115         GUEST_CR4 = 0x6804,
116         GUEST_ESLIMIT = 0x4800,
117         GUEST_CSLIMIT = 0x4802,
118         GUEST_SSLIMIT = 0x4804,
119         GUEST_DSLIMIT = 0x4806,
120         GUEST_FSLIMIT = 0x4808,
121         GUEST_GSLIMIT = 0x480A,
122         GUEST_LDTRLIMIT = 0x480C,
123         GUEST_TRLIMIT = 0x480E,
124         GUEST_GDTRLIMIT = 0x4810,
125         GUEST_IDTRLIMIT = 0x4812,
126         GUEST_ESPERM = 0x4814,
127         GUEST_CSPERM = 0x4816,
128         GUEST_SSPERM = 0x4818,
129         GUEST_DSPERM = 0x481A,
130         GUEST_FSPERM = 0x481C,
131         GUEST_GSPERM = 0x481E,
132         GUEST_LDTRPERM = 0x4820,
133         GUEST_TRPERM = 0x4822,
134         GUEST_CR0MASK = 0x6000,
135         GUEST_CR4MASK = 0x6002,
136         GUEST_CR0SHADOW = 0x6004,
137         GUEST_CR4SHADOW = 0x6006,
138         GUEST_ESBASE = 0x6806,
139         GUEST_CSBASE = 0x6808,
140         GUEST_SSBASE = 0x680A,
141         GUEST_DSBASE = 0x680C,
142         GUEST_FSBASE = 0x680E,
143         GUEST_GSBASE = 0x6810,
144         GUEST_LDTRBASE = 0x6812,
145         GUEST_TRBASE = 0x6814,
146         GUEST_GDTRBASE = 0x6816,
147         GUEST_IDTRBASE = 0x6818,
148         GUEST_DR7 = 0x681A,
149         GUEST_RSP = 0x681C,
150         GUEST_RIP = 0x681E,
151         GUEST_RFLAGS = 0x6820,
152         GUEST_IA32_DEBUGCTL = 0x2802,
153         GUEST_IA32_PAT = 0x2804,
154         GUEST_IA32_EFER = 0x2806,
155         GUEST_IA32_PERF_GLOBAL_CTRL = 0x2808,
156         
157         HOST_ES = 0xC00,
158         HOST_CS = 0xC02,
159         HOST_SS = 0xC04,
160         HOST_DS = 0xC06,
161         HOST_FS = 0xC08,
162         HOST_GS = 0xC0A,
163         HOST_TR = 0xC0C,
164         HOST_CR0 = 0x6C00,
165         HOST_CR3 = 0x6C02,
166         HOST_CR4 = 0x6C04,
167         HOST_FSBASE = 0x6C06,
168         HOST_GSBASE = 0x6C08,
169         HOST_TRBASE = 0x6C0A,
170         HOST_GDTR = 0x6C0C,
171         HOST_IDTR = 0x6C0E,
172         HOST_RSP = 0x6C14,
173         HOST_RIP = 0x6C16,
174         HOST_IA32_PAT = 0x2C00,
175         HOST_IA32_EFER = 0x2C02,
176         HOST_IA32_PERF_GLOBAL_CTRL = 0x2C04,
177         
178         GUEST_CANINTR = 0x4824,
179         
180         VM_INSTRERR = 0x4400,
181         VM_EXREASON = 0x4402,
182         VM_EXINTRINFO = 0x4404,
183         VM_EXINTRCODE = 0x4406,
184         VM_IDTVECINFO = 0x4408,
185         VM_IDTVECCODE = 0x440A,
186         VM_EXINSTRLEN = 0x440C,
187         VM_EXINSTRINFO = 0x440E,
188         VM_EXQUALIF = 0x6400,
189         VM_IORCX = 0x6402,
190         VM_IORSI = 0x6404,
191         VM_IORDI = 0x6406,
192         VM_IORIP = 0x6408,
193         VM_GUESTVA = 0x640A,
194         VM_GUESTPA = 0x2400,
195         
196         VM_VPID = 0x000,
197         VM_EPTPIDX = 0x0004,
198         
199         VM_EPTP = 0x201A,
200         VM_EPTPLA = 0x2024,
201         
202         INVLOCAL = 1,
203 };
204
205 enum {
206         CR0RSVD = 0x1ffaffc0,
207         CR4RSVD = 0xff889000,
208         CR4MCE = 1<<6,
209         CR4VMXE = 1<<13,
210         CR4SMXE = 1<<14,
211         CR4PKE = 1<<22,
212         
213         CR0KERNEL = CR0RSVD | 0x30 | (uintptr)0xFFFFFFFF00000000ULL,
214         CR4KERNEL = CR4RSVD | CR4VMXE | CR4SMXE | CR4MCE | CR4PKE | (uintptr)0xFFFFFFFF00000000ULL
215 };
216
217 enum {
218         MAXMSR = 512,
219 };
220
221 typedef struct VmxMach VmxMach;
222 typedef struct Vmx Vmx;
223 typedef struct VmCmd VmCmd;
224 typedef struct VmMem VmMem;
225 typedef struct VmIntr VmIntr;
226
227 struct VmMem {
228         uvlong addr;
229         Segment *seg;
230         uintptr off;
231         char *name;
232         VmMem *next, *prev;
233         u16int attr;
234 };
235
236 struct VmxMach {
237         char vmxon[4096]; /* has to be at the start for alignment */
238         QLock;
239         int vms;
240         Vmx *active;
241 };
242 #define vmxmach ((VmxMach*)((m)->vmx))
243 #define vmxmachp(n) ((VmxMach*)(MACHP(n)->vmx))
244
245 struct VmIntr {
246         u32int info, code, ilen;
247 };
248
249 struct Vmx {
250         uchar vmcs[4096]; /* page aligned */
251         uvlong pml4[512]; /* page aligned */
252         u32int msrbits[1024]; /* page aligned */
253         FPsave fp; /* page aligned */
254         u64int msrhost[MAXMSR*2]; /* 16 byte aligned */
255         u64int msrguest[MAXMSR*2]; /* 16 byte aligned */
256
257         enum {
258                 VMXINIT,
259                 VMXREADY,
260                 VMXRUNNING,
261                 VMXDEAD,
262                 VMXENDING,
263         } state;
264         int index, machno;
265         char errstr[ERRMAX];
266         Ureg ureg;
267         uintptr cr2;
268         uintptr dr[8]; /* DR7 is also kept in VMCS */
269         u8int launched;
270         u8int vpid;
271         enum {
272                 FLUSHVPID = 1,
273                 FLUSHEPT = 2,
274                 STEP = 4,
275                 POSTEX = 8,
276                 POSTIRQ = 16,
277         } onentry;
278         
279         Rendez cmdwait;
280         Lock cmdlock;
281         VmCmd *firstcmd, **lastcmd;
282         VmCmd *postponed;
283         VmMem mem;
284         
285         enum {
286                 GOTEXIT = 1,
287                 GOTIRQACK = 2,
288         } got;
289         VmIntr exc, irq, irqack;
290
291         int nmsr;
292 };
293
294 struct VmCmd {
295         enum {
296                 CMDFDONE = 1,
297                 CMDFFAIL = 2,
298                 CMDFPOSTP = 4,
299         } flags;
300         u8int scratched;
301         Rendez;
302         Lock;
303         int (*cmd)(VmCmd *, va_list);
304         int retval;
305         char *errstr;
306         va_list va;
307         VmCmd *next;
308         Vmx *vmx;
309 };
310
311 static char Equit[] = "vmx: ending";
312
313 static char *statenames[] = {
314         [VMXINIT] "init",
315         [VMXREADY] "ready",
316         [VMXRUNNING] "running",
317         [VMXDEAD] "dead",
318         [VMXENDING]"ending"
319 };
320
321 static Vmx *moribund;
322 static QLock vmxtablock;
323 static Vmx **vmxtab;
324 static int nvmxtab;
325
326 void
327 vmxprocrestore(Proc *p)
328 {
329         int s;
330         Vmx *vmx;
331         
332         s = splhi();
333         vmx = p->vmx;
334         if(vmxmach->active != vmx){
335                 if(vmx != nil && vmptrld(PADDR(vmx->vmcs)) < 0)
336                         panic("VMPTRLD(%p) failed", vmx->vmcs);
337                 vmxmach->active = vmx;
338         }
339         splx(s);
340 }
341
342 static u64int
343 vmcsread(u32int addr)
344 {
345         int rc;
346         u64int val;
347
348         val = 0;
349         rc = vmread(addr, (uintptr *) &val);
350         if(rc >= 0 && sizeof(uintptr) == 4 && (addr & 0x6000) == 0x2000)
351                 rc = vmread(addr | 1, (uintptr *) &val + 1);
352         if(rc < 0){
353                 char errbuf[128];
354                 snprint(errbuf, sizeof(errbuf), "vmcsread failed (%#.4ux)", addr);
355                 error(errbuf);
356         }
357         return val;
358 }
359
360 static void
361 vmcswrite(u32int addr, u64int val)
362 {
363         int rc;
364         
365         rc = vmwrite(addr, val);
366         if(rc >= 0 && sizeof(uintptr) == 4 && (addr & 0x6000) == 0x2000)
367                 rc = vmwrite(addr | 1, val >> 32);
368         if(rc < 0){
369                 char errbuf[128];
370                 snprint(errbuf, sizeof(errbuf), "vmcswrite failed (%#.4ux = %#.16ullx)", addr, val);
371                 error(errbuf);
372         }
373 }
374
375 static uvlong
376 parseval(char *s)
377 {
378         uvlong v;
379         char *p;
380
381         v = strtoull(s, &p, 0);
382         if(p == s || *p != 0) error("invalid value");
383         return v;
384 }
385
386 static char *
387 cr0fakeread(Vmx *, char *p, char *e)
388 {
389         uvlong guest, mask, shadow;
390         
391         guest = vmcsread(GUEST_CR0);
392         mask = vmcsread(GUEST_CR0MASK);
393         shadow = vmcsread(GUEST_CR0SHADOW);
394         return seprint(p, e, "%#.*ullx", sizeof(uintptr) * 2, guest & ~mask | shadow & mask);
395 }
396
397 static char *
398 cr4fakeread(Vmx *, char *p, char *e)
399 {
400         uvlong guest, mask, shadow;
401         
402         guest = vmcsread(GUEST_CR4);
403         mask = vmcsread(GUEST_CR4MASK);
404         shadow = vmcsread(GUEST_CR4SHADOW);
405         return seprint(p, e, "%#.*ullx", sizeof(uintptr) * 2, guest & ~mask | shadow & mask);
406 }
407
408 static void
409 updatelma(void)
410 {
411         uvlong cr0, efer, nefer, ectrl;
412
413         if(sizeof(uintptr) != 8) return;
414         cr0 = vmcsread(GUEST_CR0);
415         efer = vmcsread(GUEST_IA32_EFER);
416         nefer = efer & ~0x400 | efer << 2 & cr0 >> 21 & 0x400;
417         if(efer == nefer) return;
418         vmcswrite(GUEST_IA32_EFER, nefer);
419         ectrl = vmcsread(VMENTRY_CTLS);
420         ectrl = ectrl & ~0x200 | nefer >> 1 & 0x200;
421         vmcswrite(VMENTRY_CTLS, ectrl);
422 }
423
424 static int
425 cr0realwrite(Vmx *, char *s)
426 {
427         uvlong v;
428         
429         v = parseval(s);
430         vmcswrite(GUEST_CR0, vmcsread(GUEST_CR0) & CR0KERNEL | v & ~CR0KERNEL);
431         updatelma();
432         return 0;
433 }
434
435 static int
436 cr0maskwrite(Vmx *, char *s)
437 {
438         uvlong v;
439         
440         v = parseval(s);
441         vmcswrite(GUEST_CR0MASK, v | CR0KERNEL);
442         return 0;
443 }
444
445 static int
446 eferwrite(Vmx *, char *s)
447 {
448         uvlong v;
449         
450         v = parseval(s);
451         vmcswrite(GUEST_IA32_EFER, v);
452         updatelma();
453         return 0;
454 }
455
456 static int
457 cr4realwrite(Vmx *, char *s)
458 {
459         uvlong v;
460         
461         v = parseval(s);
462         vmcswrite(GUEST_CR4, vmcsread(GUEST_CR4) & CR4KERNEL | v & ~CR4KERNEL);
463         return 0;
464 }
465
466 static int
467 cr4maskwrite(Vmx *, char *s)
468 {
469         uvlong v;
470         
471         v = parseval(s);
472         vmcswrite(GUEST_CR4MASK, v | CR4KERNEL);
473         return 0;
474 }
475
476 static int
477 dr7write(Vmx *vmx, char *s)
478 {
479         uvlong v;
480         
481         v = (u32int) parseval(s);
482         vmcswrite(GUEST_DR7, vmx->dr[7] = (u32int) v);
483         return 0;
484 }
485
486 static int
487 readonly(Vmx *, char *)
488 {
489         return -1;
490 }
491
492 static int
493 dr6write(Vmx *vmx, char *s)
494 {
495         uvlong v;
496         
497         v = parseval(s);
498         vmx->dr[6] = (u32int) v;
499         return 0;
500 }
501
502 typedef struct GuestReg GuestReg;
503 struct GuestReg {
504         int offset;
505         u8int size; /* in bytes; 0 means == uintptr */
506         char *name;
507         char *(*read)(Vmx *, char *, char *);
508         int (*write)(Vmx *, char *);
509 };
510 #define VMXVAR(x) ~(ulong)&(((Vmx*)0)->x)
511 #define UREG(x) VMXVAR(ureg.x)
512 static GuestReg guestregs[] = {
513         {GUEST_RIP, 0, "pc"},
514         {GUEST_RSP, 0, "sp"},
515         {GUEST_RFLAGS, 0, "flags"},
516         {UREG(ax), 0, "ax"},
517         {UREG(bx), 0, "bx"},
518         {UREG(cx), 0, "cx"},
519         {UREG(dx), 0, "dx"},
520         {UREG(bp), 0, "bp"},
521         {UREG(si), 0, "si"},
522         {UREG(di), 0, "di"},
523 #ifdef RMACH
524         {UREG(r8), 0, "r8"},
525         {UREG(r9), 0, "r9"},
526         {UREG(r10), 0, "r10"},
527         {UREG(r11), 0, "r11"},
528         {UREG(r12), 0, "r12"},
529         {UREG(r13), 0, "r13"},
530         {UREG(r14), 0, "r14"},
531         {UREG(r15), 0, "r15"},
532 #endif
533         {GUEST_GDTRBASE, 0, "gdtrbase"},
534         {GUEST_GDTRLIMIT, 4, "gdtrlimit"},
535         {GUEST_IDTRBASE, 0, "idtrbase"},
536         {GUEST_IDTRLIMIT, 4, "idtrlimit"},
537         {GUEST_CS, 2, "cs"},
538         {GUEST_CSBASE, 0, "csbase"},
539         {GUEST_CSLIMIT, 4, "cslimit"},
540         {GUEST_CSPERM, 4, "csperm"},
541         {GUEST_DS, 2, "ds"},
542         {GUEST_DSBASE, 0, "dsbase"},
543         {GUEST_DSLIMIT, 4, "dslimit"},
544         {GUEST_DSPERM, 4, "dsperm"},
545         {GUEST_ES, 2, "es"},
546         {GUEST_ESBASE, 0, "esbase"},
547         {GUEST_ESLIMIT, 4, "eslimit"},
548         {GUEST_ESPERM, 4, "esperm"},
549         {GUEST_FS, 2, "fs"},
550         {GUEST_FSBASE, 0, "fsbase"},
551         {GUEST_FSLIMIT, 4, "fslimit"},
552         {GUEST_FSPERM, 4, "fsperm"},
553         {GUEST_GS, 2, "gs"},
554         {GUEST_GSBASE, 0, "gsbase"},
555         {GUEST_GSLIMIT, 4, "gslimit"},
556         {GUEST_GSPERM, 4, "gsperm"},
557         {GUEST_SS, 2, "ss"},
558         {GUEST_SSBASE, 0, "ssbase"},
559         {GUEST_SSLIMIT, 4, "sslimit"},
560         {GUEST_SSPERM, 4, "ssperm"},
561         {GUEST_TR, 2, "tr"},
562         {GUEST_TRBASE, 0, "trbase"},
563         {GUEST_TRLIMIT, 4, "trlimit"},
564         {GUEST_TRPERM, 4, "trperm"},
565         {GUEST_LDTR, 2, "ldtr"},
566         {GUEST_LDTRBASE, 0, "ldtrbase"},
567         {GUEST_LDTRLIMIT, 4, "ldtrlimit"},
568         {GUEST_LDTRPERM, 4, "ldtrperm"},
569         {GUEST_CR0, 0, "cr0real", nil, cr0realwrite},
570         {GUEST_CR0SHADOW, 0, "cr0fake", cr0fakeread},
571         {GUEST_CR0MASK, 0, "cr0mask", nil, cr0maskwrite},
572         {VMXVAR(cr2), 0, "cr2"},
573         {GUEST_CR3, 0, "cr3"},
574         {GUEST_CR4, 0, "cr4real", nil, cr4realwrite},
575         {GUEST_CR4SHADOW, 0, "cr4fake", cr4fakeread},
576         {GUEST_CR4MASK, 0, "cr4mask", nil, cr4maskwrite},
577         {GUEST_IA32_PAT, 8, "pat"},
578         {GUEST_IA32_EFER, 8, "efer", nil, eferwrite},
579         {VMXVAR(dr[0]), 0, "dr0"},
580         {VMXVAR(dr[1]), 0, "dr1"},
581         {VMXVAR(dr[2]), 0, "dr2"},
582         {VMXVAR(dr[3]), 0, "dr3"},
583         {VMXVAR(dr[6]), 0, "dr6", nil, dr6write},
584         {GUEST_DR7, 0, "dr7", nil, dr7write},
585         {VM_INSTRERR, 4, "instructionerror", nil, readonly},
586         {VM_EXREASON, 4, "exitreason", nil, readonly},
587         {VM_EXQUALIF, 0, "exitqualification", nil, readonly},
588         {VM_EXINTRINFO, 4, "exitinterruptinfo", nil, readonly},
589         {VM_EXINTRCODE, 4, "exitinterruptcode", nil, readonly},
590         {VM_EXINSTRLEN, 4, "exitinstructionlen", nil, readonly},
591         {VM_EXINSTRINFO, 4, "exitinstructioninfo", nil, readonly},
592         {VM_GUESTVA, 0, "exitva", nil, readonly},
593         {VM_GUESTPA, 0, "exitpa", nil, readonly},
594         {VM_IDTVECINFO, 4, "idtinterruptinfo", nil, readonly},
595         {VM_IDTVECCODE, 4, "idtinterruptcode", nil, readonly},
596 };
597
598 static int
599 vmokpage(u64int addr)
600 {
601         return (addr & 0xfff) == 0 && addr >> 48 == 0;
602 }
603
604 static uvlong *
605 eptwalk(Vmx *vmx, uvlong addr)
606 {
607         uvlong *tab, *nt;
608         uvlong v;
609         int i;
610         
611         tab = vmx->pml4;
612         if(tab == nil) error(Egreg);
613         for(i = 3; i >= 1; i--){
614                 tab += addr >> 12 + 9 * i & 0x1ff;
615                 v = *tab;
616                 if((v & 3) == 0){
617                         nt = mallocalign(BY2PG, BY2PG, 0, 0);
618                         if(nt == nil) error(Enomem);
619                         memset(nt, 0, BY2PG);
620                         v = PADDR(nt) | 0x407;
621                         *tab = v;
622                 }
623                 tab = KADDR(v & ~0xfff);
624         }
625         return tab + (addr >> 12 & 0x1ff);
626 }
627
628 static void
629 eptfree(uvlong *tab, int level)
630 {
631         int i;
632         uvlong v, *t;
633         
634         if(tab == nil) error(Egreg);
635         if(level < 3){
636                 for(i = 0; i < 512; i++){
637                         v = tab[i];
638                         if((v & 3) == 0) continue;
639                         t = KADDR(v & ~0xfff);
640                         eptfree(t, level + 1);
641                         tab[i] = 0;
642                 }
643         }
644         if(level > 0)
645                 free(tab);              
646 }
647
648 static void
649 epttranslate(Vmx *vmx, VmMem *mp, uvlong end)
650 {
651         uvlong p, v;
652
653         if((mp->addr & 0xfff) != 0 || (end & 0xfff) != 0 || (uint)mp->attr >= 0x1000)
654                 error(Egreg);
655         if(mp->seg != nil){
656                 switch(mp->seg->type & SG_TYPE){
657                 default:
658                         error(Egreg);
659                 case SG_FIXED:
660                 case SG_STICKY:
661                         break;
662                 }
663                 if(mp->seg->base + mp->off + (end - mp->addr) > mp->seg->top)
664                         error(Egreg);
665                 for(p = mp->addr, v = mp->off; p != end; p += BY2PG, v += BY2PG)
666                         *eptwalk(vmx, p) = mp->seg->map[v/PTEMAPMEM]->pages[(v & PTEMAPMEM-1)/BY2PG]->pa | mp->attr;
667         }else {
668                 for(p = mp->addr; p != end; p += BY2PG)
669                         *eptwalk(vmx, p) = 0;
670         }
671         vmx->onentry |= FLUSHEPT;
672 }
673
674 static char *mtype[] = {"uc", "wc", "02", "03", "wt", "wp", "wb", "07"};
675
676 static int
677 cmdgetmeminfo(VmCmd *cmd, va_list va)
678 {
679         VmMem *mp;
680         char *p0, *e, *p;
681         char attr[4];
682         char mt[4];
683         
684         p0 = va_arg(va, char *);
685         e = va_arg(va, char *);
686         p = p0;
687         if(p < e) *p = 0;
688         for(mp = cmd->vmx->mem.next; mp != &cmd->vmx->mem; mp = mp->next){
689                 if(mp->seg == nil)
690                         continue;
691                 attr[0] = (mp->attr & 1) != 0 ? 'r' : '-';
692                 attr[1] = (mp->attr & 2) != 0 ? 'w' : '-';
693                 attr[2] = (mp->attr & 4) != 0 ? 'x' : '-';
694                 attr[3] = 0;
695                 *(ushort*)mt = *(u16int*)mtype[mp->attr >> 3 & 7];
696                 mt[2] = (mp->attr & 0x40) != 0 ? '!' : 0;
697                 mt[3] = 0;
698                 p = seprint(p, e, "%s %s %#llux %#llux %s %#llux\n", attr, mt, mp->addr, mp->next->addr, mp->name, (uvlong)mp->off);
699         }
700         return p - p0;
701 }
702
703 static void
704 vmmeminsert(VmMem *l, VmMem *p)
705 {
706         p->prev = l->prev;
707         p->next = l;
708         p->prev->next = p;
709         p->next->prev = p;
710 }
711
712 static VmMem *
713 vmmemremove(VmMem *p)
714 {
715         VmMem *r;
716         
717         r = p->next;
718         p->next->prev = p->prev;
719         p->prev->next = p->next;
720         free(p->name);
721         putseg(p->seg);
722         free(p);
723         return r;
724 }
725
726 static int
727 cmdclearmeminfo(VmCmd *cmd, va_list)
728 {
729         VmMem *mp;
730         Vmx *vmx;
731
732         vmx = cmd->vmx;
733         eptfree(cmd->vmx->pml4, 0);
734         for(mp = vmx->mem.next; mp != &vmx->mem; )
735                 mp = vmmemremove(mp);
736         vmx->mem.prev = &vmx->mem;
737         vmx->mem.next = &vmx->mem;
738         vmx->onentry |= FLUSHEPT;
739         return 0;
740 }
741
742
743 static void
744 vmmemupdate(Vmx *vmx, VmMem *mp, uvlong end)
745 {
746         VmMem *p, *q;
747         
748         for(p = vmx->mem.prev; p != &vmx->mem; p = p->prev)
749                 if(p->addr <= end || end == 0)
750                         break;
751         if(p == &vmx->mem || p->addr < mp->addr){
752                 q = smalloc(sizeof(VmMem));
753                 *q = *p;
754                 if(p->seg != nil){
755                         incref(q->seg);
756                         kstrdup(&q->name, p->name);
757                 }
758                 vmmeminsert(p->next, q);
759         }else
760                 q = p;
761         if(q->seg != nil)
762                 q->off += end - q->addr;
763         q->addr = end;
764         for(p = vmx->mem.next; p != &vmx->mem; p = p->next)
765                 if(p->addr >= mp->addr)
766                         break;
767         vmmeminsert(p, mp);
768         while(p != q)
769                 p = vmmemremove(p);
770         for(p = vmx->mem.next; p != &vmx->mem; )
771                 if(p->seg == p->prev->seg && (p->seg == nil || p->addr - p->prev->addr == p->off - p->prev->off))
772                         p = vmmemremove(p);
773                 else
774                         p = p->next;
775 }
776
777 extern Segment* (*_globalsegattach)(char*);
778
779 static int
780 cmdsetmeminfo(VmCmd *cmd, va_list va)
781 {
782         char *p0, *p, *q, *r;
783         int j;
784         char *f[10];
785         VmMem *mp;
786         int rc;
787         uvlong end;
788         
789         p0 = va_arg(va, char *);
790         p = p0;
791         mp = nil;
792         for(;;){
793                 q = strchr(p, '\n');
794                 if(q == 0) break;
795                 *q = 0;
796                 if(mp == nil){
797                         mp = malloc(sizeof(VmMem));
798                         if(mp == nil)
799                                 error(Enomem);
800                 }
801                 memset(mp, 0, sizeof(VmMem));
802                 if(waserror()){
803                         putseg(mp->seg);
804                         free(mp->name);
805                         free(mp);
806                         nexterror();
807                 }
808                 rc = tokenize(p, f, nelem(f));
809                 p = q + 1;
810                 if(rc == 0){
811                         poperror();
812                         continue;
813                 }
814                 if(rc != 4 && rc != 6) error("number of fields wrong");
815                 for(q = f[0]; *q != 0; q++)
816                         switch(*q){
817                         case 'r': if((mp->attr & 1) != 0) goto tinval; mp->attr |= 1; break;
818                         case 'w': if((mp->attr & 2) != 0) goto tinval; mp->attr |= 2; break;
819                         case 'x': if((mp->attr & 4) != 0) goto tinval; mp->attr |= 0x404; break;
820                         case '-': break;
821                         default: tinval: error("invalid access field");
822                         }
823                 for(j = 0; j < 8; j++)
824                         if(strncmp(mtype[j], f[1], 2) == 0){
825                                 mp->attr |= j << 3;
826                                 break;
827                         }
828                 if(j == 8 || strlen(f[1]) > 3) error("invalid memory type");
829                 if(f[1][2] == '!') mp->attr |= 0x40;
830                 else if(f[1][2] != 0) error("invalid memory type");
831                 mp->addr = strtoull(f[2], &r, 0);
832                 if(*r != 0 || !vmokpage(mp->addr)) error("invalid low guest physical address");
833                 end = strtoull(f[3], &r, 0);
834                 if(*r != 0 || !vmokpage(end) || end <= mp->addr) error("invalid high guest physical address");
835                 if((mp->attr & 7) != 0){
836                         if(rc != 6) error("number of fields wrong");
837                         mp->seg = _globalsegattach(f[4]);
838                         if(mp->seg == nil) error("no such segment");
839                         if(mp->seg->base + mp->off + (end - mp->addr) > mp->seg->top) error("out of bounds");
840                         kstrdup(&mp->name, f[4]);
841                         mp->off = strtoull(f[5], &r, 0);
842                         if(*r != 0 || !vmokpage(mp->off)) error("invalid offset");
843                 }
844                 poperror();
845                 epttranslate(cmd->vmx, mp, end);
846                 vmmemupdate(cmd->vmx, mp, end);
847                 mp = nil;
848         }
849         free(mp);
850         return p - p0;
851 }
852
853 static void
854 vmxreset(void)
855 {
856         ulong regs[4];
857         vlong msr;
858         int i;
859
860         cpuid(1, regs);
861         if((regs[2] & 1<<5) == 0) return;
862         /* check if disabled by BIOS */
863         if(rdmsr(0x3a, &msr) < 0) return;
864         if((msr & 5) != 5){
865                 if((msr & 1) == 0){ /* msr still unlocked */
866                         wrmsr(0x3a, msr | 5);
867                         if(rdmsr(0x3a, &msr) < 0)
868                                 return;
869                 }
870                 if((msr & 5) != 5)
871                         return;
872         }
873         if(rdmsr(VMX_PROCB_CTLS_MSR, &msr) < 0) return;
874         if((vlong)msr >= 0) return;
875         if(rdmsr(VMX_PROCB_CTLS2_MSR, &msr) < 0) return;
876         if((msr >> 32 & PROCB_EPT) == 0 || (msr >> 32 & PROCB_VPID) == 0) return;
877         for(i = 0; i < conf.nmach; i++){
878                 MACHP(i)->vmx = mallocalign(sizeof(VmxMach), 4096, 0, 0);
879                 if(vmxmachp(i) == nil)
880                         error(Enomem);
881         }
882         gotvmx = 1;
883 }
884
885 static void
886 vmxaddmsr(Vmx *vmx, u32int msr, u64int gval)
887 {
888         int i;
889
890         if(vmx->nmsr >= MAXMSR)
891                 error("too many MSRs");
892         i = 2 * vmx->nmsr++;
893         vmx->msrhost[i] = msr;
894         rdmsr(msr, (vlong *) &vmx->msrhost[i+1]);
895         vmx->msrguest[i] = msr;
896         vmx->msrguest[i+1] = gval;
897         vmcswrite(VMENTRY_MSRLDCNT, vmx->nmsr);
898         vmcswrite(VMEXIT_MSRSTCNT, vmx->nmsr);
899         vmcswrite(VMEXIT_MSRLDCNT, vmx->nmsr);
900 }
901
902 static void
903 vmxtrapmsr(Vmx *vmx, u32int msr, enum { TRAPRD = 1, TRAPWR = 2 } state)
904 {
905         u32int m;
906         
907         if(msr >= 0x2000 && (u32int)(msr - 0xc0000000) >= 0x2000)
908                 return;
909         msr = msr & 0x1fff | msr >> 18 & 0x2000;
910         m = 1<<(msr & 31);
911         if((state & TRAPRD) != 0)
912                 vmx->msrbits[msr / 32] |= m;
913         else
914                 vmx->msrbits[msr / 32] &= ~m;
915         if((state & TRAPWR) != 0)
916                 vmx->msrbits[msr / 32 + 512] |= m;
917         else
918                 vmx->msrbits[msr / 32 + 512] &= ~m;
919 }
920
921 static void
922 vmcsinit(Vmx *vmx)
923 {
924         vlong msr;
925         u32int x;
926         
927         memset(&vmx->ureg, 0, sizeof(vmx->ureg));
928         vmx->launched = 0;
929         vmx->onentry = 0;       
930         
931         if(rdmsr(VMX_BASIC_MSR, &msr) < 0) error("rdmsr(VMX_BASIC_MSR) failed");
932         if((msr & 1ULL<<55) != 0){
933                 if(rdmsr(VMX_TRUE_PROCB_CTLS_MSR, &procb_ctls) < 0) error("rdmsr(VMX_TRUE_PROCB_CTLS_MSR) failed");
934                 if(rdmsr(VMX_TRUE_PINB_CTLS_MSR, &pinb_ctls) < 0) error("rdmsr(VMX_TRUE_PINB_CTLS_MSR) failed");
935         }else{
936                 if(rdmsr(VMX_PROCB_CTLS_MSR, &procb_ctls) < 0) error("rdmsr(VMX_PROCB_CTLS_MSR) failed");
937                 if(rdmsr(VMX_PINB_CTLS_MSR, &pinb_ctls) < 0) error("rdmsr(VMX_PINB_CTLS_MSR) failed");
938         }
939
940         if(rdmsr(VMX_PINB_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_PINB_CTLS_MSR failed");
941         x = (u32int)pinb_ctls | 1<<1 | 1<<2 | 1<<4; /* currently reserved default1 bits */
942         x |= PINB_EXITIRQ | PINB_EXITNMI;
943         x &= pinb_ctls >> 32;
944         vmcswrite(PINB_CTLS, x);
945         
946         if(rdmsr(VMX_PROCB_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_PROCB_CTLS_MSR failed");
947         x = (u32int)procb_ctls | 1<<1 | 7<<4 | 1<<8 | 1<<13 | 1<<14 | 1<<26; /* currently reserved default1 bits */
948         x |= PROCB_EXITHLT | PROCB_EXITMWAIT;
949         x |= PROCB_EXITMOVDR | PROCB_EXITIO | PROCB_EXITMONITOR | PROCB_MSRBITMAP;
950         x |= PROCB_USECTLS2;
951         x &= msr >> 32;
952         vmcswrite(PROCB_CTLS, x);
953         
954         if(rdmsr(VMX_PROCB_CTLS2_MSR, &msr) < 0) error("rdmsr(VMX_PROCB_CTLS2_MSR failed");
955         x = PROCB_EPT | PROCB_VPID | PROCB_UNRESTR;
956         x &= msr >> 32;
957         vmcswrite(PROCB_CTLS2, x);
958         
959         if(rdmsr(VMX_VMEXIT_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_VMEXIT_CTLS_MSR failed");
960         x = (u32int)msr;
961         if(sizeof(uintptr) == 8) x |= VMEXIT_HOST64;
962         x |= VMEXIT_LD_IA32_PAT | VMEXIT_LD_IA32_EFER | VMEXIT_ST_DEBUG | VMEXIT_ST_IA32_EFER;
963         x &= msr >> 32;
964         vmcswrite(VMEXIT_CTLS, x);
965         
966         if(rdmsr(VMX_VMENTRY_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_VMENTRY_CTLS_MSR failed");
967         x = (u32int)msr;
968         x |= VMENTRY_LD_IA32_PAT | VMENTRY_LD_IA32_EFER | VMENTRY_LD_DEBUG;
969         x &= msr >> 32;
970         vmcswrite(VMENTRY_CTLS, x);
971         
972         vmcswrite(CR3_TARGCNT, 0);
973         vmcswrite(VMENTRY_INTRINFO, 0);
974         vmcswrite(VMCS_LINK, -1);
975         
976         vmcswrite(HOST_CS, KESEL);
977         vmcswrite(HOST_DS, KDSEL);
978         vmcswrite(HOST_ES, KDSEL);
979         vmcswrite(HOST_FS, KDSEL);
980         vmcswrite(HOST_GS, KDSEL);
981         vmcswrite(HOST_SS, KDSEL);
982         vmcswrite(HOST_TR, TSSSEL);
983         vmcswrite(HOST_CR0, getcr0() & ~0xe);
984         vmcswrite(HOST_CR3, getcr3());
985         vmcswrite(HOST_CR4, getcr4());
986         rdmsr(FSbase, &msr);
987         vmcswrite(HOST_FSBASE, msr);
988         rdmsr(GSbase, &msr);
989         vmcswrite(HOST_GSBASE, msr);
990         vmcswrite(HOST_TRBASE, (uintptr) m->tss);
991         vmcswrite(HOST_GDTR, (uintptr) m->gdt);
992         vmcswrite(HOST_IDTR, IDTADDR);
993         if(rdmsr(0x277, &msr) < 0) error("rdmsr(IA32_PAT) failed");
994         vmcswrite(HOST_IA32_PAT, msr);
995         if(rdmsr(Efer, &msr) < 0) error("rdmsr(IA32_EFER) failed");
996         vmcswrite(HOST_IA32_EFER, msr);
997         
998         vmcswrite(EXC_BITMAP, 1<<18|1<<1);
999         vmcswrite(PFAULT_MASK, 0);
1000         vmcswrite(PFAULT_MATCH, 0);
1001         
1002         vmcswrite(GUEST_CSBASE, 0);
1003         vmcswrite(GUEST_DSBASE, 0);
1004         vmcswrite(GUEST_ESBASE, 0);
1005         vmcswrite(GUEST_FSBASE, 0);
1006         vmcswrite(GUEST_GSBASE, 0);
1007         vmcswrite(GUEST_SSBASE, 0);
1008         vmcswrite(GUEST_CSLIMIT, -1);
1009         vmcswrite(GUEST_DSLIMIT, -1);
1010         vmcswrite(GUEST_ESLIMIT, -1);
1011         vmcswrite(GUEST_FSLIMIT, -1);
1012         vmcswrite(GUEST_GSLIMIT, -1);
1013         vmcswrite(GUEST_SSLIMIT, -1);
1014         vmcswrite(GUEST_CSPERM, (SEGG|SEGD|SEGP|SEGPL(0)|SEGEXEC|SEGR) >> 8 | 1);
1015         vmcswrite(GUEST_DSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
1016         vmcswrite(GUEST_ESPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
1017         vmcswrite(GUEST_FSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
1018         vmcswrite(GUEST_GSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
1019         vmcswrite(GUEST_SSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
1020         vmcswrite(GUEST_LDTRPERM, 1<<16);
1021
1022         vmcswrite(GUEST_CR0MASK, CR0KERNEL);
1023         vmcswrite(GUEST_CR4MASK, CR4KERNEL);
1024         vmcswrite(GUEST_CR0, getcr0() & CR0KERNEL | 0x31);
1025         vmcswrite(GUEST_CR3, 0);
1026         vmcswrite(GUEST_CR4, getcr4() & CR4KERNEL);
1027         vmcswrite(GUEST_CR0SHADOW, getcr0() & CR0KERNEL | 0x31);
1028         vmcswrite(GUEST_CR4SHADOW, getcr4() & ~CR4VMXE & CR4KERNEL);
1029         
1030         vmcswrite(GUEST_IA32_PAT, 0x0007040600070406ULL);
1031         vmcswrite(GUEST_IA32_EFER, 0);
1032         
1033         vmcswrite(GUEST_TRBASE, 0);
1034         vmcswrite(GUEST_TRLIMIT, 0xffff);
1035         vmcswrite(GUEST_TRPERM, (SEGTSS|SEGPL(0)|SEGP) >> 8 | 2);
1036
1037         vmcswrite(VM_EPTP, PADDR(vmx->pml4) | 3<<3);
1038         vmx->vpid = 1;
1039         vmcswrite(VM_VPID, vmx->vpid);
1040         
1041         vmcswrite(GUEST_RFLAGS, 2);
1042         
1043         vmx->onentry = FLUSHVPID | FLUSHEPT;
1044         fpinit();
1045         fpsave(&vmx->fp);
1046         
1047         memset(vmx->msrbits, -1, 4096);
1048         vmxtrapmsr(vmx, Efer, 0);
1049         vmcswrite(VMENTRY_MSRLDADDR, PADDR(vmx->msrguest));
1050         vmcswrite(VMEXIT_MSRSTADDR, PADDR(vmx->msrguest));
1051         vmcswrite(VMEXIT_MSRLDADDR, PADDR(vmx->msrhost));
1052         vmcswrite(MSR_BITMAP, PADDR(vmx->msrbits));
1053         
1054         if(sizeof(uintptr) == 8){
1055                 vmxaddmsr(vmx, Star, 0);
1056                 vmxaddmsr(vmx, Lstar, 0);
1057                 vmxaddmsr(vmx, Cstar, 0);
1058                 vmxaddmsr(vmx, Sfmask, 0);
1059                 vmxaddmsr(vmx, KernelGSbase, 0);
1060                 vmxtrapmsr(vmx, Star, 0);
1061                 vmxtrapmsr(vmx, Lstar, 0);
1062                 vmxtrapmsr(vmx, Cstar, 0);
1063                 vmxtrapmsr(vmx, Sfmask, 0);
1064                 vmxtrapmsr(vmx, FSbase, 0);
1065                 vmxtrapmsr(vmx, GSbase, 0);
1066                 vmxtrapmsr(vmx, KernelGSbase, 0);
1067         }
1068 }
1069
1070 static void
1071 vmxstart(Vmx *vmx)
1072 {
1073         vlong msr, msr2;
1074         uintptr cr;
1075         vlong x;
1076
1077         putcr4(getcr4() | 0x2000); /* set VMXE */
1078         putcr0(getcr0() | 0x20); /* set NE */
1079         cr = getcr0();
1080         if(rdmsr(VMX_CR0_FIXED0, &msr) < 0) error("rdmsr(VMX_CR0_FIXED0) failed");
1081         if(rdmsr(VMX_CR0_FIXED1, &msr2) < 0) error("rdmsr(VMX_CR0_FIXED1) failed");
1082         if((cr & ~msr & ~msr2 | ~cr & msr & msr2) != 0) error("invalid CR0 value");
1083         cr = getcr4();
1084         if(rdmsr(VMX_CR4_FIXED0, &msr) < 0) error("rdmsr(VMX_CR4_FIXED0) failed");
1085         if(rdmsr(VMX_CR4_FIXED1, &msr2) < 0) error("rdmsr(VMX_CR4_FIXED1) failed");
1086         if((cr & ~msr & ~msr2 | ~cr & msr & msr2) != 0) error("invalid CR4 value");
1087         
1088         rdmsr(VMX_BASIC_MSR, &x);
1089         qlock(vmxmach);
1090         if(waserror()){
1091                 qunlock(vmxmach);
1092                 nexterror();
1093         }
1094         if(vmxmach->vms == 0){
1095                 memset(vmxmach->vmxon, 0, sizeof(vmxmach->vmxon));
1096                 *(ulong*)vmxmach->vmxon = x;
1097                 if(vmxon(PADDR(vmxmach->vmxon)) < 0)
1098                         error("vmxon failed");
1099         }
1100         vmxmach->vms++;
1101         qunlock(vmxmach);
1102         poperror();
1103
1104         memset(vmx->vmcs, 0, sizeof(vmx->vmcs));
1105         *(ulong*)vmx->vmcs = x;
1106         if(vmclear(PADDR(vmx->vmcs)) < 0)
1107                 error("vmclear failed");
1108         up->vmx = vmx;
1109         vmxprocrestore(up);
1110         vmcsinit(vmx);
1111 }
1112
1113 static void
1114 cmdrelease(VmCmd *p, int f)
1115 {
1116         lock(p);
1117         p->flags |= CMDFDONE | f;
1118         wakeup(p);
1119         unlock(p);
1120 }
1121
1122 static void
1123 killcmds(Vmx *vmx, VmCmd *notme)
1124 {
1125         VmCmd *p, *pn;
1126         
1127         for(p = vmx->postponed; p != nil; p = pn){
1128                 pn = p->next;
1129                 p->next = nil;
1130                 if(p == notme) continue;
1131                 kstrcpy(p->errstr, Equit, ERRMAX);
1132                 cmdrelease(p, CMDFFAIL);
1133         }
1134         vmx->postponed = nil;
1135         ilock(&vmx->cmdlock);
1136         for(p = vmx->firstcmd; p != nil; p = pn){
1137                 pn = p->next;
1138                 p->next = nil;
1139                 if(p == notme) continue;
1140                 kstrcpy(p->errstr, Equit, ERRMAX);
1141                 cmdrelease(p, CMDFFAIL);
1142         }
1143         vmx->firstcmd = nil;
1144         vmx->lastcmd = &vmx->firstcmd;
1145         iunlock(&vmx->cmdlock);
1146 }
1147
1148 static int
1149 cmdquit(VmCmd *p, va_list va)
1150 {
1151         Vmx *vmx;
1152         
1153         vmx = p->vmx;
1154         vmx->state = VMXENDING;
1155         killcmds(vmx, p);
1156
1157         cmdclearmeminfo(p, va);
1158         
1159         up->vmx = nil;
1160         vmxprocrestore(up);
1161         vmclear(PADDR(vmx->vmcs));
1162         
1163         qlock(vmxmach);
1164         if(--vmxmach->vms == 0)
1165                 vmxoff();
1166         qunlock(vmxmach);
1167         
1168         qlock(&vmxtablock);
1169         if(moribund == vmx)
1170                 moribund = nil;
1171         vmxtab[vmx->index] = nil;
1172         qunlock(&vmxtablock);
1173         free(vmx);
1174         
1175         cmdrelease(p, 0);
1176         pexit(Equit, 1);
1177         return 0;
1178 }
1179
1180 static void
1181 processexit(Vmx *vmx)
1182 {
1183         u32int reason;
1184         
1185         reason = vmcsread(VM_EXREASON);
1186         if((reason & 1<<31) == 0)
1187                 switch(reason & 0xffff){
1188                 case 1: /* external interrupt */
1189                 case 3: /* INIT */
1190                 case 4: /* SIPI */
1191                 case 5: /* IO SMI */
1192                 case 6: /* SMI */
1193                 case 7: /* IRQ window */
1194                 case 8: /* NMI window */
1195                         return;
1196                 }
1197         vmx->state = VMXREADY;
1198         vmx->got |= GOTEXIT;
1199         vmx->onentry &= ~STEP;
1200 }
1201
1202 static int
1203 cmdgetregs(VmCmd *cmd, va_list va)
1204 {
1205         char *p0, *e;
1206         GuestReg *r;
1207         uvlong val;
1208         int s;
1209         char *p;
1210         
1211         p0 = va_arg(va, char *);
1212         e = va_arg(va, char *);
1213         p = p0;
1214         for(r = guestregs; r < guestregs + nelem(guestregs); r++)
1215                 if(r->read != nil){
1216                         p = seprint(p, e, "%s ", r->name);
1217                         p = r->read(cmd->vmx, p, e);
1218                         p = strecpy(p, e, "\n");
1219                 }else{
1220                         if(r->offset >= 0)
1221                                 val = vmcsread(r->offset);
1222                         else
1223                                 val = *(uintptr*)((uchar*)cmd->vmx + ~r->offset);
1224                         s = r->size;
1225                         if(s == 0) s = sizeof(uintptr);
1226                         p = seprint(p, e, "%s %#.*llux\n", r->name, s * 2, val);
1227                 }
1228         return p - p0;
1229 }
1230
1231 static int
1232 setregs(Vmx *vmx, char *p0, char rs, char *fs)
1233 {
1234         char *p, *q, *rp;
1235         char *f[10];
1236         GuestReg *r;
1237         uvlong val;
1238         int sz;
1239         int rc;
1240
1241         p = p0;
1242         for(;;){
1243                 q = strchr(p, rs);
1244                 if(q == 0) break;
1245                 *q = 0;
1246                 rc = getfields(p, f, nelem(f), 1, fs);
1247                 p = q + 1;
1248                 if(rc == 0) continue;
1249                 if(rc != 2) error("number of fields wrong");
1250                 
1251                 for(r = guestregs; r < guestregs + nelem(guestregs); r++)
1252                         if(strcmp(r->name, f[0]) == 0)
1253                                 break;
1254                 if(r == guestregs + nelem(guestregs))
1255                         error("unknown register");
1256                 if(r->write != nil){
1257                         r->write(vmx, f[1]);
1258                         continue;
1259                 }
1260                 val = strtoull(f[1], &rp, 0);
1261                 sz = r->size;
1262                 if(sz == 0) sz = sizeof(uintptr);
1263                 if(rp == f[1] || *rp != 0) error("invalid value");
1264                 if(r->offset >= 0)
1265                         vmcswrite(r->offset, val);
1266                 else{
1267                         assert((u32int)~r->offset + sz <= sizeof(Vmx)); 
1268                         switch(sz){
1269                         case 1: *(u8int*)((u8int*)vmx + (u32int)~r->offset) = val; break;
1270                         case 2: *(u16int*)((u8int*)vmx + (u32int)~r->offset) = val; break;
1271                         case 4: *(u32int*)((u8int*)vmx + (u32int)~r->offset) = val; break;
1272                         case 8: *(u64int*)((u8int*)vmx + (u32int)~r->offset) = val; break;
1273                         default: error(Egreg);
1274                         }
1275                 }
1276         }
1277         return p - p0;
1278 }
1279
1280 static int
1281 cmdsetregs(VmCmd *cmd, va_list va)
1282 {
1283         return setregs(cmd->vmx, va_arg(va, char *), '\n', " \t");
1284 }
1285
1286 static int
1287 cmdgetfpregs(VmCmd *cmd, va_list va)
1288 {
1289         uchar *p;
1290         
1291         p = va_arg(va, uchar *);
1292         memmove(p, &cmd->vmx->fp, sizeof(FPsave));
1293         return sizeof(FPsave);
1294 }
1295
1296 static int
1297 cmdsetfpregs(VmCmd *cmd, va_list va)
1298 {
1299         uchar *p;
1300         ulong n;
1301         vlong off;
1302         
1303         p = va_arg(va, uchar *);
1304         n = va_arg(va, ulong);
1305         off = va_arg(va, vlong);
1306         if(off < 0 || off >= sizeof(FPsave)) n = 0;
1307         else if(off + n > sizeof(FPsave)) n = sizeof(FPsave) - n;
1308         memmove((uchar*)&cmd->vmx->fp + off, p, n);
1309         return n;
1310 }
1311
1312 static int
1313 cmdgo(VmCmd *cmd, va_list va)
1314 {
1315         int step;
1316         char *r;
1317         Vmx *vmx;
1318         
1319         vmx = cmd->vmx;
1320         if(vmx->state != VMXREADY)
1321                 error("VM not ready");
1322         step = va_arg(va, int);
1323         r = va_arg(va, char *);
1324         if(r != nil) setregs(vmx, r, ';', "=");
1325         if(step) vmx->onentry |= STEP;
1326         vmx->state = VMXRUNNING;
1327         return 0;
1328 }
1329
1330 static int
1331 cmdstop(VmCmd *cmd, va_list)
1332 {
1333         Vmx *vmx;
1334         
1335         vmx = cmd->vmx;
1336         if(vmx->state != VMXREADY && vmx->state != VMXRUNNING)
1337                 error("VM not ready or running");
1338         vmx->state = VMXREADY;
1339         return 0;
1340 }
1341
1342 static int
1343 cmdstatus(VmCmd *cmd, va_list va)
1344 {       
1345         kstrcpy(va_arg(va, char *), cmd->vmx->errstr, ERRMAX);
1346         return cmd->vmx->state;
1347 }
1348
1349 static char *exitreasons[] = {
1350         [0] "exc", [1] "extirq", [2] "triplef", [3] "initsig", [4] "sipi", [5] "smiio", [6] "smiother", [7] "irqwin",
1351         [8] "nmiwin", [9] "taskswitch", [10] ".cpuid", [11] ".getsec", [12] ".hlt", [13] ".invd", [14] ".invlpg", [15] ".rdpmc",
1352         [16] ".rdtsc", [17] ".rsm", [18] ".vmcall", [19] ".vmclear", [20] ".vmlaunch", [21] ".vmptrld", [22] ".vmptrst", [23] ".vmread",
1353         [24] ".vmresume", [25] ".vmwrite", [26] ".vmxoff", [27] ".vmxon", [28] "movcr", [29] ".movdr", [30] "io", [31] ".rdmsr",
1354         [32] ".wrmsr", [33] "entrystate", [34] "entrymsr", [36] ".mwait", [37] "monitortrap", [39] ".monitor",
1355         [40] ".pause", [41] "mcheck", [43] "tpr", [44] "apicacc", [45] "eoi", [46] "gdtr_idtr", [47] "ldtr_tr",
1356         [48] "eptfault", [49] "eptinval", [50] ".invept", [51] ".rdtscp", [52] "preempt", [53] ".invvpid", [54] ".wbinvd", [55] ".xsetbv",
1357         [56] "apicwrite", [57] ".rdrand", [58] ".invpcid", [59] ".vmfunc", [60] ".encls", [61] ".rdseed", [62] "pmlfull", [63] ".xsaves",
1358         [64] ".xrstors", 
1359 };
1360
1361 static char *except[] = {
1362         [0] "#de", [1] "#db", [3] "#bp", [4] "#of", [5] "#br", [6] "#ud", [7] "#nm",
1363         [8] "#df", [10] "#ts", [11] "#np", [12] "#ss", [13] "#gp", [14] "#pf",
1364         [16] "#mf", [17] "#ac", [18] "#mc", [19] "#xm", [20] "#ve",
1365 };
1366
1367 static int
1368 cmdwait(VmCmd *cp, va_list va)
1369 {
1370         char *p, *p0, *e;
1371         u32int reason, intr;
1372         uvlong qual;
1373         u16int rno;
1374         Vmx *vmx;
1375
1376         if(cp->scratched)
1377                 error(Eintr);
1378         vmx = cp->vmx;
1379         p0 = p = va_arg(va, char *);
1380         e = va_arg(va, char *);
1381         if((vmx->got & GOTIRQACK) != 0){
1382                 p = seprint(p, e, "*ack %d\n", vmx->irqack.info & 0xff);
1383                 vmx->got &= ~GOTIRQACK;
1384                 return p - p0;
1385         }
1386         if((vmx->got & GOTEXIT) == 0){
1387                 cp->flags |= CMDFPOSTP;
1388                 return -1;
1389         }
1390         vmx->got &= ~GOTEXIT;
1391         reason = vmcsread(VM_EXREASON);
1392         qual = vmcsread(VM_EXQUALIF);
1393         rno = reason;
1394         intr = vmcsread(VM_EXINTRINFO);
1395         if((reason & 1<<31) != 0)
1396                 p = seprint(p, e, "!");
1397         if(rno == 0 && (intr & 1<<31) != 0){
1398                 if((intr & 0xff) >= nelem(except) || except[intr & 0xff] == nil)
1399                         p = seprint(p, e, "#%d ", intr & 0xff);
1400                 else
1401                         p = seprint(p, e, "%s ", except[intr & 0xff]);
1402         }else if(rno >= nelem(exitreasons) || exitreasons[rno] == nil)
1403                 p = seprint(p, e, "?%d ", rno);
1404         else
1405                 p = seprint(p, e, "%s ", exitreasons[rno]);
1406         p = seprint(p, e, "%#ullx pc %#ullx sp %#ullx ilen %#ullx iinfo %#ullx", qual, vmcsread(GUEST_RIP), vmcsread(GUEST_RSP), vmcsread(VM_EXINSTRLEN), vmcsread(VM_EXINSTRINFO));
1407         if((intr & 1<<11) != 0) p = seprint(p, e, " excode %#ullx", vmcsread(VM_EXINTRCODE));
1408         if(rno == 48 && (qual & 0x80) != 0) p = seprint(p, e, " va %#ullx", vmcsread(VM_GUESTVA));
1409         if(rno == 48 || rno == 49) p = seprint(p, e, " pa %#ullx", vmcsread(VM_GUESTPA));
1410         if(rno == 30) p = seprint(p, e, " ax %#ullx", (uvlong)vmx->ureg.ax);
1411         p = seprint(p, e, "\n");
1412         return p - p0;
1413 }
1414
1415 static void
1416 eventparse(char *p, VmIntr *vi)
1417 {
1418         char *q, *r;
1419         int i;
1420         
1421         memset(vi, 0, sizeof(VmIntr));
1422         q = nil;
1423         kstrdup(&q, p);
1424         if(waserror()){
1425                 free(q);
1426                 memset(vi, 0, sizeof(VmIntr));
1427                 nexterror();
1428         }
1429         vi->info = 1<<31;
1430         r = strchr(q, ',');
1431         if(r != nil) *r++ = 0;
1432         for(i = 0; i < nelem(except); i++)
1433                 if(except[i] != nil && strcmp(except[i], q) == 0)
1434                         break;
1435         if(*q == '#'){
1436                 q++;
1437                 vi->info |= 3 << 8;
1438         }
1439         if(i == nelem(except)){
1440                 i = strtoul(q, &q, 10);
1441                 if(*q != 0 || i > 255) error(Ebadctl);
1442         }
1443         vi->info |= i;
1444         if((vi->info & 0x7ff) == 3 || (vi->info & 0x7ff) == 4)
1445                 vi->info += 3 << 8;
1446         if(r == nil) goto out;
1447         if(*r != ','){
1448                 vi->code = strtoul(r, &r, 0);
1449                 vi->info |= 1<<11;
1450         }else r++;
1451         if(*r == ',')
1452                 vi->ilen = strtoul(r + 1, &r, 0);
1453         if(*r != 0) error(Ebadctl);
1454 out:
1455         poperror();
1456         free(q);
1457 }
1458
1459 static int
1460 cmdexcept(VmCmd *cp, va_list va)
1461 {
1462         Vmx *vmx;
1463         
1464         vmx = cp->vmx;
1465         if(cp->scratched) error(Eintr);
1466         if((vmx->onentry & POSTEX) != 0){
1467                 cp->flags |= CMDFPOSTP;
1468                 return 0;
1469         }
1470         eventparse(va_arg(va, char *), &vmx->exc);
1471         vmx->onentry |= POSTEX;
1472         return 0;
1473 }
1474
1475 static int
1476 cmdirq(VmCmd *cmd, va_list va)
1477 {
1478         char *p;
1479         VmIntr vi;
1480         Vmx *vmx;
1481         
1482         vmx = cmd->vmx;
1483         p = va_arg(va, char *);
1484         if(p == nil)
1485                 vmx->onentry &= ~POSTIRQ;
1486         else{
1487                 eventparse(p, &vi);
1488                 vmx->irq = vi;
1489                 vmx->onentry |= POSTIRQ;
1490         }
1491         return 0;
1492 }
1493
1494 static int
1495 cmdextrap(VmCmd *, va_list va)
1496 {
1497         char *p, *q;
1498         u32int v;
1499         
1500         p = va_arg(va, char *);
1501         v = strtoul(p, &q, 0);
1502         if(q == p || *q != 0) error(Ebadarg);
1503         vmcswrite(EXC_BITMAP, v);
1504         return 0;
1505 }
1506
1507 static int
1508 gotcmd(void *vmxp)
1509 {
1510         int rc;
1511         Vmx *vmx;
1512
1513         vmx = vmxp;
1514         ilock(&vmx->cmdlock);
1515         rc = vmx->firstcmd != nil;
1516         iunlock(&vmx->cmdlock);
1517         return rc;
1518 }
1519
1520 static void
1521 markcmddone(VmCmd *p, VmCmd ***pp)
1522 {
1523         if((p->flags & (CMDFFAIL|CMDFPOSTP)) == CMDFPOSTP){
1524                 **pp = p;
1525                 *pp = &p->next;
1526         }else{
1527                 p->flags = p->flags & ~CMDFPOSTP;
1528                 cmdrelease(p, 0);
1529         }
1530 }
1531
1532 static VmCmd **
1533 markppcmddone(VmCmd **pp)
1534 {
1535         VmCmd *p;
1536         
1537         p = *pp;
1538         if((p->flags & (CMDFFAIL|CMDFPOSTP)) == CMDFPOSTP)
1539                 return &p->next;
1540         *pp = p->next;
1541         p->next = nil;
1542         p->flags = p->flags & ~CMDFPOSTP;
1543         cmdrelease(p, 0);
1544         return pp;
1545 }
1546
1547
1548 static void
1549 runcmd(Vmx *vmx)
1550 {
1551         VmCmd *p, **pp;
1552         
1553         for(pp = &vmx->postponed; p = *pp, p != nil; ){
1554                 if(waserror()){
1555                         kstrcpy(p->errstr, up->errstr, ERRMAX);
1556                         p->flags |= CMDFFAIL;
1557                         pp = markppcmddone(pp);
1558                         continue;
1559                 }
1560                 p->flags &= ~CMDFPOSTP;
1561                 p->retval = p->cmd(p, p->va);
1562                 poperror();
1563                 pp = markppcmddone(pp);
1564         }
1565         for(;;){
1566                 ilock(&vmx->cmdlock);
1567                 p = vmx->firstcmd;
1568                 if(p == nil){
1569                         iunlock(&vmx->cmdlock);
1570                         break;
1571                 }
1572                 vmx->firstcmd = p->next;
1573                 if(vmx->lastcmd == &p->next)
1574                         vmx->lastcmd = &vmx->firstcmd;
1575                 iunlock(&vmx->cmdlock);
1576                 p->next = nil;
1577                 if(waserror()){
1578                         kstrcpy(p->errstr, up->errstr, ERRMAX);
1579                         p->flags |= CMDFFAIL;
1580                         markcmddone(p, &pp);
1581                         continue;
1582                 }
1583                 if(p->scratched) error(Eintr);
1584                 p->retval = p->cmd(p, p->va);
1585                 poperror();
1586                 markcmddone(p, &pp);
1587         }
1588 }
1589
1590 static void
1591 vmxproc(void *vmxp)
1592 {
1593         int init, rc, x;
1594         u32int procbctls, defprocbctls;
1595         vlong v;
1596         Vmx *vmx;
1597
1598         vmx = vmxp;
1599         procwired(up, vmx->machno);
1600         sched();
1601         init = 0;
1602         defprocbctls = 0;
1603         while(waserror()){
1604                 kstrcpy(vmx->errstr, up->errstr, ERRMAX);
1605                 vmx->state = VMXDEAD;
1606         }
1607         for(;;){
1608                 if(!init){
1609                         init = 1;
1610                         vmxstart(vmx);
1611                         vmx->state = VMXREADY;
1612                         defprocbctls = vmcsread(PROCB_CTLS);
1613                 }
1614                 runcmd(vmx);
1615                 if(vmx->state == VMXRUNNING){
1616                         procbctls = defprocbctls;
1617                         if((vmx->onentry & STEP) != 0)
1618                                 defprocbctls |= PROCB_MONTRAP;
1619                         if((vmx->onentry & POSTEX) != 0){
1620                                 vmcswrite(VMENTRY_INTRINFO, vmx->exc.info);
1621                                 vmcswrite(VMENTRY_INTRCODE, vmx->exc.code);
1622                                 vmcswrite(VMENTRY_INTRILEN, vmx->exc.ilen);
1623                                 vmx->onentry &= ~POSTEX;
1624                         }
1625                         if((vmx->onentry & POSTIRQ) != 0 && (vmx->onentry & STEP) == 0){
1626                                 if((vmx->onentry & POSTEX) == 0 && (vmcsread(GUEST_RFLAGS) & 1<<9) != 0 && (vmcsread(GUEST_CANINTR) & 3) == 0){
1627                                         vmcswrite(VMENTRY_INTRINFO, vmx->irq.info);
1628                                         vmcswrite(VMENTRY_INTRCODE, vmx->irq.code);
1629                                         vmcswrite(VMENTRY_INTRILEN, vmx->irq.ilen);
1630                                         vmx->onentry &= ~POSTIRQ;
1631                                         vmx->got |= GOTIRQACK;
1632                                         vmx->irqack = vmx->irq;
1633                                 }else
1634                                         procbctls |= PROCB_IRQWIN;
1635                         }
1636                         if((vmx->onentry & FLUSHVPID) != 0){
1637                                 if(invvpid(INVLOCAL, vmx->vpid, 0) < 0)
1638                                         error("invvpid failed");
1639                                 vmx->onentry &= ~FLUSHVPID;
1640                         }
1641                         if((vmx->onentry & FLUSHEPT) != 0){
1642                                 if(invept(INVLOCAL, PADDR(vmx->pml4) | 3<<3, 0) < 0)
1643                                         error("invept failed");
1644                                 vmx->onentry &= ~FLUSHEPT;
1645                         }
1646                         vmcswrite(PROCB_CTLS, procbctls);
1647                         vmx->got &= ~GOTEXIT;
1648                         
1649                         x = splhi();
1650                         if(sizeof(uintptr) == 8){
1651                                 rdmsr(FSbase, &v);
1652                                 vmwrite(HOST_FSBASE, v);
1653                         }
1654                         if((vmx->dr[7] & ~0xd400) != 0)
1655                                 putdr01236(vmx->dr);
1656                         fpsserestore(&vmx->fp);
1657                         putcr2(vmx->cr2);
1658                         rc = vmlaunch(&vmx->ureg, vmx->launched);
1659                         vmx->cr2 = getcr2();
1660                         fpssesave(&vmx->fp);
1661                         splx(x);
1662                         if(rc < 0)
1663                                 error("vmlaunch failed");
1664                         vmx->launched = 1;
1665                         processexit(vmx);
1666                 }else{
1667                         up->psstate = "Idle";
1668                         sleep(&vmx->cmdwait, gotcmd, vmx);
1669                         up->psstate = nil;
1670                 }
1671         }
1672 }
1673
1674 enum {
1675         /* Qdir */
1676         Qclone = 1,
1677 };
1678
1679 enum {
1680         Qdir,
1681         Qctl,
1682         Qregs,
1683         Qstatus,
1684         Qmap,
1685         Qwait,
1686         Qfpregs,
1687 };
1688
1689 static Dirtab vmxdir[] = {
1690         "ctl",          { Qctl, 0, 0 },         0,              0660,
1691         "regs",         { Qregs, 0, 0 },        0,              0660,
1692         "status",       { Qstatus, 0, 0 },      0,              0440,
1693         "map",          { Qmap, 0, 0 },         0,              0660,
1694         "wait",         { Qwait, 0, 0 },        0,              0440,
1695         "fpregs",       { Qfpregs, 0, 0 },      0,              0660,
1696 };
1697
1698 enum {
1699         CMquit,
1700         CMgo,
1701         CMstop,
1702         CMstep,
1703         CMexc,
1704         CMirq,
1705         CMextrap,
1706 };
1707
1708 static Cmdtab vmxctlmsg[] = {
1709         CMquit,         "quit",         1,
1710         CMgo,           "go",           0,
1711         CMstop,         "stop",         1,
1712         CMstep,         "step",         0,
1713         CMexc,          "exc",          2,
1714         CMirq,          "irq",          0,
1715         CMextrap,       "extrap",       2,
1716 };
1717
1718 enum { AUXSIZE = 4096 };
1719
1720 static Vmx *
1721 vmxlook(vlong n)
1722 {
1723         if(n < 0) return nil;
1724         if(n >= nvmxtab) return nil;
1725         return vmxtab[n];
1726 }
1727 #define QIDPATH(q,e) ((q) + 1 << 8 | (e)) 
1728 #define SLOT(q) ((vlong)((q).path >> 8) - 1)
1729 #define FILE(q) ((int)(q).path & 0xff)
1730 static Vmx *
1731 vmxent(Qid q)
1732 {
1733         Vmx *vmx;
1734
1735         eqlock(&vmxtablock);
1736         if(waserror()){
1737                 qunlock(&vmxtablock);
1738                 nexterror();
1739         }
1740         vmx = vmxlook(SLOT(q));
1741         qunlock(&vmxtablock);
1742         poperror();
1743         return vmx;
1744 }
1745
1746 static int
1747 iscmddone(void *cp)
1748 {
1749         return (((VmCmd*)cp)->flags & CMDFDONE) != 0;
1750 }
1751
1752 static int
1753 vmxcmd(Vmx *vmx, int (*f)(VmCmd *, va_list), ...)
1754 {
1755         VmCmd cmd;
1756
1757         if(vmx->state == VMXENDING)
1758         ending:
1759                 error(Equit);
1760         memset(&cmd, 0, sizeof(VmCmd));
1761         cmd.vmx = vmx;
1762         cmd.errstr = up->errstr;
1763         cmd.cmd = f;
1764         va_start(cmd.va, f);
1765          
1766         ilock(&vmx->cmdlock);
1767         if(vmx->state == VMXENDING){
1768                 iunlock(&vmx->cmdlock);
1769                 goto ending;
1770         }
1771         *vmx->lastcmd = &cmd;
1772         vmx->lastcmd = &cmd.next;
1773         iunlock(&vmx->cmdlock);
1774         
1775         while(waserror())
1776                 cmd.scratched = 1;
1777         wakeup(&vmx->cmdwait);
1778         do
1779                 sleep(&cmd, iscmddone, &cmd);
1780         while(!iscmddone(&cmd));
1781         poperror();
1782         lock(&cmd);
1783         unlock(&cmd);
1784         if((cmd.flags & CMDFFAIL) != 0)
1785                 error(up->errstr);
1786         return cmd.retval;
1787 }
1788
1789 static Vmx *
1790 vmxnew(void)
1791 {
1792         Vmx *vmx;
1793         Vmx **newtab;
1794         int i, mi, mv;
1795         
1796         vmx = mallocalign(sizeof(Vmx), 4096, 0, 0);
1797         if(waserror()){
1798
1799                 free(vmx);
1800                 nexterror();
1801         }
1802         vmx->state = VMXINIT;
1803         vmx->lastcmd = &vmx->firstcmd;
1804         vmx->mem.next = &vmx->mem;
1805         vmx->mem.prev = &vmx->mem;
1806         vmx->index = -1;
1807         
1808         eqlock(&vmxtablock);
1809         if(waserror()){
1810                 if(vmx->index >= 0)
1811                         vmxtab[vmx->index] = 0;
1812                 qunlock(&vmxtablock);
1813                 nexterror();
1814         }
1815         for(i = 0; i < nvmxtab; i++)
1816                 if(vmxtab[i] == nil){
1817                         vmxtab[i] = vmx;
1818                         vmx->index = i;
1819                         break;
1820                 }
1821         if(i == nvmxtab){
1822                 newtab = realloc(vmxtab, (nvmxtab + 1) * sizeof(Vmx *));
1823                 if(newtab == nil)
1824                         error(Enomem);
1825                 vmxtab = newtab;
1826                 vmxtab[nvmxtab] = vmx;
1827                 vmx->index = nvmxtab++;
1828         }
1829         kproc("kvmx", vmxproc, vmx);
1830         qunlock(&vmxtablock);
1831         poperror();
1832         poperror();
1833         mi = 0;
1834         mv = 0x7fffffff;
1835         for(i = 0; i < conf.nmach; i++)
1836                 if(vmxmachp(i)->vms < mv){
1837                         mi = i;
1838                         mv = vmxmachp(i)->vms;
1839                 }
1840         vmx->machno = mi;
1841         if(vmxcmd(vmx, cmdstatus, up->errstr) == VMXDEAD)
1842                 error(up->errstr);
1843         return vmx;
1844 }
1845
1846 void
1847 vmxshutdown(void)
1848 {
1849         int i;
1850         
1851         for(i = 0; i < nvmxtab; i++)
1852                 if(vmxtab[i] != nil)
1853                         vmxcmd(vmxtab[i], cmdquit);
1854 }
1855
1856 static Chan *
1857 vmxattach(char *spec)
1858 {
1859         if(!gotvmx) error(Enodev);
1860         return devattach('X', spec);
1861 }
1862
1863 static int
1864 vmxgen(Chan *c, char *, Dirtab *, int, int s, Dir *dp)
1865 {
1866         Dirtab *tab;
1867         int path;
1868
1869         if(s == DEVDOTDOT){
1870                 devdir(c, (Qid){Qdir, 0, QTDIR}, "#X", 0, eve, 0555, dp);
1871                 return 1;
1872         }
1873         if(c->qid.path == Qdir){
1874                 if(s-- == 0) goto clone;
1875                 if(s >= nvmxtab)
1876                         return -1;
1877                 if(vmxlook(s) == nil)
1878                         return 0;
1879                 sprint(up->genbuf, "%d", s);
1880                 devdir(c, (Qid){QIDPATH(s, Qdir), 0, QTDIR}, up->genbuf, 0, eve, DMDIR|0555, dp);
1881                 return 1;
1882         }
1883         if(c->qid.path == Qclone){
1884         clone:
1885                 strcpy(up->genbuf, "clone");
1886                 devdir(c, (Qid){Qclone, 0, QTFILE}, up->genbuf, 0, eve, 0444, dp);
1887                 return 1;
1888         }
1889         if(s >= nelem(vmxdir))
1890                 return -1;
1891         tab = &vmxdir[s];
1892         path = QIDPATH(SLOT(c->qid), 0);
1893         devdir(c, (Qid){tab->qid.path|path, tab->qid.vers, tab->qid.type}, tab->name, tab->length, eve, tab->perm, dp);
1894         return 1;
1895 }
1896
1897 static Walkqid*
1898 vmxwalk(Chan *c, Chan *nc, char **name, int nname)
1899 {
1900         Walkqid *rc;
1901
1902         eqlock(&vmxtablock);
1903         if(waserror()){
1904                 qunlock(&vmxtablock);
1905                 nexterror();
1906         }
1907         rc = devwalk(c, nc, name, nname, nil, 0, vmxgen);
1908         qunlock(&vmxtablock);
1909         poperror();
1910         return rc;
1911 }
1912
1913 static int
1914 vmxstat(Chan *c, uchar *dp, int n)
1915 {
1916         int rc;
1917         
1918         eqlock(&vmxtablock);
1919         if(waserror()){
1920                 qunlock(&vmxtablock);
1921                 nexterror();
1922         }
1923         rc = devstat(c, dp, n, nil, 0, vmxgen);
1924         qunlock(&vmxtablock);
1925         poperror();
1926         return rc;
1927 }
1928
1929 static Chan*
1930 vmxopen(Chan* c, int omode)
1931 {
1932         Chan *ch;
1933         Vmx *vmx;
1934
1935         if(c->qid.path == Qclone){
1936                 if(!iseve()) error(Eperm);
1937                 vmx = vmxnew();
1938                 c->qid.path = QIDPATH(vmx->index, Qctl);
1939         }
1940         eqlock(&vmxtablock);
1941         if(waserror()){
1942                 qunlock(&vmxtablock);
1943                 nexterror();
1944         }
1945         vmx = vmxlook(SLOT(c->qid));
1946         if(SLOT(c->qid) >= 0 && vmx == nil) error(Enonexist);
1947         if(FILE(c->qid) != Qdir && !iseve()) error(Eperm);
1948         ch = devopen(c, omode, nil, 0, vmxgen);
1949         qunlock(&vmxtablock);
1950         poperror();
1951         ch->aux = smalloc(AUXSIZE);
1952         if(SLOT(ch->qid) >= 0 && FILE(ch->qid) == Qmap){
1953                 if((omode & OTRUNC) != 0)
1954                         vmxcmd(vmx, cmdclearmeminfo);
1955         }
1956         return ch;
1957 }
1958
1959 static void
1960 vmxclunk(Chan *ch)
1961 {
1962         free(ch->aux);
1963         ch->aux = nil;
1964 }
1965
1966 static void
1967 vmxremove(Chan *ch)
1968 {
1969         Vmx *vmx, *old;
1970
1971         vmxclunk(ch);
1972         if(SLOT(ch->qid) == -1 || FILE(ch->qid) != Qctl)
1973                 error(Eperm);
1974         vmx = vmxent(ch->qid);
1975         if(vmx == nil)
1976                 error(Enonexist);
1977         qlock(&vmxtablock);
1978         old = moribund;
1979         moribund = vmx;
1980         qunlock(&vmxtablock);
1981         if(old != nil)
1982                 vmxcmd(old, cmdquit);
1983 }
1984
1985 static void
1986 vmxclose(Chan *ch)
1987 {
1988         if((ch->flag & CRCLOSE) != 0)
1989                 vmxremove(ch);
1990         else
1991                 vmxclunk(ch);
1992 }
1993
1994
1995 static long
1996 vmxread(Chan* c, void* a, long n, vlong off)
1997 {
1998         int rc;
1999         Vmx *vmx;
2000
2001         if(SLOT(c->qid) == -1){
2002                 switch((int)c->qid.path){
2003                 case Qdir:
2004                 dirread:
2005                         eqlock(&vmxtablock);
2006                         if(waserror()){
2007                                 qunlock(&vmxtablock);
2008                                 nexterror();
2009                         }
2010                         rc = devdirread(c, a, n, nil, 0, vmxgen);
2011                         qunlock(&vmxtablock);
2012                         poperror();
2013                         return rc;
2014                 default:
2015                         error(Egreg);
2016                 }
2017         }
2018         vmx = vmxent(c->qid);
2019         if(vmx == nil) error(Enonexist);
2020         switch(FILE(c->qid)){
2021         case Qdir:
2022                 goto dirread;
2023         case Qctl:
2024                 {
2025                         char buf[20];
2026                         
2027                         sprint(buf, "%d", vmx->index);
2028                         return readstr(off, a, n, buf);
2029                 }
2030         case Qregs:
2031                 if(off == 0)
2032                         vmxcmd(vmx, cmdgetregs, c->aux, (char *) c->aux + AUXSIZE);
2033                 return readstr(off, a, n, c->aux);
2034         case Qmap:
2035                 if(off == 0)
2036                         vmxcmd(vmx, cmdgetmeminfo, c->aux, (char *) c->aux + AUXSIZE);
2037                 return readstr(off, a, n, c->aux);
2038         case Qstatus:
2039                 {
2040                         char buf[ERRMAX+128];
2041                         char errbuf[ERRMAX];
2042                         int status;
2043                         
2044                         status = vmx->state;
2045                         if(status == VMXDEAD){
2046                                 vmxcmd(vmx, cmdstatus, errbuf);
2047                                 snprint(buf, sizeof(buf), "%s %#q\n", statenames[status], errbuf);
2048                         }else if(status >= 0 && status < nelem(statenames))
2049                                 snprint(buf, sizeof(buf), "%s\n", statenames[status]);
2050                         else
2051                                 snprint(buf, sizeof(buf), "%d\n", status);
2052                         return readstr(off, a, n, buf);
2053                 }
2054         case Qwait:
2055                 {
2056                         char buf[512];
2057                         
2058                         rc = vmxcmd(vmx, cmdwait, buf, buf + sizeof(buf));
2059                         if(rc > n) rc = n;
2060                         if(rc > 0) memmove(a, buf, rc);
2061                         return rc;
2062                 }
2063         case Qfpregs:
2064                 {
2065                         char buf[sizeof(FPsave)];
2066                         
2067                         vmxcmd(vmx, cmdgetfpregs, buf);
2068                         if(n < 0 || off < 0 || off >= sizeof(buf)) n = 0;
2069                         else if(off + n > sizeof(buf)) n = sizeof(buf) - off;
2070                         if(n != 0) memmove(a, buf + off, n);
2071                         return n;
2072                 }
2073         default:
2074                 error(Egreg);
2075                 break;
2076         }
2077         return 0;
2078 }
2079
2080 static long
2081 vmxwrite(Chan* c, void* a, long n, vlong off)
2082 {
2083         Cmdbuf *cb;
2084         Cmdtab *ct;
2085         char *s;
2086         int rc;
2087         Vmx *vmx;
2088
2089         if(SLOT(c->qid) == -1){
2090                 switch((int)c->qid.path){
2091                 case Qdir:
2092                         error(Eperm);
2093                 default:
2094                         error(Egreg);
2095                 }
2096         }
2097         vmx = vmxent(c->qid);
2098         if(vmx == nil) error(Enonexist);
2099         switch(FILE(c->qid)){
2100         case Qdir:
2101                 error(Eperm);
2102         case Qctl:
2103                 cb = parsecmd(a, n);
2104                 if(waserror()){
2105                         free(cb);
2106                         nexterror();
2107                 }
2108                 ct = lookupcmd(cb, vmxctlmsg, nelem(vmxctlmsg));
2109                 switch(ct->index){
2110                 case CMquit:
2111                         vmxcmd(vmx, cmdquit);
2112                         break;
2113                 case CMgo:
2114                 case CMstep:
2115                         s = nil;
2116                         if(cb->nf == 2) kstrdup(&s, cb->f[1]);
2117                         else if(cb->nf != 1) error(Ebadarg);
2118                         if(waserror()){
2119                                 free(s);
2120                                 nexterror();
2121                         }
2122                         vmxcmd(vmx, cmdgo, ct->index == CMstep, s);
2123                         poperror();
2124                         free(s);
2125                         break;
2126                 case CMstop:
2127                         vmxcmd(vmx, cmdstop);
2128                         break;
2129                 case CMexc:
2130                         s = nil;
2131                         kstrdup(&s, cb->f[1]);
2132                         if(waserror()){
2133                                 free(s);
2134                                 nexterror();
2135                         }
2136                         vmxcmd(vmx, cmdexcept, s);
2137                         poperror();
2138                         free(s);
2139                         break;
2140                 case CMirq:
2141                         s = nil;
2142                         if(cb->nf == 2)
2143                                 kstrdup(&s, cb->f[1]);
2144                         if(waserror()){
2145                                 free(s);
2146                                 nexterror();
2147                         }
2148                         vmxcmd(vmx, cmdirq, s);
2149                         poperror();
2150                         free(s);
2151                         break;
2152                 case CMextrap:
2153                         s = nil;
2154                         kstrdup(&s, cb->f[1]);
2155                         if(waserror()){
2156                                 free(s);
2157                                 nexterror();
2158                         }
2159                         vmxcmd(vmx, cmdextrap, s);
2160                         poperror();
2161                         free(s);
2162                         break;
2163
2164                 default:
2165                         error(Egreg);
2166                 }
2167                 poperror();
2168                 free(cb);
2169                 break;
2170         case Qmap:
2171         case Qregs:
2172                 s = malloc(n+1);
2173                 if(s == nil) error(Enomem);
2174                 if(waserror()){
2175                         free(s);
2176                         nexterror();
2177                 }
2178                 memmove(s, a, n);
2179                 s[n] = 0;
2180                 rc = vmxcmd(vmx, FILE(c->qid) == Qregs ? cmdsetregs : cmdsetmeminfo, s);
2181                 poperror();
2182                 free(s);
2183                 return rc;
2184         case Qfpregs:
2185                 {
2186                         char buf[sizeof(FPsave)];
2187                         
2188                         if(n > sizeof(FPsave)) n = sizeof(FPsave);
2189                         memmove(buf, a, n);
2190                         return vmxcmd(vmx, cmdsetfpregs, buf, n, off);
2191                 }
2192         default:
2193                 error(Egreg);
2194                 break;
2195         }
2196         return n;
2197 }
2198
2199 Dev vmxdevtab = {
2200         'X',
2201         "vmx",
2202         
2203         vmxreset,
2204         devinit,
2205         devshutdown,
2206         vmxattach,
2207         vmxwalk,
2208         vmxstat,
2209         vmxopen,
2210         devcreate,
2211         vmxclose,
2212         vmxread,
2213         devbread,
2214         vmxwrite,
2215         devbwrite,
2216         vmxremove,
2217         devwstat,
2218 };