]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/devvmx.c
devvmx: add support for extrap command to configure halting on exceptions
[plan9front.git] / sys / src / 9 / pc / devvmx.c
1 #include "u.h"
2 #include "../port/lib.h"
3 #include "mem.h"
4 #include "dat.h"
5 #include "fns.h"
6 #include "../port/error.h"
7 #include "ureg.h"
8
9 extern int vmxon(u64int);
10 extern int vmxoff(void);
11 extern int vmclear(u64int);
12 extern int vmptrld(u64int);
13 extern int vmlaunch(Ureg *, int);
14 extern int vmread(u32int, uintptr *);
15 extern int vmwrite(u32int, uintptr);
16 extern int invept(u32int, uvlong, uvlong);
17 extern int invvpid(u32int, uvlong, uvlong);
18
19 static vlong procb_ctls, pinb_ctls;
20
21 enum {
22         VMX_BASIC_MSR = 0x480,
23         VMX_PINB_CTLS_MSR = 0x481,
24         VMX_PROCB_CTLS_MSR = 0x482,
25         VMX_VMEXIT_CTLS_MSR = 0x483,
26         VMX_VMENTRY_CTLS_MSR = 0x484,
27         VMX_MISC_MSR = 0x485,
28         VMX_CR0_FIXED0 = 0x486,
29         VMX_CR0_FIXED1 = 0x487,
30         VMX_CR4_FIXED0 = 0x488,
31         VMX_CR4_FIXED1 = 0x489,
32         VMX_VMCS_ENUM = 0x48A,
33         VMX_PROCB_CTLS2_MSR = 0x48B,
34         VMX_TRUE_PINB_CTLS_MSR = 0x48D,
35         VMX_TRUE_PROCB_CTLS_MSR = 0x48E,
36         VMX_TRUE_EXIT_CTLS_MSR = 0x48F,
37         VMX_TRUE_ENTRY_CTLS_MSR = 0x490,
38         VMX_VMFUNC_MSR = 0x491,
39         
40         PINB_CTLS = 0x4000,
41         PINB_EXITIRQ = 1<<0,
42         PINB_EXITNMI = 1<<3,
43         
44         PROCB_CTLS = 0x4002,
45         PROCB_IRQWIN = 1<<2,
46         PROCB_EXITHLT = 1<<7,
47         PROCB_EXITINVLPG = 1<<9,
48         PROCB_EXITMWAIT = 1<<10,
49         PROCB_EXITRDPMC = 1<<11,
50         PROCB_EXITRDTSC = 1<<12,
51         PROCB_EXITCR3LD = 1<<15,
52         PROCB_EXITCR3ST = 1<<16,
53         PROCB_EXITCR8LD = 1<<19,
54         PROCB_EXITCR8ST = 1<<20,
55         PROCB_EXITMOVDR = 1<<23,
56         PROCB_EXITIO = 1<<24,
57         PROCB_MONTRAP = 1<<27,
58         PROCB_MSRBITMAP = 1<<28,
59         PROCB_EXITMONITOR = 1<<29,
60         PROCB_EXITPAUSE = 1<<30,
61         PROCB_USECTLS2 = 1<<31,
62         
63         PROCB_CTLS2 = 0x401E,
64         PROCB_EPT = 1<<1,
65         PROCB_EXITGDT = 1<<2,
66         PROCB_VPID = 1<<5,
67         PROCB_UNRESTR = 1<<7,
68
69         EXC_BITMAP = 0x4004,
70         PFAULT_MASK = 0x4006,
71         PFAULT_MATCH = 0x4008,
72         CR3_TARGCNT = 0x400a,
73         MSR_BITMAP = 0x2004,
74         
75         VMEXIT_CTLS = 0x400c,
76         VMEXIT_ST_DEBUG = 1<<2,
77         VMEXIT_HOST64 = 1<<9,
78         VMEXIT_LD_IA32_PERF_GLOBAL_CTRL = 1<<12,
79         VMEXIT_ST_IA32_PAT = 1<<18,
80         VMEXIT_LD_IA32_PAT = 1<<19,
81         VMEXIT_ST_IA32_EFER = 1<<20,
82         VMEXIT_LD_IA32_EFER = 1<<21,    
83         
84         VMEXIT_MSRSTCNT = 0x400e,
85         VMEXIT_MSRLDCNT = 0x4010,
86         VMEXIT_MSRSTADDR = 0x2006,
87         VMEXIT_MSRLDADDR = 0x2008,
88         VMENTRY_MSRLDADDR = 0x200A,
89         
90         VMENTRY_CTLS = 0x4012,
91         VMENTRY_LD_DEBUG = 1<<2,
92         VMENTRY_GUEST64 = 1<<9,
93         VMENTRY_LD_IA32_PERF_GLOBAL_CTRL = 1<<13,
94         VMENTRY_LD_IA32_PAT = 1<<14,
95         VMENTRY_LD_IA32_EFER = 1<<15,
96         
97         VMENTRY_MSRLDCNT = 0x4014,
98         VMENTRY_INTRINFO = 0x4016,
99         VMENTRY_INTRCODE = 0x4018,
100         VMENTRY_INTRILEN = 0x401a,
101         
102         VMCS_LINK = 0x2800,
103         
104         GUEST_ES = 0x800,
105         GUEST_CS = 0x802,
106         GUEST_SS = 0x804,
107         GUEST_DS = 0x806,
108         GUEST_FS = 0x808,
109         GUEST_GS = 0x80A,
110         GUEST_LDTR = 0x80C,
111         GUEST_TR = 0x80E,
112         GUEST_CR0 = 0x6800,
113         GUEST_CR3 = 0x6802,
114         GUEST_CR4 = 0x6804,
115         GUEST_ESLIMIT = 0x4800,
116         GUEST_CSLIMIT = 0x4802,
117         GUEST_SSLIMIT = 0x4804,
118         GUEST_DSLIMIT = 0x4806,
119         GUEST_FSLIMIT = 0x4808,
120         GUEST_GSLIMIT = 0x480A,
121         GUEST_LDTRLIMIT = 0x480C,
122         GUEST_TRLIMIT = 0x480E,
123         GUEST_GDTRLIMIT = 0x4810,
124         GUEST_IDTRLIMIT = 0x4812,
125         GUEST_ESPERM = 0x4814,
126         GUEST_CSPERM = 0x4816,
127         GUEST_SSPERM = 0x4818,
128         GUEST_DSPERM = 0x481A,
129         GUEST_FSPERM = 0x481C,
130         GUEST_GSPERM = 0x481E,
131         GUEST_LDTRPERM = 0x4820,
132         GUEST_TRPERM = 0x4822,
133         GUEST_CR0MASK = 0x6000,
134         GUEST_CR4MASK = 0x6002,
135         GUEST_CR0SHADOW = 0x6004,
136         GUEST_CR4SHADOW = 0x6006,
137         GUEST_ESBASE = 0x6806,
138         GUEST_CSBASE = 0x6808,
139         GUEST_SSBASE = 0x680A,
140         GUEST_DSBASE = 0x680C,
141         GUEST_FSBASE = 0x680E,
142         GUEST_GSBASE = 0x6810,
143         GUEST_LDTRBASE = 0x6812,
144         GUEST_TRBASE = 0x6814,
145         GUEST_GDTRBASE = 0x6816,
146         GUEST_IDTRBASE = 0x6818,
147         GUEST_DR7 = 0x681A,
148         GUEST_RSP = 0x681C,
149         GUEST_RIP = 0x681E,
150         GUEST_RFLAGS = 0x6820,
151         GUEST_IA32_DEBUGCTL = 0x2802,
152         GUEST_IA32_PAT = 0x2804,
153         GUEST_IA32_EFER = 0x2806,
154         GUEST_IA32_PERF_GLOBAL_CTRL = 0x2808,
155         
156         HOST_ES = 0xC00,
157         HOST_CS = 0xC02,
158         HOST_SS = 0xC04,
159         HOST_DS = 0xC06,
160         HOST_FS = 0xC08,
161         HOST_GS = 0xC0A,
162         HOST_TR = 0xC0C,
163         HOST_CR0 = 0x6C00,
164         HOST_CR3 = 0x6C02,
165         HOST_CR4 = 0x6C04,
166         HOST_FSBASE = 0x6C06,
167         HOST_GSBASE = 0x6C08,
168         HOST_TRBASE = 0x6C0A,
169         HOST_GDTR = 0x6C0C,
170         HOST_IDTR = 0x6C0E,
171         HOST_RSP = 0x6C14,
172         HOST_RIP = 0x6C16,
173         HOST_IA32_PAT = 0x2C00,
174         HOST_IA32_EFER = 0x2C02,
175         HOST_IA32_PERF_GLOBAL_CTRL = 0x2C04,
176         
177         GUEST_CANINTR = 0x4824,
178         
179         VM_INSTRERR = 0x4400,
180         VM_EXREASON = 0x4402,
181         VM_EXINTRINFO = 0x4404,
182         VM_EXINTRCODE = 0x4406,
183         VM_IDTVECINFO = 0x4408,
184         VM_IDTVECCODE = 0x440A,
185         VM_EXINSTRLEN = 0x440C,
186         VM_EXINSTRINFO = 0x440E,
187         VM_EXQUALIF = 0x6400,
188         VM_IORCX = 0x6402,
189         VM_IORSI = 0x6404,
190         VM_IORDI = 0x6406,
191         VM_IORIP = 0x6408,
192         VM_GUESTVA = 0x640A,
193         VM_GUESTPA = 0x2400,
194         
195         VM_VPID = 0x000,
196         VM_EPTPIDX = 0x0004,
197         
198         VM_EPTP = 0x201A,
199         VM_EPTPLA = 0x2024,
200         
201         INVLOCAL = 1,
202 };
203
204 enum {
205         CR0RSVD = 0x1ffaffc0,
206         CR4RSVD = 0xff889000,
207         CR4MCE = 1<<6,
208         CR4VMXE = 1<<13,
209         CR4SMXE = 1<<14,
210         CR4PKE = 1<<22,
211         
212         CR0KERNEL = CR0RSVD | 0x30 | (uintptr)0xFFFFFFFF00000000ULL,
213         CR4KERNEL = CR4RSVD | CR4VMXE | CR4SMXE | CR4MCE | CR4PKE | (uintptr)0xFFFFFFFF00000000ULL
214 };
215
216 enum {
217         MAXMSR = 512,
218 };
219
220 typedef struct Vmx Vmx;
221 typedef struct VmCmd VmCmd;
222 typedef struct VmMem VmMem;
223 typedef struct VmIntr VmIntr;
224
225 struct VmMem {
226         uvlong lo, hi;
227         Segment *seg;
228         uintptr off;
229         char *name;
230         VmMem *next, *prev;
231         u16int attr;
232 };
233
234 struct VmIntr {
235         u32int info, code, ilen;
236 };
237
238 struct Vmx {
239         enum {
240                 NOVMX,
241                 VMXINACTIVE,
242                 VMXINIT,
243                 VMXREADY,
244                 VMXRUNNING,
245                 VMXDEAD,
246                 VMXENDING,
247         } state;
248         char errstr[ERRMAX];
249         Ureg ureg;
250         uintptr cr2;
251         uintptr dr[8]; /* DR7 is also kept in VMCS */
252         FPsave *fp;
253         u8int launched;
254         u8int on;
255         u8int vpid;
256         enum {
257                 FLUSHVPID = 1,
258                 FLUSHEPT = 2,
259                 STEP = 4,
260                 POSTEX = 8,
261                 POSTIRQ = 16,
262         } onentry;
263         
264         Rendez cmdwait;
265         Lock cmdlock;
266         VmCmd *firstcmd, **lastcmd;
267         VmCmd *postponed;
268         uvlong *pml4;
269         VmMem mem;
270         
271         enum {
272                 GOTEXIT = 1,
273                 GOTIRQACK = 2,
274                 GOTSTEP = 4,
275                 GOTSTEPERR = 8,
276         } got;
277         VmMem *stepmap;
278         VmIntr exc, irq, irqack;
279         
280         u64int *msrhost, *msrguest;
281         u32int *msrbits;
282         int nmsr;
283 };
284
285 struct VmCmd {
286         enum {
287                 CMDFDONE = 1,
288                 CMDFFAIL = 2,
289                 CMDFPOSTP = 4,
290         } flags;
291         u8int scratched;
292         Rendez;
293         Lock;
294         int (*cmd)(VmCmd *, va_list);
295         int retval;
296         char *errstr;
297         va_list va;
298         VmCmd *next;
299 };
300
301 static char Equit[] = "vmx: ending";
302
303 static char *statenames[] = {
304         [NOVMX] "novmx",
305         [VMXINACTIVE] "inactive",
306         [VMXINIT] "init",
307         [VMXREADY] "ready",
308         [VMXRUNNING] "running",
309         [VMXDEAD] "dead",
310         [VMXENDING]"ending"
311 };
312
313 static Vmx vmx;
314
315 static u64int
316 vmcsread(u32int addr)
317 {
318         int rc;
319         u64int val;
320
321         val = 0;
322         rc = vmread(addr, (uintptr *) &val);
323         if(rc >= 0 && sizeof(uintptr) == 4 && (addr & 0x6000) == 0x2000)
324                 rc = vmread(addr | 1, (uintptr *) &val + 1);
325         if(rc < 0){
326                 char errbuf[128];
327                 snprint(errbuf, sizeof(errbuf), "vmcsread failed (%#.4ux)", addr);
328                 error(errbuf);
329         }
330         return val;
331 }
332
333 static void
334 vmcswrite(u32int addr, u64int val)
335 {
336         int rc;
337         
338         rc = vmwrite(addr, val);
339         if(rc >= 0 && sizeof(uintptr) == 4 && (addr & 0x6000) == 0x2000)
340                 rc = vmwrite(addr | 1, val >> 32);
341         if(rc < 0){
342                 char errbuf[128];
343                 snprint(errbuf, sizeof(errbuf), "vmcswrite failed (%#.4ux = %#.16ullx)", addr, val);
344                 error(errbuf);
345         }
346 }
347
348 static uvlong
349 parseval(char *s)
350 {
351         uvlong v;
352         char *p;
353
354         v = strtoull(s, &p, 0);
355         if(p == s || *p != 0) error("invalid value");
356         return v;
357 }
358
359 static char *
360 cr0fakeread(char *p, char *e)
361 {
362         uvlong guest, mask, shadow;
363         
364         guest = vmcsread(GUEST_CR0);
365         mask = vmcsread(GUEST_CR0MASK);
366         shadow = vmcsread(GUEST_CR0SHADOW);
367         return seprint(p, e, "%#.*ullx", sizeof(uintptr) * 2, guest & ~mask | shadow & mask);
368 }
369
370 static char *
371 cr4fakeread(char *p, char *e)
372 {
373         uvlong guest, mask, shadow;
374         
375         guest = vmcsread(GUEST_CR4);
376         mask = vmcsread(GUEST_CR4MASK);
377         shadow = vmcsread(GUEST_CR4SHADOW);
378         return seprint(p, e, "%#.*ullx", sizeof(uintptr) * 2, guest & ~mask | shadow & mask);
379 }
380
381 static void
382 updatelma(void)
383 {
384         uvlong cr0, efer, nefer, ectrl;
385
386         if(sizeof(uintptr) != 8) return;
387         cr0 = vmcsread(GUEST_CR0);
388         efer = vmcsread(GUEST_IA32_EFER);
389         nefer = efer & ~0x400 | efer << 2 & cr0 >> 21 & 0x400;
390         if(efer == nefer) return;
391         vmcswrite(GUEST_IA32_EFER, nefer);
392         ectrl = vmcsread(VMENTRY_CTLS);
393         ectrl = ectrl & ~0x200 | nefer >> 1 & 0x200;
394         vmcswrite(VMENTRY_CTLS, ectrl);
395 }
396
397 static int
398 cr0realwrite(char *s)
399 {
400         uvlong v;
401         
402         v = parseval(s);
403         vmcswrite(GUEST_CR0, vmcsread(GUEST_CR0) & CR0KERNEL | v & ~CR0KERNEL);
404         updatelma();
405         return 0;
406 }
407
408 static int
409 cr0maskwrite(char *s)
410 {
411         uvlong v;
412         
413         v = parseval(s);
414         vmcswrite(GUEST_CR0MASK, v | CR0KERNEL);
415         return 0;
416 }
417
418 static int
419 eferwrite(char *s)
420 {
421         uvlong v;
422         
423         v = parseval(s);
424         vmcswrite(GUEST_IA32_EFER, v);
425         updatelma();
426         return 0;
427 }
428
429 static int
430 cr4realwrite(char *s)
431 {
432         uvlong v;
433         
434         v = parseval(s);
435         vmcswrite(GUEST_CR4, vmcsread(GUEST_CR4) & CR4KERNEL | v & ~CR4KERNEL);
436         return 0;
437 }
438
439 static int
440 cr4maskwrite(char *s)
441 {
442         uvlong v;
443         
444         v = parseval(s);
445         vmcswrite(GUEST_CR4MASK, v | CR4KERNEL);
446         return 0;
447 }
448
449 static int
450 dr7write(char *s)
451 {
452         uvlong v;
453         
454         v = (u32int) parseval(s);
455         vmcswrite(GUEST_DR7, vmx.dr[7] = (u32int) v);
456         return 0;
457 }
458
459 static int
460 readonly(char *)
461 {
462         return -1;
463 }
464
465 static int
466 dr6write(char *s)
467 {
468         uvlong v;
469         
470         v = parseval(s);
471         vmx.dr[6] = (u32int) v;
472         return 0;
473 }
474
475 typedef struct GuestReg GuestReg;
476 struct GuestReg {
477         int offset;
478         u8int size; /* in bytes; 0 means == uintptr */
479         char *name;
480         char *(*read)(char *, char *);
481         int (*write)(char *);
482 };
483 #define VMXVAR(x) ~(ulong)&(((Vmx*)0)->x)
484 #define UREG(x) VMXVAR(ureg.x)
485 static GuestReg guestregs[] = {
486         {GUEST_RIP, 0, "pc"},
487         {GUEST_RSP, 0, "sp"},
488         {GUEST_RFLAGS, 0, "flags"},
489         {UREG(ax), 0, "ax"},
490         {UREG(bx), 0, "bx"},
491         {UREG(cx), 0, "cx"},
492         {UREG(dx), 0, "dx"},
493         {UREG(bp), 0, "bp"},
494         {UREG(si), 0, "si"},
495         {UREG(di), 0, "di"},
496 #ifdef RMACH
497         {UREG(r8), 0, "r8"},
498         {UREG(r9), 0, "r9"},
499         {UREG(r10), 0, "r10"},
500         {UREG(r11), 0, "r11"},
501         {UREG(r12), 0, "r12"},
502         {UREG(r13), 0, "r13"},
503         {UREG(r14), 0, "r14"},
504         {UREG(r15), 0, "r15"},
505 #endif
506         {GUEST_GDTRBASE, 0, "gdtrbase"},
507         {GUEST_GDTRLIMIT, 4, "gdtrlimit"},
508         {GUEST_IDTRBASE, 0, "idtrbase"},
509         {GUEST_IDTRLIMIT, 4, "idtrlimit"},
510         {GUEST_CS, 2, "cs"},
511         {GUEST_CSBASE, 0, "csbase"},
512         {GUEST_CSLIMIT, 4, "cslimit"},
513         {GUEST_CSPERM, 4, "csperm"},
514         {GUEST_DS, 2, "ds"},
515         {GUEST_DSBASE, 0, "dsbase"},
516         {GUEST_DSLIMIT, 4, "dslimit"},
517         {GUEST_DSPERM, 4, "dsperm"},
518         {GUEST_ES, 2, "es"},
519         {GUEST_ESBASE, 0, "esbase"},
520         {GUEST_ESLIMIT, 4, "eslimit"},
521         {GUEST_ESPERM, 4, "esperm"},
522         {GUEST_FS, 2, "fs"},
523         {GUEST_FSBASE, 0, "fsbase"},
524         {GUEST_FSLIMIT, 4, "fslimit"},
525         {GUEST_FSPERM, 4, "fsperm"},
526         {GUEST_GS, 2, "gs"},
527         {GUEST_GSBASE, 0, "gsbase"},
528         {GUEST_GSLIMIT, 4, "gslimit"},
529         {GUEST_GSPERM, 4, "gsperm"},
530         {GUEST_SS, 2, "ss"},
531         {GUEST_SSBASE, 0, "ssbase"},
532         {GUEST_SSLIMIT, 4, "sslimit"},
533         {GUEST_SSPERM, 4, "ssperm"},
534         {GUEST_TR, 2, "tr"},
535         {GUEST_TRBASE, 0, "trbase"},
536         {GUEST_TRLIMIT, 4, "trlimit"},
537         {GUEST_TRPERM, 4, "trperm"},
538         {GUEST_LDTR, 2, "ldtr"},
539         {GUEST_LDTRBASE, 0, "ldtrbase"},
540         {GUEST_LDTRLIMIT, 4, "ldtrlimit"},
541         {GUEST_LDTRPERM, 4, "ldtrperm"},
542         {GUEST_CR0, 0, "cr0real", nil, cr0realwrite},
543         {GUEST_CR0SHADOW, 0, "cr0fake", cr0fakeread},
544         {GUEST_CR0MASK, 0, "cr0mask", nil, cr0maskwrite},
545         {VMXVAR(cr2), 0, "cr2"},
546         {GUEST_CR3, 0, "cr3"},
547         {GUEST_CR4, 0, "cr4real", nil, cr4realwrite},
548         {GUEST_CR4SHADOW, 0, "cr4fake", cr4fakeread},
549         {GUEST_CR4MASK, 0, "cr4mask", nil, cr4maskwrite},
550         {GUEST_IA32_PAT, 8, "pat"},
551         {GUEST_IA32_EFER, 8, "efer", nil, eferwrite},
552         {VMXVAR(dr[0]), 0, "dr0"},
553         {VMXVAR(dr[1]), 0, "dr1"},
554         {VMXVAR(dr[2]), 0, "dr2"},
555         {VMXVAR(dr[3]), 0, "dr3"},
556         {VMXVAR(dr[6]), 0, "dr6", nil, dr6write},
557         {GUEST_DR7, 0, "dr7", nil, dr7write},
558         {VM_INSTRERR, 4, "instructionerror", nil, readonly},
559         {VM_EXREASON, 4, "exitreason", nil, readonly},
560         {VM_EXQUALIF, 0, "exitqualification", nil, readonly},
561         {VM_EXINTRINFO, 4, "exitinterruptinfo", nil, readonly},
562         {VM_EXINTRCODE, 4, "exitinterruptcode", nil, readonly},
563         {VM_EXINSTRLEN, 4, "exitinstructionlen", nil, readonly},
564         {VM_EXINSTRINFO, 4, "exitinstructioninfo", nil, readonly},
565         {VM_GUESTVA, 0, "exitva", nil, readonly},
566         {VM_GUESTPA, 0, "exitpa", nil, readonly},
567         {VM_IDTVECINFO, 4, "idtinterruptinfo", nil, readonly},
568         {VM_IDTVECCODE, 4, "idtinterruptcode", nil, readonly},
569 };
570
571 static int
572 vmokpage(u64int addr)
573 {
574         return (addr & 0xfff) == 0 && addr >> 48 == 0;
575 }
576
577 static uvlong *
578 eptwalk(uvlong addr)
579 {
580         uvlong *tab, *nt;
581         uvlong v;
582         int i;
583         
584         tab = vmx.pml4;
585         if(tab == nil) error(Egreg);
586         for(i = 3; i >= 1; i--){
587                 tab += addr >> 12 + 9 * i & 0x1ff;
588                 v = *tab;
589                 if((v & 3) == 0){
590                         nt = mallocalign(BY2PG, BY2PG, 0, 0);
591                         if(nt == nil) error(Enomem);
592                         memset(nt, 0, BY2PG);
593                         v = PADDR(nt) | 0x407;
594                         *tab = v;
595                 }
596                 tab = KADDR(v & ~0xfff);
597         }
598         return tab + (addr >> 12 & 0x1ff);
599 }
600
601 static void
602 eptfree(uvlong *tab, int level)
603 {
604         int i;
605         uvlong v, *t;
606         
607         if(tab == nil) error(Egreg);
608         if(level < 3){
609                 for(i = 0; i < 512; i++){
610                         v = tab[i];
611                         if((v & 3) == 0) continue;
612                         t = KADDR(v & ~0xfff);
613                         eptfree(t, level + 1);
614                         tab[i] = 0;
615                 }
616         }
617         if(level > 0)
618                 free(tab);              
619 }
620
621 static void
622 epttranslate(VmMem *mp)
623 {
624         uvlong p, hpa;
625
626         if(mp->seg != nil && (mp->seg->type & SG_TYPE) != SG_FIXED || (mp->lo & 0xfff) != 0 || (mp->hi & 0xfff) != 0 || (uint)mp->attr >= 0x1000)
627                 error(Egreg);
628         if(mp->seg != nil){
629                 if(mp->seg->base + mp->off + (mp->hi - mp->lo) > mp->seg->top)
630                         error(Egreg);
631                 hpa = mp->seg->map[0]->pages[0]->pa + mp->off;
632         }else
633                 hpa = 0;
634         for(p = mp->lo; p < mp->hi; p += BY2PG)
635                 *eptwalk(p) = hpa + (p - mp->lo) + mp->attr;
636         vmx.onentry |= FLUSHEPT;
637 }
638
639 static char *mtype[] = {"uc", "wc", "02", "03", "wt", "wp", "wb", "07"};
640
641 static int
642 cmdgetmeminfo(VmCmd *, va_list va)
643 {
644         VmMem *mp;
645         char *p0, *e, *p;
646         char attr[4];
647         char mt[4];
648         
649         p0 = va_arg(va, char *);
650         e = va_arg(va, char *);
651         p = p0;
652         for(mp = vmx.mem.next; mp != &vmx.mem; mp = mp->next){
653                 attr[0] = (mp->attr & 1) != 0 ? 'r' : '-';
654                 attr[1] = (mp->attr & 2) != 0 ? 'w' : '-';
655                 attr[2] = (mp->attr & 4) != 0 ? 'x' : '-';
656                 attr[3] = 0;
657                 *(ushort*)mt = *(u16int*)mtype[mp->attr >> 3 & 7];
658                 mt[2] = (mp->attr & 0x40) != 0 ? '!' : 0;
659                 mt[3] = 0;
660                 if(mp->name == nil)
661                         p = seprint(p, e, "%s %s %#llux %#llux\n", attr, mt, mp->lo, mp->hi);
662                 else
663                         p = seprint(p, e, "%s %s %#llux %#llux %s %#llux\n", attr, mt, mp->lo, mp->hi, mp->name, (uvlong)mp->off);
664         }
665         return p - p0;
666 }
667
668 static int
669 cmdclearmeminfo(VmCmd *, va_list)
670 {
671         VmMem *mp, *mn;
672
673         eptfree(vmx.pml4, 0);
674         for(mp = vmx.mem.next; mp != &vmx.mem; mp = mn){
675                 free(mp->name);
676                 putseg(mp->seg);
677                 mn = mp->next;
678                 free(mp);
679         }
680         vmx.mem.prev = &vmx.mem;
681         vmx.mem.next = &vmx.mem;
682         vmx.onentry |= FLUSHEPT;
683         return 0;
684 }
685
686 extern Segment* (*_globalsegattach)(char*);
687
688 static int
689 cmdsetmeminfo(VmCmd *, va_list va)
690 {
691         char *p0, *p, *q, *r;
692         int j;
693         char *f[10];
694         VmMem *mp;
695         int rc;
696
697         if(vmx.pml4 == nil)
698                 error(Egreg);   
699         p0 = va_arg(va, char *);
700         p = p0;
701         mp = nil;
702         for(;;){
703                 q = strchr(p, '\n');
704                 if(q == 0) break;
705                 *q = 0;
706                 if(mp == nil){
707                         mp = malloc(sizeof(VmMem));
708                         if(mp == nil)
709                                 error(Enomem);
710                 }
711                 memset(mp, 0, sizeof(VmMem));
712                 if(waserror()){
713                         putseg(mp->seg);
714                         free(mp->name);
715                         free(mp);
716                         nexterror();
717                 }
718                 rc = tokenize(p, f, nelem(f));
719                 p = q + 1;
720                 if(rc == 0) goto next;
721                 if(rc != 4 && rc != 6) error("number of fields wrong");
722                 for(q = f[0]; *q != 0; q++)
723                         switch(*q){
724                         case 'r': if((mp->attr & 1) != 0) goto tinval; mp->attr |= 1; break;
725                         case 'w': if((mp->attr & 2) != 0) goto tinval; mp->attr |= 2; break;
726                         case 'x': if((mp->attr & 4) != 0) goto tinval; mp->attr |= 0x404; break;
727                         case '-': break;
728                         default: tinval: error("invalid access field");
729                         }
730                 for(j = 0; j < 8; j++)
731                         if(strncmp(mtype[j], f[1], 2) == 0){
732                                 mp->attr |= j << 3;
733                                 break;
734                         }
735                 if(j == 8 || strlen(f[1]) > 3) error("invalid memory type");
736                 if(f[1][2] == '!') mp->attr |= 0x40;
737                 else if(f[1][2] != 0) error("invalid memory type");
738                 mp->lo = strtoull(f[2], &r, 0);
739                 if(*r != 0 || !vmokpage(mp->lo)) error("invalid low guest physical address");
740                 mp->hi = strtoull(f[3], &r, 0);
741                 if(*r != 0 || !vmokpage(mp->hi) || mp->hi <= mp->lo) error("invalid high guest physical address");
742                 mp->off = strtoull(f[5], &r, 0);
743                 if(*r != 0 || !vmokpage(mp->off)) error("invalid offset");
744                 if((mp->attr & 7) != 0){
745                         if(rc != 6) error("number of fields wrong");
746                         mp->seg = _globalsegattach(f[4]);
747                         if(mp->seg == nil) error("no such segment");
748                         if(mp->seg->base + mp->off + (mp->hi - mp->lo) > mp->seg->top) error("out of bounds");
749                         kstrdup(&mp->name, f[4]);
750                 }
751                 epttranslate(mp);
752                 mp->prev = vmx.mem.prev;
753                 mp->next = &vmx.mem;
754                 mp->prev->next = mp;
755                 mp->next->prev = mp;
756                 mp = nil;
757         next:
758                 poperror();
759         }
760         free(mp);
761         return p - p0;
762 }
763
764 static void
765 vmxreset(void)
766 {
767         ulong regs[4];
768         vlong msr;
769
770         cpuid(1, regs);
771         if((regs[2] & 1<<5) == 0) return;
772         /* check if disabled by BIOS */
773         if(rdmsr(0x3a, &msr) < 0) return;
774         if((msr & 5) != 5){
775                 if((msr & 1) == 0){ /* msr still unlocked */
776                         wrmsr(0x3a, msr | 5);
777                         if(rdmsr(0x3a, &msr) < 0)
778                                 return;
779                 }
780                 if((msr & 5) != 5)
781                         return;
782         }
783         if(rdmsr(VMX_PROCB_CTLS_MSR, &msr) < 0) return;
784         if((vlong)msr >= 0) return;
785         if(rdmsr(VMX_PROCB_CTLS2_MSR, &msr) < 0) return;
786         if((msr >> 32 & PROCB_EPT) == 0 || (msr >> 32 & PROCB_VPID) == 0) return;
787         vmx.state = VMXINACTIVE;
788         vmx.lastcmd = &vmx.firstcmd;
789         vmx.mem.next = &vmx.mem;
790         vmx.mem.prev = &vmx.mem;
791 }
792
793 static void
794 vmxshutdown(void)
795 {
796         if(vmx.on){
797                 vmxoff();
798                 vmx.on = 0;
799         }
800 }
801
802 static void
803 vmxaddmsr(u32int msr, u64int gval)
804 {
805         int i;
806
807         if(vmx.nmsr >= MAXMSR)
808                 error("too many MSRs");
809         i = 2 * vmx.nmsr++;
810         vmx.msrhost[i] = msr;
811         rdmsr(msr, (vlong *) &vmx.msrhost[i+1]);
812         vmx.msrguest[i] = msr;
813         vmx.msrguest[i+1] = gval;
814         vmcswrite(VMENTRY_MSRLDCNT, vmx.nmsr);
815         vmcswrite(VMEXIT_MSRSTCNT, vmx.nmsr);
816         vmcswrite(VMEXIT_MSRLDCNT, vmx.nmsr);
817 }
818
819 static void
820 vmxtrapmsr(u32int msr, enum { TRAPRD = 1, TRAPWR = 2 } state)
821 {
822         u32int m;
823         
824         if(msr >= 0x2000 && (u32int)(msr - 0xc0000000) >= 0x2000)
825                 return;
826         msr = msr & 0x1fff | msr >> 18 & 0x2000;
827         m = 1<<(msr & 31);
828         if((state & TRAPRD) != 0)
829                 vmx.msrbits[msr / 32] |= m;
830         else
831                 vmx.msrbits[msr / 32] &= ~m;
832         if((state & TRAPWR) != 0)
833                 vmx.msrbits[msr / 32 + 512] |= m;
834         else
835                 vmx.msrbits[msr / 32 + 512] &= ~m;
836 }
837
838 static void
839 vmcsinit(void)
840 {
841         vlong msr;
842         u32int x;
843         
844         memset(&vmx.ureg, 0, sizeof(vmx.ureg));
845         vmx.launched = 0;
846         vmx.onentry = 0;        
847         
848         if(rdmsr(VMX_BASIC_MSR, &msr) < 0) error("rdmsr(VMX_BASIC_MSR) failed");
849         if((msr & 1ULL<<55) != 0){
850                 if(rdmsr(VMX_TRUE_PROCB_CTLS_MSR, &procb_ctls) < 0) error("rdmsr(VMX_TRUE_PROCB_CTLS_MSR) failed");
851                 if(rdmsr(VMX_TRUE_PINB_CTLS_MSR, &pinb_ctls) < 0) error("rdmsr(VMX_TRUE_PINB_CTLS_MSR) failed");
852         }else{
853                 if(rdmsr(VMX_PROCB_CTLS_MSR, &procb_ctls) < 0) error("rdmsr(VMX_PROCB_CTLS_MSR) failed");
854                 if(rdmsr(VMX_PINB_CTLS_MSR, &pinb_ctls) < 0) error("rdmsr(VMX_PINB_CTLS_MSR) failed");
855         }
856
857         if(rdmsr(VMX_PINB_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_PINB_CTLS_MSR failed");
858         x = (u32int)pinb_ctls | 1<<1 | 1<<2 | 1<<4; /* currently reserved default1 bits */
859         x |= PINB_EXITIRQ | PINB_EXITNMI;
860         x &= pinb_ctls >> 32;
861         vmcswrite(PINB_CTLS, x);
862         
863         if(rdmsr(VMX_PROCB_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_PROCB_CTLS_MSR failed");
864         x = (u32int)procb_ctls | 1<<1 | 7<<4 | 1<<8 | 1<<13 | 1<<14 | 1<<26; /* currently reserved default1 bits */
865         x |= PROCB_EXITHLT | PROCB_EXITMWAIT;
866         x |= PROCB_EXITMOVDR | PROCB_EXITIO | PROCB_EXITMONITOR | PROCB_MSRBITMAP;
867         x |= PROCB_USECTLS2;
868         x &= msr >> 32;
869         vmcswrite(PROCB_CTLS, x);
870         
871         if(rdmsr(VMX_PROCB_CTLS2_MSR, &msr) < 0) error("rdmsr(VMX_PROCB_CTLS2_MSR failed");
872         x = PROCB_EPT | PROCB_VPID | PROCB_UNRESTR;
873         x &= msr >> 32;
874         vmcswrite(PROCB_CTLS2, x);
875         
876         if(rdmsr(VMX_VMEXIT_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_VMEXIT_CTLS_MSR failed");
877         x = (u32int)msr;
878         if(sizeof(uintptr) == 8) x |= VMEXIT_HOST64;
879         x |= VMEXIT_LD_IA32_PAT | VMEXIT_LD_IA32_EFER | VMEXIT_ST_DEBUG | VMEXIT_ST_IA32_EFER;
880         x &= msr >> 32;
881         vmcswrite(VMEXIT_CTLS, x);
882         
883         if(rdmsr(VMX_VMENTRY_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_VMENTRY_CTLS_MSR failed");
884         x = (u32int)msr;
885         x |= VMENTRY_LD_IA32_PAT | VMENTRY_LD_IA32_EFER | VMENTRY_LD_DEBUG;
886         x &= msr >> 32;
887         vmcswrite(VMENTRY_CTLS, x);
888         
889         vmcswrite(CR3_TARGCNT, 0);
890         vmcswrite(VMENTRY_INTRINFO, 0);
891         vmcswrite(VMCS_LINK, -1);
892         
893         vmcswrite(HOST_CS, KESEL);
894         vmcswrite(HOST_DS, KDSEL);
895         vmcswrite(HOST_ES, KDSEL);
896         vmcswrite(HOST_FS, KDSEL);
897         vmcswrite(HOST_GS, KDSEL);
898         vmcswrite(HOST_SS, KDSEL);
899         vmcswrite(HOST_TR, TSSSEL);
900         vmcswrite(HOST_CR0, getcr0() & ~0xe);
901         vmcswrite(HOST_CR3, getcr3());
902         vmcswrite(HOST_CR4, getcr4());
903         rdmsr(FSbase, &msr);
904         vmcswrite(HOST_FSBASE, msr);
905         rdmsr(GSbase, &msr);
906         vmcswrite(HOST_GSBASE, msr);
907         vmcswrite(HOST_TRBASE, (uintptr) m->tss);
908         vmcswrite(HOST_GDTR, (uintptr) m->gdt);
909         vmcswrite(HOST_IDTR, IDTADDR);
910         if(rdmsr(0x277, &msr) < 0) error("rdmsr(IA32_PAT) failed");
911         vmcswrite(HOST_IA32_PAT, msr);
912         if(rdmsr(Efer, &msr) < 0) error("rdmsr(IA32_EFER) failed");
913         vmcswrite(HOST_IA32_EFER, msr);
914         
915         vmcswrite(EXC_BITMAP, 1<<18|1<<1);
916         vmcswrite(PFAULT_MASK, 0);
917         vmcswrite(PFAULT_MATCH, 0);
918         
919         vmcswrite(GUEST_CSBASE, 0);
920         vmcswrite(GUEST_DSBASE, 0);
921         vmcswrite(GUEST_ESBASE, 0);
922         vmcswrite(GUEST_FSBASE, 0);
923         vmcswrite(GUEST_GSBASE, 0);
924         vmcswrite(GUEST_SSBASE, 0);
925         vmcswrite(GUEST_CSLIMIT, -1);
926         vmcswrite(GUEST_DSLIMIT, -1);
927         vmcswrite(GUEST_ESLIMIT, -1);
928         vmcswrite(GUEST_FSLIMIT, -1);
929         vmcswrite(GUEST_GSLIMIT, -1);
930         vmcswrite(GUEST_SSLIMIT, -1);
931         vmcswrite(GUEST_CSPERM, (SEGG|SEGD|SEGP|SEGPL(0)|SEGEXEC|SEGR) >> 8 | 1);
932         vmcswrite(GUEST_DSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
933         vmcswrite(GUEST_ESPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
934         vmcswrite(GUEST_FSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
935         vmcswrite(GUEST_GSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
936         vmcswrite(GUEST_SSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
937         vmcswrite(GUEST_LDTRPERM, 1<<16);
938
939         vmcswrite(GUEST_CR0MASK, CR0KERNEL);
940         vmcswrite(GUEST_CR4MASK, CR4KERNEL);
941         vmcswrite(GUEST_CR0, getcr0() & CR0KERNEL | 0x31);
942         vmcswrite(GUEST_CR3, 0);
943         vmcswrite(GUEST_CR4, getcr4() & CR4KERNEL);
944         vmcswrite(GUEST_CR0SHADOW, getcr0() & CR0KERNEL | 0x31);
945         vmcswrite(GUEST_CR4SHADOW, getcr4() & ~CR4VMXE & CR4KERNEL);
946         
947         vmcswrite(GUEST_IA32_PAT, 0x0007040600070406ULL);
948         vmcswrite(GUEST_IA32_EFER, 0);
949         
950         vmcswrite(GUEST_TRBASE, 0);
951         vmcswrite(GUEST_TRLIMIT, 0xffff);
952         vmcswrite(GUEST_TRPERM, (SEGTSS|SEGPL(0)|SEGP) >> 8 | 2);
953         
954         vmx.pml4 = mallocalign(BY2PG, BY2PG, 0, 0);
955         memset(vmx.pml4, 0, BY2PG);
956         vmcswrite(VM_EPTP, PADDR(vmx.pml4) | 3<<3);
957         vmx.vpid = 1;
958         vmcswrite(VM_VPID, vmx.vpid);
959         
960         vmcswrite(GUEST_RFLAGS, 2);
961         
962         vmx.onentry = FLUSHVPID | FLUSHEPT;
963         
964         vmx.fp = mallocalign(512, 512, 0, 0);
965         if(vmx.fp == nil)
966                 error(Enomem);
967         fpinit();
968         fpsave(vmx.fp);
969         
970         vmx.msrhost = mallocalign(MAXMSR*16, 16, 0, 0);
971         vmx.msrguest = mallocalign(MAXMSR*16, 16, 0, 0);
972         vmx.msrbits = mallocalign(4096, 4096, 0, 0);
973         if(vmx.msrhost == nil || vmx.msrguest == nil || vmx.msrbits == nil)
974                 error(Enomem);
975         memset(vmx.msrbits, -1, 4096);
976         vmxtrapmsr(Efer, 0);
977         vmcswrite(VMENTRY_MSRLDADDR, PADDR(vmx.msrguest));
978         vmcswrite(VMEXIT_MSRSTADDR, PADDR(vmx.msrguest));
979         vmcswrite(VMEXIT_MSRLDADDR, PADDR(vmx.msrhost));
980         vmcswrite(MSR_BITMAP, PADDR(vmx.msrbits));
981         
982         if(sizeof(uintptr) == 8){
983                 vmxaddmsr(Star, 0);
984                 vmxaddmsr(Lstar, 0);
985                 vmxaddmsr(Cstar, 0);
986                 vmxaddmsr(Sfmask, 0);
987                 vmxaddmsr(KernelGSbase, 0);
988                 vmxtrapmsr(Star, 0);
989                 vmxtrapmsr(Lstar, 0);
990                 vmxtrapmsr(Cstar, 0);
991                 vmxtrapmsr(Sfmask, 0);
992                 vmxtrapmsr(FSbase, 0);
993                 vmxtrapmsr(GSbase, 0);
994                 vmxtrapmsr(KernelGSbase, 0);
995         }
996 }
997
998 static void
999 vmxstart(void)
1000 {
1001         static uchar *vmcs; /* also vmxon region */
1002         vlong msr, msr2;
1003         uintptr cr;
1004         vlong x;
1005
1006         putcr4(getcr4() | 0x2000); /* set VMXE */
1007         putcr0(getcr0() | 0x20); /* set NE */
1008         cr = getcr0();
1009         if(rdmsr(VMX_CR0_FIXED0, &msr) < 0) error("rdmsr(VMX_CR0_FIXED0) failed");
1010         if(rdmsr(VMX_CR0_FIXED1, &msr2) < 0) error("rdmsr(VMX_CR0_FIXED1) failed");
1011         if((cr & ~msr & ~msr2 | ~cr & msr & msr2) != 0) error("invalid CR0 value");
1012         cr = getcr4();
1013         if(rdmsr(VMX_CR4_FIXED0, &msr) < 0) error("rdmsr(VMX_CR4_FIXED0) failed");
1014         if(rdmsr(VMX_CR4_FIXED1, &msr2) < 0) error("rdmsr(VMX_CR4_FIXED1) failed");
1015         if((cr & ~msr & ~msr2 | ~cr & msr & msr2) != 0) error("invalid CR4 value");
1016
1017         if(vmcs == nil){
1018                 vmcs = mallocalign(8192, 4096, 0, 0);
1019                 if(vmcs == nil)
1020                         error(Enomem);
1021         }
1022         memset(vmcs, 0, 8192);
1023         rdmsr(VMX_BASIC_MSR, &x);
1024         *(ulong*)vmcs = x;
1025         *(ulong*)&vmcs[4096] = x;
1026         if(vmxon(PADDR(vmcs + 4096)) < 0)
1027                 error("vmxon failed");
1028         vmx.on = 1;
1029         if(vmclear(PADDR(vmcs)) < 0)
1030                 error("vmclear failed");
1031         if(vmptrld(PADDR(vmcs)) < 0)
1032                 error("vmptrld failed");
1033         vmcsinit();
1034 }
1035
1036 static void
1037 cmdrelease(VmCmd *p, int f)
1038 {
1039         lock(p);
1040         p->flags |= CMDFDONE | f;
1041         wakeup(p);
1042         unlock(p);
1043 }
1044
1045 static void
1046 killcmds(VmCmd *notme)
1047 {
1048         VmCmd *p, *pn;
1049         
1050         for(p = vmx.postponed; p != nil; p = pn){
1051                 pn = p->next;
1052                 p->next = nil;
1053                 if(p == notme) continue;
1054                 kstrcpy(p->errstr, Equit, ERRMAX);
1055                 cmdrelease(p, CMDFFAIL);
1056         }
1057         vmx.postponed = nil;
1058         ilock(&vmx.cmdlock);
1059         for(p = vmx.firstcmd; p != nil; p = pn){
1060                 pn = p->next;
1061                 p->next = nil;
1062                 if(p == notme) continue;
1063                 kstrcpy(p->errstr, Equit, ERRMAX);
1064                 cmdrelease(p, CMDFFAIL);
1065         }
1066         vmx.firstcmd = nil;
1067         vmx.lastcmd = &vmx.firstcmd;
1068         iunlock(&vmx.cmdlock);
1069 }
1070
1071 static int
1072 cmdquit(VmCmd *p, va_list va)
1073 {
1074         vmx.state = VMXENDING;
1075         killcmds(p);
1076
1077         if(vmx.pml4 != nil){
1078                 cmdclearmeminfo(p, va);
1079                 free(vmx.pml4);
1080                 vmx.pml4 = nil;
1081         }
1082         vmx.got = 0;
1083         vmx.onentry = 0;
1084         vmx.stepmap = nil;
1085         
1086         free(vmx.msrhost);
1087         free(vmx.msrguest);
1088         vmx.msrhost = nil;
1089         vmx.msrguest = nil;
1090         vmx.nmsr = 0;
1091
1092         if(vmx.on)
1093                 vmxoff();
1094         vmx.state = VMXINACTIVE;
1095         cmdrelease(p, 0);
1096         pexit(Equit, 1);
1097         return 0;
1098 }
1099
1100 static void
1101 processexit(void)
1102 {
1103         u32int reason;
1104         
1105         reason = vmcsread(VM_EXREASON);
1106         if((reason & 1<<31) == 0)
1107                 switch(reason & 0xffff){
1108                 case 1: /* external interrupt */
1109                 case 3: /* INIT */
1110                 case 4: /* SIPI */
1111                 case 5: /* IO SMI */
1112                 case 6: /* SMI */
1113                 case 7: /* IRQ window */
1114                 case 8: /* NMI window */
1115                         return;
1116                 case 37:
1117                         if((vmx.onentry & STEP) != 0){
1118                                 vmx.state = VMXREADY;
1119                                 vmx.got |= GOTSTEP;
1120                                 vmx.onentry &= ~STEP;
1121                                 return;
1122                         }
1123                         break;
1124                 }
1125         if((vmx.onentry & STEP) != 0){
1126                 print("VMX: exit reason %#x when expected step...\n", reason & 0xffff);
1127                 vmx.onentry &= ~STEP;
1128                 vmx.got |= GOTSTEP|GOTSTEPERR;
1129         }
1130         vmx.state = VMXREADY;
1131         vmx.got |= GOTEXIT;
1132 }
1133
1134 static int
1135 cmdgetregs(VmCmd *, va_list va)
1136 {
1137         char *p0, *e;
1138         GuestReg *r;
1139         uvlong val;
1140         int s;
1141         char *p;
1142         
1143         p0 = va_arg(va, char *);
1144         e = va_arg(va, char *);
1145         p = p0;
1146         for(r = guestregs; r < guestregs + nelem(guestregs); r++)
1147                 if(r->read != nil){
1148                         p = seprint(p, e, "%s ", r->name);
1149                         p = r->read(p, e);
1150                         p = strecpy(p, e, "\n");
1151                 }else{
1152                         if(r->offset >= 0)
1153                                 val = vmcsread(r->offset);
1154                         else
1155                                 val = *(uintptr*)((uchar*)&vmx + ~r->offset);
1156                         s = r->size;
1157                         if(s == 0) s = sizeof(uintptr);
1158                         p = seprint(p, e, "%s %#.*llux\n", r->name, s * 2, val);
1159                 }
1160         return p - p0;
1161 }
1162
1163 static int
1164 setregs(char *p0, char rs, char *fs)
1165 {
1166         char *p, *q, *rp;
1167         char *f[10];
1168         GuestReg *r;
1169         uvlong val;
1170         int sz;
1171         int rc;
1172
1173         p = p0;
1174         for(;;){
1175                 q = strchr(p, rs);
1176                 if(q == 0) break;
1177                 *q = 0;
1178                 rc = getfields(p, f, nelem(f), 1, fs);
1179                 p = q + 1;
1180                 if(rc == 0) continue;
1181                 if(rc != 2) error("number of fields wrong");
1182                 
1183                 for(r = guestregs; r < guestregs + nelem(guestregs); r++)
1184                         if(strcmp(r->name, f[0]) == 0)
1185                                 break;
1186                 if(r == guestregs + nelem(guestregs))
1187                         error("unknown register");
1188                 if(r->write != nil){
1189                         r->write(f[1]);
1190                         continue;
1191                 }
1192                 val = strtoull(f[1], &rp, 0);
1193                 sz = r->size;
1194                 if(sz == 0) sz = sizeof(uintptr);
1195                 if(rp == f[1] || *rp != 0) error("invalid value");
1196                 if(r->offset >= 0)
1197                         vmcswrite(r->offset, val);
1198                 else{
1199                         assert((u32int)~r->offset + sz <= sizeof(Vmx)); 
1200                         switch(sz){
1201                         case 1: *(u8int*)((u8int*)&vmx + (u32int)~r->offset) = val; break;
1202                         case 2: *(u16int*)((u8int*)&vmx + (u32int)~r->offset) = val; break;
1203                         case 4: *(u32int*)((u8int*)&vmx + (u32int)~r->offset) = val; break;
1204                         case 8: *(u64int*)((u8int*)&vmx + (u32int)~r->offset) = val; break;
1205                         default: error(Egreg);
1206                         }
1207                 }
1208         }
1209         return p - p0;
1210 }
1211
1212 static int
1213 cmdsetregs(VmCmd *, va_list va)
1214 {
1215         return setregs(va_arg(va, char *), '\n', " \t");
1216 }
1217
1218 static int
1219 cmdgetfpregs(VmCmd *, va_list va)
1220 {
1221         uchar *p;
1222         
1223         p = va_arg(va, uchar *);
1224         memmove(p, vmx.fp, sizeof(FPsave));
1225         return sizeof(FPsave);
1226 }
1227
1228 static int
1229 cmdsetfpregs(VmCmd *, va_list va)
1230 {
1231         uchar *p;
1232         ulong n;
1233         vlong off;
1234         
1235         p = va_arg(va, uchar *);
1236         n = va_arg(va, ulong);
1237         off = va_arg(va, vlong);
1238         if(off < 0 || off >= sizeof(FPsave)) n = 0;
1239         else if(off + n > sizeof(FPsave)) n = sizeof(FPsave) - n;
1240         memmove((uchar*)vmx.fp + off, p, n);
1241         return n;
1242 }
1243
1244 static int
1245 cmdgo(VmCmd *, va_list va)
1246 {
1247         char *r;
1248
1249         if(vmx.state != VMXREADY)
1250                 error("VM not ready");
1251         r = va_arg(va, char *);
1252         if(r != nil) setregs(r, ';', "=");
1253         vmx.state = VMXRUNNING;
1254         return 0;
1255 }
1256
1257 static int
1258 cmdstop(VmCmd *, va_list)
1259 {
1260         if(vmx.state != VMXREADY && vmx.state != VMXRUNNING)
1261                 error("VM not ready or running");
1262         vmx.state = VMXREADY;
1263         return 0;
1264 }
1265
1266 static int
1267 cmdstatus(VmCmd *, va_list va)
1268 {       
1269         kstrcpy(va_arg(va, char *), vmx.errstr, ERRMAX);
1270         return vmx.state;
1271 }
1272
1273 static char *exitreasons[] = {
1274         [0] "exc", [1] "extirq", [2] "triplef", [3] "initsig", [4] "sipi", [5] "smiio", [6] "smiother", [7] "irqwin",
1275         [8] "nmiwin", [9] "taskswitch", [10] ".cpuid", [11] ".getsec", [12] ".hlt", [13] ".invd", [14] ".invlpg", [15] ".rdpmc",
1276         [16] ".rdtsc", [17] ".rsm", [18] ".vmcall", [19] ".vmclear", [20] ".vmlaunch", [21] ".vmptrld", [22] ".vmptrst", [23] ".vmread",
1277         [24] ".vmresume", [25] ".vmwrite", [26] ".vmxoff", [27] ".vmxon", [28] "movcr", [29] ".movdr", [30] "io", [31] ".rdmsr",
1278         [32] ".wrmsr", [33] "entrystate", [34] "entrymsr", [36] ".mwait", [37] "monitortrap", [39] ".monitor",
1279         [40] ".pause", [41] "mcheck", [43] "tpr", [44] "apicacc", [45] "eoi", [46] "gdtr_idtr", [47] "ldtr_tr",
1280         [48] "eptfault", [49] "eptinval", [50] ".invept", [51] ".rdtscp", [52] "preempt", [53] ".invvpid", [54] ".wbinvd", [55] ".xsetbv",
1281         [56] "apicwrite", [57] ".rdrand", [58] ".invpcid", [59] ".vmfunc", [60] ".encls", [61] ".rdseed", [62] "pmlfull", [63] ".xsaves",
1282         [64] ".xrstors", 
1283 };
1284
1285 static char *except[] = {
1286         [0] "#de", [1] "#db", [3] "#bp", [4] "#of", [5] "#br", [6] "#ud", [7] "#nm",
1287         [8] "#df", [10] "#ts", [11] "#np", [12] "#ss", [13] "#gp", [14] "#pf",
1288         [16] "#mf", [17] "#ac", [18] "#mc", [19] "#xm", [20] "#ve",
1289 };
1290
1291 static int
1292 cmdwait(VmCmd *cp, va_list va)
1293 {
1294         char *p, *p0, *e;
1295         u32int reason, intr;
1296         uvlong qual;
1297         u16int rno;
1298
1299         if(cp->scratched)
1300                 error(Eintr);
1301         p0 = p = va_arg(va, char *);
1302         e = va_arg(va, char *);
1303         if((vmx.got & GOTIRQACK) != 0){
1304                 p = seprint(p, e, "*ack %d\n", vmx.irqack.info & 0xff);
1305                 vmx.got &= ~GOTIRQACK;
1306                 return p - p0;
1307         }
1308         if((vmx.got & GOTEXIT) == 0){
1309                 cp->flags |= CMDFPOSTP;
1310                 return -1;
1311         }
1312         vmx.got &= ~GOTEXIT;
1313         reason = vmcsread(VM_EXREASON);
1314         qual = vmcsread(VM_EXQUALIF);
1315         rno = reason;
1316         intr = vmcsread(VM_EXINTRINFO);
1317         if((reason & 1<<31) != 0)
1318                 p = seprint(p, e, "!");
1319         if(rno == 0 && (intr & 1<<31) != 0){
1320                 if((intr & 0xff) >= nelem(except) || except[intr & 0xff] == nil)
1321                         p = seprint(p, e, "#%d ", intr & 0xff);
1322                 else
1323                         p = seprint(p, e, "%s ", except[intr & 0xff]);
1324         }else if(rno >= nelem(exitreasons) || exitreasons[rno] == nil)
1325                 p = seprint(p, e, "?%d ", rno);
1326         else
1327                 p = seprint(p, e, "%s ", exitreasons[rno]);
1328         p = seprint(p, e, "%#ullx pc %#ullx sp %#ullx ilen %#ullx iinfo %#ullx", qual, vmcsread(GUEST_RIP), vmcsread(GUEST_RSP), vmcsread(VM_EXINSTRLEN), vmcsread(VM_EXINSTRINFO));
1329         if((intr & 1<<11) != 0) p = seprint(p, e, " excode %#ullx", vmcsread(VM_EXINTRCODE));
1330         if(rno == 48 && (qual & 0x80) != 0) p = seprint(p, e, " va %#ullx", vmcsread(VM_GUESTVA));
1331         if(rno == 48 || rno == 49) p = seprint(p, e, " pa %#ullx", vmcsread(VM_GUESTPA));
1332         if(rno == 30) p = seprint(p, e, " ax %#ullx", (uvlong)vmx.ureg.ax);
1333         p = seprint(p, e, "\n");
1334         return p - p0;
1335 }
1336
1337 static int
1338 cmdstep(VmCmd *cp, va_list va)
1339 {
1340         switch(cp->retval){
1341         case 0:
1342                 if((vmx.got & GOTSTEP) != 0 || (vmx.onentry & STEP) != 0)
1343                         error(Einuse);
1344                 if(vmx.state != VMXREADY){
1345                         print("pre-step in state %s\n", statenames[vmx.state]);
1346                         error("not ready");
1347                 }
1348                 vmx.stepmap = va_arg(va, VmMem *);
1349                 vmx.onentry |= STEP;
1350                 vmx.state = VMXRUNNING;
1351                 cp->flags |= CMDFPOSTP;
1352                 return 1;
1353         case 1:
1354                 if(vmx.state != VMXREADY){
1355                         print("post-step in state %s\n", statenames[vmx.state]);
1356                         vmx.onentry &= ~STEP;
1357                         vmx.got &= ~(GOTSTEP|GOTSTEPERR);
1358                         error("not ready");
1359                 }
1360                 if((vmx.got & GOTSTEP) == 0){
1361                         cp->flags |= CMDFPOSTP;
1362                         return 1;
1363                 }
1364                 if((vmx.got & GOTSTEPERR) != 0){
1365                         vmx.got &= ~(GOTSTEP|GOTSTEPERR);
1366                         error("step failed");
1367                 }
1368                 vmx.got &= ~(GOTSTEP|GOTSTEPERR);
1369                 return 1;
1370         }
1371         return 0;
1372 }
1373
1374 static void
1375 eventparse(char *p, VmIntr *vi)
1376 {
1377         char *q, *r;
1378         int i;
1379         
1380         memset(vi, 0, sizeof(VmIntr));
1381         q = nil;
1382         kstrdup(&q, p);
1383         if(waserror()){
1384                 free(q);
1385                 memset(vi, 0, sizeof(VmIntr));
1386                 nexterror();
1387         }
1388         vi->info = 1<<31;
1389         r = strchr(q, ',');
1390         if(r != nil) *r++ = 0;
1391         for(i = 0; i < nelem(except); i++)
1392                 if(except[i] != nil && strcmp(except[i], q) == 0)
1393                         break;
1394         if(*q == '#'){
1395                 q++;
1396                 vi->info |= 3 << 8;
1397         }
1398         if(i == nelem(except)){
1399                 i = strtoul(q, &q, 10);
1400                 if(*q != 0 || i > 255) error(Ebadctl);
1401         }
1402         vi->info |= i;
1403         if((vi->info & 0x7ff) == 3 || (vi->info & 0x7ff) == 4)
1404                 vi->info += 3 << 8;
1405         if(r == nil) goto out;
1406         if(*r != ','){
1407                 vi->code = strtoul(r, &r, 0);
1408                 vi->info |= 1<<11;
1409         }else r++;
1410         if(*r == ',')
1411                 vi->ilen = strtoul(r + 1, &r, 0);
1412         if(*r != 0) error(Ebadctl);
1413 out:
1414         poperror();
1415         free(q);
1416 }
1417
1418 static int
1419 cmdexcept(VmCmd *cp, va_list va)
1420 {
1421         if(cp->scratched) error(Eintr);
1422         if((vmx.onentry & POSTEX) != 0){
1423                 cp->flags |= CMDFPOSTP;
1424                 return 0;
1425         }
1426         eventparse(va_arg(va, char *), &vmx.exc);
1427         vmx.onentry |= POSTEX;
1428         return 0;
1429 }
1430
1431 static int
1432 cmdirq(VmCmd *, va_list va)
1433 {
1434         char *p;
1435         VmIntr vi;
1436         
1437         p = va_arg(va, char *);
1438         if(p == nil)
1439                 vmx.onentry &= ~POSTIRQ;
1440         else{
1441                 eventparse(p, &vi);
1442                 vmx.irq = vi;
1443                 vmx.onentry |= POSTIRQ;
1444         }
1445         return 0;
1446 }
1447
1448 static int
1449 cmdextrap(VmCmd *, va_list va)
1450 {
1451         char *p, *q;
1452         u32int v;
1453         
1454         p = va_arg(va, char *);
1455         v = strtoul(p, &q, 0);
1456         if(q == p || *q != 0) error(Ebadarg);
1457         vmcswrite(EXC_BITMAP, v);
1458         return 0;
1459 }
1460
1461 static int
1462 gotcmd(void *)
1463 {
1464         int rc;
1465
1466         ilock(&vmx.cmdlock);
1467         rc = vmx.firstcmd != nil;
1468         iunlock(&vmx.cmdlock);
1469         return rc;
1470 }
1471
1472 static void
1473 markcmddone(VmCmd *p, VmCmd ***pp)
1474 {
1475         if((p->flags & (CMDFFAIL|CMDFPOSTP)) == CMDFPOSTP){
1476                 **pp = p;
1477                 *pp = &p->next;
1478         }else{
1479                 p->flags = p->flags & ~CMDFPOSTP;
1480                 cmdrelease(p, 0);
1481         }
1482 }
1483
1484 static VmCmd **
1485 markppcmddone(VmCmd **pp)
1486 {
1487         VmCmd *p;
1488         
1489         p = *pp;
1490         if((p->flags & (CMDFFAIL|CMDFPOSTP)) == CMDFPOSTP)
1491                 return &p->next;
1492         *pp = p->next;
1493         p->next = nil;
1494         p->flags = p->flags & ~CMDFPOSTP;
1495         cmdrelease(p, 0);
1496         return pp;
1497 }
1498
1499
1500 static void
1501 runcmd(void)
1502 {
1503         VmCmd *p, **pp;
1504         
1505         for(pp = &vmx.postponed; p = *pp, p != nil; ){
1506                 if(waserror()){
1507                         kstrcpy(p->errstr, up->errstr, ERRMAX);
1508                         p->flags |= CMDFFAIL;
1509                         pp = markppcmddone(pp);
1510                         continue;
1511                 }
1512                 p->flags &= ~CMDFPOSTP;
1513                 p->retval = p->cmd(p, p->va);
1514                 poperror();
1515                 pp = markppcmddone(pp);
1516         }
1517         for(;;){
1518                 ilock(&vmx.cmdlock);
1519                 p = vmx.firstcmd;
1520                 if(p == nil){
1521                         iunlock(&vmx.cmdlock);
1522                         break;
1523                 }
1524                 vmx.firstcmd = p->next;
1525                 if(vmx.lastcmd == &p->next)
1526                         vmx.lastcmd = &vmx.firstcmd;
1527                 iunlock(&vmx.cmdlock);
1528                 p->next = nil;
1529                 if(waserror()){
1530                         kstrcpy(p->errstr, up->errstr, ERRMAX);
1531                         p->flags |= CMDFFAIL;
1532                         markcmddone(p, &pp);
1533                         continue;
1534                 }
1535                 if(p->scratched) error(Eintr);
1536                 p->retval = p->cmd(p, p->va);
1537                 poperror();
1538                 markcmddone(p, &pp);
1539         }
1540 }
1541
1542 static void
1543 dostep(int setup)
1544 {
1545         static uvlong oldmap;
1546         static uvlong *mapptr;
1547
1548         if(setup){
1549                 if(vmx.stepmap != nil){
1550                         mapptr = eptwalk(vmx.stepmap->lo);
1551                         oldmap = *mapptr;
1552                         epttranslate(vmx.stepmap);
1553                 }
1554         }else{
1555                 vmcswrite(PROCB_CTLS, vmcsread(PROCB_CTLS) & ~(uvlong)PROCB_MONTRAP);
1556                 if(vmx.stepmap != nil){
1557                         *mapptr = oldmap;
1558                         vmx.stepmap = nil;
1559                         vmx.onentry |= FLUSHEPT;
1560                 }
1561         }
1562 }
1563
1564 static void
1565 vmxproc(void *)
1566 {
1567         int init, rc, x;
1568         u32int procbctls, defprocbctls;
1569         vlong v;
1570
1571         procwired(up, 0);
1572         sched();
1573         init = 0;
1574         defprocbctls = 0;
1575         while(waserror()){
1576                 kstrcpy(vmx.errstr, up->errstr, ERRMAX);
1577                 vmx.state = VMXDEAD;
1578         }
1579         for(;;){
1580                 if(!init){
1581                         init = 1;
1582                         vmxstart();
1583                         vmx.state = VMXREADY;
1584                         defprocbctls = vmcsread(PROCB_CTLS);
1585                 }
1586                 runcmd();
1587                 if(vmx.state == VMXRUNNING){
1588                         procbctls = defprocbctls;
1589                         if((vmx.onentry & STEP) != 0){
1590                                 procbctls |= PROCB_MONTRAP;
1591                                 dostep(1);
1592                                 if(waserror()){
1593                                         dostep(0);
1594                                         nexterror();
1595                                 }
1596                         }
1597                         if((vmx.onentry & POSTEX) != 0){
1598                                 vmcswrite(VMENTRY_INTRINFO, vmx.exc.info);
1599                                 vmcswrite(VMENTRY_INTRCODE, vmx.exc.code);
1600                                 vmcswrite(VMENTRY_INTRILEN, vmx.exc.ilen);
1601                                 vmx.onentry &= ~POSTEX;
1602                         }
1603                         if((vmx.onentry & POSTIRQ) != 0 && (vmx.onentry & STEP) == 0){
1604                                 if((vmx.onentry & POSTEX) == 0 && (vmcsread(GUEST_RFLAGS) & 1<<9) != 0 && (vmcsread(GUEST_CANINTR) & 3) == 0){
1605                                         vmcswrite(VMENTRY_INTRINFO, vmx.irq.info);
1606                                         vmcswrite(VMENTRY_INTRCODE, vmx.irq.code);
1607                                         vmcswrite(VMENTRY_INTRILEN, vmx.irq.ilen);
1608                                         vmx.onentry &= ~POSTIRQ;
1609                                         vmx.got |= GOTIRQACK;
1610                                         vmx.irqack = vmx.irq;
1611                                 }else
1612                                         procbctls |= PROCB_IRQWIN;
1613                         }
1614                         if((vmx.onentry & FLUSHVPID) != 0){
1615                                 if(invvpid(INVLOCAL, vmx.vpid, 0) < 0)
1616                                         error("invvpid failed");
1617                                 vmx.onentry &= ~FLUSHVPID;
1618                         }
1619                         if((vmx.onentry & FLUSHEPT) != 0){
1620                                 if(invept(INVLOCAL, PADDR(vmx.pml4) | 3<<3, 0) < 0)
1621                                         error("invept failed");
1622                                 vmx.onentry &= ~FLUSHEPT;
1623                         }
1624                         vmcswrite(PROCB_CTLS, procbctls);
1625                         vmx.got &= ~GOTEXIT;
1626                         
1627                         x = splhi();
1628                         if(sizeof(uintptr) == 8){
1629                                 rdmsr(FSbase, &v);
1630                                 vmwrite(HOST_FSBASE, v);
1631                         }
1632                         if((vmx.dr[7] & ~0xd400) != 0)
1633                                 putdr01236(vmx.dr);
1634                         fpsserestore(vmx.fp);
1635                         putcr2(vmx.cr2);
1636                         rc = vmlaunch(&vmx.ureg, vmx.launched);
1637                         vmx.cr2 = getcr2();
1638                         fpssesave(vmx.fp);
1639                         splx(x);
1640                         if(rc < 0)
1641                                 error("vmlaunch failed");
1642                         vmx.launched = 1;
1643                         if((vmx.onentry & STEP) != 0){
1644                                 dostep(0);
1645                                 poperror();
1646                         }
1647                         processexit();
1648                 }else{
1649                         up->psstate = "Idle";
1650                         sleep(&vmx.cmdwait, gotcmd, nil);
1651                         up->psstate = nil;
1652                 }
1653         }
1654 }
1655
1656 enum {
1657         Qdir,
1658         Qctl,
1659         Qregs,
1660         Qstatus,
1661         Qmap,
1662         Qwait,
1663         Qfpregs,
1664 };
1665
1666 static Dirtab vmxdir[] = {
1667         ".",            { Qdir, 0, QTDIR },     0,              0550,
1668         "ctl",          { Qctl, 0, 0 },         0,              0660,
1669         "regs",         { Qregs, 0, 0 },        0,              0660,
1670         "status",       { Qstatus, 0, 0 },      0,              0440,
1671         "map",          { Qmap, 0, 0 },         0,              0660,
1672         "wait",         { Qwait, 0, 0 },        0,              0440,
1673         "fpregs",       { Qfpregs, 0, 0 },      0,              0660,
1674 };
1675
1676 enum {
1677         CMinit,
1678         CMquit,
1679         CMgo,
1680         CMstop,
1681         CMstep,
1682         CMexc,
1683         CMirq,
1684         CMextrap,
1685 };
1686
1687 static Cmdtab vmxctlmsg[] = {
1688         CMinit,         "init",         1,
1689         CMquit,         "quit",         1,
1690         CMgo,           "go",           0,
1691         CMstop,         "stop",         1,
1692         CMstep,         "step",         0,
1693         CMexc,          "exc",          2,
1694         CMirq,          "irq",          0,
1695         CMextrap,       "extrap",       2,
1696 };
1697
1698 static int
1699 iscmddone(void *cp)
1700 {
1701         return (((VmCmd*)cp)->flags & CMDFDONE) != 0;
1702 }
1703
1704 static int
1705 vmxcmd(int (*f)(VmCmd *, va_list), ...)
1706 {
1707         VmCmd cmd;
1708         
1709         if(vmx.state == VMXINACTIVE)
1710                 error("no VM");
1711         if(vmx.state == VMXENDING)
1712         ending:
1713                 error(Equit);
1714         memset(&cmd, 0, sizeof(VmCmd));
1715         cmd.errstr = up->errstr;
1716         cmd.cmd = f;
1717         va_start(cmd.va, f);
1718          
1719         ilock(&vmx.cmdlock);
1720         if(vmx.state == VMXENDING){
1721                 iunlock(&vmx.cmdlock);
1722                 goto ending;
1723         }
1724         *vmx.lastcmd = &cmd;
1725         vmx.lastcmd = &cmd.next;
1726         iunlock(&vmx.cmdlock);
1727         
1728         while(waserror())
1729                 cmd.scratched = 1;
1730         wakeup(&vmx.cmdwait);
1731         do
1732                 sleep(&cmd, iscmddone, &cmd);
1733         while(!iscmddone(&cmd));
1734         poperror();
1735         lock(&cmd);
1736         unlock(&cmd);
1737         if((cmd.flags & CMDFFAIL) != 0)
1738                 error(up->errstr);
1739         return cmd.retval;
1740 }
1741
1742 static Chan *
1743 vmxattach(char *spec)
1744 {
1745         if(vmx.state == NOVMX) error(Enodev);
1746         return devattach('X', spec);
1747 }
1748
1749 static Walkqid*
1750 vmxwalk(Chan *c, Chan *nc, char **name, int nname)
1751 {
1752         return devwalk(c, nc, name, nname, vmxdir, nelem(vmxdir), devgen);
1753 }
1754
1755 static int
1756 vmxstat(Chan *c, uchar *dp, int n)
1757 {
1758         return devstat(c, dp, n, vmxdir, nelem(vmxdir), devgen);
1759 }
1760
1761 static Chan*
1762 vmxopen(Chan* c, int omode)
1763 {
1764         Chan *ch;
1765
1766         if(c->qid.path != Qdir && !iseve()) error(Eperm);
1767         ch = devopen(c, omode, vmxdir, nelem(vmxdir), devgen);
1768         if(ch->qid.path == Qmap){
1769                 if((omode & OTRUNC) != 0)
1770                         vmxcmd(cmdclearmeminfo);
1771         }
1772         return ch;
1773 }
1774
1775 static void
1776 vmxclose(Chan*)
1777 {
1778 }
1779
1780 static long
1781 vmxread(Chan* c, void* a, long n, vlong off)
1782 {
1783         static char regbuf[4096];
1784         static char membuf[4096];
1785         int rc;
1786
1787         switch((ulong)c->qid.path){
1788         case Qdir:
1789                 return devdirread(c, a, n, vmxdir, nelem(vmxdir), devgen);
1790         case Qregs:
1791                 if(off == 0)
1792                         vmxcmd(cmdgetregs, regbuf, regbuf + sizeof(regbuf));
1793                 return readstr(off, a, n, regbuf);
1794         case Qmap:
1795                 if(off == 0)
1796                         vmxcmd(cmdgetmeminfo, membuf, membuf + sizeof(membuf));
1797                 return readstr(off, a, n, membuf);
1798         case Qstatus:
1799                 {
1800                         char buf[ERRMAX+128];
1801                         char errbuf[ERRMAX];
1802                         int status;
1803                         
1804                         status = vmx.state;
1805                         if(status == VMXDEAD){
1806                                 vmxcmd(cmdstatus, errbuf);
1807                                 snprint(buf, sizeof(buf), "%s %#q\n", statenames[status], errbuf);
1808                         }else if(status >= 0 && status < nelem(statenames))
1809                                 snprint(buf, sizeof(buf), "%s\n", statenames[status]);
1810                         else
1811                                 snprint(buf, sizeof(buf), "%d\n", status);
1812                         return readstr(off, a, n, buf);
1813                 }
1814         case Qwait:
1815                 {
1816                         char buf[512];
1817                         
1818                         rc = vmxcmd(cmdwait, buf, buf + sizeof(buf));
1819                         if(rc > n) rc = n;
1820                         if(rc > 0) memmove(a, buf, rc);
1821                         return rc;
1822                 }
1823         case Qfpregs:
1824                 {
1825                         char buf[sizeof(FPsave)];
1826                         
1827                         vmxcmd(cmdgetfpregs, buf);
1828                         if(n < 0 || off < 0 || off >= sizeof(buf)) n = 0;
1829                         else if(off + n > sizeof(buf)) n = sizeof(buf) - off;
1830                         if(n != 0) memmove(a, buf + off, n);
1831                         return n;
1832                 }
1833         default:
1834                 error(Egreg);
1835                 break;
1836         }
1837         return 0;
1838 }
1839
1840 static long
1841 vmxwrite(Chan* c, void* a, long n, vlong off)
1842 {
1843         static QLock initlock;
1844         Cmdbuf *cb;
1845         Cmdtab *ct;
1846         char *s;
1847         int rc;
1848         int i;
1849         VmMem tmpmem;
1850
1851         switch((ulong)c->qid.path){
1852         case Qdir:
1853                 error(Eperm);
1854         case Qctl:
1855                 cb = parsecmd(a, n);
1856                 if(waserror()){
1857                         free(cb);
1858                         nexterror();
1859                 }
1860                 ct = lookupcmd(cb, vmxctlmsg, nelem(vmxctlmsg));
1861                 switch(ct->index){
1862                 case CMinit:
1863                         qlock(&initlock);
1864                         if(waserror()){
1865                                 qunlock(&initlock);
1866                                 nexterror();
1867                         }
1868                         if(vmx.state != VMXINACTIVE)
1869                                 error("vmx already active");
1870                         vmx.state = VMXINIT;
1871                         kproc("kvmx", vmxproc, nil);
1872                         poperror();
1873                         qunlock(&initlock);
1874                         if(vmxcmd(cmdstatus, up->errstr) == VMXDEAD)
1875                                 error(up->errstr);
1876                         break;
1877                 case CMquit:
1878                         vmxcmd(cmdquit);
1879                         break;
1880                 case CMgo:
1881                         s = nil;
1882                         if(cb->nf == 2) kstrdup(&s, cb->f[1]);
1883                         else if(cb->nf != 1) error(Ebadarg);
1884                         if(waserror()){
1885                                 free(s);
1886                                 nexterror();
1887                         }
1888                         vmxcmd(cmdgo, s);
1889                         poperror();
1890                         free(s);
1891                         break;
1892                 case CMstop:
1893                         vmxcmd(cmdstop);
1894                         break;
1895                 case CMstep:
1896                         rc = 0;
1897                         for(i = 1; i < cb->nf; i++)
1898                                 if(strcmp(cb->f[i], "-map") == 0){
1899                                         rc = 1;
1900                                         if(i+4 > cb->nf) error("missing argument");
1901                                         memset(&tmpmem, 0, sizeof(tmpmem));
1902                                         tmpmem.lo = strtoull(cb->f[i+1], &s, 0);
1903                                         if(*s != 0 || !vmokpage(tmpmem.lo)) error("invalid address");
1904                                         tmpmem.hi = tmpmem.lo + BY2PG;
1905                                         tmpmem.attr = 0x407;
1906                                         tmpmem.seg = _globalsegattach(cb->f[i+2]);
1907                                         if(tmpmem.seg == nil) error("unknown segment");
1908                                         tmpmem.off = strtoull(cb->f[i+3], &s, 0);
1909                                         if(*s != 0 || !vmokpage(tmpmem.off)) error("invalid offset");
1910                                         i += 3;
1911                                 }else
1912                                         error(Ebadctl);
1913                         vmxcmd(cmdstep, rc ? &tmpmem : nil);
1914                         break;
1915                 case CMexc:
1916                         s = nil;
1917                         kstrdup(&s, cb->f[1]);
1918                         if(waserror()){
1919                                 free(s);
1920                                 nexterror();
1921                         }
1922                         vmxcmd(cmdexcept, s);
1923                         poperror();
1924                         free(s);
1925                         break;
1926                 case CMirq:
1927                         s = nil;
1928                         if(cb->nf == 2)
1929                                 kstrdup(&s, cb->f[1]);
1930                         if(waserror()){
1931                                 free(s);
1932                                 nexterror();
1933                         }
1934                         vmxcmd(cmdirq, s);
1935                         poperror();
1936                         free(s);
1937                         break;
1938                 case CMextrap:
1939                         s = nil;
1940                         kstrdup(&s, cb->f[1]);
1941                         if(waserror()){
1942                                 free(s);
1943                                 nexterror();
1944                         }
1945                         vmxcmd(cmdextrap, s);
1946                         poperror();
1947                         free(s);
1948                         break;
1949
1950                 default:
1951                         error(Egreg);
1952                 }
1953                 poperror();
1954                 free(cb);
1955                 break;
1956         case Qmap:
1957         case Qregs:
1958                 s = malloc(n+1);
1959                 if(s == nil) error(Enomem);
1960                 if(waserror()){
1961                         free(s);
1962                         nexterror();
1963                 }
1964                 memmove(s, a, n);
1965                 s[n] = 0;
1966                 rc = vmxcmd((ulong)c->qid.path == Qregs ? cmdsetregs : cmdsetmeminfo, s);
1967                 poperror();
1968                 free(s);
1969                 return rc;
1970         case Qfpregs:
1971                 {
1972                         char buf[sizeof(FPsave)];
1973                         
1974                         if(n > sizeof(FPsave)) n = sizeof(FPsave);
1975                         memmove(buf, a, n);
1976                         return vmxcmd(cmdsetfpregs, buf, n, off);
1977                 }
1978         default:
1979                 error(Egreg);
1980                 break;
1981         }
1982         return n;
1983 }
1984
1985 Dev vmxdevtab = {
1986         'X',
1987         "vmx",
1988         
1989         vmxreset,
1990         devinit,
1991         vmxshutdown,
1992         vmxattach,
1993         vmxwalk,
1994         vmxstat,
1995         vmxopen,
1996         devcreate,
1997         vmxclose,
1998         vmxread,
1999         devbread,
2000         vmxwrite,
2001         devbwrite,
2002         devremove,
2003         devwstat,
2004 };