]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/devvmx.c
devvmx: error handling in clearmeminfo
[plan9front.git] / sys / src / 9 / pc / devvmx.c
1 #include "u.h"
2 #include "../port/lib.h"
3 #include "mem.h"
4 #include "dat.h"
5 #include "fns.h"
6 #include "../port/error.h"
7 #include "ureg.h"
8
9 extern int vmxon(u64int);
10 extern int vmxoff(void);
11 extern int vmclear(u64int);
12 extern int vmptrld(u64int);
13 extern int vmlaunch(Ureg *, int);
14 extern int vmread(u32int, uintptr *);
15 extern int vmwrite(u32int, uintptr);
16 extern int invept(u32int, uvlong, uvlong);
17 extern int invvpid(u32int, uvlong, uvlong);
18
19 static vlong procb_ctls, pinb_ctls;
20
21 enum {
22         VMX_BASIC_MSR = 0x480,
23         VMX_PINB_CTLS_MSR = 0x481,
24         VMX_PROCB_CTLS_MSR = 0x482,
25         VMX_VMEXIT_CTLS_MSR = 0x483,
26         VMX_VMENTRY_CTLS_MSR = 0x484,
27         VMX_MISC_MSR = 0x485,
28         VMX_CR0_FIXED0 = 0x486,
29         VMX_CR0_FIXED1 = 0x487,
30         VMX_CR4_FIXED0 = 0x488,
31         VMX_CR4_FIXED1 = 0x489,
32         VMX_VMCS_ENUM = 0x48A,
33         VMX_PROCB_CTLS2_MSR = 0x48B,
34         VMX_TRUE_PINB_CTLS_MSR = 0x48D,
35         VMX_TRUE_PROCB_CTLS_MSR = 0x48E,
36         VMX_TRUE_EXIT_CTLS_MSR = 0x48F,
37         VMX_TRUE_ENTRY_CTLS_MSR = 0x490,
38         VMX_VMFUNC_MSR = 0x491,
39         
40         PINB_CTLS = 0x4000,
41         PINB_EXITIRQ = 1<<0,
42         PINB_EXITNMI = 1<<3,
43         
44         PROCB_CTLS = 0x4002,
45         PROCB_IRQWIN = 1<<2,
46         PROCB_EXITHLT = 1<<7,
47         PROCB_EXITINVLPG = 1<<9,
48         PROCB_EXITMWAIT = 1<<10,
49         PROCB_EXITRDPMC = 1<<11,
50         PROCB_EXITRDTSC = 1<<12,
51         PROCB_EXITCR3LD = 1<<15,
52         PROCB_EXITCR3ST = 1<<16,
53         PROCB_EXITCR8LD = 1<<19,
54         PROCB_EXITCR8ST = 1<<20,
55         PROCB_EXITMOVDR = 1<<23,
56         PROCB_EXITIO = 1<<24,
57         PROCB_MONTRAP = 1<<27,
58         PROCB_MSRBITMAP = 1<<28,
59         PROCB_EXITMONITOR = 1<<29,
60         PROCB_EXITPAUSE = 1<<30,
61         PROCB_USECTLS2 = 1<<31,
62         
63         PROCB_CTLS2 = 0x401E,
64         PROCB_EPT = 1<<1,
65         PROCB_EXITGDT = 1<<2,
66         PROCB_VPID = 1<<5,
67         PROCB_UNRESTR = 1<<7,
68
69         EXC_BITMAP = 0x4004,
70         PFAULT_MASK = 0x4006,
71         PFAULT_MATCH = 0x4008,
72         CR3_TARGCNT = 0x400a,
73         MSR_BITMAP = 0x2004,
74         
75         VMEXIT_CTLS = 0x400c,
76         VMEXIT_ST_DEBUG = 1<<2,
77         VMEXIT_HOST64 = 1<<9,
78         VMEXIT_LD_IA32_PERF_GLOBAL_CTRL = 1<<12,
79         VMEXIT_ST_IA32_PAT = 1<<18,
80         VMEXIT_LD_IA32_PAT = 1<<19,
81         VMEXIT_ST_IA32_EFER = 1<<20,
82         VMEXIT_LD_IA32_EFER = 1<<21,    
83         
84         VMEXIT_MSRSTCNT = 0x400e,
85         VMEXIT_MSRLDCNT = 0x4010,
86         VMEXIT_MSRSTADDR = 0x2006,
87         VMEXIT_MSRLDADDR = 0x2008,
88         VMENTRY_MSRLDADDR = 0x200A,
89         
90         VMENTRY_CTLS = 0x4012,
91         VMENTRY_LD_DEBUG = 1<<2,
92         VMENTRY_GUEST64 = 1<<9,
93         VMENTRY_LD_IA32_PERF_GLOBAL_CTRL = 1<<13,
94         VMENTRY_LD_IA32_PAT = 1<<14,
95         VMENTRY_LD_IA32_EFER = 1<<15,
96         
97         VMENTRY_MSRLDCNT = 0x4014,
98         VMENTRY_INTRINFO = 0x4016,
99         VMENTRY_INTRCODE = 0x4018,
100         VMENTRY_INTRILEN = 0x401a,
101         
102         VMCS_LINK = 0x2800,
103         
104         GUEST_ES = 0x800,
105         GUEST_CS = 0x802,
106         GUEST_SS = 0x804,
107         GUEST_DS = 0x806,
108         GUEST_FS = 0x808,
109         GUEST_GS = 0x80A,
110         GUEST_LDTR = 0x80C,
111         GUEST_TR = 0x80E,
112         GUEST_CR0 = 0x6800,
113         GUEST_CR3 = 0x6802,
114         GUEST_CR4 = 0x6804,
115         GUEST_ESLIMIT = 0x4800,
116         GUEST_CSLIMIT = 0x4802,
117         GUEST_SSLIMIT = 0x4804,
118         GUEST_DSLIMIT = 0x4806,
119         GUEST_FSLIMIT = 0x4808,
120         GUEST_GSLIMIT = 0x480A,
121         GUEST_LDTRLIMIT = 0x480C,
122         GUEST_TRLIMIT = 0x480E,
123         GUEST_GDTRLIMIT = 0x4810,
124         GUEST_IDTRLIMIT = 0x4812,
125         GUEST_ESPERM = 0x4814,
126         GUEST_CSPERM = 0x4816,
127         GUEST_SSPERM = 0x4818,
128         GUEST_DSPERM = 0x481A,
129         GUEST_FSPERM = 0x481C,
130         GUEST_GSPERM = 0x481E,
131         GUEST_LDTRPERM = 0x4820,
132         GUEST_TRPERM = 0x4822,
133         GUEST_CR0MASK = 0x6000,
134         GUEST_CR4MASK = 0x6002,
135         GUEST_CR0SHADOW = 0x6004,
136         GUEST_CR4SHADOW = 0x6006,
137         GUEST_ESBASE = 0x6806,
138         GUEST_CSBASE = 0x6808,
139         GUEST_SSBASE = 0x680A,
140         GUEST_DSBASE = 0x680C,
141         GUEST_FSBASE = 0x680E,
142         GUEST_GSBASE = 0x6810,
143         GUEST_LDTRBASE = 0x6812,
144         GUEST_TRBASE = 0x6814,
145         GUEST_GDTRBASE = 0x6816,
146         GUEST_IDTRBASE = 0x6818,
147         GUEST_DR7 = 0x681A,
148         GUEST_RSP = 0x681C,
149         GUEST_RIP = 0x681E,
150         GUEST_RFLAGS = 0x6820,
151         GUEST_IA32_DEBUGCTL = 0x2802,
152         GUEST_IA32_PAT = 0x2804,
153         GUEST_IA32_EFER = 0x2806,
154         GUEST_IA32_PERF_GLOBAL_CTRL = 0x2808,
155         
156         HOST_ES = 0xC00,
157         HOST_CS = 0xC02,
158         HOST_SS = 0xC04,
159         HOST_DS = 0xC06,
160         HOST_FS = 0xC08,
161         HOST_GS = 0xC0A,
162         HOST_TR = 0xC0C,
163         HOST_CR0 = 0x6C00,
164         HOST_CR3 = 0x6C02,
165         HOST_CR4 = 0x6C04,
166         HOST_FSBASE = 0x6C06,
167         HOST_GSBASE = 0x6C08,
168         HOST_TRBASE = 0x6C0A,
169         HOST_GDTR = 0x6C0C,
170         HOST_IDTR = 0x6C0E,
171         HOST_RSP = 0x6C14,
172         HOST_RIP = 0x6C16,
173         HOST_IA32_PAT = 0x2C00,
174         HOST_IA32_EFER = 0x2C02,
175         HOST_IA32_PERF_GLOBAL_CTRL = 0x2C04,
176         
177         GUEST_CANINTR = 0x4824,
178         
179         VM_INSTRERR = 0x4400,
180         VM_EXREASON = 0x4402,
181         VM_EXINTRINFO = 0x4404,
182         VM_EXINTRCODE = 0x4406,
183         VM_IDTVECINFO = 0x4408,
184         VM_IDTVECCODE = 0x440A,
185         VM_EXINSTRLEN = 0x440C,
186         VM_EXINSTRINFO = 0x440E,
187         VM_EXQUALIF = 0x6400,
188         VM_IORCX = 0x6402,
189         VM_IORSI = 0x6404,
190         VM_IORDI = 0x6406,
191         VM_IORIP = 0x6408,
192         VM_GUESTVA = 0x640A,
193         VM_GUESTPA = 0x2400,
194         
195         VM_VPID = 0x000,
196         VM_EPTPIDX = 0x0004,
197         
198         VM_EPTP = 0x201A,
199         VM_EPTPLA = 0x2024,
200         
201         INVLOCAL = 1,
202 };
203
204 enum {
205         CR0RSVD = 0x1ffaffc0,
206         CR4RSVD = 0xff889000,
207         CR4MCE = 1<<6,
208         CR4VMXE = 1<<13,
209         CR4SMXE = 1<<14,
210         CR4PKE = 1<<22,
211         
212         CR0KERNEL = CR0RSVD | 0x30 | (uintptr)0xFFFFFFFF00000000ULL,
213         CR4KERNEL = CR4RSVD | CR4VMXE | CR4SMXE | CR4MCE | CR4PKE | (uintptr)0xFFFFFFFF00000000ULL
214 };
215
216 enum {
217         MAXMSR = 512,
218 };
219
220 typedef struct Vmx Vmx;
221 typedef struct VmCmd VmCmd;
222 typedef struct VmMem VmMem;
223 typedef struct VmIntr VmIntr;
224
225 struct VmMem {
226         uvlong lo, hi;
227         Segment *seg;
228         uintptr off;
229         char *name;
230         VmMem *next, *prev;
231         u16int attr;
232 };
233
234 struct VmIntr {
235         u32int info, code, ilen;
236 };
237
238 struct Vmx {
239         enum {
240                 NOVMX,
241                 VMXINACTIVE,
242                 VMXINIT,
243                 VMXREADY,
244                 VMXRUNNING,
245                 VMXDEAD,
246                 VMXENDING,
247         } state;
248         char errstr[ERRMAX];
249         Ureg ureg;
250         uintptr cr2;
251         uintptr dr[8]; /* DR7 is also kept in VMCS */
252         FPsave *fp;
253         u8int launched;
254         u8int on;
255         u8int vpid;
256         enum {
257                 FLUSHVPID = 1,
258                 FLUSHEPT = 2,
259                 STEP = 4,
260                 POSTEX = 8,
261                 POSTIRQ = 16,
262         } onentry;
263         
264         Rendez cmdwait;
265         Lock cmdlock;
266         VmCmd *firstcmd, **lastcmd;
267         VmCmd *postponed;
268         uvlong *pml4;
269         VmMem mem;
270         
271         enum {
272                 GOTEXIT = 1,
273                 GOTIRQACK = 2,
274                 GOTSTEP = 4,
275                 GOTSTEPERR = 8,
276         } got;
277         VmMem *stepmap;
278         VmIntr exc, irq, irqack;
279         
280         u64int *msrhost, *msrguest;
281         u32int *msrbits;
282         int nmsr;
283 };
284
285 struct VmCmd {
286         enum {
287                 CMDFDONE = 1,
288                 CMDFFAIL = 2,
289                 CMDFPOSTP = 4,
290         } flags;
291         u8int scratched;
292         Rendez;
293         Lock;
294         int (*cmd)(VmCmd *, va_list);
295         int retval;
296         char *errstr;
297         va_list va;
298         VmCmd *next;
299 };
300
301 static char Equit[] = "vmx: ending";
302
303 static char *statenames[] = {
304         [NOVMX] "novmx",
305         [VMXINACTIVE] "inactive",
306         [VMXINIT] "init",
307         [VMXREADY] "ready",
308         [VMXRUNNING] "running",
309         [VMXDEAD] "dead",
310         [VMXENDING]"ending"
311 };
312
313 static Vmx vmx;
314
315 static u64int
316 vmcsread(u32int addr)
317 {
318         int rc;
319         u64int val;
320
321         val = 0;
322         rc = vmread(addr, (uintptr *) &val);
323         if(rc >= 0 && sizeof(uintptr) == 4 && (addr & 0x6000) == 0x2000)
324                 rc = vmread(addr | 1, (uintptr *) &val + 1);
325         if(rc < 0){
326                 char errbuf[128];
327                 snprint(errbuf, sizeof(errbuf), "vmcsread failed (%#.4ux)", addr);
328                 error(errbuf);
329         }
330         return val;
331 }
332
333 static void
334 vmcswrite(u32int addr, u64int val)
335 {
336         int rc;
337         
338         rc = vmwrite(addr, val);
339         if(rc >= 0 && sizeof(uintptr) == 4 && (addr & 0x6000) == 0x2000)
340                 rc = vmwrite(addr | 1, val >> 32);
341         if(rc < 0){
342                 char errbuf[128];
343                 snprint(errbuf, sizeof(errbuf), "vmcswrite failed (%#.4ux = %#.16ullx)", addr, val);
344                 error(errbuf);
345         }
346 }
347
348 static uvlong
349 parseval(char *s)
350 {
351         uvlong v;
352         char *p;
353
354         v = strtoull(s, &p, 0);
355         if(p == s || *p != 0) error("invalid value");
356         return v;
357 }
358
359 static char *
360 cr0fakeread(char *p, char *e)
361 {
362         uvlong guest, mask, shadow;
363         
364         guest = vmcsread(GUEST_CR0);
365         mask = vmcsread(GUEST_CR0MASK);
366         shadow = vmcsread(GUEST_CR0SHADOW);
367         return seprint(p, e, "%#.*ullx", sizeof(uintptr) * 2, guest & ~mask | shadow & mask);
368 }
369
370 static char *
371 cr4fakeread(char *p, char *e)
372 {
373         uvlong guest, mask, shadow;
374         
375         guest = vmcsread(GUEST_CR4);
376         mask = vmcsread(GUEST_CR4MASK);
377         shadow = vmcsread(GUEST_CR4SHADOW);
378         return seprint(p, e, "%#.*ullx", sizeof(uintptr) * 2, guest & ~mask | shadow & mask);
379 }
380
381 static void
382 updatelma(void)
383 {
384         uvlong cr0, efer, nefer, ectrl;
385
386         if(sizeof(uintptr) != 8) return;
387         cr0 = vmcsread(GUEST_CR0);
388         efer = vmcsread(GUEST_IA32_EFER);
389         nefer = efer & ~0x400 | efer << 2 & cr0 >> 21 & 0x400;
390         if(efer == nefer) return;
391         vmcswrite(GUEST_IA32_EFER, nefer);
392         ectrl = vmcsread(VMENTRY_CTLS);
393         ectrl = ectrl & ~0x200 | nefer >> 1 & 0x200;
394         vmcswrite(VMENTRY_CTLS, ectrl);
395 }
396
397 static int
398 cr0realwrite(char *s)
399 {
400         uvlong v;
401         
402         v = parseval(s);
403         vmcswrite(GUEST_CR0, vmcsread(GUEST_CR0) & CR0KERNEL | v & ~CR0KERNEL);
404         updatelma();
405         return 0;
406 }
407
408 static int
409 cr0maskwrite(char *s)
410 {
411         uvlong v;
412         
413         v = parseval(s);
414         vmcswrite(GUEST_CR0MASK, v | CR0KERNEL);
415         return 0;
416 }
417
418 static int
419 eferwrite(char *s)
420 {
421         uvlong v;
422         
423         v = parseval(s);
424         vmcswrite(GUEST_IA32_EFER, v);
425         updatelma();
426         return 0;
427 }
428
429 static int
430 cr4realwrite(char *s)
431 {
432         uvlong v;
433         
434         v = parseval(s);
435         vmcswrite(GUEST_CR4, vmcsread(GUEST_CR4) & CR4KERNEL | v & ~CR4KERNEL);
436         return 0;
437 }
438
439 static int
440 cr4maskwrite(char *s)
441 {
442         uvlong v;
443         
444         v = parseval(s);
445         vmcswrite(GUEST_CR4MASK, v | CR4KERNEL);
446         return 0;
447 }
448
449 static int
450 dr7write(char *s)
451 {
452         uvlong v;
453         
454         v = (u32int) parseval(s);
455         vmcswrite(GUEST_DR7, vmx.dr[7] = (u32int) v);
456         return 0;
457 }
458
459 static int
460 readonly(char *)
461 {
462         return -1;
463 }
464
465 static int
466 dr6write(char *s)
467 {
468         uvlong v;
469         
470         v = parseval(s);
471         vmx.dr[6] = (u32int) v;
472         return 0;
473 }
474
475 typedef struct GuestReg GuestReg;
476 struct GuestReg {
477         int offset;
478         u8int size; /* in bytes; 0 means == uintptr */
479         char *name;
480         char *(*read)(char *, char *);
481         int (*write)(char *);
482 };
483 #define VMXVAR(x) ~(ulong)&(((Vmx*)0)->x)
484 #define UREG(x) VMXVAR(ureg.x)
485 static GuestReg guestregs[] = {
486         {GUEST_RIP, 0, "pc"},
487         {GUEST_RSP, 0, "sp"},
488         {GUEST_RFLAGS, 0, "flags"},
489         {UREG(ax), 0, "ax"},
490         {UREG(bx), 0, "bx"},
491         {UREG(cx), 0, "cx"},
492         {UREG(dx), 0, "dx"},
493         {UREG(bp), 0, "bp"},
494         {UREG(si), 0, "si"},
495         {UREG(di), 0, "di"},
496 #ifdef RMACH
497         {UREG(r8), 0, "r8"},
498         {UREG(r9), 0, "r9"},
499         {UREG(r10), 0, "r10"},
500         {UREG(r11), 0, "r11"},
501         {UREG(r12), 0, "r12"},
502         {UREG(r13), 0, "r13"},
503         {UREG(r14), 0, "r14"},
504         {UREG(r15), 0, "r15"},
505 #endif
506         {GUEST_GDTRBASE, 0, "gdtrbase"},
507         {GUEST_GDTRLIMIT, 4, "gdtrlimit"},
508         {GUEST_IDTRBASE, 0, "idtrbase"},
509         {GUEST_IDTRLIMIT, 4, "idtrlimit"},
510         {GUEST_CS, 2, "cs"},
511         {GUEST_CSBASE, 0, "csbase"},
512         {GUEST_CSLIMIT, 4, "cslimit"},
513         {GUEST_CSPERM, 4, "csperm"},
514         {GUEST_DS, 2, "ds"},
515         {GUEST_DSBASE, 0, "dsbase"},
516         {GUEST_DSLIMIT, 4, "dslimit"},
517         {GUEST_DSPERM, 4, "dsperm"},
518         {GUEST_ES, 2, "es"},
519         {GUEST_ESBASE, 0, "esbase"},
520         {GUEST_ESLIMIT, 4, "eslimit"},
521         {GUEST_ESPERM, 4, "esperm"},
522         {GUEST_FS, 2, "fs"},
523         {GUEST_FSBASE, 0, "fsbase"},
524         {GUEST_FSLIMIT, 4, "fslimit"},
525         {GUEST_FSPERM, 4, "fsperm"},
526         {GUEST_GS, 2, "gs"},
527         {GUEST_GSBASE, 0, "gsbase"},
528         {GUEST_GSLIMIT, 4, "gslimit"},
529         {GUEST_GSPERM, 4, "gsperm"},
530         {GUEST_SS, 2, "ss"},
531         {GUEST_SSBASE, 0, "ssbase"},
532         {GUEST_SSLIMIT, 4, "sslimit"},
533         {GUEST_SSPERM, 4, "ssperm"},
534         {GUEST_TR, 2, "tr"},
535         {GUEST_TRBASE, 0, "trbase"},
536         {GUEST_TRLIMIT, 4, "trlimit"},
537         {GUEST_TRPERM, 4, "trperm"},
538         {GUEST_LDTR, 2, "ldtr"},
539         {GUEST_LDTRBASE, 0, "ldtrbase"},
540         {GUEST_LDTRLIMIT, 4, "ldtrlimit"},
541         {GUEST_LDTRPERM, 4, "ldtrperm"},
542         {GUEST_CR0, 0, "cr0real", nil, cr0realwrite},
543         {GUEST_CR0SHADOW, 0, "cr0fake", cr0fakeread},
544         {GUEST_CR0MASK, 0, "cr0mask", nil, cr0maskwrite},
545         {VMXVAR(cr2), 0, "cr2"},
546         {GUEST_CR3, 0, "cr3"},
547         {GUEST_CR4, 0, "cr4real", nil, cr4realwrite},
548         {GUEST_CR4SHADOW, 0, "cr4fake", cr4fakeread},
549         {GUEST_CR4MASK, 0, "cr4mask", nil, cr4maskwrite},
550         {GUEST_IA32_PAT, 8, "pat"},
551         {GUEST_IA32_EFER, 8, "efer", nil, eferwrite},
552         {VMXVAR(dr[0]), 0, "dr0"},
553         {VMXVAR(dr[1]), 0, "dr1"},
554         {VMXVAR(dr[2]), 0, "dr2"},
555         {VMXVAR(dr[3]), 0, "dr3"},
556         {VMXVAR(dr[6]), 0, "dr6", nil, dr6write},
557         {GUEST_DR7, 0, "dr7", nil, dr7write},
558         {VM_INSTRERR, 4, "instructionerror", nil, readonly},
559         {VM_EXREASON, 4, "exitreason", nil, readonly},
560         {VM_EXQUALIF, 0, "exitqualification", nil, readonly},
561         {VM_EXINTRINFO, 4, "exitinterruptinfo", nil, readonly},
562         {VM_EXINTRCODE, 4, "exitinterruptcode", nil, readonly},
563         {VM_EXINSTRLEN, 4, "exitinstructionlen", nil, readonly},
564         {VM_EXINSTRINFO, 4, "exitinstructioninfo", nil, readonly},
565         {VM_GUESTVA, 0, "exitva", nil, readonly},
566         {VM_GUESTPA, 0, "exitpa", nil, readonly},
567         {VM_IDTVECINFO, 4, "idtinterruptinfo", nil, readonly},
568         {VM_IDTVECCODE, 4, "idtinterruptcode", nil, readonly},
569 };
570
571 static int
572 vmokpage(u64int addr)
573 {
574         return (addr & 0xfff) == 0 && addr >> 48 == 0;
575 }
576
577 static uvlong *
578 eptwalk(uvlong addr)
579 {
580         uvlong *tab, *nt;
581         uvlong v;
582         int i;
583         
584         tab = vmx.pml4;
585         if(tab == nil) error(Egreg);
586         for(i = 3; i >= 1; i--){
587                 tab += addr >> 12 + 9 * i & 0x1ff;
588                 v = *tab;
589                 if((v & 3) == 0){
590                         nt = mallocalign(BY2PG, BY2PG, 0, 0);
591                         if(nt == nil) error(Enomem);
592                         memset(nt, 0, BY2PG);
593                         v = PADDR(nt) | 0x407;
594                         *tab = v;
595                 }
596                 tab = KADDR(v & ~0xfff);
597         }
598         return tab + (addr >> 12 & 0x1ff);
599 }
600
601 static void
602 eptfree(uvlong *tab, int level)
603 {
604         int i;
605         uvlong v, *t;
606         
607         if(tab == nil) error(Egreg);
608         if(level < 3){
609                 for(i = 0; i < 512; i++){
610                         v = tab[i];
611                         if((v & 3) == 0) continue;
612                         t = KADDR(v & ~0xfff);
613                         eptfree(t, level + 1);
614                         tab[i] = 0;
615                 }
616         }
617         if(level > 0)
618                 free(tab);              
619 }
620
621 static void
622 epttranslate(VmMem *mp)
623 {
624         uvlong p, hpa;
625
626         if(mp->seg != nil && (mp->seg->type & SG_TYPE) != SG_FIXED || (mp->lo & 0xfff) != 0 || (mp->hi & 0xfff) != 0 || (uint)mp->attr >= 0x1000)
627                 error(Egreg);
628         if(mp->seg != nil){
629                 if(mp->seg->base + mp->off + (mp->hi - mp->lo) > mp->seg->top)
630                         error(Egreg);
631                 hpa = mp->seg->map[0]->pages[0]->pa + mp->off;
632         }else
633                 hpa = 0;
634         for(p = mp->lo; p < mp->hi; p += BY2PG)
635                 *eptwalk(p) = hpa + (p - mp->lo) + mp->attr;
636         vmx.onentry |= FLUSHEPT;
637 }
638
639 static char *mtype[] = {"uc", "wc", "02", "03", "wt", "wp", "wb", "07"};
640
641 static int
642 cmdgetmeminfo(VmCmd *, va_list va)
643 {
644         VmMem *mp;
645         char *p0, *e, *p;
646         char attr[4];
647         char mt[4];
648         
649         p0 = va_arg(va, char *);
650         e = va_arg(va, char *);
651         p = p0;
652         for(mp = vmx.mem.next; mp != &vmx.mem; mp = mp->next){
653                 attr[0] = (mp->attr & 1) != 0 ? 'r' : '-';
654                 attr[1] = (mp->attr & 2) != 0 ? 'w' : '-';
655                 attr[2] = (mp->attr & 4) != 0 ? 'x' : '-';
656                 attr[3] = 0;
657                 *(ushort*)mt = *(u16int*)mtype[mp->attr >> 3 & 7];
658                 mt[2] = (mp->attr & 0x40) != 0 ? '!' : 0;
659                 mt[3] = 0;
660                 if(mp->name == nil)
661                         p = seprint(p, e, "%s %s %#llux %#llux\n", attr, mt, mp->lo, mp->hi);
662                 else
663                         p = seprint(p, e, "%s %s %#llux %#llux %s %#llux\n", attr, mt, mp->lo, mp->hi, mp->name, (uvlong)mp->off);
664         }
665         return p - p0;
666 }
667
668 static int
669 cmdclearmeminfo(VmCmd *, va_list)
670 {
671         VmMem *mp, *mn;
672
673         eptfree(vmx.pml4, 0);
674         for(mp = vmx.mem.next; mp != &vmx.mem; mp = mn){
675                 free(mp->name);
676                 putseg(mp->seg);
677                 mn = mp->next;
678                 free(mp);
679         }
680         vmx.mem.prev = &vmx.mem;
681         vmx.mem.next = &vmx.mem;
682         vmx.onentry |= FLUSHEPT;
683         return 0;
684 }
685
686 extern Segment* (*_globalsegattach)(char*);
687
688 static int
689 cmdsetmeminfo(VmCmd *, va_list va)
690 {
691         char *p0, *p, *q, *r;
692         int j;
693         char *f[10];
694         VmMem *mp;
695         int rc;
696
697         if(vmx.pml4 == nil)
698                 error(Egreg);   
699         p0 = va_arg(va, char *);
700         p = p0;
701         mp = nil;
702         for(;;){
703                 q = strchr(p, '\n');
704                 if(q == 0) break;
705                 *q = 0;
706                 if(mp == nil){
707                         mp = malloc(sizeof(VmMem));
708                         if(mp == nil)
709                                 error(Enomem);
710                 }
711                 memset(mp, 0, sizeof(VmMem));
712                 if(waserror()){
713                         putseg(mp->seg);
714                         free(mp->name);
715                         free(mp);
716                         nexterror();
717                 }
718                 rc = tokenize(p, f, nelem(f));
719                 p = q + 1;
720                 if(rc == 0) goto next;
721                 if(rc != 4 && rc != 6) error("number of fields wrong");
722                 for(q = f[0]; *q != 0; q++)
723                         switch(*q){
724                         case 'r': if((mp->attr & 1) != 0) goto tinval; mp->attr |= 1; break;
725                         case 'w': if((mp->attr & 2) != 0) goto tinval; mp->attr |= 2; break;
726                         case 'x': if((mp->attr & 4) != 0) goto tinval; mp->attr |= 0x404; break;
727                         case '-': break;
728                         default: tinval: error("invalid access field");
729                         }
730                 for(j = 0; j < 8; j++)
731                         if(strncmp(mtype[j], f[1], 2) == 0){
732                                 mp->attr |= j << 3;
733                                 break;
734                         }
735                 if(j == 8 || strlen(f[1]) > 3) error("invalid memory type");
736                 if(f[1][2] == '!') mp->attr |= 0x40;
737                 else if(f[1][2] != 0) error("invalid memory type");
738                 mp->lo = strtoull(f[2], &r, 0);
739                 if(*r != 0 || !vmokpage(mp->lo)) error("invalid low guest physical address");
740                 mp->hi = strtoull(f[3], &r, 0);
741                 if(*r != 0 || !vmokpage(mp->hi) || mp->hi <= mp->lo) error("invalid high guest physical address");
742                 mp->off = strtoull(f[5], &r, 0);
743                 if(*r != 0 || !vmokpage(mp->off)) error("invalid offset");
744                 if((mp->attr & 7) != 0){
745                         if(rc != 6) error("number of fields wrong");
746                         mp->seg = _globalsegattach(f[4]);
747                         if(mp->seg == nil) error("no such segment");
748                         if(mp->seg->base + mp->off + (mp->hi - mp->lo) > mp->seg->top) error("out of bounds");
749                         kstrdup(&mp->name, f[4]);
750                 }
751                 epttranslate(mp);
752                 mp->prev = vmx.mem.prev;
753                 mp->next = &vmx.mem;
754                 mp->prev->next = mp;
755                 mp->next->prev = mp;
756                 mp = nil;
757         next:
758                 poperror();
759         }
760         free(mp);
761         return p - p0;
762 }
763
764 static void
765 vmxreset(void)
766 {
767         ulong regs[4];
768         vlong msr;
769
770         cpuid(1, regs);
771         if((regs[2] & 1<<5) == 0) return;
772         /* check if disabled by BIOS */
773         if(rdmsr(0x3a, &msr) < 0) return;
774         if((msr & 5) != 5){
775                 if((msr & 1) == 0){ /* msr still unlocked */
776                         wrmsr(0x3a, msr | 5);
777                         if(rdmsr(0x3a, &msr) < 0)
778                                 return;
779                 }
780                 if((msr & 5) != 5)
781                         return;
782         }
783         if(rdmsr(VMX_PROCB_CTLS_MSR, &msr) < 0) return;
784         if((vlong)msr >= 0) return;
785         if(rdmsr(VMX_PROCB_CTLS2_MSR, &msr) < 0) return;
786         if((msr >> 32 & PROCB_EPT) == 0 || (msr >> 32 & PROCB_VPID) == 0) return;
787         vmx.state = VMXINACTIVE;
788         vmx.lastcmd = &vmx.firstcmd;
789         vmx.mem.next = &vmx.mem;
790         vmx.mem.prev = &vmx.mem;
791 }
792
793 static void
794 vmxshutdown(void)
795 {
796         if(vmx.on){
797                 vmxoff();
798                 vmx.on = 0;
799         }
800 }
801
802 static void
803 vmxaddmsr(u32int msr, u64int gval)
804 {
805         int i;
806
807         if(vmx.nmsr >= MAXMSR)
808                 error("too many MSRs");
809         i = 2 * vmx.nmsr++;
810         vmx.msrhost[i] = msr;
811         rdmsr(msr, (vlong *) &vmx.msrhost[i+1]);
812         vmx.msrguest[i] = msr;
813         vmx.msrguest[i+1] = gval;
814         vmcswrite(VMENTRY_MSRLDCNT, vmx.nmsr);
815         vmcswrite(VMEXIT_MSRSTCNT, vmx.nmsr);
816         vmcswrite(VMEXIT_MSRLDCNT, vmx.nmsr);
817 }
818
819 static void
820 vmxtrapmsr(u32int msr, enum { TRAPRD = 1, TRAPWR = 2 } state)
821 {
822         u32int m;
823         
824         if(msr >= 0x2000 && (u32int)(msr - 0xc0000000) >= 0x2000)
825                 return;
826         msr = msr & 0x1fff | msr >> 18 & 0x2000;
827         m = 1<<(msr & 31);
828         if((state & TRAPRD) != 0)
829                 vmx.msrbits[msr / 32] |= m;
830         else
831                 vmx.msrbits[msr / 32] &= ~m;
832         if((state & TRAPWR) != 0)
833                 vmx.msrbits[msr / 32 + 512] |= m;
834         else
835                 vmx.msrbits[msr / 32 + 512] &= ~m;
836 }
837
838 static void
839 vmcsinit(void)
840 {
841         vlong msr;
842         u32int x;
843         
844         memset(&vmx.ureg, 0, sizeof(vmx.ureg));
845         vmx.launched = 0;
846         vmx.onentry = 0;        
847         
848         if(rdmsr(VMX_BASIC_MSR, &msr) < 0) error("rdmsr(VMX_BASIC_MSR) failed");
849         if((msr & 1ULL<<55) != 0){
850                 if(rdmsr(VMX_TRUE_PROCB_CTLS_MSR, &procb_ctls) < 0) error("rdmsr(VMX_TRUE_PROCB_CTLS_MSR) failed");
851                 if(rdmsr(VMX_TRUE_PINB_CTLS_MSR, &pinb_ctls) < 0) error("rdmsr(VMX_TRUE_PINB_CTLS_MSR) failed");
852         }else{
853                 if(rdmsr(VMX_PROCB_CTLS_MSR, &procb_ctls) < 0) error("rdmsr(VMX_PROCB_CTLS_MSR) failed");
854                 if(rdmsr(VMX_PINB_CTLS_MSR, &pinb_ctls) < 0) error("rdmsr(VMX_PINB_CTLS_MSR) failed");
855         }
856
857         if(rdmsr(VMX_PINB_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_PINB_CTLS_MSR failed");
858         x = (u32int)pinb_ctls | 1<<1 | 1<<2 | 1<<4; /* currently reserved default1 bits */
859         x |= PINB_EXITIRQ | PINB_EXITNMI;
860         x &= pinb_ctls >> 32;
861         vmcswrite(PINB_CTLS, x);
862         
863         if(rdmsr(VMX_PROCB_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_PROCB_CTLS_MSR failed");
864         x = (u32int)procb_ctls | 1<<1 | 7<<4 | 1<<8 | 1<<13 | 1<<14 | 1<<26; /* currently reserved default1 bits */
865         x |= PROCB_EXITHLT | PROCB_EXITMWAIT;
866         x |= PROCB_EXITMOVDR | PROCB_EXITIO | PROCB_EXITMONITOR | PROCB_MSRBITMAP;
867         x |= PROCB_USECTLS2;
868         x &= msr >> 32;
869         vmcswrite(PROCB_CTLS, x);
870         
871         if(rdmsr(VMX_PROCB_CTLS2_MSR, &msr) < 0) error("rdmsr(VMX_PROCB_CTLS2_MSR failed");
872         x = PROCB_EPT | PROCB_VPID | PROCB_UNRESTR;
873         x &= msr >> 32;
874         vmcswrite(PROCB_CTLS2, x);
875         
876         if(rdmsr(VMX_VMEXIT_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_VMEXIT_CTLS_MSR failed");
877         x = (u32int)msr;
878         if(sizeof(uintptr) == 8) x |= VMEXIT_HOST64;
879         x |= VMEXIT_LD_IA32_PAT | VMEXIT_LD_IA32_EFER | VMEXIT_ST_DEBUG | VMEXIT_ST_IA32_EFER;
880         x &= msr >> 32;
881         vmcswrite(VMEXIT_CTLS, x);
882         
883         if(rdmsr(VMX_VMENTRY_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_VMENTRY_CTLS_MSR failed");
884         x = (u32int)msr;
885         x |= VMENTRY_LD_IA32_PAT | VMENTRY_LD_IA32_EFER | VMENTRY_LD_DEBUG;
886         x &= msr >> 32;
887         vmcswrite(VMENTRY_CTLS, x);
888         
889         vmcswrite(CR3_TARGCNT, 0);
890         vmcswrite(VMENTRY_INTRINFO, 0);
891         vmcswrite(VMCS_LINK, -1);
892         
893         vmcswrite(HOST_CS, KESEL);
894         vmcswrite(HOST_DS, KDSEL);
895         vmcswrite(HOST_ES, KDSEL);
896         vmcswrite(HOST_FS, KDSEL);
897         vmcswrite(HOST_GS, KDSEL);
898         vmcswrite(HOST_SS, KDSEL);
899         vmcswrite(HOST_TR, TSSSEL);
900         vmcswrite(HOST_CR0, getcr0() & ~0xe);
901         vmcswrite(HOST_CR3, getcr3());
902         vmcswrite(HOST_CR4, getcr4());
903         rdmsr(FSbase, &msr);
904         vmcswrite(HOST_FSBASE, msr);
905         rdmsr(GSbase, &msr);
906         vmcswrite(HOST_GSBASE, msr);
907         vmcswrite(HOST_TRBASE, (uintptr) m->tss);
908         vmcswrite(HOST_GDTR, (uintptr) m->gdt);
909         vmcswrite(HOST_IDTR, IDTADDR);
910         if(rdmsr(0x277, &msr) < 0) error("rdmsr(IA32_PAT) failed");
911         vmcswrite(HOST_IA32_PAT, msr);
912         if(rdmsr(Efer, &msr) < 0) error("rdmsr(IA32_EFER) failed");
913         vmcswrite(HOST_IA32_EFER, msr);
914         
915         vmcswrite(EXC_BITMAP, 1<<18|1<<1);
916         vmcswrite(PFAULT_MASK, 0);
917         vmcswrite(PFAULT_MATCH, 0);
918         
919         vmcswrite(GUEST_CSBASE, 0);
920         vmcswrite(GUEST_DSBASE, 0);
921         vmcswrite(GUEST_ESBASE, 0);
922         vmcswrite(GUEST_FSBASE, 0);
923         vmcswrite(GUEST_GSBASE, 0);
924         vmcswrite(GUEST_SSBASE, 0);
925         vmcswrite(GUEST_CSLIMIT, -1);
926         vmcswrite(GUEST_DSLIMIT, -1);
927         vmcswrite(GUEST_ESLIMIT, -1);
928         vmcswrite(GUEST_FSLIMIT, -1);
929         vmcswrite(GUEST_GSLIMIT, -1);
930         vmcswrite(GUEST_SSLIMIT, -1);
931         vmcswrite(GUEST_CSPERM, (SEGG|SEGD|SEGP|SEGPL(0)|SEGEXEC|SEGR) >> 8 | 1);
932         vmcswrite(GUEST_DSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
933         vmcswrite(GUEST_ESPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
934         vmcswrite(GUEST_FSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
935         vmcswrite(GUEST_GSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
936         vmcswrite(GUEST_SSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
937         vmcswrite(GUEST_LDTRPERM, 1<<16);
938
939         vmcswrite(GUEST_CR0MASK, CR0KERNEL);
940         vmcswrite(GUEST_CR4MASK, CR4KERNEL);
941         vmcswrite(GUEST_CR0, getcr0() & CR0KERNEL | 0x31);
942         vmcswrite(GUEST_CR3, 0);
943         vmcswrite(GUEST_CR4, getcr4() & CR4KERNEL);
944         vmcswrite(GUEST_CR0SHADOW, getcr0() & CR0KERNEL | 0x31);
945         vmcswrite(GUEST_CR4SHADOW, getcr4() & ~CR4VMXE & CR4KERNEL);
946         
947         vmcswrite(GUEST_IA32_PAT, 0x0007040600070406ULL);
948         vmcswrite(GUEST_IA32_EFER, 0);
949         
950         vmcswrite(GUEST_TRBASE, 0);
951         vmcswrite(GUEST_TRLIMIT, 0xffff);
952         vmcswrite(GUEST_TRPERM, (SEGTSS|SEGPL(0)|SEGP) >> 8 | 2);
953         
954         vmx.pml4 = mallocalign(BY2PG, BY2PG, 0, 0);
955         memset(vmx.pml4, 0, BY2PG);
956         vmcswrite(VM_EPTP, PADDR(vmx.pml4) | 3<<3);
957         vmx.vpid = 1;
958         vmcswrite(VM_VPID, vmx.vpid);
959         
960         vmcswrite(GUEST_RFLAGS, 2);
961         
962         vmx.onentry = FLUSHVPID | FLUSHEPT;
963         
964         vmx.fp = mallocalign(512, 512, 0, 0);
965         if(vmx.fp == nil)
966                 error(Enomem);
967         fpinit();
968         fpsave(vmx.fp);
969         
970         vmx.msrhost = mallocalign(MAXMSR*16, 16, 0, 0);
971         vmx.msrguest = mallocalign(MAXMSR*16, 16, 0, 0);
972         vmx.msrbits = mallocalign(4096, 4096, 0, 0);
973         if(vmx.msrhost == nil || vmx.msrguest == nil || vmx.msrbits == nil)
974                 error(Enomem);
975         memset(vmx.msrbits, -1, 4096);
976         vmxtrapmsr(Efer, 0);
977         vmcswrite(VMENTRY_MSRLDADDR, PADDR(vmx.msrguest));
978         vmcswrite(VMEXIT_MSRSTADDR, PADDR(vmx.msrguest));
979         vmcswrite(VMEXIT_MSRLDADDR, PADDR(vmx.msrhost));
980         vmcswrite(MSR_BITMAP, PADDR(vmx.msrbits));
981         
982         if(sizeof(uintptr) == 8){
983                 vmxaddmsr(Star, 0);
984                 vmxaddmsr(Lstar, 0);
985                 vmxaddmsr(Cstar, 0);
986                 vmxaddmsr(Sfmask, 0);
987                 vmxaddmsr(KernelGSbase, 0);
988                 vmxtrapmsr(Star, 0);
989                 vmxtrapmsr(Lstar, 0);
990                 vmxtrapmsr(Cstar, 0);
991                 vmxtrapmsr(Sfmask, 0);
992                 vmxtrapmsr(FSbase, 0);
993                 vmxtrapmsr(GSbase, 0);
994                 vmxtrapmsr(KernelGSbase, 0);
995         }
996 }
997
998 static void
999 vmxstart(void)
1000 {
1001         static uchar *vmcs; /* also vmxon region */
1002         vlong msr, msr2;
1003         uintptr cr;
1004         vlong x;
1005
1006         putcr4(getcr4() | 0x2000); /* set VMXE */
1007         putcr0(getcr0() | 0x20); /* set NE */
1008         cr = getcr0();
1009         if(rdmsr(VMX_CR0_FIXED0, &msr) < 0) error("rdmsr(VMX_CR0_FIXED0) failed");
1010         if(rdmsr(VMX_CR0_FIXED1, &msr2) < 0) error("rdmsr(VMX_CR0_FIXED1) failed");
1011         if((cr & ~msr & ~msr2 | ~cr & msr & msr2) != 0) error("invalid CR0 value");
1012         cr = getcr4();
1013         if(rdmsr(VMX_CR4_FIXED0, &msr) < 0) error("rdmsr(VMX_CR4_FIXED0) failed");
1014         if(rdmsr(VMX_CR4_FIXED1, &msr2) < 0) error("rdmsr(VMX_CR4_FIXED1) failed");
1015         if((cr & ~msr & ~msr2 | ~cr & msr & msr2) != 0) error("invalid CR4 value");
1016
1017         if(vmcs == nil){
1018                 vmcs = mallocalign(8192, 4096, 0, 0);
1019                 if(vmcs == nil)
1020                         error(Enomem);
1021         }
1022         memset(vmcs, 0, 8192);
1023         rdmsr(VMX_BASIC_MSR, &x);
1024         *(ulong*)vmcs = x;
1025         *(ulong*)&vmcs[4096] = x;
1026         if(vmxon(PADDR(vmcs + 4096)) < 0)
1027                 error("vmxon failed");
1028         vmx.on = 1;
1029         if(vmclear(PADDR(vmcs)) < 0)
1030                 error("vmclear failed");
1031         if(vmptrld(PADDR(vmcs)) < 0)
1032                 error("vmptrld failed");
1033         vmcsinit();
1034 }
1035
1036 static void
1037 cmdrelease(VmCmd *p, int f)
1038 {
1039         lock(p);
1040         p->flags |= CMDFDONE | f;
1041         wakeup(p);
1042         unlock(p);
1043 }
1044
1045 static void
1046 killcmds(VmCmd *notme)
1047 {
1048         VmCmd *p, *pn;
1049         
1050         for(p = vmx.postponed; p != nil; p = pn){
1051                 pn = p->next;
1052                 p->next = nil;
1053                 if(p == notme) continue;
1054                 kstrcpy(p->errstr, Equit, ERRMAX);
1055                 cmdrelease(p, CMDFFAIL);
1056         }
1057         vmx.postponed = nil;
1058         ilock(&vmx.cmdlock);
1059         for(p = vmx.firstcmd; p != nil; p = pn){
1060                 pn = p->next;
1061                 p->next = nil;
1062                 if(p == notme) continue;
1063                 kstrcpy(p->errstr, Equit, ERRMAX);
1064                 cmdrelease(p, CMDFFAIL);
1065         }
1066         vmx.firstcmd = nil;
1067         vmx.lastcmd = &vmx.firstcmd;
1068         iunlock(&vmx.cmdlock);
1069 }
1070
1071 static int
1072 cmdquit(VmCmd *p, va_list va)
1073 {
1074         vmx.state = VMXENDING;
1075         killcmds(p);
1076
1077         if(vmx.pml4 != nil){
1078                 cmdclearmeminfo(p, va);
1079                 free(vmx.pml4);
1080                 vmx.pml4 = nil;
1081         }
1082         vmx.got = 0;
1083         vmx.onentry = 0;
1084         vmx.stepmap = nil;
1085         
1086         free(vmx.msrhost);
1087         free(vmx.msrguest);
1088         vmx.msrhost = nil;
1089         vmx.msrguest = nil;
1090         vmx.nmsr = 0;
1091
1092         if(vmx.on)
1093                 vmxoff();
1094         vmx.state = VMXINACTIVE;
1095         cmdrelease(p, 0);
1096         pexit(Equit, 1);
1097         return 0;
1098 }
1099
1100 static void
1101 processexit(void)
1102 {
1103         u32int reason;
1104         
1105         reason = vmcsread(VM_EXREASON);
1106         if((reason & 1<<31) == 0)
1107                 switch(reason & 0xffff){
1108                 case 1: /* external interrupt */
1109                 case 3: /* INIT */
1110                 case 4: /* SIPI */
1111                 case 5: /* IO SMI */
1112                 case 6: /* SMI */
1113                 case 7: /* IRQ window */
1114                 case 8: /* NMI window */
1115                         return;
1116                 case 37:
1117                         if((vmx.onentry & STEP) != 0){
1118                                 vmx.state = VMXREADY;
1119                                 vmx.got |= GOTSTEP;
1120                                 vmx.onentry &= ~STEP;
1121                                 return;
1122                         }
1123                         break;
1124                 }
1125         if((vmx.onentry & STEP) != 0){
1126                 print("VMX: exit reason %#x when expected step...\n", reason & 0xffff);
1127                 vmx.onentry &= ~STEP;
1128                 vmx.got |= GOTSTEP|GOTSTEPERR;
1129         }
1130         vmx.state = VMXREADY;
1131         vmx.got |= GOTEXIT;
1132 }
1133
1134 static int
1135 cmdgetregs(VmCmd *, va_list va)
1136 {
1137         char *p0, *e;
1138         GuestReg *r;
1139         uvlong val;
1140         int s;
1141         char *p;
1142         
1143         p0 = va_arg(va, char *);
1144         e = va_arg(va, char *);
1145         p = p0;
1146         for(r = guestregs; r < guestregs + nelem(guestregs); r++)
1147                 if(r->read != nil){
1148                         p = seprint(p, e, "%s ", r->name);
1149                         p = r->read(p, e);
1150                         p = strecpy(p, e, "\n");
1151                 }else{
1152                         if(r->offset >= 0)
1153                                 val = vmcsread(r->offset);
1154                         else
1155                                 val = *(uintptr*)((uchar*)&vmx + ~r->offset);
1156                         s = r->size;
1157                         if(s == 0) s = sizeof(uintptr);
1158                         p = seprint(p, e, "%s %#.*llux\n", r->name, s * 2, val);
1159                 }
1160         return p - p0;
1161 }
1162
1163 static int
1164 setregs(char *p0, char rs, char *fs)
1165 {
1166         char *p, *q, *rp;
1167         char *f[10];
1168         GuestReg *r;
1169         uvlong val;
1170         int sz;
1171         int rc;
1172
1173         p = p0;
1174         for(;;){
1175                 q = strchr(p, rs);
1176                 if(q == 0) break;
1177                 *q = 0;
1178                 rc = getfields(p, f, nelem(f), 1, fs);
1179                 p = q + 1;
1180                 if(rc == 0) continue;
1181                 if(rc != 2) error("number of fields wrong");
1182                 
1183                 for(r = guestregs; r < guestregs + nelem(guestregs); r++)
1184                         if(strcmp(r->name, f[0]) == 0)
1185                                 break;
1186                 if(r == guestregs + nelem(guestregs))
1187                         error("unknown register");
1188                 if(r->write != nil){
1189                         r->write(f[1]);
1190                         continue;
1191                 }
1192                 val = strtoull(f[1], &rp, 0);
1193                 sz = r->size;
1194                 if(sz == 0) sz = sizeof(uintptr);
1195                 if(rp == f[1] || *rp != 0) error("invalid value");
1196                 if(r->offset >= 0)
1197                         vmcswrite(r->offset, val);
1198                 else{
1199                         assert((u32int)~r->offset + sz <= sizeof(Vmx)); 
1200                         switch(sz){
1201                         case 1: *(u8int*)((u8int*)&vmx + (u32int)~r->offset) = val; break;
1202                         case 2: *(u16int*)((u8int*)&vmx + (u32int)~r->offset) = val; break;
1203                         case 4: *(u32int*)((u8int*)&vmx + (u32int)~r->offset) = val; break;
1204                         case 8: *(u64int*)((u8int*)&vmx + (u32int)~r->offset) = val; break;
1205                         default: error(Egreg);
1206                         }
1207                 }
1208         }
1209         return p - p0;
1210 }
1211
1212 static int
1213 cmdsetregs(VmCmd *, va_list va)
1214 {
1215         return setregs(va_arg(va, char *), '\n', " \t");
1216 }
1217
1218 static int
1219 cmdgetfpregs(VmCmd *, va_list va)
1220 {
1221         uchar *p;
1222         
1223         p = va_arg(va, uchar *);
1224         memmove(p, vmx.fp, sizeof(FPsave));
1225         return sizeof(FPsave);
1226 }
1227
1228 static int
1229 cmdsetfpregs(VmCmd *, va_list va)
1230 {
1231         uchar *p;
1232         ulong n;
1233         vlong off;
1234         
1235         p = va_arg(va, uchar *);
1236         n = va_arg(va, ulong);
1237         off = va_arg(va, vlong);
1238         if(off < 0 || off >= sizeof(FPsave)) n = 0;
1239         else if(off + n > sizeof(FPsave)) n = sizeof(FPsave) - n;
1240         memmove((uchar*)vmx.fp + off, p, n);
1241         return n;
1242 }
1243
1244 static int
1245 cmdgo(VmCmd *, va_list va)
1246 {
1247         char *r;
1248
1249         if(vmx.state != VMXREADY)
1250                 error("VM not ready");
1251         r = va_arg(va, char *);
1252         if(r != nil) setregs(r, ';', "=");
1253         vmx.state = VMXRUNNING;
1254         return 0;
1255 }
1256
1257 static int
1258 cmdstop(VmCmd *, va_list)
1259 {
1260         if(vmx.state != VMXREADY && vmx.state != VMXRUNNING)
1261                 error("VM not ready or running");
1262         vmx.state = VMXREADY;
1263         return 0;
1264 }
1265
1266 static int
1267 cmdstatus(VmCmd *, va_list va)
1268 {       
1269         kstrcpy(va_arg(va, char *), vmx.errstr, ERRMAX);
1270         return vmx.state;
1271 }
1272
1273 static char *exitreasons[] = {
1274         [0] "exc", [1] "extirq", [2] "triplef", [3] "initsig", [4] "sipi", [5] "smiio", [6] "smiother", [7] "irqwin",
1275         [8] "nmiwin", [9] "taskswitch", [10] ".cpuid", [11] ".getsec", [12] ".hlt", [13] ".invd", [14] ".invlpg", [15] ".rdpmc",
1276         [16] ".rdtsc", [17] ".rsm", [18] ".vmcall", [19] ".vmclear", [20] ".vmlaunch", [21] ".vmptrld", [22] ".vmptrst", [23] ".vmread",
1277         [24] ".vmresume", [25] ".vmwrite", [26] ".vmxoff", [27] ".vmxon", [28] "movcr", [29] ".movdr", [30] "io", [31] ".rdmsr",
1278         [32] ".wrmsr", [33] "entrystate", [34] "entrymsr", [36] ".mwait", [37] "monitortrap", [39] ".monitor",
1279         [40] ".pause", [41] "mcheck", [43] "tpr", [44] "apicacc", [45] "eoi", [46] "gdtr_idtr", [47] "ldtr_tr",
1280         [48] "eptfault", [49] "eptinval", [50] ".invept", [51] ".rdtscp", [52] "preempt", [53] ".invvpid", [54] ".wbinvd", [55] ".xsetbv",
1281         [56] "apicwrite", [57] ".rdrand", [58] ".invpcid", [59] ".vmfunc", [60] ".encls", [61] ".rdseed", [62] "pmlfull", [63] ".xsaves",
1282         [64] ".xrstors", 
1283 };
1284
1285 static char *except[] = {
1286         [0] "#de", [1] "#db", [3] "#bp", [4] "#of", [5] "#br", [6] "#ud", [7] "#nm",
1287         [8] "#df", [10] "#ts", [11] "#np", [12] "#ss", [13] "#gp", [14] "#pf",
1288         [16] "#mf", [17] "#ac", [18] "#mc", [19] "#xm", [20] "#ve",
1289 };
1290
1291 static int
1292 cmdwait(VmCmd *cp, va_list va)
1293 {
1294         char *p, *p0, *e;
1295         u32int reason, intr;
1296         uvlong qual;
1297         u16int rno;
1298
1299         if(cp->scratched)
1300                 error(Eintr);
1301         p0 = p = va_arg(va, char *);
1302         e = va_arg(va, char *);
1303         if((vmx.got & GOTIRQACK) != 0){
1304                 p = seprint(p, e, "*ack %d\n", vmx.irqack.info & 0xff);
1305                 vmx.got &= ~GOTIRQACK;
1306                 return p - p0;
1307         }
1308         if((vmx.got & GOTEXIT) == 0){
1309                 cp->flags |= CMDFPOSTP;
1310                 return -1;
1311         }
1312         vmx.got &= ~GOTEXIT;
1313         reason = vmcsread(VM_EXREASON);
1314         qual = vmcsread(VM_EXQUALIF);
1315         rno = reason;
1316         intr = vmcsread(VM_EXINTRINFO);
1317         if((reason & 1<<31) != 0)
1318                 p = seprint(p, e, "!");
1319         if(rno == 0 && (intr & 1<<31) != 0){
1320                 if((intr & 0xff) >= nelem(except) || except[intr & 0xff] == nil)
1321                         p = seprint(p, e, "#%d ", intr & 0xff);
1322                 else
1323                         p = seprint(p, e, "%s ", except[intr & 0xff]);
1324         }else if(rno >= nelem(exitreasons) || exitreasons[rno] == nil)
1325                 p = seprint(p, e, "?%d ", rno);
1326         else
1327                 p = seprint(p, e, "%s ", exitreasons[rno]);
1328         p = seprint(p, e, "%#ullx pc %#ullx sp %#ullx ilen %#ullx iinfo %#ullx", qual, vmcsread(GUEST_RIP), vmcsread(GUEST_RSP), vmcsread(VM_EXINSTRLEN), vmcsread(VM_EXINSTRINFO));
1329         if((intr & 1<<11) != 0) p = seprint(p, e, " excode %#ullx", vmcsread(VM_EXINTRCODE));
1330         if(rno == 48 && (qual & 0x80) != 0) p = seprint(p, e, " va %#ullx", vmcsread(VM_GUESTVA));
1331         if(rno == 48 || rno == 49) p = seprint(p, e, " pa %#ullx", vmcsread(VM_GUESTPA));
1332         if(rno == 30) p = seprint(p, e, " ax %#ullx", (uvlong)vmx.ureg.ax);
1333         p = seprint(p, e, "\n");
1334         return p - p0;
1335 }
1336
1337 static int
1338 cmdstep(VmCmd *cp, va_list va)
1339 {
1340         switch(cp->retval){
1341         case 0:
1342                 if((vmx.got & GOTSTEP) != 0 || (vmx.onentry & STEP) != 0)
1343                         error(Einuse);
1344                 if(vmx.state != VMXREADY){
1345                         print("pre-step in state %s\n", statenames[vmx.state]);
1346                         error("not ready");
1347                 }
1348                 vmx.stepmap = va_arg(va, VmMem *);
1349                 vmx.onentry |= STEP;
1350                 vmx.state = VMXRUNNING;
1351                 cp->flags |= CMDFPOSTP;
1352                 return 1;
1353         case 1:
1354                 if(vmx.state != VMXREADY){
1355                         print("post-step in state %s\n", statenames[vmx.state]);
1356                         vmx.onentry &= ~STEP;
1357                         vmx.got &= ~(GOTSTEP|GOTSTEPERR);
1358                         error("not ready");
1359                 }
1360                 if((vmx.got & GOTSTEP) == 0){
1361                         cp->flags |= CMDFPOSTP;
1362                         return 1;
1363                 }
1364                 if((vmx.got & GOTSTEPERR) != 0){
1365                         vmx.got &= ~(GOTSTEP|GOTSTEPERR);
1366                         error("step failed");
1367                 }
1368                 vmx.got &= ~(GOTSTEP|GOTSTEPERR);
1369                 return 1;
1370         }
1371         return 0;
1372 }
1373
1374 static void
1375 eventparse(char *p, VmIntr *vi)
1376 {
1377         char *q, *r;
1378         int i;
1379         
1380         memset(vi, 0, sizeof(VmIntr));
1381         q = nil;
1382         kstrdup(&q, p);
1383         if(waserror()){
1384                 free(q);
1385                 memset(vi, 0, sizeof(VmIntr));
1386                 nexterror();
1387         }
1388         vi->info = 1<<31;
1389         r = strchr(q, ',');
1390         if(r != nil) *r++ = 0;
1391         for(i = 0; i < nelem(except); i++)
1392                 if(except[i] != nil && strcmp(except[i], q) == 0)
1393                         break;
1394         if(*q == '#'){
1395                 q++;
1396                 vi->info |= 3 << 8;
1397         }
1398         if(i == nelem(except)){
1399                 i = strtoul(q, &q, 10);
1400                 if(*q != 0 || i > 255) error(Ebadctl);
1401         }
1402         vi->info |= i;
1403         if((vi->info & 0x7ff) == 3 || (vi->info & 0x7ff) == 4)
1404                 vi->info += 3 << 8;
1405         if(r == nil) goto out;
1406         if(*r != ','){
1407                 vi->code = strtoul(r, &r, 0);
1408                 vi->info |= 1<<11;
1409         }else r++;
1410         if(*r == ',')
1411                 vi->ilen = strtoul(r + 1, &r, 0);
1412         if(*r != 0) error(Ebadctl);
1413 out:
1414         poperror();
1415         free(q);
1416 }
1417
1418 static int
1419 cmdexcept(VmCmd *cp, va_list va)
1420 {
1421         if(cp->scratched) error(Eintr);
1422         if((vmx.onentry & POSTEX) != 0){
1423                 cp->flags |= CMDFPOSTP;
1424                 return 0;
1425         }
1426         eventparse(va_arg(va, char *), &vmx.exc);
1427         vmx.onentry |= POSTEX;
1428         return 0;
1429 }
1430
1431 static int
1432 cmdirq(VmCmd *, va_list va)
1433 {
1434         char *p;
1435         VmIntr vi;
1436         
1437         p = va_arg(va, char *);
1438         if(p == nil)
1439                 vmx.onentry &= ~POSTIRQ;
1440         else{
1441                 eventparse(p, &vi);
1442                 vmx.irq = vi;
1443                 vmx.onentry |= POSTIRQ;
1444         }
1445         return 0;
1446 }
1447
1448
1449 static int
1450 gotcmd(void *)
1451 {
1452         int rc;
1453
1454         ilock(&vmx.cmdlock);
1455         rc = vmx.firstcmd != nil;
1456         iunlock(&vmx.cmdlock);
1457         return rc;
1458 }
1459
1460 static void
1461 markcmddone(VmCmd *p, VmCmd ***pp)
1462 {
1463         if((p->flags & (CMDFFAIL|CMDFPOSTP)) == CMDFPOSTP){
1464                 **pp = p;
1465                 *pp = &p->next;
1466         }else{
1467                 p->flags = p->flags & ~CMDFPOSTP;
1468                 cmdrelease(p, 0);
1469         }
1470 }
1471
1472 static VmCmd **
1473 markppcmddone(VmCmd **pp)
1474 {
1475         VmCmd *p;
1476         
1477         p = *pp;
1478         if((p->flags & (CMDFFAIL|CMDFPOSTP)) == CMDFPOSTP)
1479                 return &p->next;
1480         *pp = p->next;
1481         p->next = nil;
1482         p->flags = p->flags & ~CMDFPOSTP;
1483         cmdrelease(p, 0);
1484         return pp;
1485 }
1486
1487
1488 static void
1489 runcmd(void)
1490 {
1491         VmCmd *p, **pp;
1492         
1493         for(pp = &vmx.postponed; p = *pp, p != nil; ){
1494                 if(waserror()){
1495                         kstrcpy(p->errstr, up->errstr, ERRMAX);
1496                         p->flags |= CMDFFAIL;
1497                         pp = markppcmddone(pp);
1498                         continue;
1499                 }
1500                 p->flags &= ~CMDFPOSTP;
1501                 p->retval = p->cmd(p, p->va);
1502                 poperror();
1503                 pp = markppcmddone(pp);
1504         }
1505         for(;;){
1506                 ilock(&vmx.cmdlock);
1507                 p = vmx.firstcmd;
1508                 if(p == nil){
1509                         iunlock(&vmx.cmdlock);
1510                         break;
1511                 }
1512                 vmx.firstcmd = p->next;
1513                 if(vmx.lastcmd == &p->next)
1514                         vmx.lastcmd = &vmx.firstcmd;
1515                 iunlock(&vmx.cmdlock);
1516                 p->next = nil;
1517                 if(waserror()){
1518                         kstrcpy(p->errstr, up->errstr, ERRMAX);
1519                         p->flags |= CMDFFAIL;
1520                         markcmddone(p, &pp);
1521                         continue;
1522                 }
1523                 if(p->scratched) error(Eintr);
1524                 p->retval = p->cmd(p, p->va);
1525                 poperror();
1526                 markcmddone(p, &pp);
1527         }
1528 }
1529
1530 static void
1531 dostep(int setup)
1532 {
1533         static uvlong oldmap;
1534         static uvlong *mapptr;
1535
1536         if(setup){
1537                 if(vmx.stepmap != nil){
1538                         mapptr = eptwalk(vmx.stepmap->lo);
1539                         oldmap = *mapptr;
1540                         epttranslate(vmx.stepmap);
1541                 }
1542         }else{
1543                 vmcswrite(PROCB_CTLS, vmcsread(PROCB_CTLS) & ~(uvlong)PROCB_MONTRAP);
1544                 if(vmx.stepmap != nil){
1545                         *mapptr = oldmap;
1546                         vmx.stepmap = nil;
1547                         vmx.onentry |= FLUSHEPT;
1548                 }
1549         }
1550 }
1551
1552 static void
1553 vmxproc(void *)
1554 {
1555         int init, rc, x;
1556         u32int procbctls, defprocbctls;
1557         vlong v;
1558
1559         procwired(up, 0);
1560         sched();
1561         init = 0;
1562         defprocbctls = 0;
1563         while(waserror()){
1564                 kstrcpy(vmx.errstr, up->errstr, ERRMAX);
1565                 vmx.state = VMXDEAD;
1566         }
1567         for(;;){
1568                 if(!init){
1569                         init = 1;
1570                         vmxstart();
1571                         vmx.state = VMXREADY;
1572                         defprocbctls = vmcsread(PROCB_CTLS);
1573                 }
1574                 runcmd();
1575                 if(vmx.state == VMXRUNNING){
1576                         procbctls = defprocbctls;
1577                         if((vmx.onentry & STEP) != 0){
1578                                 procbctls |= PROCB_MONTRAP;
1579                                 dostep(1);
1580                                 if(waserror()){
1581                                         dostep(0);
1582                                         nexterror();
1583                                 }
1584                         }
1585                         if((vmx.onentry & POSTEX) != 0){
1586                                 vmcswrite(VMENTRY_INTRINFO, vmx.exc.info);
1587                                 vmcswrite(VMENTRY_INTRCODE, vmx.exc.code);
1588                                 vmcswrite(VMENTRY_INTRILEN, vmx.exc.ilen);
1589                                 vmx.onentry &= ~POSTEX;
1590                         }
1591                         if((vmx.onentry & POSTIRQ) != 0 && (vmx.onentry & STEP) == 0){
1592                                 if((vmx.onentry & POSTEX) == 0 && (vmcsread(GUEST_RFLAGS) & 1<<9) != 0 && (vmcsread(GUEST_CANINTR) & 3) == 0){
1593                                         vmcswrite(VMENTRY_INTRINFO, vmx.irq.info);
1594                                         vmcswrite(VMENTRY_INTRCODE, vmx.irq.code);
1595                                         vmcswrite(VMENTRY_INTRILEN, vmx.irq.ilen);
1596                                         vmx.onentry &= ~POSTIRQ;
1597                                         vmx.got |= GOTIRQACK;
1598                                         vmx.irqack = vmx.irq;
1599                                 }else
1600                                         procbctls |= PROCB_IRQWIN;
1601                         }
1602                         if((vmx.onentry & FLUSHVPID) != 0){
1603                                 if(invvpid(INVLOCAL, vmx.vpid, 0) < 0)
1604                                         error("invvpid failed");
1605                                 vmx.onentry &= ~FLUSHVPID;
1606                         }
1607                         if((vmx.onentry & FLUSHEPT) != 0){
1608                                 if(invept(INVLOCAL, PADDR(vmx.pml4) | 3<<3, 0) < 0)
1609                                         error("invept failed");
1610                                 vmx.onentry &= ~FLUSHEPT;
1611                         }
1612                         vmcswrite(PROCB_CTLS, procbctls);
1613                         vmx.got &= ~GOTEXIT;
1614                         
1615                         x = splhi();
1616                         if(sizeof(uintptr) == 8){
1617                                 rdmsr(FSbase, &v);
1618                                 vmwrite(HOST_FSBASE, v);
1619                         }
1620                         if((vmx.dr[7] & ~0xd400) != 0)
1621                                 putdr01236(vmx.dr);
1622                         fpsserestore(vmx.fp);
1623                         putcr2(vmx.cr2);
1624                         rc = vmlaunch(&vmx.ureg, vmx.launched);
1625                         vmx.cr2 = getcr2();
1626                         fpssesave(vmx.fp);
1627                         splx(x);
1628                         if(rc < 0)
1629                                 error("vmlaunch failed");
1630                         vmx.launched = 1;
1631                         if((vmx.onentry & STEP) != 0){
1632                                 dostep(0);
1633                                 poperror();
1634                         }
1635                         processexit();
1636                 }else{
1637                         up->psstate = "Idle";
1638                         sleep(&vmx.cmdwait, gotcmd, nil);
1639                         up->psstate = nil;
1640                 }
1641         }
1642 }
1643
1644 enum {
1645         Qdir,
1646         Qctl,
1647         Qregs,
1648         Qstatus,
1649         Qmap,
1650         Qwait,
1651         Qfpregs,
1652 };
1653
1654 static Dirtab vmxdir[] = {
1655         ".",            { Qdir, 0, QTDIR },     0,              0550,
1656         "ctl",          { Qctl, 0, 0 },         0,              0660,
1657         "regs",         { Qregs, 0, 0 },        0,              0660,
1658         "status",       { Qstatus, 0, 0 },      0,              0440,
1659         "map",          { Qmap, 0, 0 },         0,              0660,
1660         "wait",         { Qwait, 0, 0 },        0,              0440,
1661         "fpregs",       { Qfpregs, 0, 0 },      0,              0660,
1662 };
1663
1664 enum {
1665         CMinit,
1666         CMquit,
1667         CMgo,
1668         CMstop,
1669         CMstep,
1670         CMexc,
1671         CMirq,
1672 };
1673
1674 static Cmdtab vmxctlmsg[] = {
1675         CMinit,         "init",         1,
1676         CMquit,         "quit",         1,
1677         CMgo,           "go",           0,
1678         CMstop,         "stop",         1,
1679         CMstep,         "step",         0,
1680         CMexc,          "exc",          2,
1681         CMirq,          "irq",          0,
1682 };
1683
1684 static int
1685 iscmddone(void *cp)
1686 {
1687         return (((VmCmd*)cp)->flags & CMDFDONE) != 0;
1688 }
1689
1690 static int
1691 vmxcmd(int (*f)(VmCmd *, va_list), ...)
1692 {
1693         VmCmd cmd;
1694         
1695         if(vmx.state == VMXINACTIVE)
1696                 error("no VM");
1697         if(vmx.state == VMXENDING)
1698         ending:
1699                 error(Equit);
1700         memset(&cmd, 0, sizeof(VmCmd));
1701         cmd.errstr = up->errstr;
1702         cmd.cmd = f;
1703         va_start(cmd.va, f);
1704          
1705         ilock(&vmx.cmdlock);
1706         if(vmx.state == VMXENDING){
1707                 iunlock(&vmx.cmdlock);
1708                 goto ending;
1709         }
1710         *vmx.lastcmd = &cmd;
1711         vmx.lastcmd = &cmd.next;
1712         iunlock(&vmx.cmdlock);
1713         
1714         while(waserror())
1715                 cmd.scratched = 1;
1716         wakeup(&vmx.cmdwait);
1717         do
1718                 sleep(&cmd, iscmddone, &cmd);
1719         while(!iscmddone(&cmd));
1720         poperror();
1721         lock(&cmd);
1722         unlock(&cmd);
1723         if((cmd.flags & CMDFFAIL) != 0)
1724                 error(up->errstr);
1725         return cmd.retval;
1726 }
1727
1728 static Chan *
1729 vmxattach(char *spec)
1730 {
1731         if(vmx.state == NOVMX) error(Enodev);
1732         return devattach('X', spec);
1733 }
1734
1735 static Walkqid*
1736 vmxwalk(Chan *c, Chan *nc, char **name, int nname)
1737 {
1738         return devwalk(c, nc, name, nname, vmxdir, nelem(vmxdir), devgen);
1739 }
1740
1741 static int
1742 vmxstat(Chan *c, uchar *dp, int n)
1743 {
1744         return devstat(c, dp, n, vmxdir, nelem(vmxdir), devgen);
1745 }
1746
1747 static Chan*
1748 vmxopen(Chan* c, int omode)
1749 {
1750         Chan *ch;
1751
1752         if(c->qid.path != Qdir && !iseve()) error(Eperm);
1753         ch = devopen(c, omode, vmxdir, nelem(vmxdir), devgen);
1754         if(ch->qid.path == Qmap){
1755                 if((omode & OTRUNC) != 0)
1756                         vmxcmd(cmdclearmeminfo);
1757         }
1758         return ch;
1759 }
1760
1761 static void
1762 vmxclose(Chan*)
1763 {
1764 }
1765
1766 static long
1767 vmxread(Chan* c, void* a, long n, vlong off)
1768 {
1769         static char regbuf[4096];
1770         static char membuf[4096];
1771         int rc;
1772
1773         switch((ulong)c->qid.path){
1774         case Qdir:
1775                 return devdirread(c, a, n, vmxdir, nelem(vmxdir), devgen);
1776         case Qregs:
1777                 if(off == 0)
1778                         vmxcmd(cmdgetregs, regbuf, regbuf + sizeof(regbuf));
1779                 return readstr(off, a, n, regbuf);
1780         case Qmap:
1781                 if(off == 0)
1782                         vmxcmd(cmdgetmeminfo, membuf, membuf + sizeof(membuf));
1783                 return readstr(off, a, n, membuf);
1784         case Qstatus:
1785                 {
1786                         char buf[ERRMAX+128];
1787                         char errbuf[ERRMAX];
1788                         int status;
1789                         
1790                         status = vmx.state;
1791                         if(status == VMXDEAD){
1792                                 vmxcmd(cmdstatus, errbuf);
1793                                 snprint(buf, sizeof(buf), "%s %#q\n", statenames[status], errbuf);
1794                         }else if(status >= 0 && status < nelem(statenames))
1795                                 snprint(buf, sizeof(buf), "%s\n", statenames[status]);
1796                         else
1797                                 snprint(buf, sizeof(buf), "%d\n", status);
1798                         return readstr(off, a, n, buf);
1799                 }
1800         case Qwait:
1801                 {
1802                         char buf[512];
1803                         
1804                         rc = vmxcmd(cmdwait, buf, buf + sizeof(buf));
1805                         if(rc > n) rc = n;
1806                         if(rc > 0) memmove(a, buf, rc);
1807                         return rc;
1808                 }
1809         case Qfpregs:
1810                 {
1811                         char buf[sizeof(FPsave)];
1812                         
1813                         vmxcmd(cmdgetfpregs, buf);
1814                         if(n < 0 || off < 0 || off >= sizeof(buf)) n = 0;
1815                         else if(off + n > sizeof(buf)) n = sizeof(buf) - off;
1816                         if(n != 0) memmove(a, buf + off, n);
1817                         return n;
1818                 }
1819         default:
1820                 error(Egreg);
1821                 break;
1822         }
1823         return 0;
1824 }
1825
1826 static long
1827 vmxwrite(Chan* c, void* a, long n, vlong off)
1828 {
1829         static QLock initlock;
1830         Cmdbuf *cb;
1831         Cmdtab *ct;
1832         char *s;
1833         int rc;
1834         int i;
1835         VmMem tmpmem;
1836
1837         switch((ulong)c->qid.path){
1838         case Qdir:
1839                 error(Eperm);
1840         case Qctl:
1841                 cb = parsecmd(a, n);
1842                 if(waserror()){
1843                         free(cb);
1844                         nexterror();
1845                 }
1846                 ct = lookupcmd(cb, vmxctlmsg, nelem(vmxctlmsg));
1847                 switch(ct->index){
1848                 case CMinit:
1849                         qlock(&initlock);
1850                         if(waserror()){
1851                                 qunlock(&initlock);
1852                                 nexterror();
1853                         }
1854                         if(vmx.state != VMXINACTIVE)
1855                                 error("vmx already active");
1856                         vmx.state = VMXINIT;
1857                         kproc("kvmx", vmxproc, nil);
1858                         poperror();
1859                         qunlock(&initlock);
1860                         if(vmxcmd(cmdstatus, up->errstr) == VMXDEAD)
1861                                 error(up->errstr);
1862                         break;
1863                 case CMquit:
1864                         vmxcmd(cmdquit);
1865                         break;
1866                 case CMgo:
1867                         s = nil;
1868                         if(cb->nf == 2) kstrdup(&s, cb->f[1]);
1869                         else if(cb->nf != 1) error(Ebadarg);
1870                         if(waserror()){
1871                                 free(s);
1872                                 nexterror();
1873                         }
1874                         vmxcmd(cmdgo, s);
1875                         poperror();
1876                         free(s);
1877                         break;
1878                 case CMstop:
1879                         vmxcmd(cmdstop);
1880                         break;
1881                 case CMstep:
1882                         rc = 0;
1883                         for(i = 1; i < cb->nf; i++)
1884                                 if(strcmp(cb->f[i], "-map") == 0){
1885                                         rc = 1;
1886                                         if(i+4 > cb->nf) error("missing argument");
1887                                         memset(&tmpmem, 0, sizeof(tmpmem));
1888                                         tmpmem.lo = strtoull(cb->f[i+1], &s, 0);
1889                                         if(*s != 0 || !vmokpage(tmpmem.lo)) error("invalid address");
1890                                         tmpmem.hi = tmpmem.lo + BY2PG;
1891                                         tmpmem.attr = 0x407;
1892                                         tmpmem.seg = _globalsegattach(cb->f[i+2]);
1893                                         if(tmpmem.seg == nil) error("unknown segment");
1894                                         tmpmem.off = strtoull(cb->f[i+3], &s, 0);
1895                                         if(*s != 0 || !vmokpage(tmpmem.off)) error("invalid offset");
1896                                         i += 3;
1897                                 }else
1898                                         error(Ebadctl);
1899                         vmxcmd(cmdstep, rc ? &tmpmem : nil);
1900                         break;
1901                 case CMexc:
1902                         s = nil;
1903                         kstrdup(&s, cb->f[1]);
1904                         if(waserror()){
1905                                 free(s);
1906                                 nexterror();
1907                         }
1908                         vmxcmd(cmdexcept, s);
1909                         poperror();
1910                         free(s);
1911                         break;
1912                 case CMirq:
1913                         s = nil;
1914                         if(cb->nf == 2)
1915                                 kstrdup(&s, cb->f[1]);
1916                         if(waserror()){
1917                                 free(s);
1918                                 nexterror();
1919                         }
1920                         vmxcmd(cmdirq, s);
1921                         poperror();
1922                         free(s);
1923                         break;
1924                 default:
1925                         error(Egreg);
1926                 }
1927                 poperror();
1928                 free(cb);
1929                 break;
1930         case Qmap:
1931         case Qregs:
1932                 s = malloc(n+1);
1933                 if(s == nil) error(Enomem);
1934                 if(waserror()){
1935                         free(s);
1936                         nexterror();
1937                 }
1938                 memmove(s, a, n);
1939                 s[n] = 0;
1940                 rc = vmxcmd((ulong)c->qid.path == Qregs ? cmdsetregs : cmdsetmeminfo, s);
1941                 poperror();
1942                 free(s);
1943                 return rc;
1944         case Qfpregs:
1945                 {
1946                         char buf[sizeof(FPsave)];
1947                         
1948                         if(n > sizeof(FPsave)) n = sizeof(FPsave);
1949                         memmove(buf, a, n);
1950                         return vmxcmd(cmdsetfpregs, buf, n, off);
1951                 }
1952         default:
1953                 error(Egreg);
1954                 break;
1955         }
1956         return n;
1957 }
1958
1959 Dev vmxdevtab = {
1960         'X',
1961         "vmx",
1962         
1963         vmxreset,
1964         devinit,
1965         vmxshutdown,
1966         vmxattach,
1967         vmxwalk,
1968         vmxstat,
1969         vmxopen,
1970         devcreate,
1971         vmxclose,
1972         vmxread,
1973         devbread,
1974         vmxwrite,
1975         devbwrite,
1976         devremove,
1977         devwstat,
1978 };