]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/devvmx.c
pc, pc64: adapt devvmx to work on pc64
[plan9front.git] / sys / src / 9 / pc / devvmx.c
1 #include "u.h"
2 #include "../port/lib.h"
3 #include "mem.h"
4 #include "dat.h"
5 #include "fns.h"
6 #include "../port/error.h"
7 #include "ureg.h"
8
9 extern int vmxon(u64int);
10 extern int vmxoff(void);
11 extern int vmclear(u64int);
12 extern int vmptrld(u64int);
13 extern int vmlaunch(Ureg *, int);
14 extern int vmread(u32int, uintptr *);
15 extern int vmwrite(u32int, uintptr);
16 extern int invept(u32int, uvlong, uvlong);
17 extern int invvpid(u32int, uvlong, uvlong);
18
19 static vlong procb_ctls, pinb_ctls;
20
21 enum {
22         VMX_BASIC_MSR = 0x480,
23         VMX_PINB_CTLS_MSR = 0x481,
24         VMX_PROCB_CTLS_MSR = 0x482,
25         VMX_VMEXIT_CTLS_MSR = 0x483,
26         VMX_VMENTRY_CTLS_MSR = 0x484,
27         VMX_MISC_MSR = 0x485,
28         VMX_CR0_FIXED0 = 0x486,
29         VMX_CR0_FIXED1 = 0x487,
30         VMX_CR4_FIXED0 = 0x488,
31         VMX_CR4_FIXED1 = 0x489,
32         VMX_VMCS_ENUM = 0x48A,
33         VMX_PROCB_CTLS2_MSR = 0x48B,
34         VMX_TRUE_PINB_CTLS_MSR = 0x48D,
35         VMX_TRUE_PROCB_CTLS_MSR = 0x48E,
36         VMX_TRUE_EXIT_CTLS_MSR = 0x48F,
37         VMX_TRUE_ENTRY_CTLS_MSR = 0x490,
38         VMX_VMFUNC_MSR = 0x491,
39         
40         PINB_CTLS = 0x4000,
41         PINB_EXITIRQ = 1<<0,
42         PINB_EXITNMI = 1<<3,
43         
44         PROCB_CTLS = 0x4002,
45         PROCB_IRQWIN = 1<<2,
46         PROCB_EXITHLT = 1<<7,
47         PROCB_EXITINVLPG = 1<<9,
48         PROCB_EXITMWAIT = 1<<10,
49         PROCB_EXITRDPMC = 1<<11,
50         PROCB_EXITRDTSC = 1<<12,
51         PROCB_EXITCR3LD = 1<<15,
52         PROCB_EXITCR3ST = 1<<16,
53         PROCB_EXITCR8LD = 1<<19,
54         PROCB_EXITCR8ST = 1<<20,
55         PROCB_EXITMOVDR = 1<<23,
56         PROCB_EXITIO = 1<<24,
57         PROCB_MONTRAP = 1<<27,
58         PROCB_MSRBITMAP = 1<<28,
59         PROCB_EXITMONITOR = 1<<29,
60         PROCB_EXITPAUSE = 1<<30,
61         PROCB_USECTLS2 = 1<<31,
62         
63         PROCB_CTLS2 = 0x401E,
64         PROCB_EPT = 1<<1,
65         PROCB_EXITGDT = 1<<2,
66         PROCB_VPID = 1<<5,
67         PROCB_UNRESTR = 1<<7,
68
69         EXC_BITMAP = 0x4004,
70         PFAULT_MASK = 0x4006,
71         PFAULT_MATCH = 0x4008,
72         CR3_TARGCNT = 0x400a,
73         MSR_BITMAP = 0x2004,
74         
75         VMEXIT_CTLS = 0x400c,
76         VMEXIT_ST_DEBUG = 1<<2,
77         VMEXIT_HOST64 = 1<<9,
78         VMEXIT_LD_IA32_PERF_GLOBAL_CTRL = 1<<12,
79         VMEXIT_ST_IA32_PAT = 1<<18,
80         VMEXIT_LD_IA32_PAT = 1<<19,
81         VMEXIT_ST_IA32_EFER = 1<<20,
82         VMEXIT_LD_IA32_EFER = 1<<21,    
83         
84         VMEXIT_MSRSTCNT = 0x400e,
85         VMEXIT_MSRLDCNT = 0x4010,
86         VMEXIT_MSRSTADDR = 0x2006,
87         VMEXIT_MSRLDADDR = 0x2008,
88         VMENTRY_MSRLDADDR = 0x200A,
89         
90         VMENTRY_CTLS = 0x4012,
91         VMENTRY_LD_DEBUG = 1<<2,
92         VMENTRY_GUEST64 = 1<<9,
93         VMENTRY_LD_IA32_PERF_GLOBAL_CTRL = 1<<13,
94         VMENTRY_LD_IA32_PAT = 1<<14,
95         VMENTRY_LD_IA32_EFER = 1<<15,
96         
97         VMENTRY_MSRLDCNT = 0x4014,
98         VMENTRY_INTRINFO = 0x4016,
99         VMENTRY_INTRCODE = 0x4018,
100         VMENTRY_INTRILEN = 0x401a,
101         
102         VMCS_LINK = 0x2800,
103         
104         GUEST_ES = 0x800,
105         GUEST_CS = 0x802,
106         GUEST_SS = 0x804,
107         GUEST_DS = 0x806,
108         GUEST_FS = 0x808,
109         GUEST_GS = 0x80A,
110         GUEST_LDTR = 0x80C,
111         GUEST_TR = 0x80E,
112         GUEST_CR0 = 0x6800,
113         GUEST_CR3 = 0x6802,
114         GUEST_CR4 = 0x6804,
115         GUEST_ESLIMIT = 0x4800,
116         GUEST_CSLIMIT = 0x4802,
117         GUEST_SSLIMIT = 0x4804,
118         GUEST_DSLIMIT = 0x4806,
119         GUEST_FSLIMIT = 0x4808,
120         GUEST_GSLIMIT = 0x480A,
121         GUEST_LDTRLIMIT = 0x480C,
122         GUEST_TRLIMIT = 0x480E,
123         GUEST_GDTRLIMIT = 0x4810,
124         GUEST_IDTRLIMIT = 0x4812,
125         GUEST_ESPERM = 0x4814,
126         GUEST_CSPERM = 0x4816,
127         GUEST_SSPERM = 0x4818,
128         GUEST_DSPERM = 0x481A,
129         GUEST_FSPERM = 0x481C,
130         GUEST_GSPERM = 0x481E,
131         GUEST_LDTRPERM = 0x4820,
132         GUEST_TRPERM = 0x4822,
133         GUEST_CR0MASK = 0x6000,
134         GUEST_CR4MASK = 0x6002,
135         GUEST_CR0SHADOW = 0x6004,
136         GUEST_CR4SHADOW = 0x6006,
137         GUEST_ESBASE = 0x6806,
138         GUEST_CSBASE = 0x6808,
139         GUEST_SSBASE = 0x680A,
140         GUEST_DSBASE = 0x680C,
141         GUEST_FSBASE = 0x680E,
142         GUEST_GSBASE = 0x6810,
143         GUEST_LDTRBASE = 0x6812,
144         GUEST_TRBASE = 0x6814,
145         GUEST_GDTRBASE = 0x6816,
146         GUEST_IDTRBASE = 0x6818,
147         GUEST_DR7 = 0x681A,
148         GUEST_RSP = 0x681C,
149         GUEST_RIP = 0x681E,
150         GUEST_RFLAGS = 0x6820,
151         GUEST_IA32_DEBUGCTL = 0x2802,
152         GUEST_IA32_PAT = 0x2804,
153         GUEST_IA32_EFER = 0x2806,
154         GUEST_IA32_PERF_GLOBAL_CTRL = 0x2808,
155         
156         HOST_ES = 0xC00,
157         HOST_CS = 0xC02,
158         HOST_SS = 0xC04,
159         HOST_DS = 0xC06,
160         HOST_FS = 0xC08,
161         HOST_GS = 0xC0A,
162         HOST_TR = 0xC0C,
163         HOST_CR0 = 0x6C00,
164         HOST_CR3 = 0x6C02,
165         HOST_CR4 = 0x6C04,
166         HOST_FSBASE = 0x6C06,
167         HOST_GSBASE = 0x6C08,
168         HOST_TRBASE = 0x6C0A,
169         HOST_GDTR = 0x6C0C,
170         HOST_IDTR = 0x6C0E,
171         HOST_RSP = 0x6C14,
172         HOST_RIP = 0x6C16,
173         HOST_IA32_PAT = 0x2C00,
174         HOST_IA32_EFER = 0x2C02,
175         HOST_IA32_PERF_GLOBAL_CTRL = 0x2C04,
176         
177         GUEST_CANINTR = 0x4824,
178         
179         VM_INSTRERR = 0x4400,
180         VM_EXREASON = 0x4402,
181         VM_EXINTRINFO = 0x4404,
182         VM_EXINTRCODE = 0x4406,
183         VM_IDTVECINFO = 0x4408,
184         VM_IDTVECCODE = 0x440A,
185         VM_EXINSTRLEN = 0x440C,
186         VM_EXINSTRINFO = 0x440E,
187         VM_EXQUALIF = 0x6400,
188         VM_IORCX = 0x6402,
189         VM_IORSI = 0x6404,
190         VM_IORDI = 0x6406,
191         VM_IORIP = 0x6408,
192         VM_GUESTVA = 0x640A,
193         VM_GUESTPA = 0x2400,
194         
195         VM_VPID = 0x000,
196         VM_EPTPIDX = 0x0004,
197         
198         VM_EPTP = 0x201A,
199         VM_EPTPLA = 0x2024,
200         
201         INVLOCAL = 1,
202 };
203
204 enum {
205         CR0RSVD = 0x1ffaffc0,
206         CR4RSVD = 0xff889000,
207         CR4MCE = 1<<6,
208         CR4VMXE = 1<<13,
209         CR4SMXE = 1<<14,
210         CR4PKE = 1<<22,
211         
212         CR0KERNEL = CR0RSVD | 0x30 | (uintptr)0xFFFFFFFF00000000ULL,
213         CR4KERNEL = CR4RSVD | CR4VMXE | CR4SMXE | CR4MCE | CR4PKE | (uintptr)0xFFFFFFFF00000000ULL
214 };
215
216 enum {
217         MAXMSR = 512,
218 };
219
220 typedef struct Vmx Vmx;
221 typedef struct VmCmd VmCmd;
222 typedef struct VmMem VmMem;
223 typedef struct VmIntr VmIntr;
224
225 struct VmMem {
226         uvlong lo, hi;
227         Segment *seg;
228         uintptr off;
229         VmMem *next, *prev;
230         u16int attr;
231 };
232
233 struct VmIntr {
234         u32int info, code, ilen;
235 };
236
237 struct Vmx {
238         enum {
239                 NOVMX,
240                 VMXINACTIVE,
241                 VMXINIT,
242                 VMXREADY,
243                 VMXRUNNING,
244                 VMXDEAD,
245                 VMXENDING,
246         } state;
247         char errstr[ERRMAX];
248         Ureg ureg;
249         uintptr cr2;
250         uintptr dr[8]; /* DR7 is also kept in VMCS */
251         FPsave *fp;
252         u8int launched;
253         u8int on;
254         u8int vpid;
255         enum {
256                 FLUSHVPID = 1,
257                 FLUSHEPT = 2,
258                 STEP = 4,
259                 POSTEX = 8,
260                 POSTIRQ = 16,
261         } onentry;
262         
263         Rendez cmdwait;
264         Lock cmdlock;
265         VmCmd *firstcmd, **lastcmd;
266         VmCmd *postponed;
267         uvlong *pml4;
268         VmMem mem;
269         
270         enum {
271                 GOTEXIT = 1,
272                 GOTIRQACK = 2,
273                 GOTSTEP = 4,
274                 GOTSTEPERR = 8,
275         } got;
276         VmMem *stepmap;
277         VmIntr exc, irq, irqack;
278         
279         u64int *msrhost, *msrguest;
280         u32int *msrbits;
281         int nmsr;
282 };
283
284 struct VmCmd {
285         enum {
286                 CMDFDONE = 1,
287                 CMDFFAIL = 2,
288                 CMDFPOSTP = 4,
289         } flags;
290         u8int scratched;
291         Rendez;
292         Lock;
293         int (*cmd)(VmCmd *, va_list);
294         int retval;
295         char *errstr;
296         va_list va;
297         VmCmd *next;
298 };
299
300 static char Equit[] = "vmx: ending";
301
302 static char *statenames[] = {
303         [NOVMX] "novmx",
304         [VMXINACTIVE] "inactive",
305         [VMXINIT] "init",
306         [VMXREADY] "ready",
307         [VMXRUNNING] "running",
308         [VMXDEAD] "dead",
309         [VMXENDING]"ending"
310 };
311
312 static Vmx vmx;
313
314 static u64int
315 vmcsread(u32int addr)
316 {
317         int rc;
318         u64int val;
319
320         val = 0;
321         rc = vmread(addr, (uintptr *) &val);
322         if(rc >= 0 && sizeof(uintptr) == 4 && (addr & 0x6000) == 0x2000)
323                 rc = vmread(addr | 1, (uintptr *) &val + 1);
324         if(rc < 0){
325                 char errbuf[128];
326                 snprint(errbuf, sizeof(errbuf), "vmcsread failed (%#.4ux)", addr);
327                 error(errbuf);
328         }
329         return val;
330 }
331
332 static void
333 vmcswrite(u32int addr, u64int val)
334 {
335         int rc;
336         
337         rc = vmwrite(addr, val);
338         if(rc >= 0 && sizeof(uintptr) == 4 && (addr & 0x6000) == 0x2000)
339                 rc = vmwrite(addr | 1, val >> 32);
340         if(rc < 0){
341                 char errbuf[128];
342                 snprint(errbuf, sizeof(errbuf), "vmcswrite failed (%#.4ux = %#.16ullx)", addr, val);
343                 error(errbuf);
344         }
345 }
346
347 static uvlong
348 parseval(char *s)
349 {
350         uvlong v;
351         char *p;
352
353         v = strtoull(s, &p, 0);
354         if(p == s || *p != 0) error("invalid value");
355         return v;
356 }
357
358 static char *
359 cr0fakeread(char *p, char *e)
360 {
361         uvlong guest, mask, shadow;
362         
363         guest = vmcsread(GUEST_CR0);
364         mask = vmcsread(GUEST_CR0MASK);
365         shadow = vmcsread(GUEST_CR0SHADOW);
366         return seprint(p, e, "%#.*ullx", sizeof(uintptr) * 2, guest & ~mask | shadow & mask);
367 }
368
369 static char *
370 cr4fakeread(char *p, char *e)
371 {
372         uvlong guest, mask, shadow;
373         
374         guest = vmcsread(GUEST_CR4);
375         mask = vmcsread(GUEST_CR4MASK);
376         shadow = vmcsread(GUEST_CR4SHADOW);
377         return seprint(p, e, "%#.*ullx", sizeof(uintptr) * 2, guest & ~mask | shadow & mask);
378 }
379
380 static void
381 updatelma(void)
382 {
383         uvlong cr0, efer, nefer, ectrl;
384
385         if(sizeof(uintptr) != 8) return;
386         cr0 = vmcsread(GUEST_CR0);
387         efer = vmcsread(GUEST_IA32_EFER);
388         nefer = efer & ~0x400 | efer << 2 & cr0 >> 21 & 0x400;
389         if(efer == nefer) return;
390         vmcswrite(GUEST_IA32_EFER, nefer);
391         ectrl = vmcsread(VMENTRY_CTLS);
392         ectrl = ectrl & ~0x200 | nefer >> 1 & 0x200;
393         vmcswrite(VMENTRY_CTLS, ectrl);
394 }
395
396 static int
397 cr0realwrite(char *s)
398 {
399         uvlong v;
400         
401         v = parseval(s);
402         vmcswrite(GUEST_CR0, vmcsread(GUEST_CR0) & CR0KERNEL | v & ~CR0KERNEL);
403         updatelma();
404         return 0;
405 }
406
407 static int
408 cr0maskwrite(char *s)
409 {
410         uvlong v;
411         
412         v = parseval(s);
413         vmcswrite(GUEST_CR0MASK, v | CR0KERNEL);
414         return 0;
415 }
416
417 static int
418 eferwrite(char *s)
419 {
420         uvlong v;
421         
422         v = parseval(s);
423         vmcswrite(GUEST_IA32_EFER, v);
424         updatelma();
425         return 0;
426 }
427
428 static int
429 cr4realwrite(char *s)
430 {
431         uvlong v;
432         
433         v = parseval(s);
434         vmcswrite(GUEST_CR4, vmcsread(GUEST_CR4) & CR4KERNEL | v & ~CR4KERNEL);
435         return 0;
436 }
437
438 static int
439 cr4maskwrite(char *s)
440 {
441         uvlong v;
442         
443         v = parseval(s);
444         vmcswrite(GUEST_CR4MASK, v | CR4KERNEL);
445         return 0;
446 }
447
448 static int
449 dr7write(char *s)
450 {
451         uvlong v;
452         
453         v = (u32int) parseval(s);
454         vmcswrite(GUEST_DR7, vmx.dr[7] = (u32int) v);
455         return 0;
456 }
457
458 static int
459 readonly(char *)
460 {
461         return -1;
462 }
463
464 static int
465 dr6write(char *s)
466 {
467         uvlong v;
468         
469         v = parseval(s);
470         vmx.dr[6] = (u32int) v;
471         return 0;
472 }
473
474 typedef struct GuestReg GuestReg;
475 struct GuestReg {
476         int offset;
477         u8int size; /* in bytes; 0 means == uintptr */
478         char *name;
479         char *(*read)(char *, char *);
480         int (*write)(char *);
481 };
482 #define VMXVAR(x) ~(ulong)&(((Vmx*)0)->x)
483 #define UREG(x) VMXVAR(ureg.x)
484 static GuestReg guestregs[] = {
485         {GUEST_RIP, 0, "pc"},
486         {GUEST_RSP, 0, "sp"},
487         {GUEST_RFLAGS, 0, "flags"},
488         {UREG(ax), 0, "ax"},
489         {UREG(bx), 0, "bx"},
490         {UREG(cx), 0, "cx"},
491         {UREG(dx), 0, "dx"},
492         {UREG(bp), 0, "bp"},
493         {UREG(si), 0, "si"},
494         {UREG(di), 0, "di"},
495 #ifdef RMACH
496         {UREG(r8), 0, "r8"},
497         {UREG(r9), 0, "r9"},
498         {UREG(r10), 0, "r10"},
499         {UREG(r11), 0, "r11"},
500         {UREG(r12), 0, "r12"},
501         {UREG(r13), 0, "r13"},
502         {UREG(r14), 0, "r14"},
503         {UREG(r15), 0, "r15"},
504 #endif
505         {GUEST_GDTRBASE, 0, "gdtrbase"},
506         {GUEST_GDTRLIMIT, 4, "gdtrlimit"},
507         {GUEST_IDTRBASE, 0, "idtrbase"},
508         {GUEST_IDTRLIMIT, 4, "idtrlimit"},
509         {GUEST_CS, 2, "cs"},
510         {GUEST_CSBASE, 0, "csbase"},
511         {GUEST_CSLIMIT, 4, "cslimit"},
512         {GUEST_CSPERM, 4, "csperm"},
513         {GUEST_DS, 2, "ds"},
514         {GUEST_DSBASE, 0, "dsbase"},
515         {GUEST_DSLIMIT, 4, "dslimit"},
516         {GUEST_DSPERM, 4, "dsperm"},
517         {GUEST_ES, 2, "es"},
518         {GUEST_ESBASE, 0, "esbase"},
519         {GUEST_ESLIMIT, 4, "eslimit"},
520         {GUEST_ESPERM, 4, "esperm"},
521         {GUEST_FS, 2, "fs"},
522         {GUEST_FSBASE, 0, "fsbase"},
523         {GUEST_FSLIMIT, 4, "fslimit"},
524         {GUEST_FSPERM, 4, "fsperm"},
525         {GUEST_GS, 2, "gs"},
526         {GUEST_GSBASE, 0, "gsbase"},
527         {GUEST_GSLIMIT, 4, "gslimit"},
528         {GUEST_GSPERM, 4, "gsperm"},
529         {GUEST_SS, 2, "ss"},
530         {GUEST_SSBASE, 0, "ssbase"},
531         {GUEST_SSLIMIT, 4, "sslimit"},
532         {GUEST_SSPERM, 4, "ssperm"},
533         {GUEST_TR, 2, "tr"},
534         {GUEST_TRBASE, 0, "trbase"},
535         {GUEST_TRLIMIT, 4, "trlimit"},
536         {GUEST_TRPERM, 4, "trperm"},
537         {GUEST_LDTR, 2, "ldtr"},
538         {GUEST_LDTRBASE, 0, "ldtrbase"},
539         {GUEST_LDTRLIMIT, 4, "ldtrlimit"},
540         {GUEST_LDTRPERM, 4, "ldtrperm"},
541         {GUEST_CR0, 0, "cr0real", nil, cr0realwrite},
542         {GUEST_CR0SHADOW, 0, "cr0fake", cr0fakeread},
543         {GUEST_CR0MASK, 0, "cr0mask", nil, cr0maskwrite},
544         {VMXVAR(cr2), 0, "cr2"},
545         {GUEST_CR3, 0, "cr3"},
546         {GUEST_CR4, 0, "cr4real", nil, cr4realwrite},
547         {GUEST_CR4SHADOW, 0, "cr4fake", cr4fakeread},
548         {GUEST_CR4MASK, 0, "cr4mask", nil, cr4maskwrite},
549         {GUEST_IA32_PAT, 8, "pat"},
550         {GUEST_IA32_EFER, 8, "efer", nil, eferwrite},
551         {VMXVAR(dr[0]), 0, "dr0"},
552         {VMXVAR(dr[1]), 0, "dr1"},
553         {VMXVAR(dr[2]), 0, "dr2"},
554         {VMXVAR(dr[3]), 0, "dr3"},
555         {VMXVAR(dr[6]), 0, "dr6", nil, dr6write},
556         {GUEST_DR7, 0, "dr7", nil, dr7write},
557         {VM_INSTRERR, 4, "instructionerror", nil, readonly},
558         {VM_EXREASON, 4, "exitreason", nil, readonly},
559         {VM_EXQUALIF, 0, "exitqualification", nil, readonly},
560         {VM_EXINTRINFO, 4, "exitinterruptinfo", nil, readonly},
561         {VM_EXINTRCODE, 4, "exitinterruptcode", nil, readonly},
562         {VM_EXINSTRLEN, 4, "exitinstructionlen", nil, readonly},
563         {VM_EXINSTRINFO, 4, "exitinstructioninfo", nil, readonly},
564         {VM_GUESTVA, 0, "exitva", nil, readonly},
565         {VM_GUESTPA, 0, "exitpa", nil, readonly},
566         {VM_IDTVECINFO, 4, "idtinterruptinfo", nil, readonly},
567         {VM_IDTVECCODE, 4, "idtinterruptcode", nil, readonly},
568 };
569
570 static int
571 vmokpage(u64int addr)
572 {
573         return (addr & 0xfff) == 0 && addr >> 48 == 0;
574 }
575
576 static uvlong *
577 eptwalk(uvlong addr)
578 {
579         uvlong *tab, *nt;
580         uvlong v;
581         int i;
582         
583         tab = vmx.pml4;
584         if(tab == nil) error(Egreg);
585         for(i = 3; i >= 1; i--){
586                 tab += addr >> 12 + 9 * i & 0x1ff;
587                 v = *tab;
588                 if((v & 3) == 0){
589                         nt = mallocalign(BY2PG, BY2PG, 0, 0);
590                         if(nt == nil) error(Enomem);
591                         memset(nt, 0, BY2PG);
592                         v = PADDR(nt) | 0x407;
593                         *tab = v;
594                 }
595                 tab = KADDR(v & ~0xfff);
596         }
597         return tab + (addr >> 12 & 0x1ff);
598 }
599
600 static void
601 eptfree(uvlong *tab, int level)
602 {
603         int i;
604         uvlong v, *t;
605         
606         if(tab == nil) error(Egreg);
607         if(level < 3){
608                 for(i = 0; i < 512; i++){
609                         v = tab[i];
610                         if((v & 3) == 0) continue;
611                         t = KADDR(v & ~0xfff);
612                         eptfree(t, level + 1);
613                         tab[i] = 0;
614                 }
615         }
616         if(level > 0)
617                 free(tab);              
618 }
619
620 static void
621 epttranslate(VmMem *mp)
622 {
623         uvlong p, hpa;
624
625         if(mp->seg != nil && (mp->seg->type & SG_TYPE) != SG_FIXED || (mp->lo & 0xfff) != 0 || (mp->hi & 0xfff) != 0 || (uint)mp->attr >= 0x1000)
626                 error(Egreg);
627         if(mp->seg != nil){
628                 if(mp->seg->base + mp->off + (mp->hi - mp->lo) > mp->seg->top)
629                         error(Egreg);
630                 hpa = mp->seg->map[0]->pages[0]->pa + mp->off;
631         }else
632                 hpa = 0;
633         for(p = mp->lo; p < mp->hi; p += BY2PG)
634                 *eptwalk(p) = hpa + (p - mp->lo) + mp->attr;
635         vmx.onentry |= FLUSHEPT;
636 }
637
638 static char *mtype[] = {"uc", "wc", "02", "03", "wt", "wp", "wb", "07"};
639
640 static int
641 cmdgetmeminfo(VmCmd *, va_list va)
642 {
643         VmMem *mp;
644         char *p0, *e, *p;
645         char attr[4];
646         char mt[4];
647         
648         p0 = va_arg(va, char *);
649         e = va_arg(va, char *);
650         p = p0;
651         for(mp = vmx.mem.next; mp != &vmx.mem; mp = mp->next){
652                 attr[0] = (mp->attr & 1) != 0 ? 'r' : '-';
653                 attr[1] = (mp->attr & 2) != 0 ? 'w' : '-';
654                 attr[2] = (mp->attr & 4) != 0 ? 'x' : '-';
655                 attr[3] = 0;
656                 *(ushort*)mt = *(u16int*)mtype[mp->attr >> 3 & 7];
657                 mt[2] = (mp->attr & 0x40) != 0 ? '!' : 0;
658                 mt[3] = 0;
659                 p = seprint(p, e, "%s %s %#llux %#llux %p %#llux\n", attr, mt, mp->lo, mp->hi, mp->seg, (uvlong)mp->off);
660         }
661         return p - p0;
662 }
663
664 static int
665 cmdclearmeminfo(VmCmd *, va_list)
666 {
667         VmMem *mp, *mn;
668
669         eptfree(vmx.pml4, 0);
670         for(mp = vmx.mem.next; mp != &vmx.mem; mp = mn){
671                 mn = mp->next;
672                 free(mp);
673         }
674         vmx.mem.prev = &vmx.mem;
675         vmx.mem.next = &vmx.mem;
676         vmx.onentry |= FLUSHEPT;
677         return 0;
678 }
679
680 extern Segment* (*_globalsegattach)(char*);
681
682 static int
683 cmdsetmeminfo(VmCmd *, va_list va)
684 {
685         char *p0, *p, *q, *r;
686         int j;
687         char *f[10];
688         VmMem *mp;
689         int rc;
690
691         if(vmx.pml4 == nil)
692                 error(Egreg);   
693         p0 = va_arg(va, char *);
694         p = p0;
695         mp = nil;
696         for(;;){
697                 q = strchr(p, '\n');
698                 if(q == 0) break;
699                 *q = 0;
700                 if(mp == nil)
701                         mp = malloc(sizeof(VmMem));
702                 if(waserror()){
703                         free(mp);
704                         nexterror();
705                 }
706                 rc = tokenize(p, f, nelem(f));
707                 p = q + 1;
708                 if(rc == 0) goto next;
709                 if(rc != 4 && rc != 6) error("number of fields wrong");
710                 memset(mp, 0, sizeof(VmMem));
711                 for(q = f[0]; *q != 0; q++)
712                         switch(*q){
713                         case 'r': if((mp->attr & 1) != 0) goto tinval; mp->attr |= 1; break;
714                         case 'w': if((mp->attr & 2) != 0) goto tinval; mp->attr |= 2; break;
715                         case 'x': if((mp->attr & 4) != 0) goto tinval; mp->attr |= 0x404; break;
716                         case '-': break;
717                         default: tinval: error("invalid access field");
718                         }
719                 for(j = 0; j < 8; j++)
720                         if(strncmp(mtype[j], f[1], 2) == 0){
721                                 mp->attr |= j << 3;
722                                 break;
723                         }
724                 if(j == 8 || strlen(f[1]) > 3) error("invalid memory type");
725                 if(f[1][2] == '!') mp->attr |= 0x40;
726                 else if(f[1][2] != 0) error("invalid memory type");
727                 mp->lo = strtoull(f[2], &r, 0);
728                 if(*r != 0 || !vmokpage(mp->lo)) error("invalid low guest physical address");
729                 mp->hi = strtoull(f[3], &r, 0);
730                 if(*r != 0 || !vmokpage(mp->hi) || mp->hi <= mp->lo) error("invalid high guest physical address");
731                 mp->off = strtoull(f[5], &r, 0);
732                 if(*r != 0 || !vmokpage(mp->off)) error("invalid offset");
733                 if((mp->attr & 7) != 0){
734                         if(rc != 6) error("number of fields wrong");
735                         mp->seg = _globalsegattach(f[4]);
736                         if(mp->seg == nil) error("no such segment");
737                         if(mp->seg->base + mp->off + (mp->hi - mp->lo) > mp->seg->top) error("out of bounds");
738                 }
739                 epttranslate(mp);
740                 mp->prev = vmx.mem.prev;
741                 mp->next = &vmx.mem;
742                 mp->prev->next = mp;
743                 mp->next->prev = mp;
744                 mp = nil;
745         next:
746                 poperror();
747         }
748         free(mp);
749         return p - p0;
750 }
751
752 static void
753 vmxreset(void)
754 {
755         ulong regs[4];
756         vlong msr;
757
758         cpuid(1, regs);
759         if((regs[2] & 1<<5) == 0) return;
760         /* check if disabled by BIOS */
761         if(rdmsr(0x3a, &msr) < 0) return;
762         if((msr & 5) != 5){
763                 if((msr & 1) == 0){ /* msr still unlocked */
764                         wrmsr(0x3a, msr | 5);
765                         if(rdmsr(0x3a, &msr) < 0)
766                                 return;
767                 }
768                 if((msr & 5) != 5)
769                         return;
770         }
771         if(rdmsr(VMX_PROCB_CTLS_MSR, &msr) < 0) return;
772         if((vlong)msr >= 0) return;
773         if(rdmsr(VMX_PROCB_CTLS2_MSR, &msr) < 0) return;
774         if((msr >> 32 & PROCB_EPT) == 0 || (msr >> 32 & PROCB_VPID) == 0) return;
775         vmx.state = VMXINACTIVE;
776         vmx.lastcmd = &vmx.firstcmd;
777         vmx.mem.next = &vmx.mem;
778         vmx.mem.prev = &vmx.mem;
779 }
780
781 static void
782 vmxshutdown(void)
783 {
784         if(vmx.on){
785                 vmxoff();
786                 vmx.on = 0;
787         }
788 }
789
790 static void
791 vmxaddmsr(u32int msr, u64int gval)
792 {
793         int i;
794
795         if(vmx.nmsr >= MAXMSR)
796                 error("too many MSRs");
797         i = 2 * vmx.nmsr++;
798         vmx.msrhost[i] = msr;
799         rdmsr(msr, (vlong *) &vmx.msrhost[i+1]);
800         vmx.msrguest[i] = msr;
801         vmx.msrguest[i+1] = gval;
802         vmcswrite(VMENTRY_MSRLDCNT, vmx.nmsr);
803         vmcswrite(VMEXIT_MSRSTCNT, vmx.nmsr);
804         vmcswrite(VMEXIT_MSRLDCNT, vmx.nmsr);
805 }
806
807 static void
808 vmxtrapmsr(u32int msr, enum { TRAPRD = 1, TRAPWR = 2 } state)
809 {
810         u32int m;
811         
812         if(msr >= 0x2000 && (u32int)(msr - 0xc0000000) >= 0x2000)
813                 return;
814         msr = msr & 0x1fff | msr >> 18 & 0x2000;
815         m = 1<<(msr & 31);
816         if((state & TRAPRD) != 0)
817                 vmx.msrbits[msr / 32] |= m;
818         else
819                 vmx.msrbits[msr / 32] &= ~m;
820         if((state & TRAPWR) != 0)
821                 vmx.msrbits[msr / 32 + 512] |= m;
822         else
823                 vmx.msrbits[msr / 32 + 512] &= ~m;
824 }
825
826 static void
827 vmcsinit(void)
828 {
829         vlong msr;
830         u32int x;
831         
832         memset(&vmx.ureg, 0, sizeof(vmx.ureg));
833         vmx.launched = 0;
834         vmx.onentry = 0;        
835         
836         if(rdmsr(VMX_BASIC_MSR, &msr) < 0) error("rdmsr(VMX_BASIC_MSR) failed");
837         if((msr & 1ULL<<55) != 0){
838                 if(rdmsr(VMX_TRUE_PROCB_CTLS_MSR, &procb_ctls) < 0) error("rdmsr(VMX_TRUE_PROCB_CTLS_MSR) failed");
839                 if(rdmsr(VMX_TRUE_PINB_CTLS_MSR, &pinb_ctls) < 0) error("rdmsr(VMX_TRUE_PINB_CTLS_MSR) failed");
840         }else{
841                 if(rdmsr(VMX_PROCB_CTLS_MSR, &procb_ctls) < 0) error("rdmsr(VMX_PROCB_CTLS_MSR) failed");
842                 if(rdmsr(VMX_PINB_CTLS_MSR, &pinb_ctls) < 0) error("rdmsr(VMX_PINB_CTLS_MSR) failed");
843         }
844
845         if(rdmsr(VMX_PINB_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_PINB_CTLS_MSR failed");
846         x = (u32int)pinb_ctls | 1<<1 | 1<<2 | 1<<4; /* currently reserved default1 bits */
847         x |= PINB_EXITIRQ | PINB_EXITNMI;
848         x &= pinb_ctls >> 32;
849         vmcswrite(PINB_CTLS, x);
850         
851         if(rdmsr(VMX_PROCB_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_PROCB_CTLS_MSR failed");
852         x = (u32int)procb_ctls | 1<<1 | 7<<4 | 1<<8 | 1<<13 | 1<<14 | 1<<26; /* currently reserved default1 bits */
853         x |= PROCB_EXITHLT | PROCB_EXITMWAIT;
854         x |= PROCB_EXITMOVDR | PROCB_EXITIO | PROCB_EXITMONITOR | PROCB_MSRBITMAP;
855         x |= PROCB_USECTLS2;
856         x &= msr >> 32;
857         vmcswrite(PROCB_CTLS, x);
858         
859         if(rdmsr(VMX_PROCB_CTLS2_MSR, &msr) < 0) error("rdmsr(VMX_PROCB_CTLS2_MSR failed");
860         x = PROCB_EPT | PROCB_VPID | PROCB_UNRESTR;
861         x &= msr >> 32;
862         vmcswrite(PROCB_CTLS2, x);
863         
864         if(rdmsr(VMX_VMEXIT_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_VMEXIT_CTLS_MSR failed");
865         x = (u32int)msr;
866         if(sizeof(uintptr) == 8) x |= VMEXIT_HOST64;
867         x |= VMEXIT_LD_IA32_PAT | VMEXIT_LD_IA32_EFER | VMEXIT_ST_DEBUG | VMEXIT_ST_IA32_EFER;
868         x &= msr >> 32;
869         vmcswrite(VMEXIT_CTLS, x);
870         
871         if(rdmsr(VMX_VMENTRY_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_VMENTRY_CTLS_MSR failed");
872         x = (u32int)msr;
873         x |= VMENTRY_LD_IA32_PAT | VMENTRY_LD_IA32_EFER | VMENTRY_LD_DEBUG;
874         x &= msr >> 32;
875         vmcswrite(VMENTRY_CTLS, x);
876         
877         vmcswrite(CR3_TARGCNT, 0);
878         vmcswrite(VMENTRY_INTRINFO, 0);
879         vmcswrite(VMCS_LINK, -1);
880         
881         vmcswrite(HOST_CS, KESEL);
882         vmcswrite(HOST_DS, KDSEL);
883         vmcswrite(HOST_ES, KDSEL);
884         vmcswrite(HOST_FS, KDSEL);
885         vmcswrite(HOST_GS, KDSEL);
886         vmcswrite(HOST_SS, KDSEL);
887         vmcswrite(HOST_TR, TSSSEL);
888         vmcswrite(HOST_CR0, getcr0() & ~0xe);
889         vmcswrite(HOST_CR3, getcr3());
890         vmcswrite(HOST_CR4, getcr4());
891         rdmsr(FSbase, &msr);
892         vmcswrite(HOST_FSBASE, msr);
893         rdmsr(GSbase, &msr);
894         vmcswrite(HOST_GSBASE, msr);
895         vmcswrite(HOST_TRBASE, (uintptr) m->tss);
896         vmcswrite(HOST_GDTR, (uintptr) m->gdt);
897         vmcswrite(HOST_IDTR, IDTADDR);
898         if(rdmsr(0x277, &msr) < 0) error("rdmsr(IA32_PAT) failed");
899         vmcswrite(HOST_IA32_PAT, msr);
900         if(rdmsr(Efer, &msr) < 0) error("rdmsr(IA32_EFER) failed");
901         vmcswrite(HOST_IA32_EFER, msr);
902         
903         vmcswrite(EXC_BITMAP, 1<<18|1<<1);
904         vmcswrite(PFAULT_MASK, 0);
905         vmcswrite(PFAULT_MATCH, 0);
906         
907         vmcswrite(GUEST_CSBASE, 0);
908         vmcswrite(GUEST_DSBASE, 0);
909         vmcswrite(GUEST_ESBASE, 0);
910         vmcswrite(GUEST_FSBASE, 0);
911         vmcswrite(GUEST_GSBASE, 0);
912         vmcswrite(GUEST_SSBASE, 0);
913         vmcswrite(GUEST_CSLIMIT, -1);
914         vmcswrite(GUEST_DSLIMIT, -1);
915         vmcswrite(GUEST_ESLIMIT, -1);
916         vmcswrite(GUEST_FSLIMIT, -1);
917         vmcswrite(GUEST_GSLIMIT, -1);
918         vmcswrite(GUEST_SSLIMIT, -1);
919         vmcswrite(GUEST_CSPERM, (SEGG|SEGD|SEGP|SEGPL(0)|SEGEXEC|SEGR) >> 8 | 1);
920         vmcswrite(GUEST_DSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
921         vmcswrite(GUEST_ESPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
922         vmcswrite(GUEST_FSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
923         vmcswrite(GUEST_GSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
924         vmcswrite(GUEST_SSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
925         vmcswrite(GUEST_LDTRPERM, 1<<16);
926
927         vmcswrite(GUEST_CR0MASK, CR0KERNEL);
928         vmcswrite(GUEST_CR4MASK, CR4KERNEL);
929         vmcswrite(GUEST_CR0, getcr0() & CR0KERNEL | 0x31);
930         vmcswrite(GUEST_CR3, 0);
931         vmcswrite(GUEST_CR4, getcr4() & CR4KERNEL);
932         vmcswrite(GUEST_CR0SHADOW, getcr0() & CR0KERNEL | 0x31);
933         vmcswrite(GUEST_CR4SHADOW, getcr4() & ~CR4VMXE & CR4KERNEL);
934         
935         vmcswrite(GUEST_IA32_PAT, 0x0007040600070406ULL);
936         vmcswrite(GUEST_IA32_EFER, 0);
937         
938         vmcswrite(GUEST_TRBASE, 0);
939         vmcswrite(GUEST_TRLIMIT, 0xffff);
940         vmcswrite(GUEST_TRPERM, (SEGTSS|SEGPL(0)|SEGP) >> 8 | 2);
941         
942         vmx.pml4 = mallocalign(BY2PG, BY2PG, 0, 0);
943         memset(vmx.pml4, 0, BY2PG);
944         vmcswrite(VM_EPTP, PADDR(vmx.pml4) | 3<<3);
945         vmx.vpid = 1;
946         vmcswrite(VM_VPID, vmx.vpid);
947         
948         vmcswrite(GUEST_RFLAGS, 2);
949         
950         vmx.onentry = FLUSHVPID | FLUSHEPT;
951         
952         vmx.fp = mallocalign(512, 512, 0, 0);
953         if(vmx.fp == nil)
954                 error(Enomem);
955         fpinit();
956         fpsave(vmx.fp);
957         
958         vmx.msrhost = mallocalign(MAXMSR*16, 16, 0, 0);
959         vmx.msrguest = mallocalign(MAXMSR*16, 16, 0, 0);
960         vmx.msrbits = mallocalign(4096, 4096, 0, 0);
961         if(vmx.msrhost == nil || vmx.msrguest == nil || vmx.msrbits == nil)
962                 error(Enomem);
963         memset(vmx.msrbits, -1, 4096);
964         vmxtrapmsr(Efer, 0);
965         vmcswrite(VMENTRY_MSRLDADDR, PADDR(vmx.msrguest));
966         vmcswrite(VMEXIT_MSRSTADDR, PADDR(vmx.msrguest));
967         vmcswrite(VMEXIT_MSRLDADDR, PADDR(vmx.msrhost));
968         vmcswrite(MSR_BITMAP, PADDR(vmx.msrbits));
969         
970         if(sizeof(uintptr) == 8){
971                 vmxaddmsr(Star, 0);
972                 vmxaddmsr(Lstar, 0);
973                 vmxaddmsr(Cstar, 0);
974                 vmxaddmsr(Sfmask, 0);
975                 vmxaddmsr(KernelGSbase, 0);
976                 vmxtrapmsr(Star, 0);
977                 vmxtrapmsr(Lstar, 0);
978                 vmxtrapmsr(Cstar, 0);
979                 vmxtrapmsr(Sfmask, 0);
980                 vmxtrapmsr(FSbase, 0);
981                 vmxtrapmsr(GSbase, 0);
982                 vmxtrapmsr(KernelGSbase, 0);
983         }
984 }
985
986 static void
987 vmxstart(void)
988 {
989         static uchar *vmcs; /* also vmxon region */
990         vlong msr, msr2;
991         uintptr cr;
992         vlong x;
993
994         putcr4(getcr4() | 0x2000); /* set VMXE */
995         putcr0(getcr0() | 0x20); /* set NE */
996         cr = getcr0();
997         if(rdmsr(VMX_CR0_FIXED0, &msr) < 0) error("rdmsr(VMX_CR0_FIXED0) failed");
998         if(rdmsr(VMX_CR0_FIXED1, &msr2) < 0) error("rdmsr(VMX_CR0_FIXED1) failed");
999         if((cr & ~msr & ~msr2 | ~cr & msr & msr2) != 0) error("invalid CR0 value");
1000         cr = getcr4();
1001         if(rdmsr(VMX_CR4_FIXED0, &msr) < 0) error("rdmsr(VMX_CR4_FIXED0) failed");
1002         if(rdmsr(VMX_CR4_FIXED1, &msr2) < 0) error("rdmsr(VMX_CR4_FIXED1) failed");
1003         if((cr & ~msr & ~msr2 | ~cr & msr & msr2) != 0) error("invalid CR4 value");
1004
1005         if(vmcs == nil){
1006                 vmcs = mallocalign(8192, 4096, 0, 0);
1007                 if(vmcs == nil)
1008                         error(Enomem);
1009         }
1010         memset(vmcs, 0, 8192);
1011         rdmsr(VMX_BASIC_MSR, &x);
1012         *(ulong*)vmcs = x;
1013         *(ulong*)&vmcs[4096] = x;
1014         if(vmxon(PADDR(vmcs + 4096)) < 0)
1015                 error("vmxon failed");
1016         vmx.on = 1;
1017         if(vmclear(PADDR(vmcs)) < 0)
1018                 error("vmclear failed");
1019         if(vmptrld(PADDR(vmcs)) < 0)
1020                 error("vmptrld failed");
1021         vmcsinit();
1022 }
1023
1024 static void
1025 cmdrelease(VmCmd *p, int f)
1026 {
1027         lock(p);
1028         p->flags |= CMDFDONE | f;
1029         wakeup(p);
1030         unlock(p);
1031 }
1032
1033 static void
1034 killcmds(VmCmd *notme)
1035 {
1036         VmCmd *p, *pn;
1037         
1038         for(p = vmx.postponed; p != nil; p = pn){
1039                 pn = p->next;
1040                 p->next = nil;
1041                 if(p == notme) continue;
1042                 kstrcpy(p->errstr, Equit, ERRMAX);
1043                 cmdrelease(p, CMDFFAIL);
1044         }
1045         vmx.postponed = nil;
1046         ilock(&vmx.cmdlock);
1047         for(p = vmx.firstcmd; p != nil; p = pn){
1048                 pn = p->next;
1049                 p->next = nil;
1050                 if(p == notme) continue;
1051                 kstrcpy(p->errstr, Equit, ERRMAX);
1052                 cmdrelease(p, CMDFFAIL);
1053         }
1054         vmx.firstcmd = nil;
1055         vmx.lastcmd = &vmx.firstcmd;
1056         iunlock(&vmx.cmdlock);
1057 }
1058
1059 static int
1060 cmdquit(VmCmd *p, va_list va)
1061 {
1062         vmx.state = VMXENDING;
1063         killcmds(p);
1064
1065         if(vmx.pml4 != nil){
1066                 cmdclearmeminfo(p, va);
1067                 free(vmx.pml4);
1068                 vmx.pml4 = nil;
1069         }
1070         vmx.got = 0;
1071         vmx.onentry = 0;
1072         vmx.stepmap = nil;
1073         
1074         free(vmx.msrhost);
1075         free(vmx.msrguest);
1076         vmx.msrhost = nil;
1077         vmx.msrguest = nil;
1078         vmx.nmsr = 0;
1079
1080         if(vmx.on)
1081                 vmxoff();
1082         vmx.state = VMXINACTIVE;
1083         cmdrelease(p, 0);
1084         pexit(Equit, 1);
1085         return 0;
1086 }
1087
1088 static void
1089 processexit(void)
1090 {
1091         u32int reason;
1092         
1093         reason = vmcsread(VM_EXREASON);
1094         if((reason & 1<<31) == 0)
1095                 switch(reason & 0xffff){
1096                 case 1: /* external interrupt */
1097                 case 3: /* INIT */
1098                 case 4: /* SIPI */
1099                 case 5: /* IO SMI */
1100                 case 6: /* SMI */
1101                 case 7: /* IRQ window */
1102                 case 8: /* NMI window */
1103                         return;
1104                 case 37:
1105                         if((vmx.onentry & STEP) != 0){
1106                                 vmx.state = VMXREADY;
1107                                 vmx.got |= GOTSTEP;
1108                                 vmx.onentry &= ~STEP;
1109                                 return;
1110                         }
1111                         break;
1112                 }
1113         if((vmx.onentry & STEP) != 0){
1114                 print("VMX: exit reason %#x when expected step...\n", reason & 0xffff);
1115                 vmx.onentry &= ~STEP;
1116                 vmx.got |= GOTSTEP|GOTSTEPERR;
1117         }
1118         vmx.state = VMXREADY;
1119         vmx.got |= GOTEXIT;
1120 }
1121
1122 static int
1123 cmdgetregs(VmCmd *, va_list va)
1124 {
1125         char *p0, *e;
1126         GuestReg *r;
1127         uvlong val;
1128         int s;
1129         char *p;
1130         
1131         p0 = va_arg(va, char *);
1132         e = va_arg(va, char *);
1133         p = p0;
1134         for(r = guestregs; r < guestregs + nelem(guestregs); r++)
1135                 if(r->read != nil){
1136                         p = seprint(p, e, "%s ", r->name);
1137                         p = r->read(p, e);
1138                         p = strecpy(p, e, "\n");
1139                 }else{
1140                         if(r->offset >= 0)
1141                                 val = vmcsread(r->offset);
1142                         else
1143                                 val = *(uintptr*)((uchar*)&vmx + ~r->offset);
1144                         s = r->size;
1145                         if(s == 0) s = sizeof(uintptr);
1146                         p = seprint(p, e, "%s %#.*llux\n", r->name, s * 2, val);
1147                 }
1148         return p - p0;
1149 }
1150
1151 static int
1152 setregs(char *p0, char rs, char *fs)
1153 {
1154         char *p, *q, *rp;
1155         char *f[10];
1156         GuestReg *r;
1157         uvlong val;
1158         int sz;
1159         int rc;
1160
1161         p = p0;
1162         for(;;){
1163                 q = strchr(p, rs);
1164                 if(q == 0) break;
1165                 *q = 0;
1166                 rc = getfields(p, f, nelem(f), 1, fs);
1167                 p = q + 1;
1168                 if(rc == 0) continue;
1169                 if(rc != 2) error("number of fields wrong");
1170                 
1171                 for(r = guestregs; r < guestregs + nelem(guestregs); r++)
1172                         if(strcmp(r->name, f[0]) == 0)
1173                                 break;
1174                 if(r == guestregs + nelem(guestregs))
1175                         error("unknown register");
1176                 if(r->write != nil){
1177                         r->write(f[1]);
1178                         continue;
1179                 }
1180                 val = strtoull(f[1], &rp, 0);
1181                 sz = r->size;
1182                 if(sz == 0) sz = sizeof(uintptr);
1183                 if(rp == f[1] || *rp != 0) error("invalid value");
1184                 if(r->offset >= 0)
1185                         vmcswrite(r->offset, val);
1186                 else{
1187                         assert((u32int)~r->offset + sz <= sizeof(Vmx)); 
1188                         switch(sz){
1189                         case 1: *(u8int*)((u8int*)&vmx + (u32int)~r->offset) = val; break;
1190                         case 2: *(u16int*)((u8int*)&vmx + (u32int)~r->offset) = val; break;
1191                         case 4: *(u32int*)((u8int*)&vmx + (u32int)~r->offset) = val; break;
1192                         case 8: *(u64int*)((u8int*)&vmx + (u32int)~r->offset) = val; break;
1193                         default: error(Egreg);
1194                         }
1195                 }
1196         }
1197         return p - p0;
1198 }
1199
1200 static int
1201 cmdsetregs(VmCmd *, va_list va)
1202 {
1203         return setregs(va_arg(va, char *), '\n', " \t");
1204 }
1205
1206 static int
1207 cmdgetfpregs(VmCmd *, va_list va)
1208 {
1209         uchar *p;
1210         
1211         p = va_arg(va, uchar *);
1212         memmove(p, vmx.fp, sizeof(FPsave));
1213         return sizeof(FPsave);
1214 }
1215
1216 static int
1217 cmdsetfpregs(VmCmd *, va_list va)
1218 {
1219         uchar *p;
1220         ulong n;
1221         vlong off;
1222         
1223         p = va_arg(va, uchar *);
1224         n = va_arg(va, ulong);
1225         off = va_arg(va, vlong);
1226         if(off < 0 || off >= sizeof(FPsave)) n = 0;
1227         else if(off + n > sizeof(FPsave)) n = sizeof(FPsave) - n;
1228         memmove((uchar*)vmx.fp + off, p, n);
1229         return n;
1230 }
1231
1232 static int
1233 cmdgo(VmCmd *, va_list va)
1234 {
1235         char *r;
1236
1237         if(vmx.state != VMXREADY)
1238                 error("VM not ready");
1239         r = va_arg(va, char *);
1240         if(r != nil) setregs(r, ';', "=");
1241         vmx.state = VMXRUNNING;
1242         return 0;
1243 }
1244
1245 static int
1246 cmdstop(VmCmd *, va_list)
1247 {
1248         if(vmx.state != VMXREADY && vmx.state != VMXRUNNING)
1249                 error("VM not ready or running");
1250         vmx.state = VMXREADY;
1251         return 0;
1252 }
1253
1254 static int
1255 cmdstatus(VmCmd *, va_list va)
1256 {       
1257         kstrcpy(va_arg(va, char *), vmx.errstr, ERRMAX);
1258         return vmx.state;
1259 }
1260
1261 static char *exitreasons[] = {
1262         [0] "exc", [1] "extirq", [2] "triplef", [3] "initsig", [4] "sipi", [5] "smiio", [6] "smiother", [7] "irqwin",
1263         [8] "nmiwin", [9] "taskswitch", [10] ".cpuid", [11] ".getsec", [12] ".hlt", [13] ".invd", [14] ".invlpg", [15] ".rdpmc",
1264         [16] ".rdtsc", [17] ".rsm", [18] ".vmcall", [19] ".vmclear", [20] ".vmlaunch", [21] ".vmptrld", [22] ".vmptrst", [23] ".vmread",
1265         [24] ".vmresume", [25] ".vmwrite", [26] ".vmxoff", [27] ".vmxon", [28] "movcr", [29] ".movdr", [30] "io", [31] ".rdmsr",
1266         [32] ".wrmsr", [33] "entrystate", [34] "entrymsr", [36] ".mwait", [37] "monitortrap", [39] ".monitor",
1267         [40] ".pause", [41] "mcheck", [43] "tpr", [44] "apicacc", [45] "eoi", [46] "gdtr_idtr", [47] "ldtr_tr",
1268         [48] "eptfault", [49] "eptinval", [50] ".invept", [51] ".rdtscp", [52] "preempt", [53] ".invvpid", [54] ".wbinvd", [55] ".xsetbv",
1269         [56] "apicwrite", [57] ".rdrand", [58] ".invpcid", [59] ".vmfunc", [60] ".encls", [61] ".rdseed", [62] "pmlfull", [63] ".xsaves",
1270         [64] ".xrstors", 
1271 };
1272
1273 static char *except[] = {
1274         [0] "#de", [1] "#db", [3] "#bp", [4] "#of", [5] "#br", [6] "#ud", [7] "#nm",
1275         [8] "#df", [10] "#ts", [11] "#np", [12] "#ss", [13] "#gp", [14] "#pf",
1276         [16] "#mf", [17] "#ac", [18] "#mc", [19] "#xm", [20] "#ve",
1277 };
1278
1279 static int
1280 cmdwait(VmCmd *cp, va_list va)
1281 {
1282         char *p, *p0, *e;
1283         u32int reason, intr;
1284         uvlong qual;
1285         u16int rno;
1286
1287         if(cp->scratched)
1288                 error(Eintr);
1289         p0 = p = va_arg(va, char *);
1290         e = va_arg(va, char *);
1291         if((vmx.got & GOTIRQACK) != 0){
1292                 p = seprint(p, e, "*ack %d\n", vmx.irqack.info & 0xff);
1293                 vmx.got &= ~GOTIRQACK;
1294                 return p - p0;
1295         }
1296         if((vmx.got & GOTEXIT) == 0){
1297                 cp->flags |= CMDFPOSTP;
1298                 return -1;
1299         }
1300         vmx.got &= ~GOTEXIT;
1301         reason = vmcsread(VM_EXREASON);
1302         qual = vmcsread(VM_EXQUALIF);
1303         rno = reason;
1304         intr = vmcsread(VM_EXINTRINFO);
1305         if((reason & 1<<31) != 0)
1306                 p = seprint(p, e, "!");
1307         if(rno == 0 && (intr & 1<<31) != 0){
1308                 if((intr & 0xff) >= nelem(except) || except[intr & 0xff] == nil)
1309                         p = seprint(p, e, "#%d ", intr & 0xff);
1310                 else
1311                         p = seprint(p, e, "%s ", except[intr & 0xff]);
1312         }else if(rno >= nelem(exitreasons) || exitreasons[rno] == nil)
1313                 p = seprint(p, e, "?%d ", rno);
1314         else
1315                 p = seprint(p, e, "%s ", exitreasons[rno]);
1316         p = seprint(p, e, "%#ullx pc %#ullx sp %#ullx ilen %#ullx iinfo %#ullx", qual, vmcsread(GUEST_RIP), vmcsread(GUEST_RSP), vmcsread(VM_EXINSTRLEN), vmcsread(VM_EXINSTRINFO));
1317         if((intr & 1<<11) != 0) p = seprint(p, e, " excode %#ullx", vmcsread(VM_EXINTRCODE));
1318         if(rno == 48 && (qual & 0x80) != 0) p = seprint(p, e, " va %#ullx", vmcsread(VM_GUESTVA));
1319         if(rno == 48 || rno == 49) p = seprint(p, e, " pa %#ullx", vmcsread(VM_GUESTPA));
1320         if(rno == 30) p = seprint(p, e, " ax %#ullx", (uvlong)vmx.ureg.ax);
1321         p = seprint(p, e, "\n");
1322         return p - p0;
1323 }
1324
1325 static int
1326 cmdstep(VmCmd *cp, va_list va)
1327 {
1328         switch(cp->retval){
1329         case 0:
1330                 if((vmx.got & GOTSTEP) != 0 || (vmx.onentry & STEP) != 0)
1331                         error(Einuse);
1332                 if(vmx.state != VMXREADY){
1333                         print("pre-step in state %s\n", statenames[vmx.state]);
1334                         error("not ready");
1335                 }
1336                 vmx.stepmap = va_arg(va, VmMem *);
1337                 vmx.onentry |= STEP;
1338                 vmx.state = VMXRUNNING;
1339                 cp->flags |= CMDFPOSTP;
1340                 return 1;
1341         case 1:
1342                 if(vmx.state != VMXREADY){
1343                         print("post-step in state %s\n", statenames[vmx.state]);
1344                         vmx.onentry &= ~STEP;
1345                         vmx.got &= ~(GOTSTEP|GOTSTEPERR);
1346                         error("not ready");
1347                 }
1348                 if((vmx.got & GOTSTEP) == 0){
1349                         cp->flags |= CMDFPOSTP;
1350                         return 1;
1351                 }
1352                 if((vmx.got & GOTSTEPERR) != 0){
1353                         vmx.got &= ~(GOTSTEP|GOTSTEPERR);
1354                         error("step failed");
1355                 }
1356                 vmx.got &= ~(GOTSTEP|GOTSTEPERR);
1357                 return 1;
1358         }
1359         return 0;
1360 }
1361
1362 static void
1363 eventparse(char *p, VmIntr *vi)
1364 {
1365         char *q, *r;
1366         int i;
1367         
1368         memset(vi, 0, sizeof(VmIntr));
1369         q = nil;
1370         kstrdup(&q, p);
1371         if(waserror()){
1372                 free(q);
1373                 memset(vi, 0, sizeof(VmIntr));
1374                 nexterror();
1375         }
1376         vi->info = 1<<31;
1377         r = strchr(q, ',');
1378         if(r != nil) *r++ = 0;
1379         for(i = 0; i < nelem(except); i++)
1380                 if(except[i] != nil && strcmp(except[i], q) == 0)
1381                         break;
1382         if(*q == '#'){
1383                 q++;
1384                 vi->info |= 3 << 8;
1385         }
1386         if(i == nelem(except)){
1387                 i = strtoul(q, &q, 10);
1388                 if(*q != 0 || i > 255) error(Ebadctl);
1389         }
1390         vi->info |= i;
1391         if((vi->info & 0x7ff) == 3 || (vi->info & 0x7ff) == 4)
1392                 vi->info += 3 << 8;
1393         if(r == nil) goto out;
1394         if(*r != ','){
1395                 vi->code = strtoul(r, &r, 0);
1396                 vi->info |= 1<<11;
1397         }else r++;
1398         if(*r == ',')
1399                 vi->ilen = strtoul(r + 1, &r, 0);
1400         if(*r != 0) error(Ebadctl);
1401 out:
1402         poperror();
1403         free(q);
1404 }
1405
1406 static int
1407 cmdexcept(VmCmd *cp, va_list va)
1408 {
1409         if(cp->scratched) error(Eintr);
1410         if((vmx.onentry & POSTEX) != 0){
1411                 cp->flags |= CMDFPOSTP;
1412                 return 0;
1413         }
1414         eventparse(va_arg(va, char *), &vmx.exc);
1415         vmx.onentry |= POSTEX;
1416         return 0;
1417 }
1418
1419 static int
1420 cmdirq(VmCmd *, va_list va)
1421 {
1422         char *p;
1423         VmIntr vi;
1424         
1425         p = va_arg(va, char *);
1426         if(p == nil)
1427                 vmx.onentry &= ~POSTIRQ;
1428         else{
1429                 eventparse(p, &vi);
1430                 vmx.irq = vi;
1431                 vmx.onentry |= POSTIRQ;
1432         }
1433         return 0;
1434 }
1435
1436
1437 static int
1438 gotcmd(void *)
1439 {
1440         int rc;
1441
1442         ilock(&vmx.cmdlock);
1443         rc = vmx.firstcmd != nil;
1444         iunlock(&vmx.cmdlock);
1445         return rc;
1446 }
1447
1448 static void
1449 markcmddone(VmCmd *p, VmCmd ***pp)
1450 {
1451         if((p->flags & (CMDFFAIL|CMDFPOSTP)) == CMDFPOSTP){
1452                 **pp = p;
1453                 *pp = &p->next;
1454         }else{
1455                 p->flags = p->flags & ~CMDFPOSTP;
1456                 cmdrelease(p, 0);
1457         }
1458 }
1459
1460 static VmCmd **
1461 markppcmddone(VmCmd **pp)
1462 {
1463         VmCmd *p;
1464         
1465         p = *pp;
1466         if((p->flags & (CMDFFAIL|CMDFPOSTP)) == CMDFPOSTP)
1467                 return &p->next;
1468         *pp = p->next;
1469         p->next = nil;
1470         p->flags = p->flags & ~CMDFPOSTP;
1471         cmdrelease(p, 0);
1472         return pp;
1473 }
1474
1475
1476 static void
1477 runcmd(void)
1478 {
1479         VmCmd *p, **pp;
1480         
1481         for(pp = &vmx.postponed; p = *pp, p != nil; ){
1482                 if(waserror()){
1483                         kstrcpy(p->errstr, up->errstr, ERRMAX);
1484                         p->flags |= CMDFFAIL;
1485                         pp = markppcmddone(pp);
1486                         continue;
1487                 }
1488                 p->flags &= ~CMDFPOSTP;
1489                 p->retval = p->cmd(p, p->va);
1490                 poperror();
1491                 pp = markppcmddone(pp);
1492         }
1493         for(;;){
1494                 ilock(&vmx.cmdlock);
1495                 p = vmx.firstcmd;
1496                 if(p == nil){
1497                         iunlock(&vmx.cmdlock);
1498                         break;
1499                 }
1500                 vmx.firstcmd = p->next;
1501                 if(vmx.lastcmd == &p->next)
1502                         vmx.lastcmd = &vmx.firstcmd;
1503                 iunlock(&vmx.cmdlock);
1504                 p->next = nil;
1505                 if(waserror()){
1506                         kstrcpy(p->errstr, up->errstr, ERRMAX);
1507                         p->flags |= CMDFFAIL;
1508                         markcmddone(p, &pp);
1509                         continue;
1510                 }
1511                 if(p->scratched) error(Eintr);
1512                 p->retval = p->cmd(p, p->va);
1513                 poperror();
1514                 markcmddone(p, &pp);
1515         }
1516 }
1517
1518 static void
1519 dostep(int setup)
1520 {
1521         static uvlong oldmap;
1522         static uvlong *mapptr;
1523
1524         if(setup){
1525                 if(vmx.stepmap != nil){
1526                         mapptr = eptwalk(vmx.stepmap->lo);
1527                         oldmap = *mapptr;
1528                         epttranslate(vmx.stepmap);
1529                 }
1530         }else{
1531                 vmcswrite(PROCB_CTLS, vmcsread(PROCB_CTLS) & ~(uvlong)PROCB_MONTRAP);
1532                 if(vmx.stepmap != nil){
1533                         *mapptr = oldmap;
1534                         vmx.stepmap = nil;
1535                         vmx.onentry |= FLUSHEPT;
1536                 }
1537         }
1538 }
1539
1540 static void
1541 vmxproc(void *)
1542 {
1543         int init, rc, x;
1544         u32int procbctls, defprocbctls;
1545         vlong v;
1546
1547         procwired(up, 0);
1548         sched();
1549         init = 0;
1550         defprocbctls = 0;
1551         while(waserror()){
1552                 kstrcpy(vmx.errstr, up->errstr, ERRMAX);
1553                 vmx.state = VMXDEAD;
1554         }
1555         for(;;){
1556                 if(!init){
1557                         init = 1;
1558                         vmxstart();
1559                         vmx.state = VMXREADY;
1560                         defprocbctls = vmcsread(PROCB_CTLS);
1561                 }
1562                 runcmd();
1563                 if(vmx.state == VMXRUNNING){
1564                         procbctls = defprocbctls;
1565                         if((vmx.onentry & STEP) != 0){
1566                                 procbctls |= PROCB_MONTRAP;
1567                                 dostep(1);
1568                                 if(waserror()){
1569                                         dostep(0);
1570                                         nexterror();
1571                                 }
1572                         }
1573                         if((vmx.onentry & POSTEX) != 0){
1574                                 vmcswrite(VMENTRY_INTRINFO, vmx.exc.info);
1575                                 vmcswrite(VMENTRY_INTRCODE, vmx.exc.code);
1576                                 vmcswrite(VMENTRY_INTRILEN, vmx.exc.ilen);
1577                                 vmx.onentry &= ~POSTEX;
1578                         }
1579                         if((vmx.onentry & POSTIRQ) != 0 && (vmx.onentry & STEP) == 0){
1580                                 if((vmx.onentry & POSTEX) == 0 && (vmcsread(GUEST_RFLAGS) & 1<<9) != 0 && (vmcsread(GUEST_CANINTR) & 3) == 0){
1581                                         vmcswrite(VMENTRY_INTRINFO, vmx.irq.info);
1582                                         vmcswrite(VMENTRY_INTRCODE, vmx.irq.code);
1583                                         vmcswrite(VMENTRY_INTRILEN, vmx.irq.ilen);
1584                                         vmx.onentry &= ~POSTIRQ;
1585                                         vmx.got |= GOTIRQACK;
1586                                         vmx.irqack = vmx.irq;
1587                                 }else
1588                                         procbctls |= PROCB_IRQWIN;
1589                         }
1590                         if((vmx.onentry & FLUSHVPID) != 0){
1591                                 if(invvpid(INVLOCAL, vmx.vpid, 0) < 0)
1592                                         error("invvpid failed");
1593                                 vmx.onentry &= ~FLUSHVPID;
1594                         }
1595                         if((vmx.onentry & FLUSHEPT) != 0){
1596                                 if(invept(INVLOCAL, PADDR(vmx.pml4) | 3<<3, 0) < 0)
1597                                         error("invept failed");
1598                                 vmx.onentry &= ~FLUSHEPT;
1599                         }
1600                         vmcswrite(PROCB_CTLS, procbctls);
1601                         vmx.got &= ~GOTEXIT;
1602                         
1603                         x = splhi();
1604                         if(sizeof(uintptr) == 8){
1605                                 rdmsr(FSbase, &v);
1606                                 vmwrite(HOST_FSBASE, v);
1607                         }
1608                         if((vmx.dr[7] & ~0xd400) != 0)
1609                                 putdr01236(vmx.dr);
1610                         fpsserestore(vmx.fp);
1611                         putcr2(vmx.cr2);
1612                         rc = vmlaunch(&vmx.ureg, vmx.launched);
1613                         vmx.cr2 = getcr2();
1614                         fpssesave(vmx.fp);
1615                         splx(x);
1616                         if(rc < 0)
1617                                 error("vmlaunch failed");
1618                         vmx.launched = 1;
1619                         if((vmx.onentry & STEP) != 0){
1620                                 dostep(0);
1621                                 poperror();
1622                         }
1623                         processexit();
1624                 }else{
1625                         up->psstate = "Idle";
1626                         sleep(&vmx.cmdwait, gotcmd, nil);
1627                         up->psstate = nil;
1628                 }
1629         }
1630 }
1631
1632 enum {
1633         Qdir,
1634         Qctl,
1635         Qregs,
1636         Qstatus,
1637         Qmap,
1638         Qwait,
1639         Qfpregs,
1640 };
1641
1642 static Dirtab vmxdir[] = {
1643         ".",            { Qdir, 0, QTDIR },     0,              0550,
1644         "ctl",          { Qctl, 0, 0 },         0,              0660,
1645         "regs",         { Qregs, 0, 0 },        0,              0660,
1646         "status",       { Qstatus, 0, 0 },      0,              0440,
1647         "map",          { Qmap, 0, 0 },         0,              0660,
1648         "wait",         { Qwait, 0, 0 },        0,              0440,
1649         "fpregs",       { Qfpregs, 0, 0 },      0,              0660,
1650 };
1651
1652 enum {
1653         CMinit,
1654         CMquit,
1655         CMgo,
1656         CMstop,
1657         CMstep,
1658         CMexc,
1659         CMirq,
1660 };
1661
1662 static Cmdtab vmxctlmsg[] = {
1663         CMinit,         "init",         1,
1664         CMquit,         "quit",         1,
1665         CMgo,           "go",           0,
1666         CMstop,         "stop",         1,
1667         CMstep,         "step",         0,
1668         CMexc,          "exc",          2,
1669         CMirq,          "irq",          0,
1670 };
1671
1672 static int
1673 iscmddone(void *cp)
1674 {
1675         return (((VmCmd*)cp)->flags & CMDFDONE) != 0;
1676 }
1677
1678 static int
1679 vmxcmd(int (*f)(VmCmd *, va_list), ...)
1680 {
1681         VmCmd cmd;
1682         
1683         if(vmx.state == VMXINACTIVE)
1684                 error("no VM");
1685         if(vmx.state == VMXENDING)
1686         ending:
1687                 error(Equit);
1688         memset(&cmd, 0, sizeof(VmCmd));
1689         cmd.errstr = up->errstr;
1690         cmd.cmd = f;
1691         va_start(cmd.va, f);
1692          
1693         ilock(&vmx.cmdlock);
1694         if(vmx.state == VMXENDING){
1695                 iunlock(&vmx.cmdlock);
1696                 goto ending;
1697         }
1698         *vmx.lastcmd = &cmd;
1699         vmx.lastcmd = &cmd.next;
1700         iunlock(&vmx.cmdlock);
1701         
1702         while(waserror())
1703                 cmd.scratched = 1;
1704         wakeup(&vmx.cmdwait);
1705         do
1706                 sleep(&cmd, iscmddone, &cmd);
1707         while(!iscmddone(&cmd));
1708         poperror();
1709         lock(&cmd);
1710         unlock(&cmd);
1711         if((cmd.flags & CMDFFAIL) != 0)
1712                 error(up->errstr);
1713         return cmd.retval;
1714 }
1715
1716 static Chan *
1717 vmxattach(char *spec)
1718 {
1719         if(vmx.state == NOVMX) error(Enodev);
1720         return devattach('X', spec);
1721 }
1722
1723 static Walkqid*
1724 vmxwalk(Chan *c, Chan *nc, char **name, int nname)
1725 {
1726         return devwalk(c, nc, name, nname, vmxdir, nelem(vmxdir), devgen);
1727 }
1728
1729 static int
1730 vmxstat(Chan *c, uchar *dp, int n)
1731 {
1732         return devstat(c, dp, n, vmxdir, nelem(vmxdir), devgen);
1733 }
1734
1735 static Chan*
1736 vmxopen(Chan* c, int omode)
1737 {
1738         Chan *ch;
1739
1740         if(c->qid.path != Qdir && !iseve()) error(Eperm);
1741         ch = devopen(c, omode, vmxdir, nelem(vmxdir), devgen);
1742         if(ch->qid.path == Qmap){
1743                 if((omode & OTRUNC) != 0)
1744                         vmxcmd(cmdclearmeminfo);
1745         }
1746         return ch;
1747 }
1748
1749 static void
1750 vmxclose(Chan*)
1751 {
1752 }
1753
1754 static long
1755 vmxread(Chan* c, void* a, long n, vlong off)
1756 {
1757         static char regbuf[4096];
1758         static char membuf[4096];
1759         int rc;
1760
1761         switch((ulong)c->qid.path){
1762         case Qdir:
1763                 return devdirread(c, a, n, vmxdir, nelem(vmxdir), devgen);
1764         case Qregs:
1765                 if(off == 0)
1766                         vmxcmd(cmdgetregs, regbuf, regbuf + sizeof(regbuf));
1767                 return readstr(off, a, n, regbuf);
1768         case Qmap:
1769                 if(off == 0)
1770                         vmxcmd(cmdgetmeminfo, membuf, membuf + sizeof(membuf));
1771                 return readstr(off, a, n, membuf);
1772         case Qstatus:
1773                 {
1774                         char buf[ERRMAX+128];
1775                         char errbuf[ERRMAX];
1776                         int status;
1777                         
1778                         status = vmx.state;
1779                         if(status == VMXDEAD){
1780                                 vmxcmd(cmdstatus, errbuf);
1781                                 snprint(buf, sizeof(buf), "%s %#q\n", statenames[status], errbuf);
1782                         }else if(status >= 0 && status < nelem(statenames))
1783                                 snprint(buf, sizeof(buf), "%s\n", statenames[status]);
1784                         else
1785                                 snprint(buf, sizeof(buf), "%d\n", status);
1786                         return readstr(off, a, n, buf);
1787                 }
1788         case Qwait:
1789                 {
1790                         char buf[512];
1791                         
1792                         rc = vmxcmd(cmdwait, buf, buf + sizeof(buf));
1793                         if(rc > n) rc = n;
1794                         if(rc > 0) memmove(a, buf, rc);
1795                         return rc;
1796                 }
1797         case Qfpregs:
1798                 {
1799                         char buf[sizeof(FPsave)];
1800                         
1801                         vmxcmd(cmdgetfpregs, buf);
1802                         if(n < 0 || off < 0 || off >= sizeof(buf)) n = 0;
1803                         else if(off + n > sizeof(buf)) n = sizeof(buf) - off;
1804                         if(n != 0) memmove(a, buf + off, n);
1805                         return n;
1806                 }
1807         default:
1808                 error(Egreg);
1809                 break;
1810         }
1811         return 0;
1812 }
1813
1814 static long
1815 vmxwrite(Chan* c, void* a, long n, vlong off)
1816 {
1817         static QLock initlock;
1818         Cmdbuf *cb;
1819         Cmdtab *ct;
1820         char *s;
1821         int rc;
1822         int i;
1823         VmMem tmpmem;
1824
1825         switch((ulong)c->qid.path){
1826         case Qdir:
1827                 error(Eperm);
1828         case Qctl:
1829                 cb = parsecmd(a, n);
1830                 if(waserror()){
1831                         free(cb);
1832                         nexterror();
1833                 }
1834                 ct = lookupcmd(cb, vmxctlmsg, nelem(vmxctlmsg));
1835                 switch(ct->index){
1836                 case CMinit:
1837                         qlock(&initlock);
1838                         if(waserror()){
1839                                 qunlock(&initlock);
1840                                 nexterror();
1841                         }
1842                         if(vmx.state != VMXINACTIVE)
1843                                 error("vmx already active");
1844                         vmx.state = VMXINIT;
1845                         kproc("kvmx", vmxproc, nil);
1846                         poperror();
1847                         qunlock(&initlock);
1848                         if(vmxcmd(cmdstatus, up->errstr) == VMXDEAD)
1849                                 error(up->errstr);
1850                         break;
1851                 case CMquit:
1852                         vmxcmd(cmdquit);
1853                         break;
1854                 case CMgo:
1855                         s = nil;
1856                         if(cb->nf == 2) kstrdup(&s, cb->f[1]);
1857                         else if(cb->nf != 1) error(Ebadarg);
1858                         if(waserror()){
1859                                 free(s);
1860                                 nexterror();
1861                         }
1862                         vmxcmd(cmdgo, s);
1863                         poperror();
1864                         free(s);
1865                         break;
1866                 case CMstop:
1867                         vmxcmd(cmdstop);
1868                         break;
1869                 case CMstep:
1870                         rc = 0;
1871                         for(i = 1; i < cb->nf; i++)
1872                                 if(strcmp(cb->f[i], "-map") == 0){
1873                                         rc = 1;
1874                                         if(i+4 > cb->nf) error("missing argument");
1875                                         memset(&tmpmem, 0, sizeof(tmpmem));
1876                                         tmpmem.lo = strtoull(cb->f[i+1], &s, 0);
1877                                         if(*s != 0 || !vmokpage(tmpmem.lo)) error("invalid address");
1878                                         tmpmem.hi = tmpmem.lo + BY2PG;
1879                                         tmpmem.attr = 0x407;
1880                                         tmpmem.seg = _globalsegattach(cb->f[i+2]);
1881                                         if(tmpmem.seg == nil) error("unknown segment");
1882                                         tmpmem.off = strtoull(cb->f[i+3], &s, 0);
1883                                         if(*s != 0 || !vmokpage(tmpmem.off)) error("invalid offset");
1884                                         i += 3;
1885                                 }else
1886                                         error(Ebadctl);
1887                         vmxcmd(cmdstep, rc ? &tmpmem : nil);
1888                         break;
1889                 case CMexc:
1890                         s = nil;
1891                         kstrdup(&s, cb->f[1]);
1892                         if(waserror()){
1893                                 free(s);
1894                                 nexterror();
1895                         }
1896                         vmxcmd(cmdexcept, s);
1897                         poperror();
1898                         free(s);
1899                         break;
1900                 case CMirq:
1901                         s = nil;
1902                         if(cb->nf == 2)
1903                                 kstrdup(&s, cb->f[1]);
1904                         if(waserror()){
1905                                 free(s);
1906                                 nexterror();
1907                         }
1908                         vmxcmd(cmdirq, s);
1909                         poperror();
1910                         free(s);
1911                         break;
1912                 default:
1913                         error(Egreg);
1914                 }
1915                 poperror();
1916                 free(cb);
1917                 break;
1918         case Qmap:
1919         case Qregs:
1920                 s = malloc(n+1);
1921                 if(s == nil) error(Enomem);
1922                 if(waserror()){
1923                         free(s);
1924                         nexterror();
1925                 }
1926                 memmove(s, a, n);
1927                 s[n] = 0;
1928                 rc = vmxcmd((ulong)c->qid.path == Qregs ? cmdsetregs : cmdsetmeminfo, s);
1929                 poperror();
1930                 free(s);
1931                 return rc;
1932         case Qfpregs:
1933                 {
1934                         char buf[sizeof(FPsave)];
1935                         
1936                         if(n > sizeof(FPsave)) n = sizeof(FPsave);
1937                         memmove(buf, a, n);
1938                         return vmxcmd(cmdsetfpregs, buf, n, off);
1939                 }
1940         default:
1941                 error(Egreg);
1942                 break;
1943         }
1944         return n;
1945 }
1946
1947 Dev vmxdevtab = {
1948         'X',
1949         "vmx",
1950         
1951         vmxreset,
1952         devinit,
1953         vmxshutdown,
1954         vmxattach,
1955         vmxwalk,
1956         vmxstat,
1957         vmxopen,
1958         devcreate,
1959         vmxclose,
1960         vmxread,
1961         devbread,
1962         vmxwrite,
1963         devbwrite,
1964         devremove,
1965         devwstat,
1966 };