2 #include "../port/lib.h"
6 #include "../port/error.h"
9 extern int vmxon(u64int);
10 extern int vmxoff(void);
11 extern int vmclear(u64int);
12 extern int vmptrld(u64int);
13 extern int vmlaunch(Ureg *, int);
14 extern int vmread(u32int, uintptr *);
15 extern int vmwrite(u32int, uintptr);
16 extern int invept(u32int, uvlong, uvlong);
17 extern int invvpid(u32int, uvlong, uvlong);
20 static vlong procb_ctls, pinb_ctls;
23 VMX_BASIC_MSR = 0x480,
24 VMX_PINB_CTLS_MSR = 0x481,
25 VMX_PROCB_CTLS_MSR = 0x482,
26 VMX_VMEXIT_CTLS_MSR = 0x483,
27 VMX_VMENTRY_CTLS_MSR = 0x484,
29 VMX_CR0_FIXED0 = 0x486,
30 VMX_CR0_FIXED1 = 0x487,
31 VMX_CR4_FIXED0 = 0x488,
32 VMX_CR4_FIXED1 = 0x489,
33 VMX_VMCS_ENUM = 0x48A,
34 VMX_PROCB_CTLS2_MSR = 0x48B,
35 VMX_TRUE_PINB_CTLS_MSR = 0x48D,
36 VMX_TRUE_PROCB_CTLS_MSR = 0x48E,
37 VMX_TRUE_EXIT_CTLS_MSR = 0x48F,
38 VMX_TRUE_ENTRY_CTLS_MSR = 0x490,
39 VMX_VMFUNC_MSR = 0x491,
48 PROCB_EXITINVLPG = 1<<9,
49 PROCB_EXITMWAIT = 1<<10,
50 PROCB_EXITRDPMC = 1<<11,
51 PROCB_EXITRDTSC = 1<<12,
52 PROCB_EXITCR3LD = 1<<15,
53 PROCB_EXITCR3ST = 1<<16,
54 PROCB_EXITCR8LD = 1<<19,
55 PROCB_EXITCR8ST = 1<<20,
56 PROCB_EXITMOVDR = 1<<23,
58 PROCB_MONTRAP = 1<<27,
59 PROCB_MSRBITMAP = 1<<28,
60 PROCB_EXITMONITOR = 1<<29,
61 PROCB_EXITPAUSE = 1<<30,
62 PROCB_USECTLS2 = 1<<31,
72 PFAULT_MATCH = 0x4008,
77 VMEXIT_ST_DEBUG = 1<<2,
79 VMEXIT_LD_IA32_PERF_GLOBAL_CTRL = 1<<12,
80 VMEXIT_ST_IA32_PAT = 1<<18,
81 VMEXIT_LD_IA32_PAT = 1<<19,
82 VMEXIT_ST_IA32_EFER = 1<<20,
83 VMEXIT_LD_IA32_EFER = 1<<21,
85 VMEXIT_MSRSTCNT = 0x400e,
86 VMEXIT_MSRLDCNT = 0x4010,
87 VMEXIT_MSRSTADDR = 0x2006,
88 VMEXIT_MSRLDADDR = 0x2008,
89 VMENTRY_MSRLDADDR = 0x200A,
91 VMENTRY_CTLS = 0x4012,
92 VMENTRY_LD_DEBUG = 1<<2,
93 VMENTRY_GUEST64 = 1<<9,
94 VMENTRY_LD_IA32_PERF_GLOBAL_CTRL = 1<<13,
95 VMENTRY_LD_IA32_PAT = 1<<14,
96 VMENTRY_LD_IA32_EFER = 1<<15,
98 VMENTRY_MSRLDCNT = 0x4014,
99 VMENTRY_INTRINFO = 0x4016,
100 VMENTRY_INTRCODE = 0x4018,
101 VMENTRY_INTRILEN = 0x401a,
116 GUEST_ESLIMIT = 0x4800,
117 GUEST_CSLIMIT = 0x4802,
118 GUEST_SSLIMIT = 0x4804,
119 GUEST_DSLIMIT = 0x4806,
120 GUEST_FSLIMIT = 0x4808,
121 GUEST_GSLIMIT = 0x480A,
122 GUEST_LDTRLIMIT = 0x480C,
123 GUEST_TRLIMIT = 0x480E,
124 GUEST_GDTRLIMIT = 0x4810,
125 GUEST_IDTRLIMIT = 0x4812,
126 GUEST_ESPERM = 0x4814,
127 GUEST_CSPERM = 0x4816,
128 GUEST_SSPERM = 0x4818,
129 GUEST_DSPERM = 0x481A,
130 GUEST_FSPERM = 0x481C,
131 GUEST_GSPERM = 0x481E,
132 GUEST_LDTRPERM = 0x4820,
133 GUEST_TRPERM = 0x4822,
134 GUEST_CR0MASK = 0x6000,
135 GUEST_CR4MASK = 0x6002,
136 GUEST_CR0SHADOW = 0x6004,
137 GUEST_CR4SHADOW = 0x6006,
138 GUEST_ESBASE = 0x6806,
139 GUEST_CSBASE = 0x6808,
140 GUEST_SSBASE = 0x680A,
141 GUEST_DSBASE = 0x680C,
142 GUEST_FSBASE = 0x680E,
143 GUEST_GSBASE = 0x6810,
144 GUEST_LDTRBASE = 0x6812,
145 GUEST_TRBASE = 0x6814,
146 GUEST_GDTRBASE = 0x6816,
147 GUEST_IDTRBASE = 0x6818,
151 GUEST_RFLAGS = 0x6820,
152 GUEST_IA32_DEBUGCTL = 0x2802,
153 GUEST_IA32_PAT = 0x2804,
154 GUEST_IA32_EFER = 0x2806,
155 GUEST_IA32_PERF_GLOBAL_CTRL = 0x2808,
167 HOST_FSBASE = 0x6C06,
168 HOST_GSBASE = 0x6C08,
169 HOST_TRBASE = 0x6C0A,
174 HOST_IA32_PAT = 0x2C00,
175 HOST_IA32_EFER = 0x2C02,
176 HOST_IA32_PERF_GLOBAL_CTRL = 0x2C04,
178 GUEST_CANINTR = 0x4824,
180 VM_INSTRERR = 0x4400,
181 VM_EXREASON = 0x4402,
182 VM_EXINTRINFO = 0x4404,
183 VM_EXINTRCODE = 0x4406,
184 VM_IDTVECINFO = 0x4408,
185 VM_IDTVECCODE = 0x440A,
186 VM_EXINSTRLEN = 0x440C,
187 VM_EXINSTRINFO = 0x440E,
188 VM_EXQUALIF = 0x6400,
206 CR0RSVD = 0x1ffaffc0,
207 CR4RSVD = 0xff889000,
213 CR0KERNEL = CR0RSVD | 0x30 | (uintptr)0xFFFFFFFF00000000ULL,
214 CR4KERNEL = CR4RSVD | CR4VMXE | CR4SMXE | CR4MCE | CR4PKE | (uintptr)0xFFFFFFFF00000000ULL
221 typedef struct VmxMach VmxMach;
222 typedef struct Vmx Vmx;
223 typedef struct VmCmd VmCmd;
224 typedef struct VmMem VmMem;
225 typedef struct VmIntr VmIntr;
237 char vmxon[4096]; /* has to be at the start for alignment */
242 #define vmxmach ((VmxMach*)((m)->vmx))
243 #define vmxmachp(n) ((VmxMach*)(MACHP(n)->vmx))
246 u32int info, code, ilen;
250 uchar vmcs[4096]; /* page aligned */
251 uvlong pml4[512]; /* page aligned */
252 u32int msrbits[1024]; /* page aligned */
253 FPsave fp; /* page aligned */
254 u64int msrhost[MAXMSR*2]; /* 16 byte aligned */
255 u64int msrguest[MAXMSR*2]; /* 16 byte aligned */
268 uintptr dr[8]; /* DR7 is also kept in VMCS */
281 VmCmd *firstcmd, **lastcmd;
289 VmIntr exc, irq, irqack;
303 int (*cmd)(VmCmd *, va_list);
311 static char Equit[] = "vmx: ending";
313 static char *statenames[] = {
316 [VMXRUNNING] "running",
321 static Vmx *moribund;
322 static QLock vmxtablock;
327 vmxprocrestore(Proc *p)
334 if(vmxmach->active != vmx){
335 if(vmx != nil && vmptrld(PADDR(vmx->vmcs)) < 0)
336 panic("VMPTRLD(%p) failed", vmx->vmcs);
337 vmxmach->active = vmx;
343 vmcsread(u32int addr)
349 rc = vmread(addr, (uintptr *) &val);
350 if(rc >= 0 && sizeof(uintptr) == 4 && (addr & 0x6000) == 0x2000)
351 rc = vmread(addr | 1, (uintptr *) &val + 1);
354 snprint(errbuf, sizeof(errbuf), "vmcsread failed (%#.4ux)", addr);
361 vmcswrite(u32int addr, u64int val)
365 rc = vmwrite(addr, val);
366 if(rc >= 0 && sizeof(uintptr) == 4 && (addr & 0x6000) == 0x2000)
367 rc = vmwrite(addr | 1, val >> 32);
370 snprint(errbuf, sizeof(errbuf), "vmcswrite failed (%#.4ux = %#.16ullx)", addr, val);
381 v = strtoull(s, &p, 0);
382 if(p == s || *p != 0) error("invalid value");
387 cr0fakeread(Vmx *, char *p, char *e)
389 uvlong guest, mask, shadow;
391 guest = vmcsread(GUEST_CR0);
392 mask = vmcsread(GUEST_CR0MASK);
393 shadow = vmcsread(GUEST_CR0SHADOW);
394 return seprint(p, e, "%#.*ullx", sizeof(uintptr) * 2, guest & ~mask | shadow & mask);
398 cr4fakeread(Vmx *, char *p, char *e)
400 uvlong guest, mask, shadow;
402 guest = vmcsread(GUEST_CR4);
403 mask = vmcsread(GUEST_CR4MASK);
404 shadow = vmcsread(GUEST_CR4SHADOW);
405 return seprint(p, e, "%#.*ullx", sizeof(uintptr) * 2, guest & ~mask | shadow & mask);
411 uvlong cr0, efer, nefer, ectrl;
413 if(sizeof(uintptr) != 8) return;
414 cr0 = vmcsread(GUEST_CR0);
415 efer = vmcsread(GUEST_IA32_EFER);
416 nefer = efer & ~0x400 | efer << 2 & cr0 >> 21 & 0x400;
417 if(efer == nefer) return;
418 vmcswrite(GUEST_IA32_EFER, nefer);
419 ectrl = vmcsread(VMENTRY_CTLS);
420 ectrl = ectrl & ~0x200 | nefer >> 1 & 0x200;
421 vmcswrite(VMENTRY_CTLS, ectrl);
425 cr0realwrite(Vmx *, char *s)
430 vmcswrite(GUEST_CR0, vmcsread(GUEST_CR0) & CR0KERNEL | v & ~CR0KERNEL);
436 cr0maskwrite(Vmx *, char *s)
441 vmcswrite(GUEST_CR0MASK, v | CR0KERNEL);
446 eferwrite(Vmx *, char *s)
451 vmcswrite(GUEST_IA32_EFER, v);
457 cr4realwrite(Vmx *, char *s)
462 vmcswrite(GUEST_CR4, vmcsread(GUEST_CR4) & CR4KERNEL | v & ~CR4KERNEL);
467 cr4maskwrite(Vmx *, char *s)
472 vmcswrite(GUEST_CR4MASK, v | CR4KERNEL);
477 dr7write(Vmx *vmx, char *s)
481 v = (u32int) parseval(s);
482 vmcswrite(GUEST_DR7, vmx->dr[7] = (u32int) v);
487 readonly(Vmx *, char *)
493 dr6write(Vmx *vmx, char *s)
498 vmx->dr[6] = (u32int) v;
502 typedef struct GuestReg GuestReg;
505 u8int size; /* in bytes; 0 means == uintptr */
507 char *(*read)(Vmx *, char *, char *);
508 int (*write)(Vmx *, char *);
510 #define VMXVAR(x) ~(ulong)&(((Vmx*)0)->x)
511 #define UREG(x) VMXVAR(ureg.x)
512 static GuestReg guestregs[] = {
513 {GUEST_RIP, 0, "pc"},
514 {GUEST_RSP, 0, "sp"},
515 {GUEST_RFLAGS, 0, "flags"},
526 {UREG(r10), 0, "r10"},
527 {UREG(r11), 0, "r11"},
528 {UREG(r12), 0, "r12"},
529 {UREG(r13), 0, "r13"},
530 {UREG(r14), 0, "r14"},
531 {UREG(r15), 0, "r15"},
533 {GUEST_GDTRBASE, 0, "gdtrbase"},
534 {GUEST_GDTRLIMIT, 4, "gdtrlimit"},
535 {GUEST_IDTRBASE, 0, "idtrbase"},
536 {GUEST_IDTRLIMIT, 4, "idtrlimit"},
538 {GUEST_CSBASE, 0, "csbase"},
539 {GUEST_CSLIMIT, 4, "cslimit"},
540 {GUEST_CSPERM, 4, "csperm"},
542 {GUEST_DSBASE, 0, "dsbase"},
543 {GUEST_DSLIMIT, 4, "dslimit"},
544 {GUEST_DSPERM, 4, "dsperm"},
546 {GUEST_ESBASE, 0, "esbase"},
547 {GUEST_ESLIMIT, 4, "eslimit"},
548 {GUEST_ESPERM, 4, "esperm"},
550 {GUEST_FSBASE, 0, "fsbase"},
551 {GUEST_FSLIMIT, 4, "fslimit"},
552 {GUEST_FSPERM, 4, "fsperm"},
554 {GUEST_GSBASE, 0, "gsbase"},
555 {GUEST_GSLIMIT, 4, "gslimit"},
556 {GUEST_GSPERM, 4, "gsperm"},
558 {GUEST_SSBASE, 0, "ssbase"},
559 {GUEST_SSLIMIT, 4, "sslimit"},
560 {GUEST_SSPERM, 4, "ssperm"},
562 {GUEST_TRBASE, 0, "trbase"},
563 {GUEST_TRLIMIT, 4, "trlimit"},
564 {GUEST_TRPERM, 4, "trperm"},
565 {GUEST_LDTR, 2, "ldtr"},
566 {GUEST_LDTRBASE, 0, "ldtrbase"},
567 {GUEST_LDTRLIMIT, 4, "ldtrlimit"},
568 {GUEST_LDTRPERM, 4, "ldtrperm"},
569 {GUEST_CR0, 0, "cr0real", nil, cr0realwrite},
570 {GUEST_CR0SHADOW, 0, "cr0fake", cr0fakeread},
571 {GUEST_CR0MASK, 0, "cr0mask", nil, cr0maskwrite},
572 {VMXVAR(cr2), 0, "cr2"},
573 {GUEST_CR3, 0, "cr3"},
574 {GUEST_CR4, 0, "cr4real", nil, cr4realwrite},
575 {GUEST_CR4SHADOW, 0, "cr4fake", cr4fakeread},
576 {GUEST_CR4MASK, 0, "cr4mask", nil, cr4maskwrite},
577 {GUEST_IA32_PAT, 8, "pat"},
578 {GUEST_IA32_EFER, 8, "efer", nil, eferwrite},
579 {VMXVAR(dr[0]), 0, "dr0"},
580 {VMXVAR(dr[1]), 0, "dr1"},
581 {VMXVAR(dr[2]), 0, "dr2"},
582 {VMXVAR(dr[3]), 0, "dr3"},
583 {VMXVAR(dr[6]), 0, "dr6", nil, dr6write},
584 {GUEST_DR7, 0, "dr7", nil, dr7write},
585 {VM_INSTRERR, 4, "instructionerror", nil, readonly},
586 {VM_EXREASON, 4, "exitreason", nil, readonly},
587 {VM_EXQUALIF, 0, "exitqualification", nil, readonly},
588 {VM_EXINTRINFO, 4, "exitinterruptinfo", nil, readonly},
589 {VM_EXINTRCODE, 4, "exitinterruptcode", nil, readonly},
590 {VM_EXINSTRLEN, 4, "exitinstructionlen", nil, readonly},
591 {VM_EXINSTRINFO, 4, "exitinstructioninfo", nil, readonly},
592 {VM_GUESTVA, 0, "exitva", nil, readonly},
593 {VM_GUESTPA, 0, "exitpa", nil, readonly},
594 {VM_IDTVECINFO, 4, "idtinterruptinfo", nil, readonly},
595 {VM_IDTVECCODE, 4, "idtinterruptcode", nil, readonly},
599 vmokpage(u64int addr)
601 return (addr & 0xfff) == 0 && addr >> 48 == 0;
605 eptwalk(Vmx *vmx, uvlong addr)
612 if(tab == nil) error(Egreg);
613 for(i = 3; i >= 1; i--){
614 tab += addr >> 12 + 9 * i & 0x1ff;
617 nt = mallocalign(BY2PG, BY2PG, 0, 0);
618 if(nt == nil) error(Enomem);
619 memset(nt, 0, BY2PG);
620 v = PADDR(nt) | 0x407;
623 tab = KADDR(v & ~0xfff);
625 return tab + (addr >> 12 & 0x1ff);
629 eptfree(uvlong *tab, int level)
634 if(tab == nil) error(Egreg);
636 for(i = 0; i < 512; i++){
638 if((v & 3) == 0) continue;
639 t = KADDR(v & ~0xfff);
640 eptfree(t, level + 1);
649 epttranslate(Vmx *vmx, VmMem *mp, uvlong end)
653 if((mp->addr & 0xfff) != 0 || (end & 0xfff) != 0 || (uint)mp->attr >= 0x1000)
656 switch(mp->seg->type & SG_TYPE){
663 if(mp->seg->base + mp->off + (end - mp->addr) > mp->seg->top)
665 for(p = mp->addr, v = mp->off; p != end; p += BY2PG, v += BY2PG)
666 *eptwalk(vmx, p) = mp->seg->map[v/PTEMAPMEM]->pages[(v & PTEMAPMEM-1)/BY2PG]->pa | mp->attr;
668 for(p = mp->addr; p != end; p += BY2PG)
669 *eptwalk(vmx, p) = 0;
671 vmx->onentry |= FLUSHEPT;
674 static char *mtype[] = {"uc", "wc", "02", "03", "wt", "wp", "wb", "07"};
677 cmdgetmeminfo(VmCmd *cmd, va_list va)
684 p0 = va_arg(va, char *);
685 e = va_arg(va, char *);
688 for(mp = cmd->vmx->mem.next; mp != &cmd->vmx->mem; mp = mp->next){
691 attr[0] = (mp->attr & 1) != 0 ? 'r' : '-';
692 attr[1] = (mp->attr & 2) != 0 ? 'w' : '-';
693 attr[2] = (mp->attr & 4) != 0 ? 'x' : '-';
695 *(ushort*)mt = *(u16int*)mtype[mp->attr >> 3 & 7];
696 mt[2] = (mp->attr & 0x40) != 0 ? '!' : 0;
698 p = seprint(p, e, "%s %s %#llux %#llux %s %#llux\n", attr, mt, mp->addr, mp->next->addr, mp->name, (uvlong)mp->off);
704 vmmeminsert(VmMem *l, VmMem *p)
713 vmmemremove(VmMem *p)
718 p->next->prev = p->prev;
719 p->prev->next = p->next;
727 cmdclearmeminfo(VmCmd *cmd, va_list)
733 eptfree(cmd->vmx->pml4, 0);
734 for(mp = vmx->mem.next; mp != &vmx->mem; )
735 mp = vmmemremove(mp);
736 vmx->mem.prev = &vmx->mem;
737 vmx->mem.next = &vmx->mem;
738 vmx->onentry |= FLUSHEPT;
744 vmmemupdate(Vmx *vmx, VmMem *mp, uvlong end)
748 for(p = vmx->mem.prev; p != &vmx->mem; p = p->prev)
749 if(p->addr <= end || end == 0)
751 if(p == &vmx->mem || p->addr < mp->addr){
752 q = smalloc(sizeof(VmMem));
756 kstrdup(&q->name, p->name);
758 vmmeminsert(p->next, q);
762 q->off += end - q->addr;
764 for(p = vmx->mem.next; p != &vmx->mem; p = p->next)
765 if(p->addr >= mp->addr)
770 for(p = vmx->mem.next; p != &vmx->mem; )
771 if(p->seg == p->prev->seg && (p->seg == nil || p->addr - p->prev->addr == p->off - p->prev->off))
777 extern Segment* (*_globalsegattach)(char*);
780 cmdsetmeminfo(VmCmd *cmd, va_list va)
782 char *p0, *p, *q, *r;
789 p0 = va_arg(va, char *);
797 mp = malloc(sizeof(VmMem));
801 memset(mp, 0, sizeof(VmMem));
808 rc = tokenize(p, f, nelem(f));
814 if(rc != 4 && rc != 6) error("number of fields wrong");
815 for(q = f[0]; *q != 0; q++)
817 case 'r': if((mp->attr & 1) != 0) goto tinval; mp->attr |= 1; break;
818 case 'w': if((mp->attr & 2) != 0) goto tinval; mp->attr |= 2; break;
819 case 'x': if((mp->attr & 4) != 0) goto tinval; mp->attr |= 0x404; break;
821 default: tinval: error("invalid access field");
823 for(j = 0; j < 8; j++)
824 if(strncmp(mtype[j], f[1], 2) == 0){
828 if(j == 8 || strlen(f[1]) > 3) error("invalid memory type");
829 if(f[1][2] == '!') mp->attr |= 0x40;
830 else if(f[1][2] != 0) error("invalid memory type");
831 mp->addr = strtoull(f[2], &r, 0);
832 if(*r != 0 || !vmokpage(mp->addr)) error("invalid low guest physical address");
833 end = strtoull(f[3], &r, 0);
834 if(*r != 0 || !vmokpage(end) || end <= mp->addr) error("invalid high guest physical address");
835 if((mp->attr & 7) != 0){
836 if(rc != 6) error("number of fields wrong");
837 mp->seg = _globalsegattach(f[4]);
838 if(mp->seg == nil) error("no such segment");
839 if(mp->seg->base + mp->off + (end - mp->addr) > mp->seg->top) error("out of bounds");
840 kstrdup(&mp->name, f[4]);
841 mp->off = strtoull(f[5], &r, 0);
842 if(*r != 0 || !vmokpage(mp->off)) error("invalid offset");
845 epttranslate(cmd->vmx, mp, end);
846 vmmemupdate(cmd->vmx, mp, end);
861 if((regs[2] & 1<<5) == 0) return;
862 /* check if disabled by BIOS */
863 if(rdmsr(0x3a, &msr) < 0) return;
865 if((msr & 1) == 0){ /* msr still unlocked */
866 wrmsr(0x3a, msr | 5);
867 if(rdmsr(0x3a, &msr) < 0)
873 if(rdmsr(VMX_PROCB_CTLS_MSR, &msr) < 0) return;
874 if((vlong)msr >= 0) return;
875 if(rdmsr(VMX_PROCB_CTLS2_MSR, &msr) < 0) return;
876 if((msr >> 32 & PROCB_EPT) == 0 || (msr >> 32 & PROCB_VPID) == 0) return;
877 for(i = 0; i < conf.nmach; i++){
878 MACHP(i)->vmx = mallocalign(sizeof(VmxMach), 4096, 0, 0);
879 if(vmxmachp(i) == nil)
886 vmxaddmsr(Vmx *vmx, u32int msr, u64int gval)
890 if(vmx->nmsr >= MAXMSR)
891 error("too many MSRs");
893 vmx->msrhost[i] = msr;
894 rdmsr(msr, (vlong *) &vmx->msrhost[i+1]);
895 vmx->msrguest[i] = msr;
896 vmx->msrguest[i+1] = gval;
897 vmcswrite(VMENTRY_MSRLDCNT, vmx->nmsr);
898 vmcswrite(VMEXIT_MSRSTCNT, vmx->nmsr);
899 vmcswrite(VMEXIT_MSRLDCNT, vmx->nmsr);
903 vmxtrapmsr(Vmx *vmx, u32int msr, enum { TRAPRD = 1, TRAPWR = 2 } state)
907 if(msr >= 0x2000 && (u32int)(msr - 0xc0000000) >= 0x2000)
909 msr = msr & 0x1fff | msr >> 18 & 0x2000;
911 if((state & TRAPRD) != 0)
912 vmx->msrbits[msr / 32] |= m;
914 vmx->msrbits[msr / 32] &= ~m;
915 if((state & TRAPWR) != 0)
916 vmx->msrbits[msr / 32 + 512] |= m;
918 vmx->msrbits[msr / 32 + 512] &= ~m;
927 memset(&vmx->ureg, 0, sizeof(vmx->ureg));
931 if(rdmsr(VMX_BASIC_MSR, &msr) < 0) error("rdmsr(VMX_BASIC_MSR) failed");
932 if((msr & 1ULL<<55) != 0){
933 if(rdmsr(VMX_TRUE_PROCB_CTLS_MSR, &procb_ctls) < 0) error("rdmsr(VMX_TRUE_PROCB_CTLS_MSR) failed");
934 if(rdmsr(VMX_TRUE_PINB_CTLS_MSR, &pinb_ctls) < 0) error("rdmsr(VMX_TRUE_PINB_CTLS_MSR) failed");
936 if(rdmsr(VMX_PROCB_CTLS_MSR, &procb_ctls) < 0) error("rdmsr(VMX_PROCB_CTLS_MSR) failed");
937 if(rdmsr(VMX_PINB_CTLS_MSR, &pinb_ctls) < 0) error("rdmsr(VMX_PINB_CTLS_MSR) failed");
940 if(rdmsr(VMX_PINB_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_PINB_CTLS_MSR failed");
941 x = (u32int)pinb_ctls | 1<<1 | 1<<2 | 1<<4; /* currently reserved default1 bits */
942 x |= PINB_EXITIRQ | PINB_EXITNMI;
943 x &= pinb_ctls >> 32;
944 vmcswrite(PINB_CTLS, x);
946 if(rdmsr(VMX_PROCB_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_PROCB_CTLS_MSR failed");
947 x = (u32int)procb_ctls | 1<<1 | 7<<4 | 1<<8 | 1<<13 | 1<<14 | 1<<26; /* currently reserved default1 bits */
948 x |= PROCB_EXITHLT | PROCB_EXITMWAIT;
949 x |= PROCB_EXITMOVDR | PROCB_EXITIO | PROCB_EXITMONITOR | PROCB_MSRBITMAP;
952 vmcswrite(PROCB_CTLS, x);
954 if(rdmsr(VMX_PROCB_CTLS2_MSR, &msr) < 0) error("rdmsr(VMX_PROCB_CTLS2_MSR failed");
955 x = PROCB_EPT | PROCB_VPID | PROCB_UNRESTR;
957 vmcswrite(PROCB_CTLS2, x);
959 if(rdmsr(VMX_VMEXIT_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_VMEXIT_CTLS_MSR failed");
961 if(sizeof(uintptr) == 8) x |= VMEXIT_HOST64;
962 x |= VMEXIT_LD_IA32_PAT | VMEXIT_LD_IA32_EFER | VMEXIT_ST_DEBUG | VMEXIT_ST_IA32_EFER;
964 vmcswrite(VMEXIT_CTLS, x);
966 if(rdmsr(VMX_VMENTRY_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_VMENTRY_CTLS_MSR failed");
968 x |= VMENTRY_LD_IA32_PAT | VMENTRY_LD_IA32_EFER | VMENTRY_LD_DEBUG;
970 vmcswrite(VMENTRY_CTLS, x);
972 vmcswrite(CR3_TARGCNT, 0);
973 vmcswrite(VMENTRY_INTRINFO, 0);
974 vmcswrite(VMCS_LINK, -1);
976 vmcswrite(HOST_CS, KESEL);
977 vmcswrite(HOST_DS, KDSEL);
978 vmcswrite(HOST_ES, KDSEL);
979 vmcswrite(HOST_FS, KDSEL);
980 vmcswrite(HOST_GS, KDSEL);
981 vmcswrite(HOST_SS, KDSEL);
982 vmcswrite(HOST_TR, TSSSEL);
983 vmcswrite(HOST_CR0, getcr0() & ~0xe);
984 vmcswrite(HOST_CR3, getcr3());
985 vmcswrite(HOST_CR4, getcr4());
987 vmcswrite(HOST_FSBASE, msr);
989 vmcswrite(HOST_GSBASE, msr);
990 vmcswrite(HOST_TRBASE, (uintptr) m->tss);
991 vmcswrite(HOST_GDTR, (uintptr) m->gdt);
992 vmcswrite(HOST_IDTR, IDTADDR);
993 if(rdmsr(0x277, &msr) < 0) error("rdmsr(IA32_PAT) failed");
994 vmcswrite(HOST_IA32_PAT, msr);
995 if(rdmsr(Efer, &msr) < 0) error("rdmsr(IA32_EFER) failed");
996 vmcswrite(HOST_IA32_EFER, msr);
998 vmcswrite(EXC_BITMAP, 1<<18|1<<1);
999 vmcswrite(PFAULT_MASK, 0);
1000 vmcswrite(PFAULT_MATCH, 0);
1002 vmcswrite(GUEST_CSBASE, 0);
1003 vmcswrite(GUEST_DSBASE, 0);
1004 vmcswrite(GUEST_ESBASE, 0);
1005 vmcswrite(GUEST_FSBASE, 0);
1006 vmcswrite(GUEST_GSBASE, 0);
1007 vmcswrite(GUEST_SSBASE, 0);
1008 vmcswrite(GUEST_CSLIMIT, -1);
1009 vmcswrite(GUEST_DSLIMIT, -1);
1010 vmcswrite(GUEST_ESLIMIT, -1);
1011 vmcswrite(GUEST_FSLIMIT, -1);
1012 vmcswrite(GUEST_GSLIMIT, -1);
1013 vmcswrite(GUEST_SSLIMIT, -1);
1014 vmcswrite(GUEST_CSPERM, (SEGG|SEGD|SEGP|SEGPL(0)|SEGEXEC|SEGR) >> 8 | 1);
1015 vmcswrite(GUEST_DSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
1016 vmcswrite(GUEST_ESPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
1017 vmcswrite(GUEST_FSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
1018 vmcswrite(GUEST_GSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
1019 vmcswrite(GUEST_SSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
1020 vmcswrite(GUEST_LDTRPERM, 1<<16);
1022 vmcswrite(GUEST_CR0MASK, CR0KERNEL);
1023 vmcswrite(GUEST_CR4MASK, CR4KERNEL);
1024 vmcswrite(GUEST_CR0, getcr0() & CR0KERNEL | 0x31);
1025 vmcswrite(GUEST_CR3, 0);
1026 vmcswrite(GUEST_CR4, getcr4() & CR4KERNEL);
1027 vmcswrite(GUEST_CR0SHADOW, getcr0() & CR0KERNEL | 0x31);
1028 vmcswrite(GUEST_CR4SHADOW, getcr4() & ~CR4VMXE & CR4KERNEL);
1030 vmcswrite(GUEST_IA32_PAT, 0x0007040600070406ULL);
1031 vmcswrite(GUEST_IA32_EFER, 0);
1033 vmcswrite(GUEST_TRBASE, 0);
1034 vmcswrite(GUEST_TRLIMIT, 0xffff);
1035 vmcswrite(GUEST_TRPERM, (SEGTSS|SEGPL(0)|SEGP) >> 8 | 2);
1037 vmcswrite(VM_EPTP, PADDR(vmx->pml4) | 3<<3);
1039 vmcswrite(VM_VPID, vmx->vpid);
1041 vmcswrite(GUEST_RFLAGS, 2);
1043 vmx->onentry = FLUSHVPID | FLUSHEPT;
1047 memset(vmx->msrbits, -1, 4096);
1048 vmxtrapmsr(vmx, Efer, 0);
1049 vmcswrite(VMENTRY_MSRLDADDR, PADDR(vmx->msrguest));
1050 vmcswrite(VMEXIT_MSRSTADDR, PADDR(vmx->msrguest));
1051 vmcswrite(VMEXIT_MSRLDADDR, PADDR(vmx->msrhost));
1052 vmcswrite(MSR_BITMAP, PADDR(vmx->msrbits));
1054 if(sizeof(uintptr) == 8){
1055 vmxaddmsr(vmx, Star, 0);
1056 vmxaddmsr(vmx, Lstar, 0);
1057 vmxaddmsr(vmx, Cstar, 0);
1058 vmxaddmsr(vmx, Sfmask, 0);
1059 vmxaddmsr(vmx, KernelGSbase, 0);
1060 vmxtrapmsr(vmx, Star, 0);
1061 vmxtrapmsr(vmx, Lstar, 0);
1062 vmxtrapmsr(vmx, Cstar, 0);
1063 vmxtrapmsr(vmx, Sfmask, 0);
1064 vmxtrapmsr(vmx, FSbase, 0);
1065 vmxtrapmsr(vmx, GSbase, 0);
1066 vmxtrapmsr(vmx, KernelGSbase, 0);
1077 putcr4(getcr4() | 0x2000); /* set VMXE */
1078 putcr0(getcr0() | 0x20); /* set NE */
1080 if(rdmsr(VMX_CR0_FIXED0, &msr) < 0) error("rdmsr(VMX_CR0_FIXED0) failed");
1081 if(rdmsr(VMX_CR0_FIXED1, &msr2) < 0) error("rdmsr(VMX_CR0_FIXED1) failed");
1082 if((cr & ~msr & ~msr2 | ~cr & msr & msr2) != 0) error("invalid CR0 value");
1084 if(rdmsr(VMX_CR4_FIXED0, &msr) < 0) error("rdmsr(VMX_CR4_FIXED0) failed");
1085 if(rdmsr(VMX_CR4_FIXED1, &msr2) < 0) error("rdmsr(VMX_CR4_FIXED1) failed");
1086 if((cr & ~msr & ~msr2 | ~cr & msr & msr2) != 0) error("invalid CR4 value");
1088 rdmsr(VMX_BASIC_MSR, &x);
1094 if(vmxmach->vms == 0){
1095 memset(vmxmach->vmxon, 0, sizeof(vmxmach->vmxon));
1096 *(ulong*)vmxmach->vmxon = x;
1097 if(vmxon(PADDR(vmxmach->vmxon)) < 0)
1098 error("vmxon failed");
1104 memset(vmx->vmcs, 0, sizeof(vmx->vmcs));
1105 *(ulong*)vmx->vmcs = x;
1106 if(vmclear(PADDR(vmx->vmcs)) < 0)
1107 error("vmclear failed");
1114 cmdrelease(VmCmd *p, int f)
1117 p->flags |= CMDFDONE | f;
1123 killcmds(Vmx *vmx, VmCmd *notme)
1127 for(p = vmx->postponed; p != nil; p = pn){
1130 if(p == notme) continue;
1131 kstrcpy(p->errstr, Equit, ERRMAX);
1132 cmdrelease(p, CMDFFAIL);
1134 vmx->postponed = nil;
1135 ilock(&vmx->cmdlock);
1136 for(p = vmx->firstcmd; p != nil; p = pn){
1139 if(p == notme) continue;
1140 kstrcpy(p->errstr, Equit, ERRMAX);
1141 cmdrelease(p, CMDFFAIL);
1143 vmx->firstcmd = nil;
1144 vmx->lastcmd = &vmx->firstcmd;
1145 iunlock(&vmx->cmdlock);
1149 cmdquit(VmCmd *p, va_list va)
1154 vmx->state = VMXENDING;
1157 cmdclearmeminfo(p, va);
1161 vmclear(PADDR(vmx->vmcs));
1164 if(--vmxmach->vms == 0)
1171 vmxtab[vmx->index] = nil;
1172 qunlock(&vmxtablock);
1181 processexit(Vmx *vmx)
1185 reason = vmcsread(VM_EXREASON);
1186 if((reason & 1<<31) == 0)
1187 switch(reason & 0xffff){
1188 case 1: /* external interrupt */
1191 case 5: /* IO SMI */
1193 case 7: /* IRQ window */
1194 case 8: /* NMI window */
1197 vmx->state = VMXREADY;
1198 vmx->got |= GOTEXIT;
1199 vmx->onentry &= ~STEP;
1203 cmdgetregs(VmCmd *cmd, va_list va)
1211 p0 = va_arg(va, char *);
1212 e = va_arg(va, char *);
1214 for(r = guestregs; r < guestregs + nelem(guestregs); r++)
1216 p = seprint(p, e, "%s ", r->name);
1217 p = r->read(cmd->vmx, p, e);
1218 p = strecpy(p, e, "\n");
1221 val = vmcsread(r->offset);
1223 val = *(uintptr*)((uchar*)cmd->vmx + ~r->offset);
1225 if(s == 0) s = sizeof(uintptr);
1226 p = seprint(p, e, "%s %#.*llux\n", r->name, s * 2, val);
1232 setregs(Vmx *vmx, char *p0, char rs, char *fs)
1246 rc = getfields(p, f, nelem(f), 1, fs);
1248 if(rc == 0) continue;
1249 if(rc != 2) error("number of fields wrong");
1251 for(r = guestregs; r < guestregs + nelem(guestregs); r++)
1252 if(strcmp(r->name, f[0]) == 0)
1254 if(r == guestregs + nelem(guestregs))
1255 error("unknown register");
1256 if(r->write != nil){
1257 r->write(vmx, f[1]);
1260 val = strtoull(f[1], &rp, 0);
1262 if(sz == 0) sz = sizeof(uintptr);
1263 if(rp == f[1] || *rp != 0) error("invalid value");
1265 vmcswrite(r->offset, val);
1267 assert((u32int)~r->offset + sz <= sizeof(Vmx));
1269 case 1: *(u8int*)((u8int*)vmx + (u32int)~r->offset) = val; break;
1270 case 2: *(u16int*)((u8int*)vmx + (u32int)~r->offset) = val; break;
1271 case 4: *(u32int*)((u8int*)vmx + (u32int)~r->offset) = val; break;
1272 case 8: *(u64int*)((u8int*)vmx + (u32int)~r->offset) = val; break;
1273 default: error(Egreg);
1281 cmdsetregs(VmCmd *cmd, va_list va)
1283 return setregs(cmd->vmx, va_arg(va, char *), '\n', " \t");
1287 cmdgetfpregs(VmCmd *cmd, va_list va)
1291 p = va_arg(va, uchar *);
1292 memmove(p, &cmd->vmx->fp, sizeof(FPsave));
1293 return sizeof(FPsave);
1297 cmdsetfpregs(VmCmd *cmd, va_list va)
1303 p = va_arg(va, uchar *);
1304 n = va_arg(va, ulong);
1305 off = va_arg(va, vlong);
1306 if(off < 0 || off >= sizeof(FPsave)) n = 0;
1307 else if(off + n > sizeof(FPsave)) n = sizeof(FPsave) - n;
1308 memmove((uchar*)&cmd->vmx->fp + off, p, n);
1313 cmdgo(VmCmd *cmd, va_list va)
1320 if(vmx->state != VMXREADY)
1321 error("VM not ready");
1322 step = va_arg(va, int);
1323 r = va_arg(va, char *);
1324 if(r != nil) setregs(vmx, r, ';', "=");
1325 if(step) vmx->onentry |= STEP;
1326 vmx->state = VMXRUNNING;
1331 cmdstop(VmCmd *cmd, va_list)
1336 if(vmx->state != VMXREADY && vmx->state != VMXRUNNING)
1337 error("VM not ready or running");
1338 vmx->state = VMXREADY;
1343 cmdstatus(VmCmd *cmd, va_list va)
1345 kstrcpy(va_arg(va, char *), cmd->vmx->errstr, ERRMAX);
1346 return cmd->vmx->state;
1349 static char *exitreasons[] = {
1350 [0] "exc", [1] "extirq", [2] "triplef", [3] "initsig", [4] "sipi", [5] "smiio", [6] "smiother", [7] "irqwin",
1351 [8] "nmiwin", [9] "taskswitch", [10] ".cpuid", [11] ".getsec", [12] ".hlt", [13] ".invd", [14] ".invlpg", [15] ".rdpmc",
1352 [16] ".rdtsc", [17] ".rsm", [18] ".vmcall", [19] ".vmclear", [20] ".vmlaunch", [21] ".vmptrld", [22] ".vmptrst", [23] ".vmread",
1353 [24] ".vmresume", [25] ".vmwrite", [26] ".vmxoff", [27] ".vmxon", [28] "movcr", [29] ".movdr", [30] "io", [31] ".rdmsr",
1354 [32] ".wrmsr", [33] "entrystate", [34] "entrymsr", [36] ".mwait", [37] "monitortrap", [39] ".monitor",
1355 [40] ".pause", [41] "mcheck", [43] "tpr", [44] "apicacc", [45] "eoi", [46] "gdtr_idtr", [47] "ldtr_tr",
1356 [48] "eptfault", [49] "eptinval", [50] ".invept", [51] ".rdtscp", [52] "preempt", [53] ".invvpid", [54] ".wbinvd", [55] ".xsetbv",
1357 [56] "apicwrite", [57] ".rdrand", [58] ".invpcid", [59] ".vmfunc", [60] ".encls", [61] ".rdseed", [62] "pmlfull", [63] ".xsaves",
1361 static char *except[] = {
1362 [0] "#de", [1] "#db", [3] "#bp", [4] "#of", [5] "#br", [6] "#ud", [7] "#nm",
1363 [8] "#df", [10] "#ts", [11] "#np", [12] "#ss", [13] "#gp", [14] "#pf",
1364 [16] "#mf", [17] "#ac", [18] "#mc", [19] "#xm", [20] "#ve",
1368 cmdwait(VmCmd *cp, va_list va)
1371 u32int reason, intr;
1379 p0 = p = va_arg(va, char *);
1380 e = va_arg(va, char *);
1381 if((vmx->got & GOTIRQACK) != 0){
1382 p = seprint(p, e, "*ack %d\n", vmx->irqack.info & 0xff);
1383 vmx->got &= ~GOTIRQACK;
1386 if((vmx->got & GOTEXIT) == 0){
1387 cp->flags |= CMDFPOSTP;
1390 vmx->got &= ~GOTEXIT;
1391 reason = vmcsread(VM_EXREASON);
1392 qual = vmcsread(VM_EXQUALIF);
1394 intr = vmcsread(VM_EXINTRINFO);
1395 if((reason & 1<<31) != 0)
1396 p = seprint(p, e, "!");
1397 if(rno == 0 && (intr & 1<<31) != 0){
1398 if((intr & 0xff) >= nelem(except) || except[intr & 0xff] == nil)
1399 p = seprint(p, e, "#%d ", intr & 0xff);
1401 p = seprint(p, e, "%s ", except[intr & 0xff]);
1402 }else if(rno >= nelem(exitreasons) || exitreasons[rno] == nil)
1403 p = seprint(p, e, "?%d ", rno);
1405 p = seprint(p, e, "%s ", exitreasons[rno]);
1406 p = seprint(p, e, "%#ullx pc %#ullx sp %#ullx ilen %#ullx iinfo %#ullx", qual, vmcsread(GUEST_RIP), vmcsread(GUEST_RSP), vmcsread(VM_EXINSTRLEN), vmcsread(VM_EXINSTRINFO));
1407 if((intr & 1<<11) != 0) p = seprint(p, e, " excode %#ullx", vmcsread(VM_EXINTRCODE));
1408 if(rno == 48 && (qual & 0x80) != 0) p = seprint(p, e, " va %#ullx", vmcsread(VM_GUESTVA));
1409 if(rno == 48 || rno == 49) p = seprint(p, e, " pa %#ullx", vmcsread(VM_GUESTPA));
1410 if(rno == 30) p = seprint(p, e, " ax %#ullx", (uvlong)vmx->ureg.ax);
1411 p = seprint(p, e, "\n");
1416 eventparse(char *p, VmIntr *vi)
1421 memset(vi, 0, sizeof(VmIntr));
1426 memset(vi, 0, sizeof(VmIntr));
1431 if(r != nil) *r++ = 0;
1432 for(i = 0; i < nelem(except); i++)
1433 if(except[i] != nil && strcmp(except[i], q) == 0)
1439 if(i == nelem(except)){
1440 i = strtoul(q, &q, 10);
1441 if(*q != 0 || i > 255) error(Ebadctl);
1444 if((vi->info & 0x7ff) == 3 || (vi->info & 0x7ff) == 4)
1446 if(r == nil) goto out;
1448 vi->code = strtoul(r, &r, 0);
1452 vi->ilen = strtoul(r + 1, &r, 0);
1453 if(*r != 0) error(Ebadctl);
1460 cmdexcept(VmCmd *cp, va_list va)
1465 if(cp->scratched) error(Eintr);
1466 if((vmx->onentry & POSTEX) != 0){
1467 cp->flags |= CMDFPOSTP;
1470 eventparse(va_arg(va, char *), &vmx->exc);
1471 vmx->onentry |= POSTEX;
1476 cmdirq(VmCmd *cmd, va_list va)
1483 p = va_arg(va, char *);
1485 vmx->onentry &= ~POSTIRQ;
1489 vmx->onentry |= POSTIRQ;
1495 cmdextrap(VmCmd *, va_list va)
1500 p = va_arg(va, char *);
1501 v = strtoul(p, &q, 0);
1502 if(q == p || *q != 0) error(Ebadarg);
1503 vmcswrite(EXC_BITMAP, v);
1514 ilock(&vmx->cmdlock);
1515 rc = vmx->firstcmd != nil;
1516 iunlock(&vmx->cmdlock);
1521 markcmddone(VmCmd *p, VmCmd ***pp)
1523 if((p->flags & (CMDFFAIL|CMDFPOSTP)) == CMDFPOSTP){
1527 p->flags = p->flags & ~CMDFPOSTP;
1533 markppcmddone(VmCmd **pp)
1538 if((p->flags & (CMDFFAIL|CMDFPOSTP)) == CMDFPOSTP)
1542 p->flags = p->flags & ~CMDFPOSTP;
1553 for(pp = &vmx->postponed; p = *pp, p != nil; ){
1555 kstrcpy(p->errstr, up->errstr, ERRMAX);
1556 p->flags |= CMDFFAIL;
1557 pp = markppcmddone(pp);
1560 p->flags &= ~CMDFPOSTP;
1561 p->retval = p->cmd(p, p->va);
1563 pp = markppcmddone(pp);
1566 ilock(&vmx->cmdlock);
1569 iunlock(&vmx->cmdlock);
1572 vmx->firstcmd = p->next;
1573 if(vmx->lastcmd == &p->next)
1574 vmx->lastcmd = &vmx->firstcmd;
1575 iunlock(&vmx->cmdlock);
1578 kstrcpy(p->errstr, up->errstr, ERRMAX);
1579 p->flags |= CMDFFAIL;
1580 markcmddone(p, &pp);
1583 if(p->scratched) error(Eintr);
1584 p->retval = p->cmd(p, p->va);
1586 markcmddone(p, &pp);
1594 u32int procbctls, defprocbctls;
1599 procwired(up, vmx->machno);
1604 kstrcpy(vmx->errstr, up->errstr, ERRMAX);
1605 vmx->state = VMXDEAD;
1611 vmx->state = VMXREADY;
1612 defprocbctls = vmcsread(PROCB_CTLS);
1615 if(vmx->state == VMXRUNNING){
1616 procbctls = defprocbctls;
1617 if((vmx->onentry & STEP) != 0)
1618 defprocbctls |= PROCB_MONTRAP;
1619 if((vmx->onentry & POSTEX) != 0){
1620 vmcswrite(VMENTRY_INTRINFO, vmx->exc.info);
1621 vmcswrite(VMENTRY_INTRCODE, vmx->exc.code);
1622 vmcswrite(VMENTRY_INTRILEN, vmx->exc.ilen);
1623 vmx->onentry &= ~POSTEX;
1625 if((vmx->onentry & POSTIRQ) != 0 && (vmx->onentry & STEP) == 0){
1626 if((vmx->onentry & POSTEX) == 0 && (vmcsread(GUEST_RFLAGS) & 1<<9) != 0 && (vmcsread(GUEST_CANINTR) & 3) == 0){
1627 vmcswrite(VMENTRY_INTRINFO, vmx->irq.info);
1628 vmcswrite(VMENTRY_INTRCODE, vmx->irq.code);
1629 vmcswrite(VMENTRY_INTRILEN, vmx->irq.ilen);
1630 vmx->onentry &= ~POSTIRQ;
1631 vmx->got |= GOTIRQACK;
1632 vmx->irqack = vmx->irq;
1634 procbctls |= PROCB_IRQWIN;
1636 if((vmx->onentry & FLUSHVPID) != 0){
1637 if(invvpid(INVLOCAL, vmx->vpid, 0) < 0)
1638 error("invvpid failed");
1639 vmx->onentry &= ~FLUSHVPID;
1641 if((vmx->onentry & FLUSHEPT) != 0){
1642 if(invept(INVLOCAL, PADDR(vmx->pml4) | 3<<3, 0) < 0)
1643 error("invept failed");
1644 vmx->onentry &= ~FLUSHEPT;
1646 vmcswrite(PROCB_CTLS, procbctls);
1647 vmx->got &= ~GOTEXIT;
1650 if(sizeof(uintptr) == 8){
1652 vmwrite(HOST_FSBASE, v);
1654 if((vmx->dr[7] & ~0xd400) != 0)
1655 putdr01236(vmx->dr);
1656 fpsserestore(&vmx->fp);
1658 rc = vmlaunch(&vmx->ureg, vmx->launched);
1659 vmx->cr2 = getcr2();
1660 fpssesave(&vmx->fp);
1663 error("vmlaunch failed");
1667 up->psstate = "Idle";
1668 sleep(&vmx->cmdwait, gotcmd, vmx);
1689 static Dirtab vmxdir[] = {
1690 "ctl", { Qctl, 0, 0 }, 0, 0660,
1691 "regs", { Qregs, 0, 0 }, 0, 0660,
1692 "status", { Qstatus, 0, 0 }, 0, 0440,
1693 "map", { Qmap, 0, 0 }, 0, 0660,
1694 "wait", { Qwait, 0, 0 }, 0, 0440,
1695 "fpregs", { Qfpregs, 0, 0 }, 0, 0660,
1708 static Cmdtab vmxctlmsg[] = {
1715 CMextrap, "extrap", 2,
1718 enum { AUXSIZE = 4096 };
1723 if(n < 0) return nil;
1724 if(n >= nvmxtab) return nil;
1727 #define QIDPATH(q,e) ((q) + 1 << 8 | (e))
1728 #define SLOT(q) ((vlong)((q).path >> 8) - 1)
1729 #define FILE(q) ((int)(q).path & 0xff)
1735 eqlock(&vmxtablock);
1737 qunlock(&vmxtablock);
1740 vmx = vmxlook(SLOT(q));
1741 qunlock(&vmxtablock);
1749 return (((VmCmd*)cp)->flags & CMDFDONE) != 0;
1753 vmxcmd(Vmx *vmx, int (*f)(VmCmd *, va_list), ...)
1757 if(vmx->state == VMXENDING)
1760 memset(&cmd, 0, sizeof(VmCmd));
1762 cmd.errstr = up->errstr;
1764 va_start(cmd.va, f);
1766 ilock(&vmx->cmdlock);
1767 if(vmx->state == VMXENDING){
1768 iunlock(&vmx->cmdlock);
1771 *vmx->lastcmd = &cmd;
1772 vmx->lastcmd = &cmd.next;
1773 iunlock(&vmx->cmdlock);
1777 wakeup(&vmx->cmdwait);
1779 sleep(&cmd, iscmddone, &cmd);
1780 while(!iscmddone(&cmd));
1784 if((cmd.flags & CMDFFAIL) != 0)
1796 vmx = mallocalign(sizeof(Vmx), 4096, 0, 0);
1802 vmx->state = VMXINIT;
1803 vmx->lastcmd = &vmx->firstcmd;
1804 vmx->mem.next = &vmx->mem;
1805 vmx->mem.prev = &vmx->mem;
1808 eqlock(&vmxtablock);
1811 vmxtab[vmx->index] = 0;
1812 qunlock(&vmxtablock);
1815 for(i = 0; i < nvmxtab; i++)
1816 if(vmxtab[i] == nil){
1822 newtab = realloc(vmxtab, (nvmxtab + 1) * sizeof(Vmx *));
1826 vmxtab[nvmxtab] = vmx;
1827 vmx->index = nvmxtab++;
1829 kproc("kvmx", vmxproc, vmx);
1830 qunlock(&vmxtablock);
1835 for(i = 0; i < conf.nmach; i++)
1836 if(vmxmachp(i)->vms < mv){
1838 mv = vmxmachp(i)->vms;
1841 if(vmxcmd(vmx, cmdstatus, up->errstr) == VMXDEAD)
1851 for(i = 0; i < nvmxtab; i++)
1852 if(vmxtab[i] != nil)
1853 vmxcmd(vmxtab[i], cmdquit);
1857 vmxattach(char *spec)
1859 if(!gotvmx) error(Enodev);
1860 return devattach('X', spec);
1864 vmxgen(Chan *c, char *, Dirtab *, int, int s, Dir *dp)
1870 devdir(c, (Qid){Qdir, 0, QTDIR}, "#X", 0, eve, 0555, dp);
1873 if(c->qid.path == Qdir){
1874 if(s-- == 0) goto clone;
1877 if(vmxlook(s) == nil)
1879 sprint(up->genbuf, "%d", s);
1880 devdir(c, (Qid){QIDPATH(s, Qdir), 0, QTDIR}, up->genbuf, 0, eve, DMDIR|0555, dp);
1883 if(c->qid.path == Qclone){
1885 strcpy(up->genbuf, "clone");
1886 devdir(c, (Qid){Qclone, 0, QTFILE}, up->genbuf, 0, eve, 0444, dp);
1889 if(s >= nelem(vmxdir))
1892 path = QIDPATH(SLOT(c->qid), 0);
1893 devdir(c, (Qid){tab->qid.path|path, tab->qid.vers, tab->qid.type}, tab->name, tab->length, eve, tab->perm, dp);
1898 vmxwalk(Chan *c, Chan *nc, char **name, int nname)
1902 eqlock(&vmxtablock);
1904 qunlock(&vmxtablock);
1907 rc = devwalk(c, nc, name, nname, nil, 0, vmxgen);
1908 qunlock(&vmxtablock);
1914 vmxstat(Chan *c, uchar *dp, int n)
1918 eqlock(&vmxtablock);
1920 qunlock(&vmxtablock);
1923 rc = devstat(c, dp, n, nil, 0, vmxgen);
1924 qunlock(&vmxtablock);
1930 vmxopen(Chan* c, int omode)
1935 if(c->qid.path == Qclone){
1936 if(!iseve()) error(Eperm);
1938 c->qid.path = QIDPATH(vmx->index, Qctl);
1940 eqlock(&vmxtablock);
1942 qunlock(&vmxtablock);
1945 vmx = vmxlook(SLOT(c->qid));
1946 if(SLOT(c->qid) >= 0 && vmx == nil) error(Enonexist);
1947 if(FILE(c->qid) != Qdir && !iseve()) error(Eperm);
1948 ch = devopen(c, omode, nil, 0, vmxgen);
1949 qunlock(&vmxtablock);
1951 ch->aux = smalloc(AUXSIZE);
1952 if(SLOT(ch->qid) >= 0 && FILE(ch->qid) == Qmap){
1953 if((omode & OTRUNC) != 0)
1954 vmxcmd(vmx, cmdclearmeminfo);
1972 if(SLOT(ch->qid) == -1 || FILE(ch->qid) != Qctl)
1974 vmx = vmxent(ch->qid);
1980 qunlock(&vmxtablock);
1982 vmxcmd(old, cmdquit);
1988 if((ch->flag & CRCLOSE) != 0)
1996 vmxread(Chan* c, void* a, long n, vlong off)
2001 if(SLOT(c->qid) == -1){
2002 switch((int)c->qid.path){
2005 eqlock(&vmxtablock);
2007 qunlock(&vmxtablock);
2010 rc = devdirread(c, a, n, nil, 0, vmxgen);
2011 qunlock(&vmxtablock);
2018 vmx = vmxent(c->qid);
2019 if(vmx == nil) error(Enonexist);
2020 switch(FILE(c->qid)){
2027 sprint(buf, "%d", vmx->index);
2028 return readstr(off, a, n, buf);
2032 vmxcmd(vmx, cmdgetregs, c->aux, (char *) c->aux + AUXSIZE);
2033 return readstr(off, a, n, c->aux);
2036 vmxcmd(vmx, cmdgetmeminfo, c->aux, (char *) c->aux + AUXSIZE);
2037 return readstr(off, a, n, c->aux);
2040 char buf[ERRMAX+128];
2041 char errbuf[ERRMAX];
2044 status = vmx->state;
2045 if(status == VMXDEAD){
2046 vmxcmd(vmx, cmdstatus, errbuf);
2047 snprint(buf, sizeof(buf), "%s %#q\n", statenames[status], errbuf);
2048 }else if(status >= 0 && status < nelem(statenames))
2049 snprint(buf, sizeof(buf), "%s\n", statenames[status]);
2051 snprint(buf, sizeof(buf), "%d\n", status);
2052 return readstr(off, a, n, buf);
2058 rc = vmxcmd(vmx, cmdwait, buf, buf + sizeof(buf));
2060 if(rc > 0) memmove(a, buf, rc);
2065 char buf[sizeof(FPsave)];
2067 vmxcmd(vmx, cmdgetfpregs, buf);
2068 if(n < 0 || off < 0 || off >= sizeof(buf)) n = 0;
2069 else if(off + n > sizeof(buf)) n = sizeof(buf) - off;
2070 if(n != 0) memmove(a, buf + off, n);
2081 vmxwrite(Chan* c, void* a, long n, vlong off)
2089 if(SLOT(c->qid) == -1){
2090 switch((int)c->qid.path){
2097 vmx = vmxent(c->qid);
2098 if(vmx == nil) error(Enonexist);
2099 switch(FILE(c->qid)){
2103 cb = parsecmd(a, n);
2108 ct = lookupcmd(cb, vmxctlmsg, nelem(vmxctlmsg));
2111 vmxcmd(vmx, cmdquit);
2116 if(cb->nf == 2) kstrdup(&s, cb->f[1]);
2117 else if(cb->nf != 1) error(Ebadarg);
2122 vmxcmd(vmx, cmdgo, ct->index == CMstep, s);
2127 vmxcmd(vmx, cmdstop);
2131 kstrdup(&s, cb->f[1]);
2136 vmxcmd(vmx, cmdexcept, s);
2143 kstrdup(&s, cb->f[1]);
2148 vmxcmd(vmx, cmdirq, s);
2154 kstrdup(&s, cb->f[1]);
2159 vmxcmd(vmx, cmdextrap, s);
2173 if(s == nil) error(Enomem);
2180 rc = vmxcmd(vmx, FILE(c->qid) == Qregs ? cmdsetregs : cmdsetmeminfo, s);
2186 char buf[sizeof(FPsave)];
2188 if(n > sizeof(FPsave)) n = sizeof(FPsave);
2190 return vmxcmd(vmx, cmdsetfpregs, buf, n, off);