2 #include "../port/lib.h"
9 * Simple segment descriptors with no translation.
11 #define EXECSEGM(p) { 0, SEGL|SEGP|SEGPL(p)|SEGEXEC }
12 #define DATASEGM(p) { 0, SEGB|SEGG|SEGP|SEGPL(p)|SEGDATA|SEGW }
13 #define EXEC32SEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
14 #define DATA32SEGM(p) { 0xFFFF, SEGB|SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
18 [NULLSEG] { 0, 0}, /* null descriptor */
19 [KESEG] EXECSEGM(0), /* kernel code */
20 [KDSEG] DATASEGM(0), /* kernel data */
21 [UE32SEG] EXEC32SEGM(3), /* user code 32 bit*/
22 [UDSEG] DATA32SEGM(3), /* user data/stack */
23 [UESEG] EXECSEGM(3), /* user code */
26 static int didmmuinit = 0;
34 MAPBITS = 8*sizeof(m->mmumap[0]),
38 loadptr(u16int lim, uintptr off, void (*load)(void*))
53 taskswitch(uintptr stack)
58 tss->rsp0[0] = (u32int)stack;
59 tss->rsp0[1] = stack >> 32;
60 tss->rsp1[0] = (u32int)stack;
61 tss->rsp1[1] = stack >> 32;
62 tss->rsp2[0] = (u32int)stack;
63 tss->rsp2[1] = stack >> 32;
76 /* zap double map done by l.s */
80 m->tss = mallocz(sizeof(Tss), 1);
82 panic("mmuinit: no memory for Tss");
83 m->tss->iomap = 0xDFFF;
85 x = (uintptr)m + MACHSIZE;
87 m->tss->ist[i+1] = x>>32;
91 * We used to keep the GDT in the Mach structure, but it
92 * turns out that that slows down access to the rest of the
93 * page. Since the Mach structure is accessed quite often,
94 * it pays off anywhere from a factor of 1.25 to 2 on real
95 * hardware to separate them (the AMDs are more sensitive
96 * than Intels in this regard). Under VMware it pays off
97 * a factor of about 10 to 100.
99 memmove(m->gdt, gdt, sizeof gdt);
102 m->gdt[TSSSEG+0].d0 = (x<<16)|(sizeof(Tss)-1);
103 m->gdt[TSSSEG+0].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
104 m->gdt[TSSSEG+1].d0 = x>>32;
105 m->gdt[TSSSEG+1].d1 = 0;
107 loadptr(sizeof(gdt)-1, (uintptr)m->gdt, lgdt);
108 loadptr(sizeof(Segdesc)*512-1, (uintptr)IDTADDR, lidt);
109 taskswitch((uintptr)m + MACHSIZE);
112 wrmsr(0xc0000100, 0ull); /* 64 bit fsbase */
113 wrmsr(0xc0000101, (uvlong)&machp[m->machno]); /* 64 bit gsbase */
114 wrmsr(0xc0000102, 0ull); /* kernel gs base */
116 /* enable syscall extension */
117 rdmsr(0xc0000080, &v);
119 wrmsr(0xc0000080, v);
122 wrmsr(0xc0000081, ((uvlong)UE32SEL << 48) | ((uvlong)KESEL << 32));
125 wrmsr(0xc0000082, (uvlong)syscallentry);
127 /* SYSCALL flags mask */
128 wrmsr(0xc0000084, 0x200);
132 * These could go back to being macros once the kernel is debugged,
133 * but the extra checking is nice to have.
138 if(pa > (uintptr)-KZERO)
139 panic("kaddr: pa=%#p pc=%#p", pa, getcallerpc(&pa));
140 return (void*)(pa+KZERO);
152 return va-(VMAP-(-KZERO));
153 panic("paddr: va=%#p pc=%#p", va, getcallerpc(&v));
166 p = malloc(n * sizeof(MMU));
168 panic("mmualloc: out of memory for MMU");
169 p->page = mallocalign(n * PTSZ, BY2PG, 0, 0);
171 panic("mmualloc: out of memory for MMU pages");
173 p[i].page = p[i-1].page + (1<<PTSHIFT);
179 m->mmufree = p->next;
185 mmuwalk(uintptr* table, uintptr va, int level, int create)
192 for(i = 2; i >= level; i--){
197 table = KADDR(PPN(pte));
201 pte = PTEWRITE|PTEVALID;
209 /* PML4 entries linked to head */
210 p->next = up->mmuhead;
214 if(p->index <= PTLX(TSTKTOP, 3))
215 m->mmumap[p->index/MAPBITS] |= 1ull<<(p->index%MAPBITS);
217 /* PDP and PD entries linked to tail */
218 up->mmutail->next = p;
221 } else if(didmmuinit) {
222 page = mallocalign(PTSZ, BY2PG, 0, 0);
225 memset(page, 0, PTSZ);
226 table[x] = PADDR(page) | pte;
235 ptecount(uintptr va, int level)
237 return (1<<PTSHIFT) - (va & PGLSZ(level+1)-1) / PGLSZ(level);
241 pmap(uintptr *pml4, uintptr pa, uintptr va, int size)
243 uintptr *pte, *ptee, flags;
246 if((size <= 0) || va < VMAP)
247 panic("pmap: pa=%#p va=%#p size=%d", pa, va, size);
254 if(size >= PGLSZ(1) && (va % PGLSZ(1)) == 0)
256 l = (flags & PTESIZE) != 0;
258 pte = mmuwalk(pml4, va, l, 1);
260 pte = mmuwalk(pml4, va, ++l, 0);
261 if(pte && (*pte & PTESIZE)){
263 z = va & (PGLSZ(l)-1);
269 panic("pmap: pa=%#p va=%#p size=%d", pa, va, size);
271 ptee = pte + ptecount(va, l);
272 while(size > 0 && pte < ptee){
289 pte[PTLX(KMAP, 3)] = 0;
292 pte[PTLX(UTZERO, 3)] = 0;
293 pte[PTLX(TSTKTOP, 3)] = 0;
294 m->mmumap[PTLX(UTZERO, 3)/MAPBITS] &= ~(1ull<<(PTLX(UTZERO, 3)%MAPBITS));
295 m->mmumap[PTLX(TSTKTOP, 3)/MAPBITS] &= ~(1ull<<(PTLX(TSTKTOP, 3)%MAPBITS));
297 for(i = 0; i < nelem(m->mmumap); pte += MAPBITS, i++){
319 p->next = m->mmufree;
320 m->mmufree = proc->mmuhead;
321 proc->mmuhead = proc->mmutail = nil;
322 m->mmucount += proc->mmucount;
339 mmuswitch(Proc *proc)
349 for(p = proc->mmuhead; p && p->level==PML4E; p = p->next){
350 pte = PADDR(p->page) | PTEWRITE|PTEVALID;
351 if(p->index <= PTLX(TSTKTOP, 3)){
352 m->mmumap[p->index/MAPBITS] |= 1ull<<(p->index%MAPBITS);
355 m->pml4[p->index] = pte;
357 taskswitch((uintptr)proc->kstack+KSTACK);
361 mmurelease(Proc *proc)
365 taskswitch((uintptr)m+MACHSIZE);
369 putmmu(uintptr va, uintptr pa, Page *)
375 pte = mmuwalk(m->pml4, va, 0, 1);
377 panic("putmmu: bug: va=%#p pa=%#p", va, pa);
381 *pte = pa | PTEVALID|PTEUSER;
388 checkmmu(uintptr va, uintptr pa)
402 countpagerefs(ulong *ref, int print)
410 uintptr *pte, pa, va;
414 if(cankaddr(pa) != 0)
415 return (KMap*)KADDR(pa);
418 va = KMAP + ((uintptr)m->kmapindex << PGSHIFT);
419 pte = mmuwalk(m->pml4, va, 0, 1);
420 if(pte == 0 || *pte & PTEVALID)
421 panic("kmap: pa=%#p va=%#p", pa, va);
422 *pte = pa | PTEWRITE|PTEVALID;
423 m->kmapindex = (m->kmapindex + 1) % (1<<PTSHIFT);
424 if(m->kmapindex == 0)
441 pte = mmuwalk(m->pml4, va, 0, 0);
442 if(pte == 0 || (*pte & PTEVALID) == 0)
443 panic("kunmap: va=%#p", va);
450 * Add a device mapping to the vmap range.
453 vmap(uintptr pa, int size)
458 if(size <= 0 || pa >= -VMAP)
459 panic("vmap: pa=%#p size=%d pc=%#p", pa, size, getcallerpc(&pa));
460 if(cankaddr(pa) >= size)
463 va = pa+(VMAP-(-KZERO));
465 * might be asking for less than a page.
471 pmap(m->pml4, pa | PTEUNCACHED|PTEWRITE|PTEVALID, va, size);
472 return (void*)(va+o);
478 paddr(v); /* will panic on error */
482 * vmapsync() is currently unused as the VMAP and KZERO PDPs
483 * are shared between processors. (see mpstartap)
488 uintptr *pte1, *pte2;
491 if(va < VMAP || m->machno == 0)
494 for(level=0; level<2; level++){
495 pte1 = mmuwalk(MACHP(0)->pml4, va, level, 0);
496 if(pte1 && *pte1 & PTEVALID){
497 pte2 = mmuwalk(m->pml4, va, level, 1);