2 #include "../port/lib.h"
9 * Simple segment descriptors with no translation.
11 #define EXECSEGM(p) { 0, SEGL|SEGP|SEGPL(p)|SEGEXEC }
12 #define DATASEGM(p) { 0, SEGB|SEGG|SEGP|SEGPL(p)|SEGDATA|SEGW }
13 #define EXEC32SEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
14 #define DATA32SEGM(p) { 0xFFFF, SEGB|SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
18 [NULLSEG] { 0, 0}, /* null descriptor */
19 [KESEG] EXECSEGM(0), /* kernel code */
20 [KDSEG] DATASEGM(0), /* kernel data */
21 [UE32SEG] EXEC32SEGM(3), /* user code 32 bit*/
22 [UDSEG] DATA32SEGM(3), /* user data/stack */
23 [UESEG] EXECSEGM(3), /* user code */
40 MAPBITS = 8*sizeof(m->mmumap[0]),
42 /* PAT entry used for write combining */
47 loadptr(u16int lim, uintptr off, void (*load)(void*))
62 taskswitch(uintptr stack)
67 tss->rsp0[0] = (u32int)stack;
68 tss->rsp0[1] = stack >> 32;
69 tss->rsp1[0] = (u32int)stack;
70 tss->rsp1[1] = stack >> 32;
71 tss->rsp2[0] = (u32int)stack;
72 tss->rsp2[1] = stack >> 32;
83 /* zap double map done by l.s */
87 m->tss = mallocz(sizeof(Tss), 1);
89 panic("mmuinit: no memory for Tss");
90 m->tss->iomap = 0xDFFF;
92 x = (uintptr)m + MACHSIZE;
94 m->tss->ist[i+1] = x>>32;
98 * We used to keep the GDT in the Mach structure, but it
99 * turns out that that slows down access to the rest of the
100 * page. Since the Mach structure is accessed quite often,
101 * it pays off anywhere from a factor of 1.25 to 2 on real
102 * hardware to separate them (the AMDs are more sensitive
103 * than Intels in this regard). Under VMware it pays off
104 * a factor of about 10 to 100.
106 memmove(m->gdt, gdt, sizeof gdt);
109 m->gdt[TSSSEG+0].d0 = (x<<16)|(sizeof(Tss)-1);
110 m->gdt[TSSSEG+0].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
111 m->gdt[TSSSEG+1].d0 = x>>32;
112 m->gdt[TSSSEG+1].d1 = 0;
114 loadptr(sizeof(gdt)-1, (uintptr)m->gdt, lgdt);
115 loadptr(sizeof(Segdesc)*512-1, (uintptr)IDTADDR, lidt);
116 taskswitch((uintptr)m + MACHSIZE);
119 wrmsr(0xc0000100, 0ull); /* 64 bit fsbase */
120 wrmsr(0xc0000101, (uvlong)&machp[m->machno]); /* 64 bit gsbase */
121 wrmsr(0xc0000102, 0ull); /* kernel gs base */
123 /* enable syscall extension */
124 rdmsr(0xc0000080, &v);
126 wrmsr(0xc0000080, v);
129 wrmsr(0xc0000081, ((uvlong)UE32SEL << 48) | ((uvlong)KESEL << 32));
132 wrmsr(0xc0000082, (uvlong)syscallentry);
134 /* SYSCALL flags mask */
135 wrmsr(0xc0000084, 0x200);
137 /* IA32_PAT write combining */
138 if((MACHP(0)->cpuiddx & Pat) != 0
139 && rdmsr(0x277, &v) != -1){
140 v &= ~(255LL<<(PATWC*8));
141 v |= 1LL<<(PATWC*8); /* WC */
147 * These could go back to being macros once the kernel is debugged,
148 * but the extra checking is nice to have.
153 if(pa >= (uintptr)-KZERO)
154 panic("kaddr: pa=%#p pc=%#p", pa, getcallerpc(&pa));
155 return (void*)(pa+KZERO);
168 panic("paddr: va=%#p pc=%#p", va, getcallerpc(&v));
180 m->mmufree = p->next;
186 mmupool.free = p->next;
192 p = malloc(n * sizeof(MMU));
194 panic("mmualloc: out of memory for MMU");
195 p->page = mallocalign(n * PTSZ, BY2PG, 0, 0);
197 panic("mmualloc: out of memory for MMU pages");
199 p[i].page = p[i-1].page + (1<<PTSHIFT);
204 p[n-1].next = mmupool.free;
205 mmupool.free = p->next;
207 mmupool.nfree += n-1;
216 mmucreate(uintptr *table, uintptr va, int level, int index)
218 uintptr *page, flags;
221 flags = PTEWRITE|PTEVALID;
224 assert((va < TSTKTOP) || (va >= KMAP && va < KMAP+KMAPSIZE));
232 if((p->next = up->mmuhead) == nil)
235 m->mmumap[index/MAPBITS] |= 1ull<<(index%MAPBITS);
237 up->mmutail->next = p;
246 up->kmaptail->next = p;
252 } else if(conf.mem[0].npage != 0) {
253 page = mallocalign(PTSZ, BY2PG, 0, 0);
257 memset(page, 0, PTSZ);
258 table[index] = PADDR(page) | flags;
263 mmuwalk(uintptr* table, uintptr va, int level, int create)
269 for(i = 2; i >= level; i--){
274 table = KADDR(PPN(pte));
278 table = mmucreate(table, va, i, x);
286 ptecount(uintptr va, int level)
288 return (1<<PTSHIFT) - (va & PGLSZ(level+1)-1) / PGLSZ(level);
292 pmap(uintptr *pml4, uintptr pa, uintptr va, vlong size)
294 uintptr *pte, *ptee, flags;
297 if(size <= 0 || va < VMAP)
298 panic("pmap: pa=%#p va=%#p size=%lld", pa, va, size);
305 if(size >= PGLSZ(1) && (va % PGLSZ(1)) == 0)
307 l = (flags & PTESIZE) != 0;
309 pte = mmuwalk(pml4, va, l, 1);
311 pte = mmuwalk(pml4, va, ++l, 0);
312 if(pte && (*pte & PTESIZE)){
314 z = va & (PGLSZ(l)-1);
320 panic("pmap: pa=%#p va=%#p size=%lld", pa, va, size);
322 ptee = pte + ptecount(va, l);
323 while(size > 0 && pte < ptee){
340 pte[PTLX(KMAP, 3)] = 0;
343 pte[PTLX(UTZERO, 3)] = 0;
344 pte[PTLX(TSTKTOP, 3)] = 0;
345 m->mmumap[PTLX(UTZERO, 3)/MAPBITS] &= ~(1ull<<(PTLX(UTZERO, 3)%MAPBITS));
346 m->mmumap[PTLX(TSTKTOP, 3)/MAPBITS] &= ~(1ull<<(PTLX(TSTKTOP, 3)%MAPBITS));
348 for(i = 0; i < nelem(m->mmumap); pte += MAPBITS, i++){
349 if((w = m->mmumap[i]) == 0)
352 for(x = 0; w != 0; w >>= 1, x++){
367 if(m->mmucount+proc->mmucount < 256){
368 p->next = m->mmufree;
369 m->mmufree = proc->mmuhead;
370 m->mmucount += proc->mmucount;
373 p->next = mmupool.free;
374 mmupool.free = proc->mmuhead;
375 mmupool.nfree += proc->mmucount;
378 proc->mmuhead = proc->mmutail = nil;
394 mmuswitch(Proc *proc)
403 if((p = proc->kmaphead) != nil)
404 m->pml4[PTLX(KMAP, 3)] = PADDR(p->page) | PTEWRITE|PTEVALID;
405 for(p = proc->mmuhead; p != nil && p->level == PML4E; p = p->next){
406 m->mmumap[p->index/MAPBITS] |= 1ull<<(p->index%MAPBITS);
407 m->pml4[p->index] = PADDR(p->page) | PTEUSER|PTEWRITE|PTEVALID;
409 taskswitch((uintptr)proc->kstack+KSTACK);
413 mmurelease(Proc *proc)
418 if((p = proc->kmaptail) != nil){
419 if((p->next = proc->mmuhead) == nil)
421 proc->mmuhead = proc->kmaphead;
422 proc->mmucount += proc->kmapcount;
424 proc->kmaphead = proc->kmaptail = nil;
425 proc->kmapcount = proc->kmapindex = 0;
428 taskswitch((uintptr)m+MACHSIZE);
432 putmmu(uintptr va, uintptr pa, Page *)
438 pte = mmuwalk(m->pml4, va, 0, 1);
440 panic("putmmu: bug: va=%#p pa=%#p", va, pa);
442 *pte = pa | PTEVALID|PTEUSER;
449 * Double-check the user MMU.
450 * Error checking only.
453 checkmmu(uintptr va, uintptr pa)
457 pte = mmuwalk(m->pml4, va, 0, 0);
458 if(pte != 0 && (*pte & PTEVALID) != 0 && PPN(*pte) != pa)
459 print("%ld %s: va=%#p pa=%#p pte=%#p\n",
460 up->pid, up->text, va, pa, *pte);
472 countpagerefs(ulong *ref, int print)
480 uintptr *pte, pa, va;
484 if(cankaddr(pa) != 0)
485 return (KMap*)KADDR(pa);
488 va = KMAP + ((uintptr)up->kmapindex << PGSHIFT);
489 pte = mmuwalk(m->pml4, va, 0, 1);
490 if(pte == 0 || *pte & PTEVALID)
491 panic("kmap: pa=%#p va=%#p", pa, va);
492 *pte = pa | PTEWRITE|PTEVALID;
493 up->kmapindex = (up->kmapindex + 1) % (1<<PTSHIFT);
494 if(up->kmapindex == 0)
511 pte = mmuwalk(m->pml4, va, 0, 0);
512 if(pte == 0 || (*pte & PTEVALID) == 0)
513 panic("kunmap: va=%#p", va);
519 * Add a device mapping to the vmap range.
520 * note that the VMAP and KZERO PDPs are shared
521 * between processors (see mpstartap) so no
522 * synchronization is being done.
525 vmap(uintptr pa, int size)
530 if(pa+size > VMAPSIZE)
534 * might be asking for less than a page.
540 pmap(m->pml4, pa | PTEUNCACHED|PTEWRITE|PTEVALID, va, size);
541 return (void*)(va+o);
547 paddr(v); /* will panic on error */
551 * mark pages as write combining (used for framebuffer)
554 patwc(void *a, int n)
556 uintptr *pte, mask, attr, va;
560 /* check if pat is usable */
561 if((MACHP(0)->cpuiddx & Pat) == 0
562 || rdmsr(0x277, &v) == -1
563 || ((v >> PATWC*8) & 7) != 1)
566 /* set the bits for all pages in range */
567 for(va = (uintptr)a; n > 0; n -= z, va += z){
569 pte = mmuwalk(m->pml4, va, l, 0);
571 pte = mmuwalk(m->pml4, va, ++l, 0);
572 if(pte == 0 || (*pte & PTEVALID) == 0)
573 panic("patwc: va=%#p", va);
576 mask = l == 0 ? 3<<3 | 1<<7 : 3<<3 | 1<<12;
577 attr = (((PATWC&3)<<3) | ((PATWC&4)<<5) | ((PATWC&4)<<10));
578 *pte = (*pte & ~mask) | (attr & mask);