[UESEG] EXECSEGM(3), /* user code */
};
-static int didmmuinit = 0;
-
static struct {
Lock;
MMU *free;
- int nalloc;
- int nfree;
+ ulong nalloc;
+ ulong nfree;
} mmupool;
-/* level */
enum {
+ /* level */
PML4E = 2,
PDPE = 1,
PDE = 0,
MAPBITS = 8*sizeof(m->mmumap[0]),
+
+ /* PAT entry used for write combining */
+ PATWC = 7,
};
static void
mmuflushtlb();
}
+static void kernelro(void);
+
void
mmuinit(void)
{
vlong v;
int i;
- didmmuinit = 1;
-
/* zap double map done by l.s */
m->pml4[512] = 0;
m->pml4[0] = 0;
+ if(m->machno == 0)
+ kernelro();
+
m->tss = mallocz(sizeof(Tss), 1);
if(m->tss == nil)
panic("mmuinit: no memory for Tss");
taskswitch((uintptr)m + MACHSIZE);
ltr(TSSSEL);
- wrmsr(0xc0000100, 0ull); /* 64 bit fsbase */
- wrmsr(0xc0000101, (uvlong)&machp[m->machno]); /* 64 bit gsbase */
- wrmsr(0xc0000102, 0ull); /* kernel gs base */
+ wrmsr(FSbase, 0ull);
+ wrmsr(GSbase, (uvlong)&machp[m->machno]);
+ wrmsr(KernelGSbase, 0ull);
/* enable syscall extension */
- rdmsr(0xc0000080, &v);
+ rdmsr(Efer, &v);
v |= 1ull;
- wrmsr(0xc0000080, v);
-
- /* IA32_STAR */
- wrmsr(0xc0000081, ((uvlong)UE32SEL << 48) | ((uvlong)KESEL << 32));
-
- /* IA32_LSTAR */
- wrmsr(0xc0000082, (uvlong)syscallentry);
-
- /* SYSCALL flags mask */
- wrmsr(0xc0000084, 0x200);
+ wrmsr(Efer, v);
+
+ wrmsr(Star, ((uvlong)UE32SEL << 48) | ((uvlong)KESEL << 32));
+ wrmsr(Lstar, (uvlong)syscallentry);
+ wrmsr(Sfmask, 0x200);
+
+ /* IA32_PAT write combining */
+ if((MACHP(0)->cpuiddx & Pat) != 0
+ && rdmsr(0x277, &v) != -1){
+ v &= ~(255LL<<(PATWC*8));
+ v |= 1LL<<(PATWC*8); /* WC */
+ wrmsr(0x277, v);
+ }
}
/*
void*
kaddr(uintptr pa)
{
- if(pa > (uintptr)-KZERO)
+ if(pa >= (uintptr)-KZERO)
panic("kaddr: pa=%#p pc=%#p", pa, getcallerpc(&pa));
return (void*)(pa+KZERO);
}
flags = PTEWRITE|PTEVALID;
if(va < VMAP){
assert(up != nil);
- assert((va < TSTKTOP) || (va >= KMAP && va < KMAP+KMAPSIZE));
+ assert((va < USTKTOP) || (va >= KMAP && va < KMAP+KMAPSIZE));
p = mmualloc();
p->index = index;
p->level = level;
- if(va < TSTKTOP){
+ if(va < USTKTOP){
flags |= PTEUSER;
if(level == PML4E){
if((p->next = up->mmuhead) == nil)
up->kmapcount++;
}
page = p->page;
- } else if(didmmuinit) {
- page = mallocalign(PTSZ, BY2PG, 0, 0);
} else {
page = rampage();
}
if(pte & PTEVALID){
if(pte & PTESIZE)
return 0;
- table = KADDR(PPN(pte));
+ pte = PPN(pte);
+ if(pte >= (uintptr)-KZERO)
+ table = (void*)(pte + VMAP);
+ else
+ table = (void*)(pte + KZERO);
} else {
if(!create)
return 0;
return (1<<PTSHIFT) - (va & PGLSZ(level+1)-1) / PGLSZ(level);
}
+static void
+ptesplit(uintptr* table, uintptr va)
+{
+ uintptr *pte, pa, off;
+
+ pte = mmuwalk(table, va, 1, 0);
+ if(pte == nil || (*pte & PTESIZE) == 0 || (va & PGLSZ(1)-1) == 0)
+ return;
+ table = rampage();
+ if(table == nil)
+ panic("ptesplit: out of memory\n");
+ va &= -PGLSZ(1);
+ pa = *pte & ~PTESIZE;
+ for(off = 0; off < PGLSZ(1); off += PGLSZ(0))
+ table[PTLX(va + off, 0)] = pa + off;
+ *pte = PADDR(table) | PTEVALID|PTEWRITE;
+ invlpg(va);
+}
+
+/*
+ * map kernel text segment readonly
+ * and everything else no-execute.
+ */
+static void
+kernelro(void)
+{
+ uintptr *pte, psz, va;
+
+ ptesplit(m->pml4, APBOOTSTRAP);
+ ptesplit(m->pml4, KTZERO);
+ ptesplit(m->pml4, (uintptr)etext-1);
+
+ for(va = KZERO; va != 0; va += psz){
+ psz = PGLSZ(0);
+ pte = mmuwalk(m->pml4, va, 0, 0);
+ if(pte == nil){
+ if(va & PGLSZ(1)-1)
+ continue;
+ pte = mmuwalk(m->pml4, va, 1, 0);
+ if(pte == nil)
+ continue;
+ psz = PGLSZ(1);
+ }
+ if((*pte & PTEVALID) == 0)
+ continue;
+ if(va >= KTZERO && va < (uintptr)etext)
+ *pte &= ~PTEWRITE;
+ else if(va != (APBOOTSTRAP & -BY2PG))
+ *pte |= PTENOEXEC;
+ invlpg(va);
+ }
+}
+
void
-pmap(uintptr *pml4, uintptr pa, uintptr va, vlong size)
+pmap(uintptr pa, uintptr va, vlong size)
{
uintptr *pte, *ptee, flags;
int z, l;
- if((size <= 0) || va < VMAP)
+ if(size <= 0 || va < VMAP)
panic("pmap: pa=%#p va=%#p size=%lld", pa, va, size);
flags = pa;
pa = PPN(pa);
flags |= PTESIZE;
l = (flags & PTESIZE) != 0;
z = PGLSZ(l);
- pte = mmuwalk(pml4, va, l, 1);
- if(pte == 0){
- pte = mmuwalk(pml4, va, ++l, 0);
+ pte = mmuwalk(m->pml4, va, l, 1);
+ if(pte == nil){
+ pte = mmuwalk(m->pml4, va, ++l, 0);
if(pte && (*pte & PTESIZE)){
flags |= PTESIZE;
z = va & (PGLSZ(l)-1);
}
}
+void
+punmap(uintptr va, vlong size)
+{
+ uintptr *pte;
+ int l;
+
+ va = PPN(va);
+ while(size > 0){
+ if((va % PGLSZ(1)) != 0 || size < PGLSZ(1))
+ ptesplit(m->pml4, va);
+ l = 0;
+ pte = mmuwalk(m->pml4, va, l, 0);
+ if(pte == nil && (va % PGLSZ(1)) == 0 && size >= PGLSZ(1))
+ pte = mmuwalk(m->pml4, va, ++l, 0);
+ if(pte){
+ *pte = 0;
+ invlpg(va);
+ }
+ va += PGLSZ(l);
+ size -= PGLSZ(l);
+ }
+}
+
static void
mmuzap(void)
{
/* common case */
pte[PTLX(UTZERO, 3)] = 0;
- pte[PTLX(TSTKTOP, 3)] = 0;
+ pte[PTLX(USTKTOP-1, 3)] = 0;
m->mmumap[PTLX(UTZERO, 3)/MAPBITS] &= ~(1ull<<(PTLX(UTZERO, 3)%MAPBITS));
- m->mmumap[PTLX(TSTKTOP, 3)/MAPBITS] &= ~(1ull<<(PTLX(TSTKTOP, 3)%MAPBITS));
+ m->mmumap[PTLX(USTKTOP-1, 3)/MAPBITS] &= ~(1ull<<(PTLX(USTKTOP-1, 3)%MAPBITS));
for(i = 0; i < nelem(m->mmumap); pte += MAPBITS, i++){
if((w = m->mmumap[i]) == 0)
if(pte == 0)
panic("putmmu: bug: va=%#p pa=%#p", va, pa);
old = *pte;
- *pte = pa | PTEVALID|PTEUSER;
+ *pte = pa | PTEUSER;
splx(x);
if(old & PTEVALID)
invlpg(va);
}
+/*
+ * Double-check the user MMU.
+ * Error checking only.
+ */
void
checkmmu(uintptr va, uintptr pa)
{
- USED(va, pa);
+ uintptr *pte;
+
+ pte = mmuwalk(m->pml4, va, 0, 0);
+ if(pte != 0 && (*pte & PTEVALID) != 0 && PPN(*pte) != pa)
+ print("%ld %s: va=%#p pa=%#p pte=%#p\n",
+ up->pid, up->text, va, pa, *pte);
}
uintptr
return -KZERO - pa;
}
-void
-countpagerefs(ulong *ref, int print)
-{
- USED(ref, print);
-}
-
KMap*
kmap(Page *page)
{
return (KMap*)KADDR(pa);
x = splhi();
- va = KMAP + ((uintptr)up->kmapindex << PGSHIFT);
+ va = KMAP + (((uintptr)up->kmapindex++ << PGSHIFT) & (KMAPSIZE-1));
pte = mmuwalk(m->pml4, va, 0, 1);
- if(pte == 0 || *pte & PTEVALID)
+ if(pte == 0 || (*pte & PTEVALID) != 0)
panic("kmap: pa=%#p va=%#p", pa, va);
- *pte = pa | PTEWRITE|PTEVALID;
- up->kmapindex = (up->kmapindex + 1) % (1<<PTSHIFT);
- if(up->kmapindex == 0)
- mmuflushtlb();
+ *pte = pa | PTEWRITE|PTENOEXEC|PTEVALID;
splx(x);
+ invlpg(va);
return (KMap*)va;
}
/*
* Add a device mapping to the vmap range.
+ * note that the VMAP and KZERO PDPs are shared
+ * between processors (see mpstartap) so no
+ * synchronization is being done.
*/
void*
vmap(uintptr pa, int size)
uintptr va;
int o;
+ if(pa+size > VMAPSIZE)
+ return 0;
va = pa+VMAP;
/*
* might be asking for less than a page.
pa -= o;
va -= o;
size += o;
- pmap(m->pml4, pa | PTEUNCACHED|PTEWRITE|PTEVALID, va, size);
+ pmap(pa | PTEUNCACHED|PTEWRITE|PTENOEXEC|PTEVALID, va, size);
return (void*)(va+o);
}
}
/*
- * vmapsync() is currently unused as the VMAP and KZERO PDPs
- * are shared between processors. (see mpstartap)
+ * mark pages as write combining (used for framebuffer)
*/
-int
-vmapsync(uintptr va)
+void
+patwc(void *a, int n)
{
- uintptr *pte1, *pte2;
- int level;
+ uintptr *pte, mask, attr, va;
+ int z, l;
+ vlong v;
- if(va < VMAP || m->machno == 0)
- return 0;
+ /* check if pat is usable */
+ if((MACHP(0)->cpuiddx & Pat) == 0
+ || rdmsr(0x277, &v) == -1
+ || ((v >> PATWC*8) & 7) != 1)
+ return;
+
+ /* set the bits for all pages in range */
+ for(va = (uintptr)a; n > 0; n -= z, va += z){
+ l = 0;
+ pte = mmuwalk(m->pml4, va, l, 0);
+ if(pte == 0)
+ pte = mmuwalk(m->pml4, va, ++l, 0);
+ if(pte == 0 || (*pte & PTEVALID) == 0)
+ panic("patwc: va=%#p", va);
+ z = PGLSZ(l);
+ z -= va & (z-1);
+ mask = l == 0 ? 3<<3 | 1<<7 : 3<<3 | 1<<12;
+ attr = (((PATWC&3)<<3) | ((PATWC&4)<<5) | ((PATWC&4)<<10));
+ *pte = (*pte & ~mask) | (attr & mask);
+ }
+}
+
+/*
+ * The palloc.pages array and mmupool can be a large chunk
+ * out of the 2GB window above KZERO, so we allocate from
+ * upages and map in the VMAP window before pageinit()
+ */
+void
+preallocpages(void)
+{
+ Pallocmem *pm;
+ uintptr va, base, top;
+ vlong tsize, psize;
+ ulong np, nt;
+ int i;
- for(level=0; level<2; level++){
- pte1 = mmuwalk(MACHP(0)->pml4, va, level, 0);
- if(pte1 && *pte1 & PTEVALID){
- pte2 = mmuwalk(m->pml4, va, level, 1);
- if(pte2 == 0)
- break;
- if(pte1 != pte2)
- *pte2 = *pte1;
- return 1;
+ np = 0;
+ for(i=0; i<nelem(palloc.mem); i++){
+ pm = &palloc.mem[i];
+ np += pm->npage;
+ }
+ nt = np / 50; /* 2% for mmupool */
+ np -= nt;
+
+ nt = (uvlong)nt*BY2PG / (sizeof(MMU)+PTSZ);
+ tsize = (uvlong)nt * (sizeof(MMU)+PTSZ);
+
+ psize = (uvlong)np * BY2PG;
+ psize += sizeof(Page) + BY2PG;
+ psize = (psize / (sizeof(Page)+BY2PG)) * sizeof(Page);
+
+ psize += tsize;
+ psize = ROUND(psize, PGLSZ(1));
+
+ for(i=0; i<nelem(palloc.mem); i++){
+ pm = &palloc.mem[i];
+ base = ROUND(pm->base, PGLSZ(1));
+ top = pm->base + (uvlong)pm->npage * BY2PG;
+ if((base + psize) <= VMAPSIZE && (vlong)(top - base) >= psize){
+ pm->base = base + psize;
+ pm->npage = (top - pm->base)/BY2PG;
+
+ va = base + VMAP;
+ pmap(base | PTEGLOBAL|PTEWRITE|PTENOEXEC|PTEVALID, va, psize);
+
+ palloc.pages = (void*)(va + tsize);
+
+ mmupool.nfree = mmupool.nalloc = nt;
+ mmupool.free = (void*)(va + (uvlong)nt*PTSZ);
+ for(i=0; i<nt; i++){
+ mmupool.free[i].page = (uintptr*)va;
+ mmupool.free[i].next = &mmupool.free[i+1];
+ va += PTSZ;
+ }
+ mmupool.free[i-1].next = nil;
+
+ break;
}
}
- return 0;
}