2 #include "../port/lib.h"
14 * Where configuration info is left for the loaded programme.
15 * This will turn into a structure as more is done by the boot loader
16 * (e.g. why parse the .ini file twice?).
17 * There are 3584 bytes available at CONFADDR.
19 #define BOOTLINE ((char*)CONFADDR)
20 #define BOOTLINELEN 64
21 #define BOOTARGS ((char*)(CONFADDR+BOOTLINELEN))
22 #define BOOTARGSLEN (4096-0x200-BOOTLINELEN)
26 char *confname[MAXCONF];
27 char *confval[MAXCONF];
32 uchar *sp; /* user stack of init proc */
34 extern void (*i8237alloc)(void);
39 extern ulong multibootptr;
47 multiboot = (ulong*)KADDR(multibootptr);
49 if((multiboot[0] & (1<<2)) != 0)
50 strncpy(BOOTLINE, KADDR(multiboot[4]), BOOTLINELEN-1);
53 ep = cp + BOOTARGSLEN-1;
56 if((multiboot[0] & (1<<6)) != 0 && (l = multiboot[11]) >= 24){
57 cp = seprint(cp, ep, "*e820=");
58 m = KADDR(multiboot[12]);
59 while(m[0] >= 20 && m[0] <= l-4){
62 base = ((uvlong)m[0] | (uvlong)m[1]<<32);
63 size = ((uvlong)m[2] | (uvlong)m[3]<<32);
64 cp = seprint(cp, ep, "%.1lux %.16llux %.16llux ",
65 m[4] & 0xF, base, base+size);
67 m = (ulong*)((uintptr)m + m[-1]);
72 /* plan9.ini passed as the first module */
73 if((multiboot[0] & (1<<3)) != 0 && multiboot[5] > 0){
74 m = KADDR(multiboot[6]);
89 char *cp, *line[MAXCONF], *p, *q;
94 * parse configuration args from dos file plan9.ini
96 cp = BOOTARGS; /* where b.com leaves its config */
97 cp[BOOTARGSLEN-1] = 0;
100 * Strip out '\r', change '\t' -> ' '.
103 for(q = cp; *q; q++){
112 n = getfields(cp, line, MAXCONF, 1, "\n");
113 for(i = 0; i < n; i++){
116 cp = strchr(line[i], '=');
120 confname[nconf] = line[i];
131 for(i = 0; i < nconf; i++)
132 if(cistrcmp(confname[i], name) == 0)
150 /* convert to name=value\n format */
159 error("kernel configuration too large");
160 memset(BOOTLINE, 0, BOOTLINELEN);
161 memmove(BOOTARGS, p, n);
173 if(p = getconf("service")){
174 if(strcmp(p, "cpu") == 0)
176 else if(strcmp(p,"terminal") == 0)
180 if(p = getconf("*kernelpercent"))
181 userpcnt = 100 - strtol(p, 0, 0);
186 for(i=0; i<nelem(conf.mem); i++)
187 conf.npage += conf.mem[i].npage;
189 conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
192 if(conf.nproc > 2000)
195 conf.nswap = conf.nproc*80;
201 kpages = conf.npage - (conf.npage*userpcnt)/100;
204 if(conf.npage*BY2PG < 16*MB)
209 kpages = conf.npage - (conf.npage*userpcnt)/100;
212 * Make sure terminals with low memory get at least
213 * 4MB on the first Image chunk allocation.
215 if(conf.npage*BY2PG < 16*MB)
216 imagmem->minarena = 4*MB;
220 * can't go past the end of virtual memory.
222 if(kpages > ((uintptr)-KZERO)/BY2PG)
223 kpages = ((uintptr)-KZERO)/BY2PG;
225 conf.upages = conf.npage - kpages;
226 conf.ialloc = (kpages/2)*BY2PG;
229 * Guess how much is taken by the large permanent
230 * datastructures. Mntcache and Mntrpc are not accounted for
234 kpages -= conf.nproc*sizeof(Proc)
235 + conf.nimage*sizeof(Image)
237 + conf.nswppo*sizeof(Page*);
238 mainmem->maxsize = kpages;
241 * the dynamic allocation will balance the load properly,
242 * hopefully. be careful with 32-bit overflow.
244 imagmem->maxsize = kpages - (kpages/10);
245 if(p = getconf("*imagemaxmb")){
246 imagmem->maxsize = strtol(p, nil, 0)*MB;
247 if(imagmem->maxsize > mainmem->maxsize)
248 imagmem->maxsize = mainmem->maxsize;
253 * The palloc.pages array can be a large chunk out of the 2GB
254 * window above KZERO, so we allocate the array from
255 * upages and map in the VMAP window before pageinit()
261 uintptr va, base, top;
267 for(i=0; i<nelem(palloc.mem); i++){
271 size = (uvlong)np * BY2PG;
272 size += sizeof(Page) + BY2PG; /* round up */
273 size = (size / (sizeof(Page) + BY2PG)) * sizeof(Page);
274 size = ROUND(size, PGLSZ(1));
276 for(i=0; i<nelem(palloc.mem); i++){
278 base = ROUND(pm->base, PGLSZ(1));
279 top = pm->base + (uvlong)pm->npage * BY2PG;
280 if((base + size) <= VMAPSIZE && (vlong)(top - base) >= size){
282 pmap(m->pml4, base | PTEGLOBAL|PTEWRITE|PTEVALID, va, size);
283 palloc.pages = (Page*)va;
284 pm->base = base + size;
285 pm->npage = (top - pm->base)/BY2PG;
301 memset(m, 0, sizeof(Mach));
308 * For polled uart output at boot, need
309 * a default delay constant. 100000 should
310 * be enough for a while. Cpuidentify will
311 * calculate the real value later.
313 m->loopconst = 100000;
321 MACHP(0) = (Mach*)CPU0MACH;
324 m->pml4 = (u64int*)CPU0PML4;
325 m->gdt = (Segdesc*)CPU0GDT;
354 sp = (uchar*)base + BY2PG - sizeof(Tos);
357 av[ac++] = pusharg("boot");
359 /* when boot is changed to only use rc, this code can go away */
360 cp[BOOTLINELEN-1] = 0;
362 if(strncmp(cp, "fd", 2) == 0){
363 sprint(buf, "local!#f/fd%lddisk", strtol(cp+2, 0, 0));
364 av[ac++] = pusharg(buf);
365 } else if(strncmp(cp, "sd", 2) == 0){
366 sprint(buf, "local!#S/sd%c%c/fs", *(cp+2), *(cp+3));
367 av[ac++] = pusharg(buf);
368 } else if(strncmp(cp, "ether", 5) == 0)
369 av[ac++] = pusharg("-n");
371 /* 8 byte word align stack */
372 sp = (uchar*)((uintptr)sp & ~7);
374 /* build argc, argv on stack */
375 sp -= (ac+1)*sizeof(sp);
377 for(i = 0; i < ac; i++)
378 lsp[i] = av[i] + ((uintptr)(USTKTOP - BY2PG) - (uintptr)base);
380 sp += (uintptr)(USTKTOP - BY2PG) - (uintptr)base;
388 char buf[2*KNAMELEN];
395 * These are o.k. because rootinit is null.
396 * Then early kproc's will have a root and dot.
398 up->slash = namec("#/", Atodir, 0, 0);
399 pathclose(up->slash->path);
400 up->slash->path = newpath("/");
401 up->dot = cclone(up->slash);
406 snprint(buf, sizeof(buf), "%s %s", arch->id, conffile);
407 ksetenv("terminal", buf, 0);
408 ksetenv("cputype", "amd64", 0);
410 ksetenv("service", "cpu", 0);
412 ksetenv("service", "terminal", 0);
413 for(i = 0; i < nconf; i++){
414 if(confname[i][0] != '*')
415 ksetenv(confname[i], confval[i], 0);
416 ksetenv(confname[i], confval[i], 1);
420 kproc("alarm", alarmkproc, 0);
435 p->egrp = smalloc(sizeof(Egrp));
437 p->fgrp = dupfgrp(nil);
442 kstrdup(&p->text, "*init*");
443 kstrdup(&p->user, eve);
450 * N.B. make sure there's enough space for syscall to check
452 * 8 bytes for gotolabel's return PC
454 p->sched.pc = (uintptr)init0;
455 p->sched.sp = (uintptr)p->kstack+KSTACK-(sizeof(Sargs)+BY2WD);
457 /* temporarily set up for kmap() */
463 s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BY2PG);
465 pg = newpage(0, 0, USTKTOP-BY2PG);
475 s = newseg(SG_TEXT, UTZERO, 1);
478 pg = newpage(0, 0, UTZERO);
479 memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl));
483 memmove(v, initcode, sizeof initcode);
511 if(i8237alloc != nil)
522 if(arch->clockenable)
537 active.thunderbirdsarego = 1;
542 shutdown(int ispanic)
548 active.ispanic = ispanic;
549 else if(m->machno == 0 && (active.machs & (1<<m->machno)) == 0)
551 once = active.machs & (1<<m->machno);
553 * setting exiting will make hzclock() on each processor call exit(0),
554 * which calls shutdown(0) and arch->reset(), which on mp systems is
555 * mpshutdown, from which there is no return: the processor is idled
556 * or initiates a reboot. clearing our bit in machs avoids calling
557 * exit(0) from hzclock() on this processor.
559 active.machs &= ~(1<<m->machno);
564 iprint("cpu%d: exiting\n", m->machno);
566 /* wait for any other processors to shutdown */
568 for(ms = 5*1000; ms > 0; ms -= TK2MS(2)){
570 if(active.machs == 0 && consactive() == 0)
578 if(getconf("*debug"))
593 reboot(void *entry, void *code, ulong size)
595 void (*f)(uintptr, uintptr, ulong);
600 * the boot processor is cpu0. execute this function on it
601 * so that the new kernel has the same cpu0. this only matters
602 * because the hardware has a notion of which processor was the
603 * boot processor and we look at it at start up.
605 if (m->machno != 0) {
611 iprint("shutting down...\n");
616 /* turn off buffered serial console */
619 /* shutdown devices */
624 * This allows the reboot code to turn off the page mapping
626 *mmuwalk(m->pml4, 0, 3, 0) = *mmuwalk(m->pml4, KZERO, 3, 0);
627 *mmuwalk(m->pml4, 0, 2, 0) = *mmuwalk(m->pml4, KZERO, 2, 0);
630 /* setup reboot trampoline function */
631 f = (void*)REBOOTADDR;
632 memmove(f, rebootcode, sizeof(rebootcode));
634 /* off we go - never to return */
636 (*f)((uintptr)entry & ~0xF0000000UL, (uintptr)PADDR(code), size);
640 * SIMD Floating Point.
641 * Assembler support to get at the individual instructions
643 * There are opportunities to be lazier about saving and
644 * restoring the state and allocating the storage needed.
646 extern void _clts(void);
647 extern void _fldcw(u16int);
648 extern void _fnclex(void);
649 extern void _fninit(void);
650 extern void _fxrstor(Fxsave*);
651 extern void _fxsave(Fxsave*);
652 extern void _fwait(void);
653 extern void _ldmxcsr(u32int);
654 extern void _stts(void);
657 * not used, AMD64 mandated SSE
664 fpx87restore(FPsave*)
669 fpssesave(FPsave *fps)
671 Fxsave *fx = (Fxsave*)ROUND(((uintptr)fps), FPalign);
675 if(fx != (Fxsave*)fps)
676 memmove((Fxsave*)fps, fx, sizeof(Fxsave));
679 fpsserestore(FPsave *fps)
681 Fxsave *fx = (Fxsave*)ROUND(((uintptr)fps), FPalign);
683 if(fx != (Fxsave*)fps)
684 memmove(fx, (Fxsave*)fps, sizeof(Fxsave));
689 static char* mathmsg[] =
691 nil, /* handled below */
692 "denormalized operand",
700 mathnote(ulong status, uintptr pc)
702 char *msg, note[ERRMAX];
706 * Some attention should probably be paid here to the
707 * exception masks and error summary.
709 msg = "unknown exception";
710 for(i = 1; i <= 5; i++){
711 if(!((1<<i) & status))
719 msg = "stack overflow";
721 msg = "stack underflow";
723 msg = "invalid operation";
725 snprint(note, sizeof note, "sys: fp: %s fppc=%#p status=0x%lux",
727 postnote(up, 1, note, NDebug);
731 * math coprocessor error
734 matherror(Ureg*, void*)
737 * Save FPU state to check out the error.
740 up->fpstate = FPinactive;
741 mathnote(up->fpsave.fsw, up->fpsave.rip);
748 simderror(Ureg *ureg, void*)
751 up->fpstate = FPinactive;
752 mathnote(up->fpsave.mxcsr & 0x3f, ureg->pc);
756 * math coprocessor emulation fault
759 mathemu(Ureg *ureg, void*)
761 ulong status, control;
763 if(up->fpstate & FPillegal){
764 /* someone did floating point in a note handler */
765 postnote(up, 1, "sys: floating point in note handler", NDebug);
771 * A process tries to use the FPU for the
772 * first time and generates a 'device not available'
774 * Turn the FPU on and initialise it for use.
775 * Set the precision and mask the exceptions
776 * we don't care about from the generic Mach value.
783 up->fpstate = FPactive;
787 * Before restoring the state, check for any pending
788 * exceptions, there's no way to restore the state without
789 * generating an unmasked exception.
790 * More attention should probably be paid here to the
791 * exception masks and error summary.
793 status = up->fpsave.fsw;
794 control = up->fpsave.fcw;
795 if((status & ~control) & 0x07F){
796 mathnote(status, up->fpsave.rip);
799 fprestore(&up->fpsave);
800 up->fpstate = FPactive;
803 panic("math emu pid %ld %s pc %#p",
804 up->pid, up->text, ureg->pc);
810 * math coprocessor segment overrun
813 mathover(Ureg*, void*)
815 pexit("math overrun", 0);
821 trapenable(VectorCERR, matherror, 0, "matherror");
822 if(X86FAMILY(m->cpuidax) == 3)
823 intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror");
824 trapenable(VectorCNA, mathemu, 0, "mathemu");
825 trapenable(VectorCSO, mathover, 0, "mathover");
826 trapenable(VectorSIMD, simderror, 0, "simderror");
835 p->pcycles = -p->kentry;
843 p->kentry = up->kentry;
844 p->pcycles = -p->kentry;
846 /* save floating point state */
848 switch(up->fpstate & ~FPillegal){
851 up->fpstate = FPinactive;
853 p->fpsave = up->fpsave;
854 p->fpstate = FPinactive;
882 if(p->fpstate == FPactive){
883 if(p->state == Moribund){
890 * Fpsave() stores without handling pending
891 * unmasked exeptions. Postnote() can't be called
892 * here as sleep() already has up->rlock, so
893 * the handling of pending exceptions is delayed
894 * until the process runs again and generates an
895 * emulation fault to activate the FPU.
899 p->fpstate = FPinactive;
903 * While this processor is in the scheduler, the process could run
904 * on another processor and exit, returning the page tables to
905 * the free list where they could be reallocated and overwritten.
906 * When this processor eventually has to get an entry from the
907 * trashed page tables it will crash.
909 * If there's only one processor, this can't happen.
910 * You might think it would be a win not to do this in that case,
911 * especially on VMware, but it turns out not to matter.