2 #include "../port/lib.h"
10 #include "apbootstrap.h"
14 static Bus* mpbuslast;
15 static int mpisabus = -1;
16 static int mpeisabus = -1;
17 extern int i8259elcr; /* mask of level-triggered interrupts */
18 static Apic mpapic[MaxAPICNO+1];
19 static int machno2apicno[MaxAPICNO+1]; /* inverse map: machno -> APIC ID */
20 static int mpapicremap[MaxAPICNO+1];
21 static int mpmachno = 1;
22 static Lock mpphysidlock;
25 static char* buses[] = {
48 mkprocessor(PCMPprocessor* p)
54 if(!(p->flags & PcmpEN) || apicno > MaxAPICNO)
57 apic = &mpapic[apicno];
58 apic->type = PcmpPROCESSOR;
59 apic->apicno = apicno;
60 apic->flags = p->flags;
61 apic->lintr[0] = ApicIMASK;
62 apic->lintr[1] = ApicIMASK;
64 if(p->flags & PcmpBP){
65 machno2apicno[0] = apicno;
69 machno2apicno[mpmachno] = apicno;
70 apic->machno = mpmachno;
83 for(i = 0; buses[i]; i++){
84 if(strncmp(buses[i], p->string, sizeof(p->string)) == 0)
90 if((bus = xalloc(sizeof(Bus))) == nil)
91 panic("mkbus: no memory for Bus");
93 mpbuslast->next = bus;
99 bus->busno = p->busno;
100 if(bus->type == BusEISA){
104 print("mkbus: more than one EISA bus\n");
105 mpeisabus = bus->busno;
107 else if(bus->type == BusPCI){
111 else if(bus->type == BusISA){
115 print("mkbus: more than one ISA bus\n");
116 mpisabus = bus->busno;
131 for(bus = mpbus; bus; bus = bus->next){
132 if(bus->busno == busno)
135 print("mpgetbus: can't find bus %d\n", busno);
145 for(i = 0; i < MaxAPICNO+1; i++)
146 if(mpapic[i].flags == 0)
152 mkioapic(PCMPioapic* p)
159 if(!(p->flags & PcmpEN) || apicno > MaxAPICNO)
165 if((va = vmap(p->addr, 1024)) == nil)
168 apic = &mpapic[apicno];
169 if(apic->flags != 0) {
172 print("mkioapic: out of APIC IDs\n");
174 mpapicremap[p->apicno] = new;
175 print("mkioapic: APIC ID conflict at %d, remapping to %d\n", p->apicno, new);
176 p->apicno = apicno = new;
177 apic = &mpapic[apicno];
180 mpapicremap[p->apicno] = p->apicno;
181 apic->type = PcmpIOAPIC;
182 apic->apicno = apicno;
184 apic->paddr = p->addr;
185 apic->flags = p->flags;
191 mkiointr(PCMPintr* p)
198 * According to the MultiProcessor Specification, a destination
199 * I/O APIC of 0xFF means the signal is routed to all I/O APICs.
200 * It's unclear how that can possibly be correct so treat it as
203 if(p->apicno > MaxAPICNO)
206 if(mpapicremap[p->apicno] < 0) {
207 print("iointr: non-existing IOAPIC %d\n", p->apicno);
210 p->apicno = mpapicremap[p->apicno];
211 if((bus = mpgetbus(p->busno)) == 0)
214 if((aintr = xalloc(sizeof(Aintr))) == nil)
215 panic("iointr: no memory for Aintr");
219 print("iointr: type %d intr type %d flags %#o "
220 "bus %d irq %d apicno %d intin %d\n",
221 p->type, p->intr, p->flags,
222 p->busno, p->irq, p->apicno, p->intin);
224 * Hack for Intel SR1520ML motherboard, which BIOS describes
225 * the i82575 dual ethernet controllers incorrectly.
227 if(memcmp(mppcmp->product, "INTEL X38MLST ", 20) == 0){
228 if(p->busno == 1 && p->intin == 16 && p->irq == 1){
229 if((pcmpintr = xalloc(sizeof(PCMPintr))) == nil)
230 panic("iointr: no memory for PCMPintr");
231 memmove(pcmpintr, p, sizeof(PCMPintr));
232 print("mkiointr: %20.20s bus %d intin %d irq %d\n",
233 (char*)mppcmp->product,
234 pcmpintr->busno, pcmpintr->intin,
236 pcmpintr->intin = 17;
237 aintr->intr = pcmpintr;
240 aintr->apic = &mpapic[p->apicno];
241 aintr->next = bus->aintr;
248 mpintrinit(Bus* bus, PCMPintr* intr, int vno, int /*irq*/)
253 * Parse an I/O or Local APIC interrupt table entry and
254 * return the encoded vector.
258 po = intr->flags & PcmpPOMASK;
259 el = intr->flags & PcmpELMASK;
263 default: /* PcmpINT */
264 v |= ApicFIXED; /* no-op */
280 * The AMI Goliath doesn't boot successfully with it's LINTR0
281 * entry which decodes to low+level. The PPro manual says ExtINT
282 * should be level, whereas the Pentium is edge. Setting the
283 * Goliath to edge+high seems to cure the problem. Other PPro
284 * MP tables (e.g. ASUS P/I-P65UP5 have a entry which decodes
285 * to edge+high, so who knows.
286 * Perhaps it would be best just to not set an ExtINT entry at
287 * all, it shouldn't be needed for SMP mode.
296 if(bus->type == BusEISA && !po && !el /*&& !(i8259elcr & (1<<irq))*/){
304 else if(po != PcmpHIGH){
305 print("mpintrinit: bad polarity 0x%uX\n", po);
313 else if(el != PcmpEDGE){
314 print("mpintrinit: bad trigger 0x%uX\n", el);
329 * The offsets of vectors for LINT[01] are known to be
330 * 0 and 1 from the local APIC vector space at VectorLAPIC.
332 if((bus = mpgetbus(p->busno)) == 0)
337 * Pentium Pros have problems if LINT[01] are set to ExtINT
338 * so just bag it, SMP mode shouldn't need ExtINT anyway.
340 if(p->intr == PcmpExtINT || p->intr == PcmpNMI)
343 v = mpintrinit(bus, p, VectorLAPIC+intin, p->irq);
345 if(p->apicno == 0xFF){
346 for(apic = mpapic; apic <= &mpapic[MaxAPICNO]; apic++){
347 if((apic->flags & PcmpEN)
348 && apic->type == PcmpPROCESSOR)
349 apic->lintr[intin] = v;
353 apic = &mpapic[p->apicno];
354 if((apic->flags & PcmpEN) && apic->type == PcmpPROCESSOR)
355 apic->lintr[intin] = v;
368 * If there are MTRR registers, snarf them for validation.
370 if(!(m->cpuiddx & 0x1000))
373 rdmsr(0x0FE, &m->mtrrcap);
374 rdmsr(0x2FF, &m->mtrrdef);
375 if(m->mtrrcap & 0x0100){
376 rdmsr(0x250, &m->mtrrfix[0]);
377 rdmsr(0x258, &m->mtrrfix[1]);
378 rdmsr(0x259, &m->mtrrfix[2]);
379 for(i = 0; i < 8; i++)
380 rdmsr(0x268+i, &m->mtrrfix[(i+3)]);
382 vcnt = m->mtrrcap & 0x00FF;
383 if(vcnt > nelem(m->mtrrvar))
384 vcnt = nelem(m->mtrrvar);
385 for(i = 0; i < vcnt; i++)
386 rdmsr(0x200+i, &m->mtrrvar[i]);
389 * If not the bootstrap processor, compare.
395 if(mach0->mtrrcap != m->mtrrcap)
396 print("mtrrcap%d: %lluX %lluX\n",
397 m->machno, mach0->mtrrcap, m->mtrrcap);
398 if(mach0->mtrrdef != m->mtrrdef)
399 print("mtrrdef%d: %lluX %lluX\n",
400 m->machno, mach0->mtrrdef, m->mtrrdef);
401 for(i = 0; i < 11; i++){
402 if(mach0->mtrrfix[i] != m->mtrrfix[i])
403 print("mtrrfix%d: i%d: %lluX %lluX\n",
404 m->machno, i, mach0->mtrrfix[i], m->mtrrfix[i]);
406 for(i = 0; i < vcnt; i++){
407 if(mach0->mtrrvar[i] != m->mtrrvar[i])
408 print("mtrrvar%d: i%d: %lluX %lluX\n",
409 m->machno, i, mach0->mtrrvar[i], m->mtrrvar[i]);
416 // iprint("Hello Squidboy\n");
435 active.machs |= 1<<m->machno;
438 while(!active.thunderbirdsarego)
445 mpstartap(Apic* apic)
447 ulong *apbootp, *pdb, *pte;
455 * Initialise the AP page-tables and Mach structure. The page-tables
456 * are the same as for the bootstrap processor with the exception of
457 * the PTE for the Mach structure.
458 * Xspanalloc will panic if an allocation can't be made.
460 p = xspanalloc(4*BY2PG, BY2PG, 0);
462 memmove(pdb, mach0->pdb, BY2PG);
465 if((pte = mmuwalk(pdb, MACHADDR, 1, 0)) == nil)
467 memmove(p, KADDR(PPN(*pte)), BY2PG);
468 *pte = PADDR(p)|PTEWRITE|PTEVALID;
474 if((pte = mmuwalk(pdb, MACHADDR, 2, 0)) == nil)
476 *pte = PADDR(mach)|PTEWRITE|PTEVALID;
481 machno = apic->machno;
482 MACHP(machno) = mach;
483 mach->machno = machno;
485 mach->gdt = (Segdesc*)p; /* filled by mmuinit */
488 * Tell the AP where its kernel vector and pdb are.
489 * The offsets are known in the AP bootstrap code.
491 apbootp = (ulong*)(APBOOTSTRAP+0x08);
492 *apbootp++ = (ulong)squidboy;
493 *apbootp++ = PADDR(pdb);
494 *apbootp = (ulong)apic;
497 * Universal Startup Algorithm.
500 *p++ = PADDR(APBOOTSTRAP);
501 *p++ = PADDR(APBOOTSTRAP)>>8;
502 i = (PADDR(APBOOTSTRAP) & ~0xFFFF)/16;
503 /* code assumes i==0 */
505 print("mp: bad APBOOTSTRAP\n");
509 nvramwrite(0x0F, 0x0A);
510 lapicstartap(apic, PADDR(APBOOTSTRAP));
511 for(i = 0; i < 1000; i++){
516 nvramwrite(0x0F, 0x00);
520 dumpmp(uchar *p, uchar *e)
524 for(i = 0; p < e; p++) {
525 if((i % 16) == 0) print("*mp%d=", i/16);
527 if((++i % 16) == 0) print("\n");
529 if((i % 16) != 0) print("\n");
533 mpoverride(uchar** newp, uchar** e)
540 size = atoi(getconf("*mp"));
541 if(size == 0) panic("mpoverride: invalid size in *mp");
542 *newp = p = xalloc(size);
543 if(p == nil) panic("mpoverride: can't allocate memory");
546 snprint(buf, sizeof buf, "*mp%d", i);
550 j = strtol(s, &s, 16);
551 if(*s && *s != ' ' || j < 0 || j > 0xff) panic("mpoverride: invalid entry in %s", buf);
552 if(p >= *e) panic("mpoverride: overflow in %s", buf);
556 if(p != *e) panic("mpoverride: size doesn't match");
574 pcmp = KADDR(_mp_->physaddr);
577 * Map the local APIC.
579 if((va = vmap(pcmp->lapicbase, 1024)) == nil)
582 print("LAPIC: %.8lux %.8lux\n", pcmp->lapicbase, (ulong)va);
586 for(i = 0; i <= MaxAPICNO; i++)
590 * Run through the table saving information needed for starting
591 * application processors and initialising any I/O APICs. The table
592 * is guaranteed to be in order such that only one pass is necessary.
594 p = ((uchar*)pcmp)+sizeof(PCMP);
595 e = ((uchar*)pcmp)+pcmp->length;
596 if(getconf("*dumpmp") != nil)
598 if(getconf("*mp") != nil)
600 while(p < e) switch(*p){
603 print("mpinit: unknown PCMP type 0x%uX (e-p 0x%luX)\n",
612 if(apic = mkprocessor((PCMPprocessor*)p)){
614 * Must take a note of bootstrap processor APIC
615 * now as it will be needed in order to start the
616 * application processors later and there's no
617 * guarantee that the bootstrap processor appears
618 * first in the table before the others.
621 apic->paddr = pcmp->lapicbase;
622 if(apic->flags & PcmpBP)
625 p += sizeof(PCMPprocessor);
630 p += sizeof(PCMPbus);
634 if(apic = mkioapic((PCMPioapic*)p))
635 ioapicinit(apic, ((PCMPioapic*)p)->apicno);
636 p += sizeof(PCMPioapic);
640 mkiointr((PCMPintr*)p);
641 p += sizeof(PCMPintr);
645 mklintr((PCMPintr*)p);
646 p += sizeof(PCMPintr);
651 * No bootstrap processor, no need to go further.
660 * These interrupts are local to the processor
661 * and do not appear in the I/O APIC so it is OK
664 intrenable(IrqTIMER, lapicclock, 0, BUSUNKNOWN, "clock");
665 intrenable(IrqERROR, lapicerror, 0, BUSUNKNOWN, "lapicerror");
666 intrenable(IrqSPURIOUS, lapicspurious, 0, BUSUNKNOWN, "lapicspurious");
672 * Initialise the application processors.
674 if(cp = getconf("*ncpu")){
675 ncpu = strtol(cp, 0, 0);
678 else if(ncpu > MAXMACH)
683 memmove((void*)APBOOTSTRAP, apbootstrap, sizeof(apbootstrap));
684 for(apic = mpapic; apic <= &mpapic[MaxAPICNO]; apic++){
687 if((apic->flags & (PcmpBP|PcmpEN)) == PcmpEN
688 && apic->type == PcmpPROCESSOR){
696 * we don't really know the number of processors till
699 * set conf.copymode here if nmach > 1.
700 * Should look for an ExtINT line and enable it.
702 if(X86FAMILY(m->cpuidax) == 3 || conf.nmach > 1)
712 * The bulk of this code was written ~1995, when there was
713 * one architecture and one generation of hardware, the number
714 * of CPUs was up to 4(8) and the choices for interrupt routing
715 * were physical, or flat logical (optionally with lowest
716 * priority interrupt). Logical mode hasn't scaled well with
717 * the increasing number of packages/cores/threads, so the
718 * fall-back is to physical mode, which works across all processor
719 * generations, both AMD and Intel, using the APIC and xAPIC.
721 * Interrupt routing policy can be set here.
722 * Currently, just assign each interrupt to a different CPU on
723 * a round-robin basis. Some idea of the packages/cores/thread
724 * topology would be useful here, e.g. to not assign interrupts
725 * to more than one thread in a core, or to use a "noise" core.
726 * But, as usual, Intel make that an onerous task.
731 if(mpphysid >= MaxAPICNO+1)
736 unlock(&mpphysidlock);
738 return mpapic[i].apicno;
741 /* hardcoded VectorAPIC and stuff. bad. */
745 static int round = 0, num = 1;
751 if(++round >= 8) round = 0;
754 vno = 64 + num++ * 8 + round;
760 mpintrenablex(Vctl* v, int tbdf)
766 int bno, dno, hi, irq, lo, n, type, vno;
771 type = BUSTYPE(tbdf);
776 for(bus = mpbus; bus != nil; bus = bus->next){
777 if(bus->type != type)
779 if(bus->busno == bno)
783 print("ioapicirq: can't find bus type %d, number %d\n", type, bno);
788 * For PCI devices the interrupt pin (INT[ABCD]) and device
789 * number are encoded into the entry irq field, so create something
790 * to match on. The interrupt pin used by the device has to be
791 * obtained from the PCI config space.
793 if(bus->type == BusPCI){
794 pcidev = pcimatchtbdf(tbdf);
795 if(pcidev != nil && (n = pcicfgr8(pcidev, PciINTP)) != 0)
796 irq = (dno<<2)|(n-1);
799 //print("pcidev %uX: irq %uX v->irq %uX\n", tbdf, irq, v->irq);
805 * Find a matching interrupt entry from the list of interrupts
806 * attached to this bus.
808 for(aintr = bus->aintr; aintr; aintr = aintr->next){
809 if(aintr->intr->irq != irq)
812 PCMPintr* p = aintr->intr;
814 print("mpintrenablex: bus %d intin %d irq %d\n",
815 p->busno, p->intin, p->irq);
818 * Check if already enabled. Multifunction devices may share
819 * INT[A-D]# so, if already enabled, check the polarity matches
820 * and the trigger is level.
822 * Should check the devices differ only in the function number,
823 * but that can wait for the planned enable/disable rewrite.
824 * The RDT read here is safe for now as currently interrupts
825 * are never disabled once enabled.
828 ioapicrdtr(apic, aintr->intr->intin, 0, &lo);
829 if(!(lo & ApicIMASK)){
831 //print("%s vector %d (!imask)\n", v->name, vno);
832 n = mpintrinit(bus, aintr->intr, vno, v->irq);
833 n |= ApicPHYSICAL; /* no-op */
834 lo &= ~(ApicRemoteIRR|ApicDELIVS);
835 if(n != lo || !(n & ApicLEVEL)){
836 print("mpintrenable: multiple botch irq%d, tbdf %uX, lo %8.8uX, n %8.8uX\n",
837 v->irq, tbdf, lo, n);
848 * With the APIC a unique vector can be assigned to each
849 * request to enable an interrupt. There are two reasons this
851 * 1) to prevent lost interrupts, no more than 2 interrupts
852 * should be assigned per block of 16 vectors (there is an
853 * in-service entry and a holding entry for each priority
854 * level and there is one priority level per block of 16
856 * 2) each input pin on the IOAPIC will receive a different
857 * vector regardless of whether the devices on that pin use
858 * the same IRQ as devices on another pin.
861 hi = mpintrcpu()<<24;
862 lo = mpintrinit(bus, aintr->intr, vno, v->irq);
863 //print("lo 0x%uX: busno %d intr %d vno %d irq %d elcr 0x%uX\n",
864 // lo, bus->busno, aintr->intr->irq, vno,
865 // v->irq, i8259elcr);
869 lo |= ApicPHYSICAL; /* no-op */
871 if((apic->flags & PcmpEN) && apic->type == PcmpIOAPIC)
872 ioapicrdtw(apic, aintr->intr->intin, hi, lo);
874 // print("lo not enabled 0x%uX %d\n",
875 // apic->flags, apic->type);
887 MSICtrl = 0x02, /* message control register (16 bit) */
888 MSIAddr = 0x04, /* message address register (64 bit) */
889 MSIData32 = 0x08, /* message data register for 32 bit MSI (16 bit) */
890 MSIData64 = 0x0C, /* message data register for 64 bit MSI (16 bit) */
894 msiintrenable(Vctl *v)
896 int tbdf, vno, cap, cpu, ok64;
899 if(getconf("*msi") == nil)
902 if(tbdf == BUSUNKNOWN || BUSTYPE(tbdf) != BusPCI)
904 pci = pcimatchtbdf(tbdf);
906 print("msiintrenable: could not find Pcidev for tbdf %.8x\n", tbdf);
909 cap = pcicap(pci, PciCapMSI);
914 ok64 = (pcicfgr16(pci, cap + MSICtrl) & (1<<7)) != 0;
915 pcicfgw32(pci, cap + MSIAddr, (0xFEE << 20) | (cpu << 12));
916 if(ok64) pcicfgw32(pci, cap + MSIAddr + 4, 0);
917 pcicfgw16(pci, cap + (ok64 ? MSIData64 : MSIData32), vno | (1<<14));
918 pcicfgw16(pci, cap + MSICtrl, 1);
919 print("msiintrenable: success with tbdf %.8x, vector %d, cpu %d\n", tbdf, vno, cpu);
926 mpintrenable(Vctl* v)
930 vno = msiintrenable(v);
935 * If the bus is known, try it.
936 * BUSUNKNOWN is given both by [E]ISA devices and by
937 * interrupts local to the processor (local APIC, coprocessor
938 * breakpoint and page-fault).
941 if(tbdf != BUSUNKNOWN && (vno = mpintrenablex(v, tbdf)) != -1)
945 if(irq >= IrqLINT0 && irq <= MaxIrqLAPIC){
946 if(irq != IrqSPURIOUS)
948 return VectorPIC+irq;
950 if(irq < 0 || irq > MaxIrqPIC){
951 print("mpintrenable: irq %d out of range\n", irq);
956 * Either didn't find it or have to try the default buses
957 * (ISA and EISA). This hack is due to either over-zealousness
958 * or laziness on the part of some manufacturers.
960 * The MP configuration table on some older systems
961 * (e.g. ASUS PCI/E-P54NP4) has an entry for the EISA bus
962 * but none for ISA. It also has the interrupt type and
963 * polarity set to 'default for this bus' which wouldn't
964 * be compatible with ISA.
967 vno = mpintrenablex(v, MKBUS(BusEISA, 0, 0, 0));
972 vno = mpintrenablex(v, MKBUS(BusISA, 0, 0, 0));
976 print("mpintrenable: out of choices eisa %d isa %d tbdf %#ux irq %d\n",
977 mpeisabus, mpisabus, v->tbdf, v->irq);
981 static Lock mpshutdownlock;
989 if(!canlock(&mpshutdownlock)){
991 * If this processor received the CTRL-ALT-DEL from
992 * the keyboard, acknowledge it. Send an INIT to self.
995 if(lapicisr(VectorKBD))
997 #endif /* FIX THIS */
1002 print("apshutdown: active = %#8.8ux\n", active.machs);
1007 * INIT all excluding self.
1009 lapicicrw(0, 0x000C0000|ApicINIT);
1015 * Often the BIOS hangs during restart if a conventional 8042
1016 * warm-boot sequence is tried. The following is Intel specific and
1017 * seems to perform a cold-boot, but at least it comes back.
1018 * And sometimes there is no keyboard...
1020 * The reset register (0xcf9) is usually in one of the bridge
1021 * chips. The actual location and sequence could be extracted from
1022 * ACPI but why bother, this is the end of the line anyway.
1024 print("no kbd; trying bios warm boot...");
1025 *(ushort*)KADDR(0x472) = 0x1234; /* BIOS warm-boot flag */
1029 print("can't reset\n");