2 * tegra 2 SoC machine assist
3 * dual arm cortex-a9 processors
5 * ARM v7 arch. ref. man. §B1.3.3 says that we don't need barriers
6 * around writes to CPSR.
8 * LDREX/STREX use an exclusive monitor, which is part of the data cache unit
9 * for the L1 cache, so they won't work right if the L1 cache is disabled.
14 #define LDREX(fp,t) WORD $(0xe<<28|0x01900f9f | (fp)<<16 | (t)<<12)
15 /* `The order of operands is from left to right in dataflow order' - asm man */
16 #define STREX(f,tp,r) WORD $(0xe<<28|0x01800f90 | (tp)<<16 | (r)<<12 | (f)<<0)
18 #define MAXMB (KiB-1) /* last MB has vectors */
19 #define TMPSTACK (DRAMSIZE - 64*MiB) /* used only during cpu startup */
20 /* tas/cas strex debugging limits; started at 10000 */
26 * Entered here from Das U-Boot or another Plan 9 kernel with MMU disabled.
27 * Until the MMU is enabled it is OK to call functions provided
28 * they are within ±32MiB relative and do not require any
29 * local variables or more than one argument (i.e. there is
32 TEXT _start(SB), 1, $-4
34 CPSID /* interrupts off */
36 SETEND(0) /* little-endian */
45 /* invalidate i-cache and branch-target cache */
46 MTCP CpSC, 0, PC, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
49 /* put cpus other than 0 to sleep until cpu 0 is ready */
59 MOVW cpus_proceed(SB), R1
66 DELAY(printloopret, 1)
73 /* disable the PL310 L2 cache on cpu0 */
74 MOVW $(PHYSL2BAG+0x100), R1
84 * disable my MMU & caches
86 MFCP CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
88 BIC $(CpCsbz|CpCmmu|CpCdcache|CpCicache|CpCpredict), R1
89 MTCP CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
92 /* cortex-a9 model-specific initial configuration */
94 MTCP CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl
100 MOVW $testmem-KZERO(SB), R0
104 /* clear Mach for cpu 0 */
105 MOVW $PADDR(MACHADDR), R4 /* address of Mach for cpu 0 */
110 CMP.S $PADDR(L1+L1X(0)), R4 /* end at top-level page table */
114 * set up the MMU page table for cpu 0
118 /* clear all PTEs first, to provide a default */
119 // MOVW $PADDR(L1+L1X(0)), R4 /* address of PTE for 0 */
122 CMP.S $PADDR(L1+16*KiB), R4
128 * set up double map of PHYSDRAM, KZERO to PHYSDRAM for first few MBs,
129 * but only if KZERO and PHYSDRAM differ.
131 MOVW $PTEDRAM, R2 /* PTE bits */
132 MOVW $PHYSDRAM, R3 /* pa */
135 MOVW $PADDR(L1+L1X(PHYSDRAM)), R4 /* address of PTE for PHYSDRAM */
136 MOVW $DOUBLEMAPMBS, R5
144 * back up and fill in PTEs for memory at KZERO.
145 * trimslice has 1 bank of 1GB at PHYSDRAM.
149 MOVW $PTEDRAM, R2 /* PTE bits */
151 MOVW $PADDR(L1+L1X(KZERO)), R4 /* start with PTE for KZERO */
152 MOVW $MAXMB, R5 /* inner loop count (MBs) */
153 _ptekrw: /* set PTEs */
155 SUB.S $1, R5 /* decrement inner loop count */
159 * back up and fill in PTEs for MMIO
162 MOVW $PTEIO, R2 /* PTE bits */
164 MOVW $PADDR(L1+L1X(VIRTIO)), R4 /* start with PTE for VIRTIO */
167 CMP.S $PADDR(L1+L1X(PHYSIOEND)), R4
170 /* mmu.c sets up the trap vectors later */
172 MOVW $(PHYSDRAM | TMPSTACK), SP
175 * learn l1 cache characteristics (on cpu 0 only).
178 MOVW $(1-1), R0 /* l1 */
179 SLL $1, R0 /* R0 = (cache - 1) << 1 */
180 MTCP CpSC, CpIDcssel, R0, C(CpID), C(CpIDid), 0 /* select l1 cache */
182 MFCP CpSC, CpIDcsize, R0, C(CpID), C(CpIDid), 0 /* get sets & ways */
185 /* get log2linelen into l1setsh */
189 /* l1 & l2 must have same cache line size, thus same set shift */
190 MOVW R1, 4(R8) /* +4 = l1setsh */
191 MOVW R1, 12(R8) /* +12 = l2setsh */
193 /* get nways in R1 */
198 /* get log2(nways) in R2 (assume nways is 2^n) */
199 MOVW $(BI2BY*BY2WD - 1), R2
201 SUB.S R1, R2 /* R2 = 31 - clz(nways) */
203 // MOVW R2, R3 /* print log2(nways): 2 */
206 SUB R2, R1 /* R1 = 32 - log2(nways) */
207 MOVW R1, 0(R8) /* +0 = l1waysh */
211 MOVW $testmem-KZERO(SB), R0
215 * the mpcore manual says invalidate d-cache, scu, pl310 in that order,
216 * but says nothing about when to disable them.
218 * invalidate my caches before enabling
221 MTCP CpSC, 0, PC, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
226 * the mpcore manual says enable scu, d-cache, pl310, smp mode
227 * in that order. we have to reverse the last two; see main().
232 * turn my L1 cache on; need it for tas below.
234 MFCP CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
235 ORR $(CpCdcache|CpCicache|CpCalign|CpCpredict), R1
236 MTCP CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl
239 /* cortex-a9 model-specific configuration */
241 MTCP CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl
244 /* we're supposed to wait until l1 & l2 are on before calling smpon */
247 /* set the domain access control */
258 /* set the translation table base */
264 * the little dance to turn the MMU on
271 /* warp the PC into the virtual map */
275 * cpu 0 is now running at KZERO+something!
279 MOVW $setR12(SB), R12 /* reload kernel SB */
280 MOVW $(KZERO | TMPSTACK), SP
285 MOVW $PHYSDRAM, R3 /* pa */
288 /* undo double map of PHYSDRAM, KZERO & first few MBs */
289 MOVW $(L1+L1X(PHYSDRAM)), R4 /* addr. of PTE for PHYSDRAM */
291 MOVW $DOUBLEMAPMBS, R5
303 * pass Mach to main and set up the stack in it
305 MOVW $MACHADDR, R0 /* cpu 0 Mach */
306 MOVW R0, R(MACH) /* m = MACHADDR */
307 ADD $(MACHSIZE-4), R0, SP /* leave space for link register */
309 BL main(SB) /* main(m) */
314 BL _div(SB) /* hack to load _div, etc. */
318 * called on cpu(s) other than 0, to start them, from _vrst
319 * (reset vector) in lexception.s, with interrupts disabled
320 * and in SVC mode, running in the zero segment (pc is in lower 256MB).
321 * SB is set for the zero segment.
323 TEXT cpureset(SB), 1, $-4
329 MOVW $(PHYSDRAM | TMPSTACK), SP /* stack for cache ops */
331 /* paranoia: turn my mmu and caches off. */
332 MFCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
334 BIC $(CpCsbz|CpCmmu|CpCdcache|CpCicache|CpCpredict), R0
335 MTCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
338 /* cortex-a9 model-specific initial configuration */
340 MTCP CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl
343 /* invalidate my caches before enabling */
345 MTCP CpSC, 0, PC, C(CpCACHE), C(CpCACHEinvi), CpCACHEall
349 * turn my L1 cache on; need it (and mmu) for tas below.
350 * need branch prediction to make delay() timing right.
352 MFCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
353 ORR $(CpCdcache|CpCicache|CpCalign|CpCpredict), R0
354 MTCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
357 /* enable l1 caches coherency, at minimum for ldrex/strex. */
362 * we used to write to PHYSEVP here; now we do it in C, which offers
363 * more assurance that we're up and won't go off the rails.
366 /* set the domain access control */
373 * redo double map of PHYSDRAM, KZERO in this cpu's ptes.
374 * mmuinit will undo this later.
381 /* launchinit set m->mmul1 to a copy of cpu0's l1 page table */
382 MOVW 12(R(MACH)), R0 /* m->mmul1 (virtual addr) */
383 BL k2paddr(SB) /* R0 = PADDR(m->mmul1) */
384 ADD $L1X(PHYSDRAM), R0, R4 /* R4 = address of PHYSDRAM's PTE */
386 MOVW $PTEDRAM, R2 /* PTE bits */
387 MOVW $DOUBLEMAPMBS, R5
389 ORR R3, R2, R1 /* first identity-map 0 to 0, etc. */
391 ADD $4, R4 /* bump PTE address */
392 ADD $MiB, R3 /* bump pa */
400 /* set the translation table base to PADDR(m->mmul1) */
401 MOVW 12(R(MACH)), R0 /* m->mmul1 */
402 BL k2paddr(SB) /* R0 = PADDR(m->mmul1) */
406 * the little dance to turn the MMU on
413 * mmu is now on, with l1 pt at m->mmul1.
416 /* warp the PC into the virtual map */
421 * now running at KZERO+something!
425 MOVW $setR12(SB), R12 /* reload kernel's SB */
426 MOVW $(KZERO | TMPSTACK), SP /* stack for cache ops*/
428 ADD $(MACHSIZE-4), R(MACH), SP /* leave space for link register */
433 * converts virtual address in R0 to a physical address.
435 TEXT k2paddr(SB), 1, $-4
441 * converts physical address in R0 to a virtual address.
443 TEXT p2kaddr(SB), 1, $-4
449 * converts address in R0 to the current segment, as defined by the PC.
452 TEXT addr2pcseg(SB), 1, $-4
455 AND $KSEGM, R1 /* segment PC is in */
459 /* sets R(MACH), preserves other registers */
460 TEXT setmach(SB), 1, $-4
461 MOVM.DB.W [R14], (R13)
462 MOVM.DB.W [R0-R2], (R13)
465 SLL $2, R2 /* convert to word index */
467 MOVW $machaddr(SB), R0
469 ADD R2, R0 /* R0 = &machaddr[cpuid] */
470 MOVW (R0), R0 /* R0 = machaddr[cpuid] */
472 MOVW.EQ $MACHADDR, R0 /* paranoia: use MACHADDR if 0 */
474 MOVW R0, R(MACH) /* m = machaddr[cpuid] */
476 MOVM.IA.W (R13), [R0-R2]
477 MOVM.IA.W (R13), [R14]
483 * tests word at (R0); modifies R7 and R8
485 TEXT memdiag(SB), 1, $-4
490 BNE mbuggery /* broken memory */
495 BNE mbuggery /* broken memory */
502 /* modifies R0, R3—R6 */
503 TEXT printhex(SB), 1, $-4
507 MOVW $(32-4), R5 /* bits to shift right */
513 BLE nothex /* if R4 <= 9, jump */
514 ADD $('a'-('9'+1)), R4
529 MOVW $membmsg(SB), R0
530 MOVW R14, R1 /* get R14's segment ... */
532 BIC $KSEGM, R0 /* strip segment from address */
533 ORR R1, R0 /* combine them */
539 DATA membmsg+0(SB)/8,$"memory b"
540 DATA membmsg+8(SB)/6,$"roken\z"
541 GLOBL membmsg(SB), $14
543 TEXT _r15warp(SB), 1, $-4
544 BIC $KSEGM, R14 /* link reg, will become PC */
551 * `single-element' cache operations.
552 * in arm arch v7, they operate on all architected cache levels, so separate
553 * l2 functions are usually unnecessary.
556 TEXT cachedwbse(SB), $-4 /* D writeback SE */
562 BARRIERS /* force outstanding stores to cache */
565 ADD R0, R1 /* R1 is end address */
566 BIC $(CACHELINESZ-1), R0 /* cache line start */
568 MTCP CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEse
574 TEXT cachedwbinvse(SB), $-4 /* D writeback+invalidate SE */
580 BARRIERS /* force outstanding stores to cache */
583 ADD R0, R1 /* R1 is end address */
584 BIC $(CACHELINESZ-1), R0 /* cache line start */
586 MTCP CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEse
590 _wait: /* drain write buffer */
593 MOVW R3, CPSR /* splx */
596 TEXT cachedinvse(SB), $-4 /* D invalidate SE */
602 BARRIERS /* force outstanding stores to cache */
605 ADD R0, R1 /* R1 is end address */
608 * if start & end addresses are not on cache-line boundaries,
609 * flush first & last cache lines before invalidating.
611 AND.S $(CACHELINESZ-1), R0, R4
613 BIC $(CACHELINESZ-1), R0, R4 /* cache line start */
614 MTCP CpSC, 0, R4, C(CpCACHE), C(CpCACHEwb), CpCACHEse
616 AND.S $(CACHELINESZ-1), R1, R4
618 BIC $(CACHELINESZ-1), R1, R4 /* cache line start */
619 MTCP CpSC, 0, R4, C(CpCACHE), C(CpCACHEwb), CpCACHEse
621 BIC $(CACHELINESZ-1), R0 /* cache line start */
623 MTCP CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvd), CpCACHEse
630 * enable mmu and high vectors
632 TEXT mmuenable(SB), 1, $-4
633 MFCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
635 MTCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
639 TEXT mmudisable(SB), 1, $-4
640 MFCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
642 MTCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
647 * If one of these MCR instructions crashes or hangs the machine,
648 * check your Level 1 page table (at TTB) closely.
650 TEXT mmuinvalidate(SB), $-4 /* invalidate all */
652 CPSID /* interrupts off */
654 MTCP CpSC, 0, PC, C(CpTLB), C(CpTLBinvu), CpTLBinv
656 MOVW R2, CPSR /* interrupts restored */
659 TEXT mmuinvalidateaddr(SB), $-4 /* invalidate single entry */
660 MTCP CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinvse
664 TEXT cpidget(SB), 1, $-4 /* main ID */
665 MFCP CpSC, 0, R0, C(CpID), C(CpIDidct), CpIDid
668 TEXT cpctget(SB), 1, $-4 /* cache type */
669 MFCP CpSC, 0, R0, C(CpID), C(CpIDidct), CpIDct
672 TEXT controlget(SB), 1, $-4 /* system control (sctlr) */
673 MFCP CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl
676 TEXT ttbget(SB), 1, $-4 /* translation table base */
677 MFCP CpSC, 0, R0, C(CpTTB), C(0), CpTTB0
680 TEXT ttbput(SB), 1, $-4 /* translation table base */
684 BARRIERS /* finish prior accesses before changing ttb */
685 MTCP CpSC, 0, R1, C(CpTTB), C(0), CpTTB0
686 MTCP CpSC, 0, R1, C(CpTTB), C(0), CpTTB1 /* non-secure too */
688 MTCP CpSC, 0, R0, C(CpTTB), C(0), CpTTBctl
693 TEXT dacget(SB), 1, $-4 /* domain access control */
694 MFCP CpSC, 0, R0, C(CpDAC), C(0)
697 TEXT dacput(SB), 1, $-4 /* domain access control */
700 MTCP CpSC, 0, R1, C(CpDAC), C(0)
704 TEXT fsrget(SB), 1, $-4 /* fault status */
705 MFCP CpSC, 0, R0, C(CpFSR), C(0), CpDFSR
708 TEXT farget(SB), 1, $-4 /* fault address */
709 MFCP CpSC, 0, R0, C(CpFAR), C(0), CpDFAR
712 TEXT getpsr(SB), 1, $-4
716 TEXT getscr(SB), 1, $-4 /* secure configuration */
717 MFCP CpSC, 0, R0, C(CpCONTROL), C(CpCONTROLscr), CpSCRscr
720 TEXT pidget(SB), 1, $-4 /* address translation pid */
721 MFCP CpSC, 0, R0, C(CpPID), C(0x0)
724 TEXT pidput(SB), 1, $-4 /* address translation pid */
725 MTCP CpSC, 0, R0, C(CpPID), C(0), 0 /* pid, v7a deprecated */
726 MTCP CpSC, 0, R0, C(CpPID), C(0), 1 /* context id, errata 754322 */
731 * access to yet more coprocessor registers
734 TEXT getauxctl(SB), 1, $-4 /* get cortex-a9 aux. ctl. */
735 MFCP CpSC, 0, R0, C(CpCONTROL), C(0), CpAuxctl
738 TEXT putauxctl(SB), 1, $-4 /* put cortex-a9 aux. ctl. */
740 MTCP CpSC, 0, R0, C(CpCONTROL), C(0), CpAuxctl
744 TEXT getclvlid(SB), 1, $-4
745 MFCP CpSC, CpIDcsize, R0, C(CpID), C(CpIDidct), CpIDclvlid
748 TEXT getcyc(SB), 1, $-4
749 MFCP CpSC, 0, R0, C(CpCLD), C(CpCLDcyc), 0
752 TEXT getdebug(SB), 1, $-4 /* get cortex-a9 debug enable register */
753 MFCP CpSC, 0, R0, C(1), C(1), 1
756 TEXT getpc(SB), 1, $-4
760 TEXT getsb(SB), 1, $-4
764 TEXT setsp(SB), 1, $-4
769 TEXT splhi(SB), 1, $-4
770 MOVW CPSR, R0 /* return old CPSR */
771 CPSID /* turn off interrupts */
773 MOVW.NE R14, 4(R(MACH)) /* save caller pc in m->splpc */
776 TEXT spllo(SB), 1, $-4 /* start marker for devkprof.c */
777 MOVW CPSR, R0 /* return old CPSR */
780 MOVW.NE R1, 4(R(MACH)) /* clear m->splpc */
784 TEXT splx(SB), 1, $-4
785 MOVW CPSR, R3 /* must return old CPSR */
789 MOVW.NE R14, 4(R(MACH)) /* save caller pc in m->splpc */
790 MOVW R0, CPSR /* reset interrupt level */
791 MOVW R3, R0 /* must return old CPSR */
794 TEXT spldone(SB), 1, $0 /* end marker for devkprof.c */
797 TEXT islo(SB), 1, $-4
804 CLZ(0, 0) /* 0 is R0 */
807 TEXT setlabel(SB), 1, $-4
809 MOVW R14, 4(R0) /* pc */
813 TEXT gotolabel(SB), 1, $-4
815 MOVW 4(R0), R14 /* pc */
819 TEXT getcallerpc(SB), 1, $-4
826 * an interrupt should break us out of wfi. masking interrupts
827 * slows interrupt response slightly but prevents recursion.
838 TEXT coherence(SB), $-4
842 GLOBL cpus_proceed+0(SB), $4
844 #include "cache.v7.s"
847 TEXT cmpswap(SB),0,$12 /* r0 holds p */
855 /* STREX 0(R0),R2,R4 */
866 TEXT tas(SB), $-4 /* _tas(ulong *) */
868 /* returns old (R0) after modifying (R0) */
872 MOVW $1,R2 /* new value of (R0) */
875 LDREX(5,7) /* LDREX 0(R5),R7 */
876 CMP.S $0, R7 /* old value non-zero (lock taken)? */
877 BNE lockbusy /* we lose */
880 STREX(2,5,4) /* STREX R2,(R5),R4 */
882 BNE tas1 /* strex failed? try again */
893 MOVW R7, R0 /* return old value */