12 r = alloc(sizeof(*r));
21 rcmp(void *a1, void *a2)
32 return p2->varno - p1->varno;
41 long initpc, val, npc;
55 for(z=0; z<BITS; z++) {
64 * build aux data structure
66 * find use and set of variables
68 val = 5L * 5L * 5L * 5L * 5L;
78 for(; p != P; p = p->link) {
116 switch(r1->prog->as) {
126 * left side always read
128 bit = mkvar(&p->from, p->as==AMOVW || p->as == AMOVWU || p->as == AMOV);
129 for(z=0; z<BITS; z++)
130 r->use1.b[z] |= bit.b[z];
133 * right side depends on opcode
135 bit = mkvar(&p->to, 0);
139 diag(Z, "reg: unknown asop: %A", p->as);
158 for(z=0; z<BITS; z++)
159 r->set.b[z] |= bit.b[z];
166 for(z=0; z<BITS; z++)
167 addrs.b[z] |= bit.b[z];
179 * turn branch references to pointers
180 * build back pointers
182 for(r = firstr; r != R; r = r->link) {
184 if(p->to.type == D_BRANCH) {
185 val = p->to.offset - initpc;
189 if(r2 != R && val >= r2->pc) {
199 diag(Z, "ref not found\n%P", p);
204 diag(Z, "ref to self\n%P", p);
214 print("\n%L %D\n", p->lineno, &p->from);
219 * find looping structure
221 for(r = firstr; r != R; r = r->link)
228 * iterate propagating usage
229 * back until flow graph is complete
233 for(r = firstr; r != R; r = r->link)
235 for(r = firstr; r != R; r = r->link)
236 if(r->prog->as == ARET || r->prog->as == ARETURN)
237 prop(r, zbits, zbits);
239 /* pick up unreachable code */
241 for(r = firstr; r != R; r = r1) {
243 if(r1 && r1->active && !r->active) {
244 prop(r, zbits, zbits);
256 * iterate propagating register/variable synchrony
257 * forward until graph is complete
261 for(r = firstr; r != R; r = r->link)
263 synch(firstr, zbits);
269 if(debug['R'] && debug['v']) {
270 print("\nprop structure:\n");
271 for(r = firstr; r != R; r = r->link) {
272 print("%ld:%P", r->loop, r->prog);
273 for(z=0; z<BITS; z++)
274 bit.b[z] = r->set.b[z] |
275 r->refahead.b[z] | r->calahead.b[z] |
276 r->refbehind.b[z] | r->calbehind.b[z] |
277 r->use1.b[z] | r->use2.b[z];
281 print(" u1=%B", r->use1);
283 print(" u2=%B", r->use2);
285 print(" st=%B", r->set);
286 if(bany(&r->refahead))
287 print(" ra=%B", r->refahead);
288 if(bany(&r->calahead))
289 print(" ca=%B", r->calahead);
290 if(bany(&r->refbehind))
291 print(" rb=%B", r->refbehind);
292 if(bany(&r->calbehind))
293 print(" cb=%B", r->calbehind);
302 * calculate costs (paint1)
306 for(z=0; z<BITS; z++)
307 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
308 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
310 nearln = r->prog->lineno;
311 warn(Z, "used and not set: %B", bit);
312 if(debug['R'] && !debug['w'])
313 print("used and not set: %B\n", bit);
317 for(r = firstr; r != R; r = r->link)
321 for(r = firstr; r != R; r = r->link) {
322 for(z=0; z<BITS; z++)
323 bit.b[z] = r->set.b[z] &
324 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
326 nearln = r->prog->lineno;
327 warn(Z, "set and not used: %B", bit);
329 print("set and not used: %B\n", bit);
332 for(z=0; z<BITS; z++)
333 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
339 if(debug['R'] && debug['v'])
342 bit.b[i/32] &= ~(1L<<(i%32));
345 print("%L $%d: %B\n",
346 r->prog->lineno, change, blsh(i));
351 if(nregion >= NRGN) {
352 warn(Z, "too many regions");
359 qsort(region, nregion, sizeof(region[0]), rcmp);
363 * determine used registers (paint2)
364 * replace code (paint3)
367 for(i=0; i<nregion; i++) {
368 bit = blsh(rgp->varno);
369 vreg = paint2(rgp->enter, rgp->varno);
370 vreg = allreg(vreg, rgp);
372 if(rgp->regno >= NREG)
373 print("%L $%d F%d: %B\n",
374 rgp->enter->prog->lineno,
379 print("%L $%d R%d: %B\n",
380 rgp->enter->prog->lineno,
386 paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
391 * peep-hole on basic block
393 if(!debug['R'] || debug['P'])
401 for(r = firstr; r != R; r = r1) {
408 for(; p != p1; p = p->link) {
430 print("addrs: %B\n", addrs);
433 for(r = firstr; r != R; r = r->link) {
435 if(p->to.type == D_BRANCH)
436 p->to.offset = r->s2->pc;
443 * free aux structures
445 for(p = firstr->prog; p != P; p = p->link){
446 while(p->link && p->link->as == ANOP)
447 p->link = p->link->link;
462 for(r = firstr; r != R; r = r->link) {
465 if(r->prog->as == ABL)
467 for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
470 for(z=0; z<BITS; z++)
471 bit.b[z] = r1->calbehind.b[z] &
472 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
473 ~(r->calahead.b[z] & addrs.b[z]);
476 bit.b[i/32] &= ~(1L << (i%32));
487 addmove(Reg *r, int bn, int rn, int f)
493 p1 = alloc(sizeof(*p1));
499 p1->lineno = p->lineno;
506 a->offset = v->offset;
509 if(a->etype == TARRAY || a->sym == S)
513 if(v->etype == TCHAR || v->etype == TUCHAR)
515 if(v->etype == TSHORT || v->etype == TUSHORT)
517 if(v->etype == TVLONG || v->etype == TUVLONG || v->etype == TIND)
519 if(v->etype == TFLOAT)
521 if(v->etype == TDOUBLE)
524 p1->from.type = D_REG;
527 p1->from.type = D_FREG;
528 p1->from.reg = rn-NREG;
539 if(v->etype == TUCHAR)
541 if(v->etype == TUSHORT)
543 if(v->etype == TUINT || v->etype == TULONG)
547 print("%P\t.a%P\n", p, p1);
551 mkvar(Adr *a, int docon)
560 if(t == D_REG && a->reg != NREG)
561 regbits |= RtoB(a->reg);
562 if(t == D_FREG && a->reg != NREG)
563 regbits |= FtoB(a->reg);
568 if(t != D_CONST || !docon || a->reg != NREG)
573 if(s == S && sval(o))
579 for(i=0; i<nvar; i++) {
587 if(s->name[0] == '.')
590 if(debug['w'] > 1 && s)
591 warn(Z, "variable not optimized: %s", s->name);
602 print("bit=%2d et=%2d %D\n", i, et, a);
605 if(n == D_EXTERN || n == D_STATIC)
606 for(z=0; z<BITS; z++)
607 externs.b[z] |= bit.b[z];
609 for(z=0; z<BITS; z++)
610 params.b[z] |= bit.b[z];
611 if(v->etype != et || !(typechlpfd[et] || typev[et])) /* funny punning */
612 for(z=0; z<BITS; z++)
613 addrs.b[z] |= bit.b[z];
616 for(z=0; z<BITS; z++)
617 consts.b[z] |= bit.b[z];
621 for(z=0; z<BITS; z++)
622 addrs.b[z] |= bit.b[z];
623 for(z=0; z<BITS; z++)
624 params.b[z] |= bit.b[z];
635 prop(Reg *r, Bits ref, Bits cal)
640 for(r1 = r; r1 != R; r1 = r1->p1) {
641 for(z=0; z<BITS; z++) {
642 ref.b[z] |= r1->refahead.b[z];
643 if(ref.b[z] != r1->refahead.b[z]) {
644 r1->refahead.b[z] = ref.b[z];
647 cal.b[z] |= r1->calahead.b[z];
648 if(cal.b[z] != r1->calahead.b[z]) {
649 r1->calahead.b[z] = cal.b[z];
653 switch(r1->prog->as) {
655 for(z=0; z<BITS; z++) {
656 cal.b[z] |= ref.b[z] | externs.b[z];
662 for(z=0; z<BITS; z++) {
670 for(z=0; z<BITS; z++) {
671 cal.b[z] = externs.b[z];
675 for(z=0; z<BITS; z++) {
676 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
677 r1->use1.b[z] | r1->use2.b[z];
678 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
679 r1->refbehind.b[z] = ref.b[z];
680 r1->calbehind.b[z] = cal.b[z];
686 for(; r != r1; r = r->p1)
687 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
688 prop(r2, r->refbehind, r->calbehind);
692 * find looping structure
694 * 1) find reverse postordering
695 * 2) find approximate dominators,
696 * the actual dominators if the flow graph is reducible
697 * otherwise, dominators plus some other non-dominators.
698 * See Matthew S. Hecht and Jeffrey D. Ullman,
699 * "Analysis of a Simple Algorithm for Global Data Flow Problems",
700 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
701 * Oct. 1-3, 1973, pp. 207-217.
702 * 3) find all nodes with a predecessor dominated by the current node.
703 * such a node is a loop head.
704 * recursively, all preds with a greater rpo number are in the loop
707 postorder(Reg *r, Reg **rpo2r, long n)
714 n = postorder(r1, rpo2r, n);
717 n = postorder(r1, rpo2r, n);
724 rpolca(long *idom, long rpo1, long rpo2)
739 fatal(Z, "bad idom");
747 doms(long *idom, long r, long s)
755 loophead(long *idom, Reg *r)
760 if(r->p1 != R && doms(idom, src, r->p1->rpo))
762 for(r = r->p2; r != R; r = r->p2link)
763 if(doms(idom, src, r->rpo))
769 loopmark(Reg **rpo2r, long head, Reg *r)
771 if(r->rpo < head || r->active == head)
776 loopmark(rpo2r, head, r->p1);
777 for(r = r->p2; r != R; r = r->p2link)
778 loopmark(rpo2r, head, r);
782 loopit(Reg *r, long nr)
788 rpo2r = alloc(nr * sizeof(Reg*));
789 idom = alloc(nr * sizeof(long));
793 d = postorder(r, rpo2r, 0);
795 fatal(Z, "too many reg nodes");
797 for(i = 0; i < nr / 2; i++){
799 rpo2r[i] = rpo2r[nr - 1 - i];
800 rpo2r[nr - 1 - i] = r1;
802 for(i = 0; i < nr; i++)
806 for(i = 0; i < nr; i++){
810 if(r1->p1 != R && r1->p1->rpo < me)
812 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
814 d = rpolca(idom, d, r1->rpo);
818 for(i = 0; i < nr; i++){
821 if(r1->p2 != R && loophead(idom, r1))
822 loopmark(rpo2r, i, r1);
827 synch(Reg *r, Bits dif)
832 for(r1 = r; r1 != R; r1 = r1->s1) {
833 for(z=0; z<BITS; z++) {
834 dif.b[z] = (dif.b[z] &
835 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
836 r1->set.b[z] | r1->regdiff.b[z];
837 if(dif.b[z] != r1->regdiff.b[z]) {
838 r1->regdiff.b[z] = dif.b[z];
845 for(z=0; z<BITS; z++)
846 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
853 allreg(ulong b, Rgn *r)
863 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
879 if(i && r->cost > 0) {
888 if(i && r->cost > 0) {
898 paint1(Reg *r, int bn)
910 if(!(r->refbehind.b[z] & bb))
915 if(!(r1->refahead.b[z] & bb))
917 if(r1->act.b[z] & bb)
922 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
923 change -= CLOAD * r->loop;
924 if(debug['R'] && debug['v'])
925 print("%ld%P\tld %B $%d\n", r->loop,
926 r->prog, blsh(bn), change);
932 if(r->use1.b[z] & bb) {
933 change += CREF * r->loop;
934 if(p->to.type == D_FREG && (p->as == AMOVW || p->as == AMOV))
935 change = -CINF; /* cant go Rreg to Freg */
936 if(debug['R'] && debug['v'])
937 print("%ld%P\tu1 %B $%d\n", r->loop,
938 p, blsh(bn), change);
941 if((r->use2.b[z]|r->set.b[z]) & bb) {
942 change += CREF * r->loop;
943 if(p->from.type == D_FREG && (p->as == AMOVW || p->as == AMOV))
944 change = -CINF; /* cant go Rreg to Freg */
945 if(debug['R'] && debug['v'])
946 print("%ld%P\tu2 %B $%d\n", r->loop,
947 p, blsh(bn), change);
950 if(STORE(r) & r->regdiff.b[z] & bb) {
951 change -= CLOAD * r->loop;
952 if(debug['R'] && debug['v'])
953 print("%ld%P\tst %B $%d\n", r->loop,
954 p, blsh(bn), change);
957 if(r->refbehind.b[z] & bb)
958 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
959 if(r1->refahead.b[z] & bb)
962 if(!(r->refahead.b[z] & bb))
966 if(r1->refbehind.b[z] & bb)
973 if(!(r->refbehind.b[z] & bb))
979 paint2(Reg *r, int bn)
988 if(!(r->act.b[z] & bb))
991 if(!(r->refbehind.b[z] & bb))
996 if(!(r1->refahead.b[z] & bb))
998 if(!(r1->act.b[z] & bb))
1007 if(r->refbehind.b[z] & bb)
1008 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1009 if(r1->refahead.b[z] & bb)
1010 vreg |= paint2(r1, bn);
1012 if(!(r->refahead.b[z] & bb))
1016 if(r1->refbehind.b[z] & bb)
1017 vreg |= paint2(r1, bn);
1021 if(!(r->act.b[z] & bb))
1023 if(!(r->refbehind.b[z] & bb))
1030 paint3(Reg *r, int bn, long rb, int rn)
1039 if(r->act.b[z] & bb)
1042 if(!(r->refbehind.b[z] & bb))
1047 if(!(r1->refahead.b[z] & bb))
1049 if(r1->act.b[z] & bb)
1054 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1055 addmove(r, bn, rn, 0);
1060 if(r->use1.b[z] & bb) {
1063 addreg(&p->from, rn);
1065 print("\t.c%P\n", p);
1067 if((r->use2.b[z]|r->set.b[z]) & bb) {
1072 print("\t.c%P\n", p);
1075 if(STORE(r) & r->regdiff.b[z] & bb)
1076 addmove(r, bn, rn, 1);
1079 if(r->refbehind.b[z] & bb)
1080 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1081 if(r1->refahead.b[z] & bb)
1082 paint3(r1, bn, rb, rn);
1084 if(!(r->refahead.b[z] & bb))
1088 if(r1->refbehind.b[z] & bb)
1089 paint3(r1, bn, rb, rn);
1093 if(r->act.b[z] & bb)
1095 if(!(r->refbehind.b[z] & bb))
1101 addreg(Adr *a, int rn)
1124 if(r >= REGMIN && r <= REGMAX)
1125 return 1L << (r-REGMIN);
1135 return bitno(b) + REGMIN;
1148 if(f < FREGMIN || f > FREGEXT)
1150 return 1L << (f - FREGMIN + 22);
1160 return bitno(b) - 22 + FREGMIN;