10 r = alloc(sizeof(*r));
19 rcmp(void *a1, void *a2)
30 return p2->varno - p1->varno;
39 long initpc, val, npc;
53 for(z=0; z<BITS; z++) {
62 * build aux data structure
64 * find use and set of variables
66 val = 5L * 5L * 5L * 5L * 5L;
76 for(; p != P; p = p->link) {
114 switch(r1->prog->as) {
123 * left side always read
125 bit = mkvar(&p->from, p->as==AMOVW);
126 for(z=0; z<BITS; z++)
127 r->use1.b[z] |= bit.b[z];
130 * right side depends on opcode
132 bit = mkvar(&p->to, 0);
136 diag(Z, "reg: unknown asop: %A", p->as);
160 for(z=0; z<BITS; z++)
161 r->set.b[z] |= bit.b[z];
168 for(z=0; z<BITS; z++)
169 addrs.b[z] |= bit.b[z];
180 * turn branch references to pointers
181 * build back pointers
183 for(r = firstr; r != R; r = r->link) {
185 if(p->to.type == D_BRANCH) {
186 val = p->to.offset - initpc;
190 if(r2 != R && val >= r2->pc) {
200 diag(Z, "ref not found\n%P", p);
205 diag(Z, "ref to self\n%P", p);
215 print("\n%L %D\n", p->lineno, &p->from);
220 * find looping structure
222 for(r = firstr; r != R; r = r->link)
226 if(debug['R'] && debug['v']) {
227 print("\nlooping structure:\n");
228 for(r = firstr; r != R; r = r->link) {
229 print("%ld:%P", r->loop, r->prog);
230 for(z=0; z<BITS; z++)
231 bit.b[z] = r->use1.b[z] |
232 r->use2.b[z] | r->set.b[z];
236 print(" u1=%B", r->use1);
238 print(" u2=%B", r->use2);
240 print(" st=%B", r->set);
248 * iterate propagating usage
249 * back until flow graph is complete
253 for(r = firstr; r != R; r = r->link)
255 for(r = firstr; r != R; r = r->link)
256 if(r->prog->as == ARETURN)
257 prop(r, zbits, zbits);
259 /* pick up unreachable code */
261 for(r = firstr; r != R; r = r1) {
263 if(r1 && r1->active && !r->active) {
264 prop(r, zbits, zbits);
276 * iterate propagating register/variable synchrony
277 * forward until graph is complete
281 for(r = firstr; r != R; r = r->link)
283 synch(firstr, zbits);
291 * calculate costs (paint1)
295 for(z=0; z<BITS; z++)
296 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
297 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
299 nearln = r->prog->lineno;
300 warn(Z, "used and not set: %B", bit);
301 if(debug['R'] && !debug['w'])
302 print("used and not set: %B\n", bit);
305 if(debug['R'] && debug['v'])
306 print("\nprop structure:\n");
307 for(r = firstr; r != R; r = r->link)
311 for(r = firstr; r != R; r = r->link) {
312 if(debug['R'] && debug['v'])
313 print("%P\n set = %B; rah = %B; cal = %B\n",
314 r->prog, r->set, r->refahead, r->calahead);
315 for(z=0; z<BITS; z++)
316 bit.b[z] = r->set.b[z] &
317 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
319 nearln = r->prog->lineno;
320 warn(Z, "assignment not used: %B", bit);
322 print("set an not used: %B\n", bit);
325 for(z=0; z<BITS; z++)
326 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
332 if(debug['R'] && debug['v'])
335 bit.b[i/32] &= ~(1L<<(i%32));
339 r->prog->lineno, change, blsh(i));
344 if(nregion >= NRGN) {
345 warn(Z, "too many regions");
352 qsort(region, nregion, sizeof(region[0]), rcmp);
356 * determine used registers (paint2)
357 * replace code (paint3)
360 for(i=0; i<nregion; i++) {
361 bit = blsh(rgp->varno);
362 vreg = paint2(rgp->enter, rgp->varno);
363 vreg = allreg(vreg, rgp);
365 if(rgp->regno >= NREG)
366 print("%L$%d F%d: %B\n",
367 rgp->enter->prog->lineno,
372 print("%L$%d R%d: %B\n",
373 rgp->enter->prog->lineno,
379 paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
384 * peep-hole on basic block
386 if(!debug['R'] || debug['P'])
394 for(r = firstr; r != R; r = r1) {
401 for(; p != p1; p = p->link) {
423 print("addrs: %B\n", addrs);
426 for(r = firstr; r != R; r = r->link) {
428 if(p->to.type == D_BRANCH)
429 p->to.offset = r->s2->pc;
436 * free aux structures
438 for(p = firstr->prog; p != P; p = p->link){
439 while(p->link && p->link->as == ANOP)
440 p->link = p->link->link;
453 addmove(Reg *r, int bn, int rn, int f)
459 p1 = alloc(sizeof(*p1));
465 p1->lineno = p->lineno;
472 a->offset = v->offset;
475 if(a->etype == TARRAY || a->sym == S)
479 if(v->etype == TCHAR || v->etype == TUCHAR)
481 if(v->etype == TSHORT || v->etype == TUSHORT)
483 if(v->etype == TFLOAT)
485 if(v->etype == TDOUBLE)
488 p1->from.type = D_REG;
491 p1->from.type = D_FREG;
492 p1->from.reg = rn-NREG;
503 if(v->etype == TUCHAR)
505 if(v->etype == TUSHORT)
509 print("%P\t.a%P\n", p, p1);
513 mkvar(Adr *a, int docon)
522 if(t == D_REG && a->reg != NREG)
523 regbits |= RtoB(a->reg);
524 if(t == D_FREG && a->reg != NREG)
525 regbits |= FtoB(a->reg);
530 if(t != D_CONST || !docon || a->reg != NREG)
535 if(s == S && sval(o))
540 for(i=0; i<nvar; i++) {
548 if(s->name[0] == '.')
551 if(debug['w'] > 1 && s)
552 warn(Z, "variable not optimized: %s", s->name);
563 print("bit=%2d et=%2d %D\n", i, et, a);
566 if(n == D_EXTERN || n == D_STATIC)
567 for(z=0; z<BITS; z++)
568 externs.b[z] |= bit.b[z];
570 for(z=0; z<BITS; z++)
571 params.b[z] |= bit.b[z];
572 if(v->etype != et || !typechlpfd[et]) /* funny punning */
573 for(z=0; z<BITS; z++)
574 addrs.b[z] |= bit.b[z];
577 for(z=0; z<BITS; z++)
578 consts.b[z] |= bit.b[z];
582 for(z=0; z<BITS; z++)
583 addrs.b[z] |= bit.b[z];
584 for(z=0; z<BITS; z++)
585 params.b[z] |= bit.b[z];
596 prop(Reg *r, Bits ref, Bits cal)
601 for(r1 = r; r1 != R; r1 = r1->p1) {
602 for(z=0; z<BITS; z++) {
603 ref.b[z] |= r1->refahead.b[z];
604 if(ref.b[z] != r1->refahead.b[z]) {
605 r1->refahead.b[z] = ref.b[z];
608 cal.b[z] |= r1->calahead.b[z];
609 if(cal.b[z] != r1->calahead.b[z]) {
610 r1->calahead.b[z] = cal.b[z];
614 switch(r1->prog->as) {
616 for(z=0; z<BITS; z++) {
617 cal.b[z] |= ref.b[z] | externs.b[z];
623 for(z=0; z<BITS; z++) {
630 for(z=0; z<BITS; z++) {
631 cal.b[z] = externs.b[z];
635 for(z=0; z<BITS; z++) {
636 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
637 r1->use1.b[z] | r1->use2.b[z];
638 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
639 r1->refbehind.b[z] = ref.b[z];
640 r1->calbehind.b[z] = cal.b[z];
646 for(; r != r1; r = r->p1)
647 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
648 prop(r2, r->refbehind, r->calbehind);
652 * find looping structure
654 * 1) find reverse postordering
655 * 2) find approximate dominators,
656 * the actual dominators if the flow graph is reducible
657 * otherwise, dominators plus some other non-dominators.
658 * See Matthew S. Hecht and Jeffrey D. Ullman,
659 * "Analysis of a Simple Algorithm for Global Data Flow Problems",
660 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
661 * Oct. 1-3, 1973, pp. 207-217.
662 * 3) find all nodes with a predecessor dominated by the current node.
663 * such a node is a loop head.
664 * recursively, all preds with a greater rpo number are in the loop
667 postorder(Reg *r, Reg **rpo2r, long n)
674 n = postorder(r1, rpo2r, n);
677 n = postorder(r1, rpo2r, n);
684 rpolca(long *idom, long rpo1, long rpo2)
699 fatal(Z, "bad idom");
707 doms(long *idom, long r, long s)
715 loophead(long *idom, Reg *r)
720 if(r->p1 != R && doms(idom, src, r->p1->rpo))
722 for(r = r->p2; r != R; r = r->p2link)
723 if(doms(idom, src, r->rpo))
729 loopmark(Reg **rpo2r, long head, Reg *r)
731 if(r->rpo < head || r->active == head)
736 loopmark(rpo2r, head, r->p1);
737 for(r = r->p2; r != R; r = r->p2link)
738 loopmark(rpo2r, head, r);
742 loopit(Reg *r, long nr)
748 rpo2r = alloc(nr * sizeof(Reg*));
749 idom = alloc(nr * sizeof(long));
753 d = postorder(r, rpo2r, 0);
755 fatal(Z, "too many reg nodes");
757 for(i = 0; i < nr / 2; i++){
759 rpo2r[i] = rpo2r[nr - 1 - i];
760 rpo2r[nr - 1 - i] = r1;
762 for(i = 0; i < nr; i++)
766 for(i = 0; i < nr; i++){
770 if(r1->p1 != R && r1->p1->rpo < me)
772 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
774 d = rpolca(idom, d, r1->rpo);
778 for(i = 0; i < nr; i++){
781 if(r1->p2 != R && loophead(idom, r1))
782 loopmark(rpo2r, i, r1);
787 synch(Reg *r, Bits dif)
792 for(r1 = r; r1 != R; r1 = r1->s1) {
793 for(z=0; z<BITS; z++) {
794 dif.b[z] = (dif.b[z] &
795 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
796 r1->set.b[z] | r1->regdiff.b[z];
797 if(dif.b[z] != r1->regdiff.b[z]) {
798 r1->regdiff.b[z] = dif.b[z];
805 for(z=0; z<BITS; z++)
806 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
813 allreg(ulong b, Rgn *r)
823 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
837 if(i && r->cost > 0) {
846 if(i && r->cost > 0) {
856 paint1(Reg *r, int bn)
868 if(!(r->refbehind.b[z] & bb))
873 if(!(r1->refahead.b[z] & bb))
875 if(r1->act.b[z] & bb)
880 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
881 change -= CLOAD * r->loop;
882 if(debug['R'] && debug['v'])
883 print("%ld%P\tld %B $%d\n", r->loop,
884 r->prog, blsh(bn), change);
890 if(r->use1.b[z] & bb) {
891 change += CREF * r->loop;
892 if(p->to.type == D_FREG && p->as == AMOVW)
893 change = -CINF; /* cant go Rreg to Freg */
894 if(debug['R'] && debug['v'])
895 print("%ld%P\tu1 %B $%d\n", r->loop,
896 p, blsh(bn), change);
899 if((r->use2.b[z]|r->set.b[z]) & bb) {
900 change += CREF * r->loop;
901 if(p->from.type == D_FREG && p->as == AMOVW)
902 change = -CINF; /* cant go Rreg to Freg */
903 if(debug['R'] && debug['v'])
904 print("%ld%P\tu2 %B $%d\n", r->loop,
905 p, blsh(bn), change);
908 if(STORE(r) & r->regdiff.b[z] & bb) {
909 change -= CLOAD * r->loop;
910 if(debug['R'] && debug['v'])
911 print("%ld%P\tst %B $%d\n", r->loop,
912 p, blsh(bn), change);
915 if(r->refbehind.b[z] & bb)
916 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
917 if(r1->refahead.b[z] & bb)
920 if(!(r->refahead.b[z] & bb))
924 if(r1->refbehind.b[z] & bb)
931 if(!(r->refbehind.b[z] & bb))
937 paint2(Reg *r, int bn)
946 if(!(r->act.b[z] & bb))
949 if(!(r->refbehind.b[z] & bb))
954 if(!(r1->refahead.b[z] & bb))
956 if(!(r1->act.b[z] & bb))
965 if(r->refbehind.b[z] & bb)
966 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
967 if(r1->refahead.b[z] & bb)
968 vreg |= paint2(r1, bn);
970 if(!(r->refahead.b[z] & bb))
974 if(r1->refbehind.b[z] & bb)
975 vreg |= paint2(r1, bn);
979 if(!(r->act.b[z] & bb))
981 if(!(r->refbehind.b[z] & bb))
988 paint3(Reg *r, int bn, long rb, int rn)
1000 if(!(r->refbehind.b[z] & bb))
1005 if(!(r1->refahead.b[z] & bb))
1007 if(r1->act.b[z] & bb)
1012 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1013 addmove(r, bn, rn, 0);
1018 if(r->use1.b[z] & bb) {
1021 addreg(&p->from, rn);
1023 print("\t.c%P\n", p);
1025 if((r->use2.b[z]|r->set.b[z]) & bb) {
1030 print("\t.c%P\n", p);
1033 if(STORE(r) & r->regdiff.b[z] & bb)
1034 addmove(r, bn, rn, 1);
1037 if(r->refbehind.b[z] & bb)
1038 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1039 if(r1->refahead.b[z] & bb)
1040 paint3(r1, bn, rb, rn);
1042 if(!(r->refahead.b[z] & bb))
1046 if(r1->refbehind.b[z] & bb)
1047 paint3(r1, bn, rb, rn);
1051 if(r->act.b[z] & bb)
1053 if(!(r->refbehind.b[z] & bb))
1059 addreg(Adr *a, int rn)
1073 * track register variables including external registers:
1084 if(r >= REGMIN && r <= REGMAX)
1085 return 1L << (r-REGMIN);
1095 return bitno(b) + REGMIN;
1108 if(f < FREGMIN || f > FREGEXT)
1110 return 1L << (f - FREGMIN + 22);
1120 return bitno(b) - 22 + FREGMIN;