10 r = alloc(sizeof(*r));
19 rcmp(const void *a1, const void *a2)
34 return p2->varno - p1->varno;
43 long val, initpc, npc;
56 for(z=0; z<BITS; z++) {
61 regbits = RtoB(0) | /* return reg */
62 AtoB(6) | AtoB(7) | /* sp and sb */
63 FtoB(0) | FtoB(1); /* floating return reg */
64 for(i=0; i<NREG; i++) {
75 * build aux data structure
77 * find use and set of variables
79 val = 5L * 5L * 5L * 5L * 5L;
89 for(; p != P; p = p->link) {
127 switch(r1->prog->as) {
135 bit = mkvar(&p->from, AGOK);
139 if(!(mvbits & B_INDIR))
140 for(z=0; z<BITS; z++)
141 addrs.b[z] |= bit.b[z];
145 for(z=0; z<BITS; z++)
146 addrs.b[z] |= bit.b[z];
147 for(z=0; z<BITS; z++)
148 r->use1.b[z] |= bit.b[z];
151 bit = mkvar(&p->to, p->as);
154 case ABSR: /* funny */
155 for(z=0; z<BITS; z++)
156 addrs.b[z] |= bit.b[z];
160 if(!(mvbits & B_INDIR))
161 for(z=0; z<BITS; z++)
162 addrs.b[z] |= bit.b[z];
165 case ACMPB: case ACMPW: case ACMPL:
166 case AFCMPF: case AFCMPD:
167 case ATSTB: case ATSTW: case ATSTL:
168 case AFTSTF: case AFTSTD:
169 case ABFEXTU: case ABFEXTS:
171 for(z=0; z<BITS; z++)
172 addrs.b[z] |= bit.b[z];
173 for(z=0; z<BITS; z++)
174 r->use2.b[z] |= bit.b[z];
178 diag(Z, "reg: unknown asop: %A", p->as);
180 case AADDB: case AADDW: case AADDL:
181 case ASUBB: case ASUBW: case ASUBL:
182 case AANDB: case AANDW: case AANDL:
183 case AORB: case AORW: case AORL:
184 case AEORB: case AEORW: case AEORL:
186 for(z=0; z<BITS; z++)
187 r->use2.b[z] |= bit.b[z];
190 case AMOVB: case AMOVW: case AMOVL:
191 case AFMOVEB: case AFMOVEW: case AFMOVEL:
192 case ACLRB: case ACLRW: case ACLRL:
193 case AFMOVEF: case AFMOVED:
195 for(z=0; z<BITS; z++)
196 r->use2.b[z] |= bit.b[z];
198 for(z=0; z<BITS; z++)
199 r->set.b[z] |= bit.b[z];
211 * turn branch references to pointers
212 * build back pointers
214 for(r = firstr; r != R; r = r->link) {
216 if(p->to.type == D_BRANCH) {
217 val = p->to.offset - initpc;
221 if(r2 != R && val >= r2->pc) {
230 diag(Z, "ref not found\n%L:%P", p->lineno, p);
234 diag(Z, "ref to self");
243 print("\n%L %D\n", firstr->prog->lineno, &firstr->prog->from);
247 * find looping structure
249 for(r = firstr; r != R; r = r->link)
253 if(debug['R'] && debug['v']) {
254 print("\nlooping structure:\n");
255 for(r = firstr; r != R; r = r->link) {
256 print("%ld:%P", r->loop, r->prog);
257 for(z=0; z<BITS; z++)
258 bit.b[z] = r->use1.b[z] |
259 r->use2.b[z] | r->set.b[z];
263 print(" u1=%B", r->use1);
265 print(" u2=%B", r->use2);
267 print(" st=%B", r->set);
275 * iterate propagating usage
276 * back until flow graph is complete
280 for(r = firstr; r != R; r = r->link)
282 for(r = firstr; r != R; r = r->link)
283 if(r->prog->as == ARTS)
284 prop(r, zbits, zbits);
286 /* pick up unreachable code */
288 for(r = firstr; r != R; r = r1) {
290 if(r1 && r1->active && !r->active) {
291 prop(r, zbits, zbits);
302 * iterate propagating register/variable synchrony
303 * forward until graph is complete
307 for(r = firstr; r != R; r = r->link)
309 synch(firstr, zbits);
317 * calculate costs (paint1)
321 for(z=0; z<BITS; z++)
322 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
323 ~(externs.b[z] | params.b[z] | addrs.b[z]);
325 nearln = r->prog->lineno;
326 warn(Z, "used and not set: %B", bit);
327 if(debug['R'] && !debug['w'])
328 print("used and not set: %B\n", bit);
332 * load of a denormalized fp will trap
336 bit.b[i/32] &= ~(1L << (i%32));
338 if(v->type == D_AUTO) {
339 r->set.b[i/32] |= (1L << (i%32));
341 addmove(r, i, NREG+NREG, 1);
346 if(debug['R'] && debug['v'])
347 print("\nprop structure:\n");
348 for(r = firstr; r != R; r = r->link) {
349 if(debug['R'] && debug['v'])
350 print("%P\n set = %B; rah = %B; cal = %B\n",
351 r->prog, r->set, r->refahead, r->calahead);
356 for(r = firstr; r != R; r = r->link) {
357 for(z=0; z<BITS; z++)
358 bit.b[z] = r->set.b[z] &
359 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
361 nearln = r->prog->lineno;
362 warn(Z, "set and not used: %B", bit);
364 print("set an not used: %B\n", bit);
367 for(z=0; z<BITS; z++)
368 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
375 if(debug['R'] && debug['v'])
378 bit.b[i/32] &= ~(1L<<(i%32));
379 if(changer <= 0 && changea <= 0) {
381 print("%L$%d.%d: %B\n",
383 changer, changea, blsh(i));
386 rgp->costr = changer;
387 rgp->costa = changea;
389 if(nregion >= NRGN) {
390 warn(Z, "too many regions");
397 qsort(region, nregion, sizeof(region[0]), rcmp);
401 * determine used registers (paint2)
402 * replace code (paint3)
405 for(i=0; i<nregion; i++) {
406 bit = blsh(rgp->varno);
407 vreg = paint2(rgp->enter, rgp->varno);
408 vreg = allreg(vreg, rgp);
410 print("%L$%d.%d %R: %B\n",
411 rgp->enter->prog->lineno,
412 rgp->costr, rgp->costa,
415 if(rgp->regno != D_NONE)
416 paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
421 * peep-hole on basic block
423 if(!debug['R'] || debug['P'])
431 for(r = firstr; r != R; r = r1) {
438 for(; p != p1; p = p->link) {
460 print("addrs: %B\n", addrs);
463 for(r = firstr; r != R; r = r->link) {
465 if(p->to.type == D_BRANCH)
466 p->to.offset = r->s2->pc;
473 * free aux structures
475 for(p = firstr->prog; p != P; p = p->link){
476 while(p->link && p->link->as == ANOP)
477 p->link = p->link->link;
490 addmove(Reg *r, int bn, int rn, int f)
502 if(p1->from.type == D_CCR)
521 p1->lineno = p->lineno;
523 p1->from.type = D_CCR;
532 p1->lineno = p->lineno;
535 p1->from.sym = v->sym;
536 p1->from.type = v->type;
537 p1->from.offset = v->offset;
538 p1->from.etype = v->etype;
542 p1->from = zprog.from;
545 p1->as = opxt[OAS][v->etype];
551 p1->lineno = p->lineno;
553 p1->from.type = D_TOS;
558 print("%P\t.a%P\n", p, p1);
562 mkvar(Adr *a, int as)
571 t = a->type & D_MASK;
575 if(t >= D_R0 && t < D_R0+NREG) {
576 regbits |= RtoB(t-D_R0);
577 if(as == ADIVUL || as == ADIVSL)
578 regbits |= RtoB(t-D_R0+1);
580 if(t >= D_A0 && t < D_A0+NREG)
581 regbits |= AtoB(t-D_A0);
582 if(t >= D_F0 && t < D_F0+NREG)
583 regbits |= FtoB(t-D_F0);
596 if((a->type & I_MASK) == I_ADDR)
601 for(i=0; i<nvar; i++) {
609 if(s->name[0] == '.')
612 if(debug['w'] > 1 && s)
613 warn(Z, "variable not optimized: %s", s->name);
624 print("bit=%2d et=%2d %s (%p,%d,%ld)\n",
625 i, a->etype, s->name,
626 v->sym, v->type, v->offset);
630 if(t == D_EXTERN || t == D_STATIC)
631 for(z=0; z<BITS; z++)
632 externs.b[z] |= bit.b[z];
634 for(z=0; z<BITS; z++)
635 params.b[z] |= bit.b[z];
636 if(a->etype != v->etype || !typechlpfd[a->etype])
637 for(z=0; z<BITS; z++)
638 addrs.b[z] |= bit.b[z]; /* funny punning */
646 prop(Reg *r, Bits ref, Bits cal)
651 for(r1 = r; r1 != R; r1 = r1->p1) {
652 for(z=0; z<BITS; z++) {
653 ref.b[z] |= r1->refahead.b[z];
654 if(ref.b[z] != r1->refahead.b[z]) {
655 r1->refahead.b[z] = ref.b[z];
658 cal.b[z] |= r1->calahead.b[z];
659 if(cal.b[z] != r1->calahead.b[z]) {
660 r1->calahead.b[z] = cal.b[z];
664 switch(r1->prog->as) {
666 for(z=0; z<BITS; z++) {
667 cal.b[z] |= ref.b[z] | externs.b[z];
673 for(z=0; z<BITS; z++) {
680 for(z=0; z<BITS; z++) {
681 cal.b[z] = externs.b[z];
685 for(z=0; z<BITS; z++) {
686 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
687 r1->use1.b[z] | r1->use2.b[z];
688 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
689 r1->refbehind.b[z] = ref.b[z];
690 r1->calbehind.b[z] = cal.b[z];
696 for(; r != r1; r = r->p1)
697 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
698 prop(r2, r->refbehind, r->calbehind);
702 * find looping structure
704 * 1) find reverse postordering
705 * 2) find approximate dominators,
706 * the actual dominators if the flow graph is reducible
707 * otherwise, dominators plus some other non-dominators.
708 * See Matthew S. Hecht and Jeffrey D. Ullman,
709 * "Analysis of a Simple Algorithm for Global Data Flow Problems",
710 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
711 * Oct. 1-3, 1973, pp. 207-217.
712 * 3) find all nodes with a predecessor dominated by the current node.
713 * such a node is a loop head.
714 * recursively, all preds with a greater rpo number are in the loop
717 postorder(Reg *r, Reg **rpo2r, long n)
724 n = postorder(r1, rpo2r, n);
727 n = postorder(r1, rpo2r, n);
734 rpolca(long *idom, long rpo1, long rpo2)
749 fatal(Z, "bad idom");
757 doms(long *idom, long r, long s)
765 loophead(long *idom, Reg *r)
770 if(r->p1 != R && doms(idom, src, r->p1->rpo))
772 for(r = r->p2; r != R; r = r->p2link)
773 if(doms(idom, src, r->rpo))
779 loopmark(Reg **rpo2r, long head, Reg *r)
781 if(r->rpo < head || r->active == head)
786 loopmark(rpo2r, head, r->p1);
787 for(r = r->p2; r != R; r = r->p2link)
788 loopmark(rpo2r, head, r);
792 loopit(Reg *r, long nr)
798 rpo2r = alloc(nr * sizeof(Reg*));
799 idom = alloc(nr * sizeof(long));
803 d = postorder(r, rpo2r, 0);
805 fatal(Z, "too many reg nodes");
807 for(i = 0; i < nr / 2; i++){
809 rpo2r[i] = rpo2r[nr - 1 - i];
810 rpo2r[nr - 1 - i] = r1;
812 for(i = 0; i < nr; i++)
816 for(i = 0; i < nr; i++){
820 if(r1->p1 != R && r1->p1->rpo < me)
822 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
824 d = rpolca(idom, d, r1->rpo);
828 for(i = 0; i < nr; i++){
831 if(r1->p2 != R && loophead(idom, r1))
832 loopmark(rpo2r, i, r1);
837 synch(Reg *r, Bits dif)
842 for(r1 = r; r1 != R; r1 = r1->s1) {
843 for(z=0; z<BITS; z++) {
844 dif.b[z] = (dif.b[z] &
845 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
846 r1->set.b[z] | r1->regdiff.b[z];
847 if(dif.b[z] != r1->regdiff.b[z]) {
848 r1->regdiff.b[z] = dif.b[z];
855 for(z=0; z<BITS; z++)
856 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
863 allreg(ulong b, Rgn *r)
873 diag(Z, "unknown etype");
887 if(r->costa == r->costr)
890 if(j < NREG && r->costa > 0)
891 if(r->costa > r->costr || i >= NREG) {
895 if(i < NREG && r->costr > 0) {
914 paint1(Reg *r, int bn)
927 if(!(r->refbehind.b[z] & bb))
932 if(!(r1->refahead.b[z] & bb))
934 if(r1->act.b[z] & bb)
938 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
939 changer -= CLOAD * r->loop;
940 changea -= CLOAD * r->loop;
941 if(debug['R'] && debug['v'])
942 print("%ld%P\tld %B $%d.%d\n", r->loop,
943 r->prog, blsh(bn), changer, changea);
949 if(r->use1.b[z] & bb) {
950 changer += CREF * r->loop;
951 changea += CREF * r->loop;
963 if(x >= D_R0 && x < D_R0+NREG)
965 if(x >= D_A0 && x < D_A0+NREG)
968 if(debug['R'] && debug['v'])
969 print("%ld%P\tu1 %B $%d.%d\n", r->loop,
970 p, blsh(bn), changer, changea);
972 if((r->use2.b[z]|r->set.b[z]) & bb) {
973 changer += CREF * r->loop;
974 changea += CREF * r->loop;
983 case ACLRL: /* can be faked */
984 case ATSTL: /* can be faked */
989 if(x >= D_R0 && x < D_R0+NREG)
991 if(x >= D_A0 && x < D_A0+NREG)
994 if(debug['R'] && debug['v'])
995 print("%ld%P\tu2 %B $%d.%d\n", r->loop,
996 p, blsh(bn), changer, changea);
998 if(STORE(r) & r->regdiff.b[z] & bb) {
999 changer -= CLOAD * r->loop;
1000 changea -= CLOAD * r->loop;
1001 if(debug['R'] && debug['v'])
1002 print("%ld%P\tst %B $%d.%d\n", r->loop,
1003 p, blsh(bn), changer, changea);
1006 if(r->refbehind.b[z] & bb)
1007 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1008 if(r1->refahead.b[z] & bb)
1011 if(!(r->refahead.b[z] & bb))
1015 if(r1->refbehind.b[z] & bb)
1020 if(r->act.b[z] & bb)
1022 if(!(r->refbehind.b[z] & bb))
1028 paint2(Reg *r, int bn)
1037 if(!(r->act.b[z] & bb))
1040 if(!(r->refbehind.b[z] & bb))
1045 if(!(r1->refahead.b[z] & bb))
1047 if(!(r1->act.b[z] & bb))
1056 if(r->refbehind.b[z] & bb)
1057 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1058 if(r1->refahead.b[z] & bb)
1059 vreg |= paint2(r1, bn);
1061 if(!(r->refahead.b[z] & bb))
1065 if(r1->refbehind.b[z] & bb)
1066 vreg |= paint2(r1, bn);
1070 if(!(r->act.b[z] & bb))
1072 if(!(r->refbehind.b[z] & bb))
1079 paint3(Reg *r, int bn, ulong rb, int rn)
1088 if(r->act.b[z] & bb)
1091 if(!(r->refbehind.b[z] & bb))
1096 if(!(r1->refahead.b[z] & bb))
1098 if(r1->act.b[z] & bb)
1102 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1103 addmove(r, bn, rn, 0);
1108 if(r->use1.b[z] & bb) {
1111 addreg(&p->from, rn);
1113 print("\t.c%P\n", p);
1115 if((r->use2.b[z]|r->set.b[z]) & bb) {
1120 print("\t.c%P\n", p);
1122 if(STORE(r) & r->regdiff.b[z] & bb)
1123 addmove(r, bn, rn, 1);
1126 if(r->refbehind.b[z] & bb)
1127 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1128 if(r1->refahead.b[z] & bb)
1129 paint3(r1, bn, rb, rn);
1131 if(!(r->refahead.b[z] & bb))
1135 if(r1->refbehind.b[z] & bb)
1136 paint3(r1, bn, rb, rn);
1140 if(r->act.b[z] & bb)
1142 if(!(r->refbehind.b[z] & bb))
1148 addreg(Adr *a, int rn)
1151 if(rn >= D_R0 && rn < D_R0+NREG)
1153 a->type = rn | (a->type & I_INDIR);
1157 a->type = rn | (a->type & I_INDIR);
1170 if(r < 0 || r >= NREG)
1172 return 1L << (r + 0);
1182 return bitno(b) - 0;
1189 if(a < 0 || a >= NREG)
1191 return 1L << (a + NREG);
1201 return bitno(b) - NREG;
1208 if(f < 0 || f >= NREG)
1210 return 1L << (f + NREG+NREG);
1220 return bitno(b) - NREG-NREG;