10 r = alloc(sizeof(*r));
19 rcmp(const void *a1, const void *a2)
30 return p2->varno - p1->varno;
39 long initpc, val, npc;
53 for(z=0; z<BITS; z++) {
62 * build aux data structure
64 * find use and set of variables
66 val = 5L * 5L * 5L * 5L * 5L;
76 for(; p != P; p = p->link) {
114 switch(r1->prog->as) {
123 * left side always read
125 bit = mkvar(&p->from, p->as==AMOVW);
126 for(z=0; z<BITS; z++)
127 r->use1.b[z] |= bit.b[z];
130 * right side depends on opcode
132 bit = mkvar(&p->to, 0);
136 diag(Z, "reg: unknown asop: %A", p->as);
150 for(z=0; z<BITS; z++)
151 r->set.b[z] |= bit.b[z];
158 for(z=0; z<BITS; z++)
159 addrs.b[z] |= bit.b[z];
170 * turn branch references to pointers
171 * build back pointers
173 for(r = firstr; r != R; r = r->link) {
175 if(p->to.type == D_BRANCH) {
176 val = p->to.offset - initpc;
180 if(r2 != R && val >= r2->pc) {
190 diag(Z, "ref not found\n%P", p);
195 diag(Z, "ref to self\n%P", p);
205 print("\n%L %D\n", p->lineno, &p->from);
210 * find looping structure
212 for(r = firstr; r != R; r = r->link)
216 if(debug['R'] && debug['v']) {
217 print("\nlooping structure:\n");
218 for(r = firstr; r != R; r = r->link) {
219 print("%ld:%P", r->loop, r->prog);
220 for(z=0; z<BITS; z++)
221 bit.b[z] = r->use1.b[z] |
222 r->use2.b[z] | r->set.b[z];
226 print(" u1=%B", r->use1);
228 print(" u2=%B", r->use2);
230 print(" st=%B", r->set);
238 * iterate propagating usage
239 * back until flow graph is complete
243 for(r = firstr; r != R; r = r->link)
245 for(r = firstr; r != R; r = r->link)
246 if(r->prog->as == ARETURN)
247 prop(r, zbits, zbits);
249 /* pick up unreachable code */
251 for(r = firstr; r != R; r = r1) {
253 if(r1 && r1->active && !r->active) {
254 prop(r, zbits, zbits);
266 * iterate propagating register/variable synchrony
267 * forward until graph is complete
271 for(r = firstr; r != R; r = r->link)
273 synch(firstr, zbits);
281 * calculate costs (paint1)
285 for(z=0; z<BITS; z++)
286 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
287 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
289 nearln = r->prog->lineno;
290 warn(Z, "used and not set: %B", bit);
291 if(debug['R'] && !debug['w'])
292 print("used and not set: %B\n", bit);
295 if(debug['R'] && debug['v'])
296 print("\nprop structure:\n");
297 for(r = firstr; r != R; r = r->link)
301 for(r = firstr; r != R; r = r->link) {
302 if(debug['R'] && debug['v'])
303 print("%P\n set = %B; rah = %B; cal = %B\n",
304 r->prog, r->set, r->refahead, r->calahead);
305 for(z=0; z<BITS; z++)
306 bit.b[z] = r->set.b[z] &
307 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
309 nearln = r->prog->lineno;
310 warn(Z, "set and not used: %B", bit);
312 print("set an not used: %B\n", bit);
315 for(z=0; z<BITS; z++)
316 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
322 if(debug['R'] && debug['v'])
325 bit.b[i/32] &= ~(1L<<(i%32));
329 r->prog->lineno, change, blsh(i));
334 if(nregion >= NRGN) {
335 warn(Z, "too many regions");
342 qsort(region, nregion, sizeof(region[0]), rcmp);
346 * determine used registers (paint2)
347 * replace code (paint3)
350 for(i=0; i<nregion; i++) {
351 bit = blsh(rgp->varno);
352 vreg = paint2(rgp->enter, rgp->varno);
353 vreg = allreg(vreg, rgp);
355 if(rgp->regno >= NREG)
356 print("%L$%d F%d: %B\n",
357 rgp->enter->prog->lineno,
362 print("%L$%d R%d: %B\n",
363 rgp->enter->prog->lineno,
369 paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
374 * peep-hole on basic block
376 if(!debug['R'] || debug['P'])
384 for(r = firstr; r != R; r = r1) {
391 for(; p != p1; p = p->link) {
413 print("addrs: %B\n", addrs);
416 for(r = firstr; r != R; r = r->link) {
418 if(p->to.type == D_BRANCH)
419 p->to.offset = r->s2->pc;
426 * free aux structures
428 for(p = firstr->prog; p != P; p = p->link){
429 while(p->link && p->link->as == ANOP)
430 p->link = p->link->link;
443 addmove(Reg *r, int bn, int rn, int f)
449 p1 = alloc(sizeof(*p1));
455 p1->lineno = p->lineno;
462 a->offset = v->offset;
465 if(a->etype == TARRAY || a->sym == S)
469 if(v->etype == TCHAR || v->etype == TUCHAR)
471 if(v->etype == TSHORT || v->etype == TUSHORT)
473 if(v->etype == TFLOAT)
475 if(v->etype == TDOUBLE)
478 p1->from.type = D_REG;
481 p1->from.type = D_FREG;
482 p1->from.reg = rn-NREG;
493 if(v->etype == TUCHAR)
495 if(v->etype == TUSHORT)
499 print("%P\t.a%P\n", p, p1);
503 mkvar(Adr *a, int docon)
512 if(t == D_REG && a->reg != NREG)
513 regbits |= RtoB(a->reg);
514 if(t == D_FREG && a->reg != NREG)
515 regbits |= FtoB(a->reg);
520 if(t != D_CONST || !docon || a->reg != NREG)
525 if(s == S && sval(o))
530 for(i=0; i<nvar; i++) {
538 if(s->name[0] == '.')
541 if(debug['w'] > 1 && s)
542 warn(Z, "variable not optimized: %s", s->name);
553 print("bit=%2d et=%2d %D\n", i, et, a);
556 if(n == D_EXTERN || n == D_STATIC)
557 for(z=0; z<BITS; z++)
558 externs.b[z] |= bit.b[z];
560 for(z=0; z<BITS; z++)
561 params.b[z] |= bit.b[z];
562 if(v->etype != et || !typechlpfd[et]) /* funny punning */
563 for(z=0; z<BITS; z++)
564 addrs.b[z] |= bit.b[z];
567 for(z=0; z<BITS; z++)
568 consts.b[z] |= bit.b[z];
572 for(z=0; z<BITS; z++)
573 addrs.b[z] |= bit.b[z];
574 for(z=0; z<BITS; z++)
575 params.b[z] |= bit.b[z];
586 prop(Reg *r, Bits ref, Bits cal)
591 for(r1 = r; r1 != R; r1 = r1->p1) {
592 for(z=0; z<BITS; z++) {
593 ref.b[z] |= r1->refahead.b[z];
594 if(ref.b[z] != r1->refahead.b[z]) {
595 r1->refahead.b[z] = ref.b[z];
598 cal.b[z] |= r1->calahead.b[z];
599 if(cal.b[z] != r1->calahead.b[z]) {
600 r1->calahead.b[z] = cal.b[z];
604 switch(r1->prog->as) {
606 for(z=0; z<BITS; z++) {
607 cal.b[z] |= ref.b[z] | externs.b[z];
613 for(z=0; z<BITS; z++) {
620 for(z=0; z<BITS; z++) {
621 cal.b[z] = externs.b[z];
625 for(z=0; z<BITS; z++) {
626 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
627 r1->use1.b[z] | r1->use2.b[z];
628 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
629 r1->refbehind.b[z] = ref.b[z];
630 r1->calbehind.b[z] = cal.b[z];
636 for(; r != r1; r = r->p1)
637 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
638 prop(r2, r->refbehind, r->calbehind);
643 * find looping structure
645 * 1) find reverse postordering
646 * 2) find approximate dominators,
647 * the actual dominators if the flow graph is reducible
648 * otherwise, dominators plus some other non-dominators.
649 * See Matthew S. Hecht and Jeffrey D. Ullman,
650 * "Analysis of a Simple Algorithm for Global Data Flow Problems",
651 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
652 * Oct. 1-3, 1973, pp. 207-217.
653 * 3) find all nodes with a predecessor dominated by the current node.
654 * such a node is a loop head.
655 * recursively, all preds with a greater rpo number are in the loop
658 postorder(Reg *r, Reg **rpo2r, long n)
665 n = postorder(r1, rpo2r, n);
668 n = postorder(r1, rpo2r, n);
675 rpolca(long *idom, long rpo1, long rpo2)
690 fatal(Z, "bad idom");
698 doms(long *idom, long r, long s)
706 loophead(long *idom, Reg *r)
711 if(r->p1 != R && doms(idom, src, r->p1->rpo))
713 for(r = r->p2; r != R; r = r->p2link)
714 if(doms(idom, src, r->rpo))
720 loopmark(Reg **rpo2r, long head, Reg *r)
722 if(r->rpo < head || r->active == head)
727 loopmark(rpo2r, head, r->p1);
728 for(r = r->p2; r != R; r = r->p2link)
729 loopmark(rpo2r, head, r);
733 loopit(Reg *r, long nr)
739 rpo2r = alloc(nr * sizeof(Reg*));
740 idom = alloc(nr * sizeof(long));
744 d = postorder(r, rpo2r, 0);
746 fatal(Z, "too many reg nodes");
748 for(i = 0; i < nr / 2; i++){
750 rpo2r[i] = rpo2r[nr - 1 - i];
751 rpo2r[nr - 1 - i] = r1;
753 for(i = 0; i < nr; i++)
757 for(i = 0; i < nr; i++){
761 if(r1->p1 != R && r1->p1->rpo < me)
763 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
765 d = rpolca(idom, d, r1->rpo);
769 for(i = 0; i < nr; i++){
772 if(r1->p2 != R && loophead(idom, r1))
773 loopmark(rpo2r, i, r1);
778 synch(Reg *r, Bits dif)
783 for(r1 = r; r1 != R; r1 = r1->s1) {
784 for(z=0; z<BITS; z++) {
785 dif.b[z] = (dif.b[z] &
786 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
787 r1->set.b[z] | r1->regdiff.b[z];
788 if(dif.b[z] != r1->regdiff.b[z]) {
789 r1->regdiff.b[z] = dif.b[z];
796 for(z=0; z<BITS; z++)
797 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
804 allreg(ulong b, Rgn *r)
814 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
828 if(i && r->cost > 0) {
837 if(i && r->cost > 0) {
847 paint1(Reg *r, int bn)
859 if(!(r->refbehind.b[z] & bb))
864 if(!(r1->refahead.b[z] & bb))
866 if(r1->act.b[z] & bb)
871 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
872 change -= CLOAD * r->loop;
873 if(debug['R'] && debug['v'])
874 print("%ld%P\tld %B $%d\n", r->loop,
875 r->prog, blsh(bn), change);
881 if(r->use1.b[z] & bb) {
882 change += CREF * r->loop;
883 if(p->to.type == D_FREG && p->as == AMOVW)
884 change = -CINF; /* cant go Rreg to Freg */
885 if(debug['R'] && debug['v'])
886 print("%ld%P\tu1 %B $%d\n", r->loop,
887 p, blsh(bn), change);
890 if((r->use2.b[z]|r->set.b[z]) & bb) {
891 change += CREF * r->loop;
892 if(p->from.type == D_FREG && p->as == AMOVW)
893 change = -CINF; /* cant go Rreg to Freg */
894 if(debug['R'] && debug['v'])
895 print("%ld%P\tu2 %B $%d\n", r->loop,
896 p, blsh(bn), change);
899 if(STORE(r) & r->regdiff.b[z] & bb) {
900 change -= CLOAD * r->loop;
901 if(debug['R'] && debug['v'])
902 print("%ld%P\tst %B $%d\n", r->loop,
903 p, blsh(bn), change);
906 if(r->refbehind.b[z] & bb)
907 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
908 if(r1->refahead.b[z] & bb)
911 if(!(r->refahead.b[z] & bb))
915 if(r1->refbehind.b[z] & bb)
922 if(!(r->refbehind.b[z] & bb))
928 paint2(Reg *r, int bn)
937 if(!(r->act.b[z] & bb))
940 if(!(r->refbehind.b[z] & bb))
945 if(!(r1->refahead.b[z] & bb))
947 if(!(r1->act.b[z] & bb))
956 if(r->refbehind.b[z] & bb)
957 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
958 if(r1->refahead.b[z] & bb)
959 vreg |= paint2(r1, bn);
961 if(!(r->refahead.b[z] & bb))
965 if(r1->refbehind.b[z] & bb)
966 vreg |= paint2(r1, bn);
970 if(!(r->act.b[z] & bb))
972 if(!(r->refbehind.b[z] & bb))
979 paint3(Reg *r, int bn, long rb, int rn)
991 if(!(r->refbehind.b[z] & bb))
996 if(!(r1->refahead.b[z] & bb))
998 if(r1->act.b[z] & bb)
1003 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1004 addmove(r, bn, rn, 0);
1009 if(r->use1.b[z] & bb) {
1012 addreg(&p->from, rn);
1014 print("\t.c%P\n", p);
1016 if((r->use2.b[z]|r->set.b[z]) & bb) {
1021 print("\t.c%P\n", p);
1024 if(STORE(r) & r->regdiff.b[z] & bb)
1025 addmove(r, bn, rn, 1);
1028 if(r->refbehind.b[z] & bb)
1029 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1030 if(r1->refahead.b[z] & bb)
1031 paint3(r1, bn, rb, rn);
1033 if(!(r->refahead.b[z] & bb))
1037 if(r1->refbehind.b[z] & bb)
1038 paint3(r1, bn, rb, rn);
1042 if(r->act.b[z] & bb)
1044 if(!(r->refbehind.b[z] & bb))
1050 addreg(Adr *a, int rn)
1077 if(r >= 9 && r <= 13)
1079 if(r >= 16 && r <= 31)
1080 return 1L << (r-11);
1109 if(f < 4 || f > 22 || (f&1))
1111 return 1L << (f/2 + 20);
1120 return bitno(b)*2 - 40;