]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/qc/reg.c
bfd275862eccf2deddd491b57e9788dda32bd287
[plan9front.git] / sys / src / cmd / qc / reg.c
1 #include "gc.h"
2
3 Reg*
4 rega(void)
5 {
6         Reg *r;
7
8         r = freer;
9         if(r == R) {
10                 r = alloc(sizeof(*r));
11         } else
12                 freer = r->link;
13
14         *r = zreg;
15         return r;
16 }
17
18 int
19 rcmp(void *a1, void *a2)
20 {
21         Rgn *p1, *p2;
22         int c1, c2;
23
24         p1 = a1;
25         p2 = a2;
26         c1 = p2->cost;
27         c2 = p1->cost;
28         if(c1 -= c2)
29                 return c1;
30         return p2->varno - p1->varno;
31 }
32
33 void
34 regopt(Prog *p)
35 {
36         Reg *r, *r1, *r2;
37         Prog *p1;
38         int i, z;
39         long initpc, val, npc;
40         ulong vreg;
41         Bits bit;
42         struct
43         {
44                 long    m;
45                 long    c;
46                 Reg*    p;
47         } log5[6], *lp;
48
49         firstr = R;
50         lastr = R;
51         nvar = 0;
52         regbits = 0;
53         for(z=0; z<BITS; z++) {
54                 externs.b[z] = 0;
55                 params.b[z] = 0;
56                 consts.b[z] = 0;
57                 addrs.b[z] = 0;
58         }
59
60         /*
61          * pass 1
62          * build aux data structure
63          * allocate pcs
64          * find use and set of variables
65          */
66         val = 5L * 5L * 5L * 5L * 5L;
67         lp = log5;
68         for(i=0; i<5; i++) {
69                 lp->m = val;
70                 lp->c = 0;
71                 lp->p = R;
72                 val /= 5L;
73                 lp++;
74         }
75         val = 0;
76         for(; p != P; p = p->link) {
77                 switch(p->as) {
78                 case ADATA:
79                 case AGLOBL:
80                 case ANAME:
81                 case ASIGNAME:
82                         continue;
83                 }
84                 r = rega();
85                 if(firstr == R) {
86                         firstr = r;
87                         lastr = r;
88                 } else {
89                         lastr->link = r;
90                         r->p1 = lastr;
91                         lastr->s1 = r;
92                         lastr = r;
93                 }
94                 r->prog = p;
95                 r->pc = val;
96                 val++;
97
98                 lp = log5;
99                 for(i=0; i<5; i++) {
100                         lp->c--;
101                         if(lp->c <= 0) {
102                                 lp->c = lp->m;
103                                 if(lp->p != R)
104                                         lp->p->log5 = r;
105                                 lp->p = r;
106                                 (lp+1)->c = 0;
107                                 break;
108                         }
109                         lp++;
110                 }
111
112                 r1 = r->p1;
113                 if(r1 != R)
114                 switch(r1->prog->as) {
115                 case ARETURN:
116                 case ABR:
117                 case ARFI:
118                         r->p1 = R;
119                         r1->s1 = R;
120                 }
121
122                 /*
123                  * left side always read
124                  */
125                 bit = mkvar(&p->from, p->as==AMOVW);
126                 for(z=0; z<BITS; z++)
127                         r->use1.b[z] |= bit.b[z];
128
129                 /*
130                  * right side depends on opcode
131                  */
132                 bit = mkvar(&p->to, 0);
133                 if(bany(&bit))
134                 switch(p->as) {
135                 default:
136                         diag(Z, "reg: unknown asop: %A", p->as);
137                         break;
138
139                 /*
140                  * right side write
141                  */
142                 case ANOP:
143                 case AMOVB:
144                 case AMOVBU:
145                 case AMOVBZ:
146                 case AMOVBZU:
147                 case AMOVH:
148                 case AMOVHBR:
149                 case AMOVHU:
150                 case AMOVHZ:
151                 case AMOVHZU:
152                 case AMOVW:
153                 case AMOVWU:
154                 case AFMOVD:
155                 case AFMOVDCC:
156                 case AFMOVDU:
157                 case AFMOVS:
158                 case AFMOVSU:
159                 case AFRSP:
160                         for(z=0; z<BITS; z++)
161                                 r->set.b[z] |= bit.b[z];
162                         break;
163
164                 /*
165                  * funny
166                  */
167                 case ABL:
168                         for(z=0; z<BITS; z++)
169                                 addrs.b[z] |= bit.b[z];
170                         break;
171                 }
172         }
173         if(firstr == R)
174                 return;
175         initpc = pc - val;
176         npc = val;
177
178         /*
179          * pass 2
180          * turn branch references to pointers
181          * build back pointers
182          */
183         for(r = firstr; r != R; r = r->link) {
184                 p = r->prog;
185                 if(p->to.type == D_BRANCH) {
186                         val = p->to.offset - initpc;
187                         r1 = firstr;
188                         while(r1 != R) {
189                                 r2 = r1->log5;
190                                 if(r2 != R && val >= r2->pc) {
191                                         r1 = r2;
192                                         continue;
193                                 }
194                                 if(r1->pc == val)
195                                         break;
196                                 r1 = r1->link;
197                         }
198                         if(r1 == R) {
199                                 nearln = p->lineno;
200                                 diag(Z, "ref not found\n%P", p);
201                                 continue;
202                         }
203                         if(r1 == r) {
204                                 nearln = p->lineno;
205                                 diag(Z, "ref to self\n%P", p);
206                                 continue;
207                         }
208                         r->s2 = r1;
209                         r->p2link = r1->p2;
210                         r1->p2 = r;
211                 }
212         }
213         if(debug['R']) {
214                 p = firstr->prog;
215                 print("\n%L %D\n", p->lineno, &p->from);
216         }
217
218         /*
219          * pass 2.5
220          * find looping structure
221          */
222         for(r = firstr; r != R; r = r->link)
223                 r->active = 0;
224         change = 0;
225         loopit(firstr, npc);
226         if(debug['R'] && debug['v']) {
227                 print("\nlooping structure:\n");
228                 for(r = firstr; r != R; r = r->link) {
229                         print("%ld:%P", r->loop, r->prog);
230                         for(z=0; z<BITS; z++)
231                                 bit.b[z] = r->use1.b[z] |
232                                         r->use2.b[z] | r->set.b[z];
233                         if(bany(&bit)) {
234                                 print("\t");
235                                 if(bany(&r->use1))
236                                         print(" u1=%B", r->use1);
237                                 if(bany(&r->use2))
238                                         print(" u2=%B", r->use2);
239                                 if(bany(&r->set))
240                                         print(" st=%B", r->set);
241                         }
242                         print("\n");
243                 }
244         }
245
246         /*
247          * pass 3
248          * iterate propagating usage
249          *      back until flow graph is complete
250          */
251 loop1:
252         change = 0;
253         for(r = firstr; r != R; r = r->link)
254                 r->active = 0;
255         for(r = firstr; r != R; r = r->link)
256                 if(r->prog->as == ARETURN)
257                         prop(r, zbits, zbits);
258 loop11:
259         /* pick up unreachable code */
260         i = 0;
261         for(r = firstr; r != R; r = r1) {
262                 r1 = r->link;
263                 if(r1 && r1->active && !r->active) {
264                         prop(r, zbits, zbits);
265                         i = 1;
266                 }
267         }
268         if(i)
269                 goto loop11;
270         if(change)
271                 goto loop1;
272
273
274         /*
275          * pass 4
276          * iterate propagating register/variable synchrony
277          *      forward until graph is complete
278          */
279 loop2:
280         change = 0;
281         for(r = firstr; r != R; r = r->link)
282                 r->active = 0;
283         synch(firstr, zbits);
284         if(change)
285                 goto loop2;
286
287
288         /*
289          * pass 5
290          * isolate regions
291          * calculate costs (paint1)
292          */
293         r = firstr;
294         if(r) {
295                 for(z=0; z<BITS; z++)
296                         bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
297                           ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
298                 if(bany(&bit)) {
299                         nearln = r->prog->lineno;
300                         warn(Z, "used and not set: %B", bit);
301                         if(debug['R'] && !debug['w'])
302                                 print("used and not set: %B\n", bit);
303                 }
304         }
305         if(debug['R'] && debug['v'])
306                 print("\nprop structure:\n");
307         for(r = firstr; r != R; r = r->link)
308                 r->act = zbits;
309         rgp = region;
310         nregion = 0;
311         for(r = firstr; r != R; r = r->link) {
312                 if(debug['R'] && debug['v'])
313                         print("%P\n     set = %B; rah = %B; cal = %B\n",
314                                 r->prog, r->set, r->refahead, r->calahead);
315                 for(z=0; z<BITS; z++)
316                         bit.b[z] = r->set.b[z] &
317                           ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
318                 if(bany(&bit)) {
319                         nearln = r->prog->lineno;
320                         warn(Z, "assignment not used: %B", bit);
321                         if(debug['R'])
322                                 print("set an not used: %B\n", bit);
323                         excise(r);
324                 }
325                 for(z=0; z<BITS; z++)
326                         bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
327                 while(bany(&bit)) {
328                         i = bnum(bit);
329                         rgp->enter = r;
330                         rgp->varno = i;
331                         change = 0;
332                         if(debug['R'] && debug['v'])
333                                 print("\n");
334                         paint1(r, i);
335                         bit.b[i/32] &= ~(1L<<(i%32));
336                         if(change <= 0) {
337                                 if(debug['R'])
338                                         print("%L$%d: %B\n",
339                                                 r->prog->lineno, change, blsh(i));
340                                 continue;
341                         }
342                         rgp->cost = change;
343                         nregion++;
344                         if(nregion >= NRGN) {
345                                 warn(Z, "too many regions");
346                                 goto brk;
347                         }
348                         rgp++;
349                 }
350         }
351 brk:
352         qsort(region, nregion, sizeof(region[0]), rcmp);
353
354         /*
355          * pass 6
356          * determine used registers (paint2)
357          * replace code (paint3)
358          */
359         rgp = region;
360         for(i=0; i<nregion; i++) {
361                 bit = blsh(rgp->varno);
362                 vreg = paint2(rgp->enter, rgp->varno);
363                 vreg = allreg(vreg, rgp);
364                 if(debug['R']) {
365                         if(rgp->regno >= NREG)
366                                 print("%L$%d F%d: %B\n",
367                                         rgp->enter->prog->lineno,
368                                         rgp->cost,
369                                         rgp->regno-NREG,
370                                         bit);
371                         else
372                                 print("%L$%d R%d: %B\n",
373                                         rgp->enter->prog->lineno,
374                                         rgp->cost,
375                                         rgp->regno,
376                                         bit);
377                 }
378                 if(rgp->regno != 0)
379                         paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
380                 rgp++;
381         }
382         /*
383          * pass 7
384          * peep-hole on basic block
385          */
386         if(!debug['R'] || debug['P'])
387                 peep();
388
389         /*
390          * pass 8
391          * recalculate pc
392          */
393         val = initpc;
394         for(r = firstr; r != R; r = r1) {
395                 r->pc = val;
396                 p = r->prog;
397                 p1 = P;
398                 r1 = r->link;
399                 if(r1 != R)
400                         p1 = r1->prog;
401                 for(; p != p1; p = p->link) {
402                         switch(p->as) {
403                         default:
404                                 val++;
405                                 break;
406
407                         case ANOP:
408                         case ADATA:
409                         case AGLOBL:
410                         case ANAME:
411                         case ASIGNAME:
412                                 break;
413                         }
414                 }
415         }
416         pc = val;
417
418         /*
419          * fix up branches
420          */
421         if(debug['R'])
422                 if(bany(&addrs))
423                         print("addrs: %B\n", addrs);
424
425         r1 = 0; /* set */
426         for(r = firstr; r != R; r = r->link) {
427                 p = r->prog;
428                 if(p->to.type == D_BRANCH)
429                         p->to.offset = r->s2->pc;
430                 r1 = r;
431         }
432
433         /*
434          * last pass
435          * eliminate nops
436          * free aux structures
437          */
438         for(p = firstr->prog; p != P; p = p->link){
439                 while(p->link && p->link->as == ANOP)
440                         p->link = p->link->link;
441         }
442         if(r1 != R) {
443                 r1->link = freer;
444                 freer = firstr;
445         }
446 }
447
448 /*
449  * add mov b,rn
450  * just after r
451  */
452 void
453 addmove(Reg *r, int bn, int rn, int f)
454 {
455         Prog *p, *p1;
456         Adr *a;
457         Var *v;
458
459         p1 = alloc(sizeof(*p1));
460         *p1 = zprog;
461         p = r->prog;
462
463         p1->link = p->link;
464         p->link = p1;
465         p1->lineno = p->lineno;
466
467         v = var + bn;
468
469         a = &p1->to;
470         a->sym = v->sym;
471         a->name = v->name;
472         a->offset = v->offset;
473         a->etype = v->etype;
474         a->type = D_OREG;
475         if(a->etype == TARRAY || a->sym == S)
476                 a->type = D_CONST;
477
478         p1->as = AMOVW;
479         if(v->etype == TCHAR || v->etype == TUCHAR)
480                 p1->as = AMOVB;
481         if(v->etype == TSHORT || v->etype == TUSHORT)
482                 p1->as = AMOVH;
483         if(v->etype == TFLOAT)
484                 p1->as = AFMOVS;
485         if(v->etype == TDOUBLE)
486                 p1->as = AFMOVD;
487
488         p1->from.type = D_REG;
489         p1->from.reg = rn;
490         if(rn >= NREG) {
491                 p1->from.type = D_FREG;
492                 p1->from.reg = rn-NREG;
493         }
494         if(!f) {
495                 p1->from = *a;
496                 *a = zprog.from;
497                 a->type = D_REG;
498                 a->reg = rn;
499                 if(rn >= NREG) {
500                         a->type = D_FREG;
501                         a->reg = rn-NREG;
502                 }
503                 if(v->etype == TUCHAR)
504                         p1->as = AMOVBZ;
505                 if(v->etype == TUSHORT)
506                         p1->as = AMOVHZ;
507         }
508         if(debug['R'])
509                 print("%P\t.a%P\n", p, p1);
510 }
511
512 Bits
513 mkvar(Adr *a, int docon)
514 {
515         Var *v;
516         int i, t, n, et, z;
517         long o;
518         Bits bit;
519         Sym *s;
520
521         t = a->type;
522         if(t == D_REG && a->reg != NREG)
523                 regbits |= RtoB(a->reg);
524         if(t == D_FREG && a->reg != NREG)
525                 regbits |= FtoB(a->reg);
526         s = a->sym;
527         o = a->offset;
528         et = a->etype;
529         if(s == S) {
530                 if(t != D_CONST || !docon || a->reg != NREG)
531                         goto none;
532                 et = TLONG;
533         }
534         if(t == D_CONST) {
535                 if(s == S && sval(o))
536                         goto none;
537         }
538         n = a->name;
539         v = var;
540         for(i=0; i<nvar; i++) {
541                 if(s == v->sym)
542                 if(n == v->name)
543                 if(o == v->offset)
544                         goto out;
545                 v++;
546         }
547         if(s)
548                 if(s->name[0] == '.')
549                         goto none;
550         if(nvar >= NVAR) {
551                 if(debug['w'] > 1 && s)
552                         warn(Z, "variable not optimized: %s", s->name);
553                 goto none;
554         }
555         i = nvar;
556         nvar++;
557         v = &var[i];
558         v->sym = s;
559         v->offset = o;
560         v->etype = et;
561         v->name = n;
562         if(debug['R'])
563                 print("bit=%2d et=%2d %D\n", i, et, a);
564 out:
565         bit = blsh(i);
566         if(n == D_EXTERN || n == D_STATIC)
567                 for(z=0; z<BITS; z++)
568                         externs.b[z] |= bit.b[z];
569         if(n == D_PARAM)
570                 for(z=0; z<BITS; z++)
571                         params.b[z] |= bit.b[z];
572         if(v->etype != et || !typechlpfd[et])   /* funny punning */
573                 for(z=0; z<BITS; z++)
574                         addrs.b[z] |= bit.b[z];
575         if(t == D_CONST) {
576                 if(s == S) {
577                         for(z=0; z<BITS; z++)
578                                 consts.b[z] |= bit.b[z];
579                         return bit;
580                 }
581                 if(et != TARRAY)
582                         for(z=0; z<BITS; z++)
583                                 addrs.b[z] |= bit.b[z];
584                 for(z=0; z<BITS; z++)
585                         params.b[z] |= bit.b[z];
586                 return bit;
587         }
588         if(t == D_OREG)
589                 return bit;
590
591 none:
592         return zbits;
593 }
594
595 void
596 prop(Reg *r, Bits ref, Bits cal)
597 {
598         Reg *r1, *r2;
599         int z;
600
601         for(r1 = r; r1 != R; r1 = r1->p1) {
602                 for(z=0; z<BITS; z++) {
603                         ref.b[z] |= r1->refahead.b[z];
604                         if(ref.b[z] != r1->refahead.b[z]) {
605                                 r1->refahead.b[z] = ref.b[z];
606                                 change++;
607                         }
608                         cal.b[z] |= r1->calahead.b[z];
609                         if(cal.b[z] != r1->calahead.b[z]) {
610                                 r1->calahead.b[z] = cal.b[z];
611                                 change++;
612                         }
613                 }
614                 switch(r1->prog->as) {
615                 case ABL:
616                         for(z=0; z<BITS; z++) {
617                                 cal.b[z] |= ref.b[z] | externs.b[z];
618                                 ref.b[z] = 0;
619                         }
620                         break;
621
622                 case ATEXT:
623                         for(z=0; z<BITS; z++) {
624                                 cal.b[z] = 0;
625                                 ref.b[z] = 0;
626                         }
627                         break;
628
629                 case ARETURN:
630                         for(z=0; z<BITS; z++) {
631                                 cal.b[z] = externs.b[z];
632                                 ref.b[z] = 0;
633                         }
634                 }
635                 for(z=0; z<BITS; z++) {
636                         ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
637                                 r1->use1.b[z] | r1->use2.b[z];
638                         cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
639                         r1->refbehind.b[z] = ref.b[z];
640                         r1->calbehind.b[z] = cal.b[z];
641                 }
642                 if(r1->active)
643                         break;
644                 r1->active = 1;
645         }
646         for(; r != r1; r = r->p1)
647                 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
648                         prop(r2, r->refbehind, r->calbehind);
649 }
650
651 /*
652  * find looping structure
653  *
654  * 1) find reverse postordering
655  * 2) find approximate dominators,
656  *      the actual dominators if the flow graph is reducible
657  *      otherwise, dominators plus some other non-dominators.
658  *      See Matthew S. Hecht and Jeffrey D. Ullman,
659  *      "Analysis of a Simple Algorithm for Global Data Flow Problems",
660  *      Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
661  *      Oct. 1-3, 1973, pp.  207-217.
662  * 3) find all nodes with a predecessor dominated by the current node.
663  *      such a node is a loop head.
664  *      recursively, all preds with a greater rpo number are in the loop
665  */
666 long
667 postorder(Reg *r, Reg **rpo2r, long n)
668 {
669         Reg *r1;
670
671         r->rpo = 1;
672         r1 = r->s1;
673         if(r1 && !r1->rpo)
674                 n = postorder(r1, rpo2r, n);
675         r1 = r->s2;
676         if(r1 && !r1->rpo)
677                 n = postorder(r1, rpo2r, n);
678         rpo2r[n] = r;
679         n++;
680         return n;
681 }
682
683 long
684 rpolca(long *idom, long rpo1, long rpo2)
685 {
686         long t;
687
688         if(rpo1 == -1)
689                 return rpo2;
690         while(rpo1 != rpo2){
691                 if(rpo1 > rpo2){
692                         t = rpo2;
693                         rpo2 = rpo1;
694                         rpo1 = t;
695                 }
696                 while(rpo1 < rpo2){
697                         t = idom[rpo2];
698                         if(t >= rpo2)
699                                 fatal(Z, "bad idom");
700                         rpo2 = t;
701                 }
702         }
703         return rpo1;
704 }
705
706 int
707 doms(long *idom, long r, long s)
708 {
709         while(s > r)
710                 s = idom[s];
711         return s == r;
712 }
713
714 int
715 loophead(long *idom, Reg *r)
716 {
717         long src;
718
719         src = r->rpo;
720         if(r->p1 != R && doms(idom, src, r->p1->rpo))
721                 return 1;
722         for(r = r->p2; r != R; r = r->p2link)
723                 if(doms(idom, src, r->rpo))
724                         return 1;
725         return 0;
726 }
727
728 void
729 loopmark(Reg **rpo2r, long head, Reg *r)
730 {
731         if(r->rpo < head || r->active == head)
732                 return;
733         r->active = head;
734         r->loop += LOOP;
735         if(r->p1 != R)
736                 loopmark(rpo2r, head, r->p1);
737         for(r = r->p2; r != R; r = r->p2link)
738                 loopmark(rpo2r, head, r);
739 }
740
741 void
742 loopit(Reg *r, long nr)
743 {
744         Reg *r1;
745         long i, d, me;
746
747         if(nr > maxnr) {
748                 rpo2r = alloc(nr * sizeof(Reg*));
749                 idom = alloc(nr * sizeof(long));
750                 maxnr = nr;
751         }
752
753         d = postorder(r, rpo2r, 0);
754         if(d > nr)
755                 fatal(Z, "too many reg nodes");
756         nr = d;
757         for(i = 0; i < nr / 2; i++){
758                 r1 = rpo2r[i];
759                 rpo2r[i] = rpo2r[nr - 1 - i];
760                 rpo2r[nr - 1 - i] = r1;
761         }
762         for(i = 0; i < nr; i++)
763                 rpo2r[i]->rpo = i;
764
765         idom[0] = 0;
766         for(i = 0; i < nr; i++){
767                 r1 = rpo2r[i];
768                 me = r1->rpo;
769                 d = -1;
770                 if(r1->p1 != R && r1->p1->rpo < me)
771                         d = r1->p1->rpo;
772                 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
773                         if(r1->rpo < me)
774                                 d = rpolca(idom, d, r1->rpo);
775                 idom[i] = d;
776         }
777
778         for(i = 0; i < nr; i++){
779                 r1 = rpo2r[i];
780                 r1->loop++;
781                 if(r1->p2 != R && loophead(idom, r1))
782                         loopmark(rpo2r, i, r1);
783         }
784 }
785
786 void
787 synch(Reg *r, Bits dif)
788 {
789         Reg *r1;
790         int z;
791
792         for(r1 = r; r1 != R; r1 = r1->s1) {
793                 for(z=0; z<BITS; z++) {
794                         dif.b[z] = (dif.b[z] &
795                                 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
796                                         r1->set.b[z] | r1->regdiff.b[z];
797                         if(dif.b[z] != r1->regdiff.b[z]) {
798                                 r1->regdiff.b[z] = dif.b[z];
799                                 change++;
800                         }
801                 }
802                 if(r1->active)
803                         break;
804                 r1->active = 1;
805                 for(z=0; z<BITS; z++)
806                         dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
807                 if(r1->s2 != R)
808                         synch(r1->s2, dif);
809         }
810 }
811
812 ulong
813 allreg(ulong b, Rgn *r)
814 {
815         Var *v;
816         int i;
817
818         v = var + r->varno;
819         r->regno = 0;
820         switch(v->etype) {
821
822         default:
823                 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
824                 break;
825
826         case TCHAR:
827         case TUCHAR:
828         case TSHORT:
829         case TUSHORT:
830         case TINT:
831         case TUINT:
832         case TLONG:
833         case TULONG:
834         case TIND:
835         case TARRAY:
836                 i = BtoR(~b);
837                 if(i && r->cost > 0) {
838                         r->regno = i;
839                         return RtoB(i);
840                 }
841                 break;
842
843         case TDOUBLE:
844         case TFLOAT:
845                 i = BtoF(~b);
846                 if(i && r->cost > 0) {
847                         r->regno = i+NREG;
848                         return FtoB(i);
849                 }
850                 break;
851         }
852         return 0;
853 }
854
855 void
856 paint1(Reg *r, int bn)
857 {
858         Reg *r1;
859         Prog *p;
860         int z;
861         ulong bb;
862
863         z = bn/32;
864         bb = 1L<<(bn%32);
865         if(r->act.b[z] & bb)
866                 return;
867         for(;;) {
868                 if(!(r->refbehind.b[z] & bb))
869                         break;
870                 r1 = r->p1;
871                 if(r1 == R)
872                         break;
873                 if(!(r1->refahead.b[z] & bb))
874                         break;
875                 if(r1->act.b[z] & bb)
876                         break;
877                 r = r1;
878         }
879
880         if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
881                 change -= CLOAD * r->loop;
882                 if(debug['R'] && debug['v'])
883                         print("%ld%P\tld %B $%d\n", r->loop,
884                                 r->prog, blsh(bn), change);
885         }
886         for(;;) {
887                 r->act.b[z] |= bb;
888                 p = r->prog;
889
890                 if(r->use1.b[z] & bb) {
891                         change += CREF * r->loop;
892                         if(p->to.type == D_FREG && p->as == AMOVW)
893                                 change = -CINF;         /* cant go Rreg to Freg */
894                         if(debug['R'] && debug['v'])
895                                 print("%ld%P\tu1 %B $%d\n", r->loop,
896                                         p, blsh(bn), change);
897                 }
898
899                 if((r->use2.b[z]|r->set.b[z]) & bb) {
900                         change += CREF * r->loop;
901                         if(p->from.type == D_FREG && p->as == AMOVW)
902                                 change = -CINF;         /* cant go Rreg to Freg */
903                         if(debug['R'] && debug['v'])
904                                 print("%ld%P\tu2 %B $%d\n", r->loop,
905                                         p, blsh(bn), change);
906                 }
907
908                 if(STORE(r) & r->regdiff.b[z] & bb) {
909                         change -= CLOAD * r->loop;
910                         if(debug['R'] && debug['v'])
911                                 print("%ld%P\tst %B $%d\n", r->loop,
912                                         p, blsh(bn), change);
913                 }
914
915                 if(r->refbehind.b[z] & bb)
916                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
917                                 if(r1->refahead.b[z] & bb)
918                                         paint1(r1, bn);
919
920                 if(!(r->refahead.b[z] & bb))
921                         break;
922                 r1 = r->s2;
923                 if(r1 != R)
924                         if(r1->refbehind.b[z] & bb)
925                                 paint1(r1, bn);
926                 r = r->s1;
927                 if(r == R)
928                         break;
929                 if(r->act.b[z] & bb)
930                         break;
931                 if(!(r->refbehind.b[z] & bb))
932                         break;
933         }
934 }
935
936 ulong
937 paint2(Reg *r, int bn)
938 {
939         Reg *r1;
940         int z;
941         ulong bb, vreg;
942
943         z = bn/32;
944         bb = 1L << (bn%32);
945         vreg = regbits;
946         if(!(r->act.b[z] & bb))
947                 return vreg;
948         for(;;) {
949                 if(!(r->refbehind.b[z] & bb))
950                         break;
951                 r1 = r->p1;
952                 if(r1 == R)
953                         break;
954                 if(!(r1->refahead.b[z] & bb))
955                         break;
956                 if(!(r1->act.b[z] & bb))
957                         break;
958                 r = r1;
959         }
960         for(;;) {
961                 r->act.b[z] &= ~bb;
962
963                 vreg |= r->regu;
964
965                 if(r->refbehind.b[z] & bb)
966                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
967                                 if(r1->refahead.b[z] & bb)
968                                         vreg |= paint2(r1, bn);
969
970                 if(!(r->refahead.b[z] & bb))
971                         break;
972                 r1 = r->s2;
973                 if(r1 != R)
974                         if(r1->refbehind.b[z] & bb)
975                                 vreg |= paint2(r1, bn);
976                 r = r->s1;
977                 if(r == R)
978                         break;
979                 if(!(r->act.b[z] & bb))
980                         break;
981                 if(!(r->refbehind.b[z] & bb))
982                         break;
983         }
984         return vreg;
985 }
986
987 void
988 paint3(Reg *r, int bn, long rb, int rn)
989 {
990         Reg *r1;
991         Prog *p;
992         int z;
993         ulong bb;
994
995         z = bn/32;
996         bb = 1L << (bn%32);
997         if(r->act.b[z] & bb)
998                 return;
999         for(;;) {
1000                 if(!(r->refbehind.b[z] & bb))
1001                         break;
1002                 r1 = r->p1;
1003                 if(r1 == R)
1004                         break;
1005                 if(!(r1->refahead.b[z] & bb))
1006                         break;
1007                 if(r1->act.b[z] & bb)
1008                         break;
1009                 r = r1;
1010         }
1011
1012         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1013                 addmove(r, bn, rn, 0);
1014         for(;;) {
1015                 r->act.b[z] |= bb;
1016                 p = r->prog;
1017
1018                 if(r->use1.b[z] & bb) {
1019                         if(debug['R'])
1020                                 print("%P", p);
1021                         addreg(&p->from, rn);
1022                         if(debug['R'])
1023                                 print("\t.c%P\n", p);
1024                 }
1025                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1026                         if(debug['R'])
1027                                 print("%P", p);
1028                         addreg(&p->to, rn);
1029                         if(debug['R'])
1030                                 print("\t.c%P\n", p);
1031                 }
1032
1033                 if(STORE(r) & r->regdiff.b[z] & bb)
1034                         addmove(r, bn, rn, 1);
1035                 r->regu |= rb;
1036
1037                 if(r->refbehind.b[z] & bb)
1038                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1039                                 if(r1->refahead.b[z] & bb)
1040                                         paint3(r1, bn, rb, rn);
1041
1042                 if(!(r->refahead.b[z] & bb))
1043                         break;
1044                 r1 = r->s2;
1045                 if(r1 != R)
1046                         if(r1->refbehind.b[z] & bb)
1047                                 paint3(r1, bn, rb, rn);
1048                 r = r->s1;
1049                 if(r == R)
1050                         break;
1051                 if(r->act.b[z] & bb)
1052                         break;
1053                 if(!(r->refbehind.b[z] & bb))
1054                         break;
1055         }
1056 }
1057
1058 void
1059 addreg(Adr *a, int rn)
1060 {
1061
1062         a->sym = 0;
1063         a->name = D_NONE;
1064         a->type = D_REG;
1065         a->reg = rn;
1066         if(rn >= NREG) {
1067                 a->type = D_FREG;
1068                 a->reg = rn-NREG;
1069         }
1070 }
1071
1072 /*
1073  * track register variables including external registers:
1074  *      bit     reg
1075  *      0       R7
1076  *      1       R8
1077  *      ...     ...
1078  *      21      R28
1079  */
1080 long
1081 RtoB(int r)
1082 {
1083
1084         if(r >= REGMIN && r <= REGMAX)
1085                 return 1L << (r-REGMIN);
1086         return 0;
1087 }
1088
1089 int
1090 BtoR(long b)
1091 {
1092         b &= 0x001fffffL;
1093         if(b == 0)
1094                 return 0;
1095         return bitno(b) + REGMIN;
1096 }
1097
1098 /*
1099  *      bit     reg
1100  *      22      F17
1101  *      23      F18
1102  *      ...     ...
1103  *      31      F26
1104  */
1105 long
1106 FtoB(int f)
1107 {
1108         if(f < FREGMIN || f > FREGEXT)
1109                 return 0;
1110         return 1L << (f - FREGMIN + 22);
1111 }
1112
1113 int
1114 BtoF(long b)
1115 {
1116
1117         b &= 0xffc00000L;
1118         if(b == 0)
1119                 return 0;
1120         return bitno(b) - 22 + FREGMIN;
1121 }