]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/kc/reg.c
abaco: cleanup, handle image/x-icon, don't use backspace as a hotkey, and remove...
[plan9front.git] / sys / src / cmd / kc / reg.c
1 #include "gc.h"
2
3 Reg*
4 rega(void)
5 {
6         Reg *r;
7
8         r = freer;
9         if(r == R) {
10                 r = alloc(sizeof(*r));
11         } else
12                 freer = r->link;
13
14         *r = zreg;
15         return r;
16 }
17
18 int
19 rcmp(const void *a1, const void *a2)
20 {
21         Rgn *p1, *p2;
22         int c1, c2;
23
24         p1 = (Rgn*)a1;
25         p2 = (Rgn*)a2;
26         c1 = p2->cost;
27         c2 = p1->cost;
28         if(c1 -= c2)
29                 return c1;
30         return p2->varno - p1->varno;
31 }
32
33 void
34 regopt(Prog *p)
35 {
36         Reg *r, *r1, *r2;
37         Prog *p1;
38         int i, z;
39         long initpc, val, npc;
40         ulong vreg;
41         Bits bit;
42         struct
43         {
44                 long    m;
45                 long    c;
46                 Reg*    p;
47         } log5[6], *lp;
48
49         firstr = R;
50         lastr = R;
51         nvar = 0;
52         regbits = 0;
53         for(z=0; z<BITS; z++) {
54                 externs.b[z] = 0;
55                 params.b[z] = 0;
56                 consts.b[z] = 0;
57                 addrs.b[z] = 0;
58         }
59
60         /*
61          * pass 1
62          * build aux data structure
63          * allocate pcs
64          * find use and set of variables
65          */
66         val = 5L * 5L * 5L * 5L * 5L;
67         lp = log5;
68         for(i=0; i<5; i++) {
69                 lp->m = val;
70                 lp->c = 0;
71                 lp->p = R;
72                 val /= 5L;
73                 lp++;
74         }
75         val = 0;
76         for(; p != P; p = p->link) {
77                 switch(p->as) {
78                 case ADATA:
79                 case AGLOBL:
80                 case ANAME:
81                 case ASIGNAME:
82                         continue;
83                 }
84                 r = rega();
85                 if(firstr == R) {
86                         firstr = r;
87                         lastr = r;
88                 } else {
89                         lastr->link = r;
90                         r->p1 = lastr;
91                         lastr->s1 = r;
92                         lastr = r;
93                 }
94                 r->prog = p;
95                 r->pc = val;
96                 val++;
97
98                 lp = log5;
99                 for(i=0; i<5; i++) {
100                         lp->c--;
101                         if(lp->c <= 0) {
102                                 lp->c = lp->m;
103                                 if(lp->p != R)
104                                         lp->p->log5 = r;
105                                 lp->p = r;
106                                 (lp+1)->c = 0;
107                                 break;
108                         }
109                         lp++;
110                 }
111
112                 r1 = r->p1;
113                 if(r1 != R)
114                 switch(r1->prog->as) {
115                 case ARETURN:
116                 case AJMP:
117                 case ARETT:
118                         r->p1 = R;
119                         r1->s1 = R;
120                 }
121
122                 /*
123                  * left side always read
124                  */
125                 bit = mkvar(&p->from, p->as==AMOVW);
126                 for(z=0; z<BITS; z++)
127                         r->use1.b[z] |= bit.b[z];
128
129                 /*
130                  * right side depends on opcode
131                  */
132                 bit = mkvar(&p->to, 0);
133                 if(bany(&bit))
134                 switch(p->as) {
135                 default:
136                         diag(Z, "reg: unknown asop: %A", p->as);
137                         break;
138
139                 /*
140                  * right side write
141                  */
142                 case ANOP:
143                 case AMOVB:
144                 case AMOVBU:
145                 case AMOVH:
146                 case AMOVHU:
147                 case AMOVW:
148                 case AFMOVF:
149                 case AFMOVD:
150                         for(z=0; z<BITS; z++)
151                                 r->set.b[z] |= bit.b[z];
152                         break;
153
154                 /*
155                  * funny
156                  */
157                 case AJMPL:
158                         for(z=0; z<BITS; z++)
159                                 addrs.b[z] |= bit.b[z];
160                         break;
161                 }
162         }
163         if(firstr == R)
164                 return;
165         initpc = pc - val;
166         npc = val;
167
168         /*
169          * pass 2
170          * turn branch references to pointers
171          * build back pointers
172          */
173         for(r = firstr; r != R; r = r->link) {
174                 p = r->prog;
175                 if(p->to.type == D_BRANCH) {
176                         val = p->to.offset - initpc;
177                         r1 = firstr;
178                         while(r1 != R) {
179                                 r2 = r1->log5;
180                                 if(r2 != R && val >= r2->pc) {
181                                         r1 = r2;
182                                         continue;
183                                 }
184                                 if(r1->pc == val)
185                                         break;
186                                 r1 = r1->link;
187                         }
188                         if(r1 == R) {
189                                 nearln = p->lineno;
190                                 diag(Z, "ref not found\n%P", p);
191                                 continue;
192                         }
193                         if(r1 == r) {
194                                 nearln = p->lineno;
195                                 diag(Z, "ref to self\n%P", p);
196                                 continue;
197                         }
198                         r->s2 = r1;
199                         r->p2link = r1->p2;
200                         r1->p2 = r;
201                 }
202         }
203         if(debug['R']) {
204                 p = firstr->prog;
205                 print("\n%L %D\n", p->lineno, &p->from);
206         }
207
208         /*
209          * pass 2.5
210          * find looping structure
211          */
212         for(r = firstr; r != R; r = r->link)
213                 r->active = 0;
214         change = 0;
215         loopit(firstr, npc);
216         if(debug['R'] && debug['v']) {
217                 print("\nlooping structure:\n");
218                 for(r = firstr; r != R; r = r->link) {
219                         print("%ld:%P", r->loop, r->prog);
220                         for(z=0; z<BITS; z++)
221                                 bit.b[z] = r->use1.b[z] |
222                                         r->use2.b[z] | r->set.b[z];
223                         if(bany(&bit)) {
224                                 print("\t");
225                                 if(bany(&r->use1))
226                                         print(" u1=%B", r->use1);
227                                 if(bany(&r->use2))
228                                         print(" u2=%B", r->use2);
229                                 if(bany(&r->set))
230                                         print(" st=%B", r->set);
231                         }
232                         print("\n");
233                 }
234         }
235
236         /*
237          * pass 3
238          * iterate propagating usage
239          *      back until flow graph is complete
240          */
241 loop1:
242         change = 0;
243         for(r = firstr; r != R; r = r->link)
244                 r->active = 0;
245         for(r = firstr; r != R; r = r->link)
246                 if(r->prog->as == ARETURN)
247                         prop(r, zbits, zbits);
248 loop11:
249         /* pick up unreachable code */
250         i = 0;
251         for(r = firstr; r != R; r = r1) {
252                 r1 = r->link;
253                 if(r1 && r1->active && !r->active) {
254                         prop(r, zbits, zbits);
255                         i = 1;
256                 }
257         }
258         if(i)
259                 goto loop11;
260         if(change)
261                 goto loop1;
262
263
264         /*
265          * pass 4
266          * iterate propagating register/variable synchrony
267          *      forward until graph is complete
268          */
269 loop2:
270         change = 0;
271         for(r = firstr; r != R; r = r->link)
272                 r->active = 0;
273         synch(firstr, zbits);
274         if(change)
275                 goto loop2;
276
277
278         /*
279          * pass 5
280          * isolate regions
281          * calculate costs (paint1)
282          */
283         r = firstr;
284         if(r) {
285                 for(z=0; z<BITS; z++)
286                         bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
287                           ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
288                 if(bany(&bit)) {
289                         nearln = r->prog->lineno;
290                         warn(Z, "used and not set: %B", bit);
291                         if(debug['R'] && !debug['w'])
292                                 print("used and not set: %B\n", bit);
293                 }
294         }
295         if(debug['R'] && debug['v'])
296                 print("\nprop structure:\n");
297         for(r = firstr; r != R; r = r->link)
298                 r->act = zbits;
299         rgp = region;
300         nregion = 0;
301         for(r = firstr; r != R; r = r->link) {
302                 if(debug['R'] && debug['v'])
303                         print("%P\n     set = %B; rah = %B; cal = %B\n",
304                                 r->prog, r->set, r->refahead, r->calahead);
305                 for(z=0; z<BITS; z++)
306                         bit.b[z] = r->set.b[z] &
307                           ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
308                 if(bany(&bit)) {
309                         nearln = r->prog->lineno;
310                         warn(Z, "set and not used: %B", bit);
311                         if(debug['R'])
312                                 print("set an not used: %B\n", bit);
313                         excise(r);
314                 }
315                 for(z=0; z<BITS; z++)
316                         bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
317                 while(bany(&bit)) {
318                         i = bnum(bit);
319                         rgp->enter = r;
320                         rgp->varno = i;
321                         change = 0;
322                         if(debug['R'] && debug['v'])
323                                 print("\n");
324                         paint1(r, i);
325                         bit.b[i/32] &= ~(1L<<(i%32));
326                         if(change <= 0) {
327                                 if(debug['R'])
328                                         print("%L$%d: %B\n",
329                                                 r->prog->lineno, change, blsh(i));
330                                 continue;
331                         }
332                         rgp->cost = change;
333                         nregion++;
334                         if(nregion >= NRGN) {
335                                 warn(Z, "too many regions");
336                                 goto brk;
337                         }
338                         rgp++;
339                 }
340         }
341 brk:
342         qsort(region, nregion, sizeof(region[0]), rcmp);
343
344         /*
345          * pass 6
346          * determine used registers (paint2)
347          * replace code (paint3)
348          */
349         rgp = region;
350         for(i=0; i<nregion; i++) {
351                 bit = blsh(rgp->varno);
352                 vreg = paint2(rgp->enter, rgp->varno);
353                 vreg = allreg(vreg, rgp);
354                 if(debug['R']) {
355                         if(rgp->regno >= NREG)
356                                 print("%L$%d F%d: %B\n",
357                                         rgp->enter->prog->lineno,
358                                         rgp->cost,
359                                         rgp->regno-NREG,
360                                         bit);
361                         else
362                                 print("%L$%d R%d: %B\n",
363                                         rgp->enter->prog->lineno,
364                                         rgp->cost,
365                                         rgp->regno,
366                                         bit);
367                 }
368                 if(rgp->regno != 0)
369                         paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
370                 rgp++;
371         }
372         /*
373          * pass 7
374          * peep-hole on basic block
375          */
376         if(!debug['R'] || debug['P'])
377                 peep();
378
379         /*
380          * pass 8
381          * recalculate pc
382          */
383         val = initpc;
384         for(r = firstr; r != R; r = r1) {
385                 r->pc = val;
386                 p = r->prog;
387                 p1 = P;
388                 r1 = r->link;
389                 if(r1 != R)
390                         p1 = r1->prog;
391                 for(; p != p1; p = p->link) {
392                         switch(p->as) {
393                         default:
394                                 val++;
395                                 break;
396
397                         case ANOP:
398                         case ADATA:
399                         case AGLOBL:
400                         case ANAME:
401                         case ASIGNAME:
402                                 break;
403                         }
404                 }
405         }
406         pc = val;
407
408         /*
409          * fix up branches
410          */
411         if(debug['R'])
412                 if(bany(&addrs))
413                         print("addrs: %B\n", addrs);
414
415         r1 = 0; /* set */
416         for(r = firstr; r != R; r = r->link) {
417                 p = r->prog;
418                 if(p->to.type == D_BRANCH)
419                         p->to.offset = r->s2->pc;
420                 r1 = r;
421         }
422
423         /*
424          * last pass
425          * eliminate nops
426          * free aux structures
427          */
428         for(p = firstr->prog; p != P; p = p->link){
429                 while(p->link && p->link->as == ANOP)
430                         p->link = p->link->link;
431         }
432         if(r1 != R) {
433                 r1->link = freer;
434                 freer = firstr;
435         }
436 }
437
438 /*
439  * add mov b,rn
440  * just after r
441  */
442 void
443 addmove(Reg *r, int bn, int rn, int f)
444 {
445         Prog *p, *p1;
446         Adr *a;
447         Var *v;
448
449         p1 = alloc(sizeof(*p1));
450         *p1 = zprog;
451         p = r->prog;
452
453         p1->link = p->link;
454         p->link = p1;
455         p1->lineno = p->lineno;
456
457         v = var + bn;
458
459         a = &p1->to;
460         a->sym = v->sym;
461         a->name = v->name;
462         a->offset = v->offset;
463         a->etype = v->etype;
464         a->type = D_OREG;
465         if(a->etype == TARRAY || a->sym == S)
466                 a->type = D_CONST;
467
468         p1->as = AMOVW;
469         if(v->etype == TCHAR || v->etype == TUCHAR)
470                 p1->as = AMOVB;
471         if(v->etype == TSHORT || v->etype == TUSHORT)
472                 p1->as = AMOVH;
473         if(v->etype == TFLOAT)
474                 p1->as = AFMOVF;
475         if(v->etype == TDOUBLE)
476                 p1->as = AFMOVD;
477
478         p1->from.type = D_REG;
479         p1->from.reg = rn;
480         if(rn >= NREG) {
481                 p1->from.type = D_FREG;
482                 p1->from.reg = rn-NREG;
483         }
484         if(!f) {
485                 p1->from = *a;
486                 *a = zprog.from;
487                 a->type = D_REG;
488                 a->reg = rn;
489                 if(rn >= NREG) {
490                         a->type = D_FREG;
491                         a->reg = rn-NREG;
492                 }
493                 if(v->etype == TUCHAR)
494                         p1->as = AMOVBU;
495                 if(v->etype == TUSHORT)
496                         p1->as = AMOVHU;
497         }
498         if(debug['R'])
499                 print("%P\t.a%P\n", p, p1);
500 }
501
502 Bits
503 mkvar(Adr *a, int docon)
504 {
505         Var *v;
506         int i, t, n, et, z;
507         long o;
508         Bits bit;
509         Sym *s;
510
511         t = a->type;
512         if(t == D_REG && a->reg != NREG)
513                 regbits |= RtoB(a->reg);
514         if(t == D_FREG && a->reg != NREG)
515                 regbits |= FtoB(a->reg);
516         s = a->sym;
517         o = a->offset;
518         et = a->etype;
519         if(s == S) {
520                 if(t != D_CONST || !docon || a->reg != NREG)
521                         goto none;
522                 et = TLONG;
523         }
524         if(t == D_CONST) {
525                 if(s == S && sval(o))
526                         goto none;
527         }
528         n = a->name;
529         v = var;
530         for(i=0; i<nvar; i++) {
531                 if(s == v->sym)
532                 if(n == v->name)
533                 if(o == v->offset)
534                         goto out;
535                 v++;
536         }
537         if(s)
538                 if(s->name[0] == '.')
539                         goto none;
540         if(nvar >= NVAR) {
541                 if(debug['w'] > 1 && s)
542                         warn(Z, "variable not optimized: %s", s->name);
543                 goto none;
544         }
545         i = nvar;
546         nvar++;
547         v = &var[i];
548         v->sym = s;
549         v->offset = o;
550         v->etype = et;
551         v->name = n;
552         if(debug['R'])
553                 print("bit=%2d et=%2d %D\n", i, et, a);
554 out:
555         bit = blsh(i);
556         if(n == D_EXTERN || n == D_STATIC)
557                 for(z=0; z<BITS; z++)
558                         externs.b[z] |= bit.b[z];
559         if(n == D_PARAM)
560                 for(z=0; z<BITS; z++)
561                         params.b[z] |= bit.b[z];
562         if(v->etype != et || !typechlpfd[et])   /* funny punning */
563                 for(z=0; z<BITS; z++)
564                         addrs.b[z] |= bit.b[z];
565         if(t == D_CONST) {
566                 if(s == S) {
567                         for(z=0; z<BITS; z++)
568                                 consts.b[z] |= bit.b[z];
569                         return bit;
570                 }
571                 if(et != TARRAY)
572                         for(z=0; z<BITS; z++)
573                                 addrs.b[z] |= bit.b[z];
574                 for(z=0; z<BITS; z++)
575                         params.b[z] |= bit.b[z];
576                 return bit;
577         }
578         if(t == D_OREG)
579                 return bit;
580
581 none:
582         return zbits;
583 }
584
585 void
586 prop(Reg *r, Bits ref, Bits cal)
587 {
588         Reg *r1, *r2;
589         int z;
590
591         for(r1 = r; r1 != R; r1 = r1->p1) {
592                 for(z=0; z<BITS; z++) {
593                         ref.b[z] |= r1->refahead.b[z];
594                         if(ref.b[z] != r1->refahead.b[z]) {
595                                 r1->refahead.b[z] = ref.b[z];
596                                 change++;
597                         }
598                         cal.b[z] |= r1->calahead.b[z];
599                         if(cal.b[z] != r1->calahead.b[z]) {
600                                 r1->calahead.b[z] = cal.b[z];
601                                 change++;
602                         }
603                 }
604                 switch(r1->prog->as) {
605                 case AJMPL:
606                         for(z=0; z<BITS; z++) {
607                                 cal.b[z] |= ref.b[z] | externs.b[z];
608                                 ref.b[z] = 0;
609                         }
610                         break;
611
612                 case ATEXT:
613                         for(z=0; z<BITS; z++) {
614                                 cal.b[z] = 0;
615                                 ref.b[z] = 0;
616                         }
617                         break;
618
619                 case ARETURN:
620                         for(z=0; z<BITS; z++) {
621                                 cal.b[z] = externs.b[z];
622                                 ref.b[z] = 0;
623                         }
624                 }
625                 for(z=0; z<BITS; z++) {
626                         ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
627                                 r1->use1.b[z] | r1->use2.b[z];
628                         cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
629                         r1->refbehind.b[z] = ref.b[z];
630                         r1->calbehind.b[z] = cal.b[z];
631                 }
632                 if(r1->active)
633                         break;
634                 r1->active = 1;
635         }
636         for(; r != r1; r = r->p1)
637                 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
638                         prop(r2, r->refbehind, r->calbehind);
639 }
640
641
642 /*
643  * find looping structure
644  *
645  * 1) find reverse postordering
646  * 2) find approximate dominators,
647  *      the actual dominators if the flow graph is reducible
648  *      otherwise, dominators plus some other non-dominators.
649  *      See Matthew S. Hecht and Jeffrey D. Ullman,
650  *      "Analysis of a Simple Algorithm for Global Data Flow Problems",
651  *      Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
652  *      Oct. 1-3, 1973, pp.  207-217.
653  * 3) find all nodes with a predecessor dominated by the current node.
654  *      such a node is a loop head.
655  *      recursively, all preds with a greater rpo number are in the loop
656  */
657 long
658 postorder(Reg *r, Reg **rpo2r, long n)
659 {
660         Reg *r1;
661
662         r->rpo = 1;
663         r1 = r->s1;
664         if(r1 && !r1->rpo)
665                 n = postorder(r1, rpo2r, n);
666         r1 = r->s2;
667         if(r1 && !r1->rpo)
668                 n = postorder(r1, rpo2r, n);
669         rpo2r[n] = r;
670         n++;
671         return n;
672 }
673
674 long
675 rpolca(long *idom, long rpo1, long rpo2)
676 {
677         long t;
678
679         if(rpo1 == -1)
680                 return rpo2;
681         while(rpo1 != rpo2){
682                 if(rpo1 > rpo2){
683                         t = rpo2;
684                         rpo2 = rpo1;
685                         rpo1 = t;
686                 }
687                 while(rpo1 < rpo2){
688                         t = idom[rpo2];
689                         if(t >= rpo2)
690                                 fatal(Z, "bad idom");
691                         rpo2 = t;
692                 }
693         }
694         return rpo1;
695 }
696
697 int
698 doms(long *idom, long r, long s)
699 {
700         while(s > r)
701                 s = idom[s];
702         return s == r;
703 }
704
705 int
706 loophead(long *idom, Reg *r)
707 {
708         long src;
709
710         src = r->rpo;
711         if(r->p1 != R && doms(idom, src, r->p1->rpo))
712                 return 1;
713         for(r = r->p2; r != R; r = r->p2link)
714                 if(doms(idom, src, r->rpo))
715                         return 1;
716         return 0;
717 }
718
719 void
720 loopmark(Reg **rpo2r, long head, Reg *r)
721 {
722         if(r->rpo < head || r->active == head)
723                 return;
724         r->active = head;
725         r->loop += LOOP;
726         if(r->p1 != R)
727                 loopmark(rpo2r, head, r->p1);
728         for(r = r->p2; r != R; r = r->p2link)
729                 loopmark(rpo2r, head, r);
730 }
731
732 void
733 loopit(Reg *r, long nr)
734 {
735         Reg *r1;
736         long i, d, me;
737
738         if(nr > maxnr) {
739                 rpo2r = alloc(nr * sizeof(Reg*));
740                 idom = alloc(nr * sizeof(long));
741                 maxnr = nr;
742         }
743
744         d = postorder(r, rpo2r, 0);
745         if(d > nr)
746                 fatal(Z, "too many reg nodes");
747         nr = d;
748         for(i = 0; i < nr / 2; i++){
749                 r1 = rpo2r[i];
750                 rpo2r[i] = rpo2r[nr - 1 - i];
751                 rpo2r[nr - 1 - i] = r1;
752         }
753         for(i = 0; i < nr; i++)
754                 rpo2r[i]->rpo = i;
755
756         idom[0] = 0;
757         for(i = 0; i < nr; i++){
758                 r1 = rpo2r[i];
759                 me = r1->rpo;
760                 d = -1;
761                 if(r1->p1 != R && r1->p1->rpo < me)
762                         d = r1->p1->rpo;
763                 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
764                         if(r1->rpo < me)
765                                 d = rpolca(idom, d, r1->rpo);
766                 idom[i] = d;
767         }
768
769         for(i = 0; i < nr; i++){
770                 r1 = rpo2r[i];
771                 r1->loop++;
772                 if(r1->p2 != R && loophead(idom, r1))
773                         loopmark(rpo2r, i, r1);
774         }
775 }
776
777 void
778 synch(Reg *r, Bits dif)
779 {
780         Reg *r1;
781         int z;
782
783         for(r1 = r; r1 != R; r1 = r1->s1) {
784                 for(z=0; z<BITS; z++) {
785                         dif.b[z] = (dif.b[z] &
786                                 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
787                                         r1->set.b[z] | r1->regdiff.b[z];
788                         if(dif.b[z] != r1->regdiff.b[z]) {
789                                 r1->regdiff.b[z] = dif.b[z];
790                                 change++;
791                         }
792                 }
793                 if(r1->active)
794                         break;
795                 r1->active = 1;
796                 for(z=0; z<BITS; z++)
797                         dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
798                 if(r1->s2 != R)
799                         synch(r1->s2, dif);
800         }
801 }
802
803 ulong
804 allreg(ulong b, Rgn *r)
805 {
806         Var *v;
807         int i;
808
809         v = var + r->varno;
810         r->regno = 0;
811         switch(v->etype) {
812
813         default:
814                 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
815                 break;
816
817         case TCHAR:
818         case TUCHAR:
819         case TSHORT:
820         case TUSHORT:
821         case TINT:
822         case TUINT:
823         case TLONG:
824         case TULONG:
825         case TIND:
826         case TARRAY:
827                 i = BtoR(~b);
828                 if(i && r->cost > 0) {
829                         r->regno = i;
830                         return RtoB(i);
831                 }
832                 break;
833
834         case TDOUBLE:
835         case TFLOAT:
836                 i = BtoF(~b);
837                 if(i && r->cost > 0) {
838                         r->regno = i+NREG;
839                         return FtoB(i);
840                 }
841                 break;
842         }
843         return 0;
844 }
845
846 void
847 paint1(Reg *r, int bn)
848 {
849         Reg *r1;
850         Prog *p;
851         int z;
852         ulong bb;
853
854         z = bn/32;
855         bb = 1L<<(bn%32);
856         if(r->act.b[z] & bb)
857                 return;
858         for(;;) {
859                 if(!(r->refbehind.b[z] & bb))
860                         break;
861                 r1 = r->p1;
862                 if(r1 == R)
863                         break;
864                 if(!(r1->refahead.b[z] & bb))
865                         break;
866                 if(r1->act.b[z] & bb)
867                         break;
868                 r = r1;
869         }
870
871         if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
872                 change -= CLOAD * r->loop;
873                 if(debug['R'] && debug['v'])
874                         print("%ld%P\tld %B $%d\n", r->loop,
875                                 r->prog, blsh(bn), change);
876         }
877         for(;;) {
878                 r->act.b[z] |= bb;
879                 p = r->prog;
880
881                 if(r->use1.b[z] & bb) {
882                         change += CREF * r->loop;
883                         if(p->to.type == D_FREG && p->as == AMOVW)
884                                 change = -CINF;         /* cant go Rreg to Freg */
885                         if(debug['R'] && debug['v'])
886                                 print("%ld%P\tu1 %B $%d\n", r->loop,
887                                         p, blsh(bn), change);
888                 }
889
890                 if((r->use2.b[z]|r->set.b[z]) & bb) {
891                         change += CREF * r->loop;
892                         if(p->from.type == D_FREG && p->as == AMOVW)
893                                 change = -CINF;         /* cant go Rreg to Freg */
894                         if(debug['R'] && debug['v'])
895                                 print("%ld%P\tu2 %B $%d\n", r->loop,
896                                         p, blsh(bn), change);
897                 }
898
899                 if(STORE(r) & r->regdiff.b[z] & bb) {
900                         change -= CLOAD * r->loop;
901                         if(debug['R'] && debug['v'])
902                                 print("%ld%P\tst %B $%d\n", r->loop,
903                                         p, blsh(bn), change);
904                 }
905
906                 if(r->refbehind.b[z] & bb)
907                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
908                                 if(r1->refahead.b[z] & bb)
909                                         paint1(r1, bn);
910
911                 if(!(r->refahead.b[z] & bb))
912                         break;
913                 r1 = r->s2;
914                 if(r1 != R)
915                         if(r1->refbehind.b[z] & bb)
916                                 paint1(r1, bn);
917                 r = r->s1;
918                 if(r == R)
919                         break;
920                 if(r->act.b[z] & bb)
921                         break;
922                 if(!(r->refbehind.b[z] & bb))
923                         break;
924         }
925 }
926
927 ulong
928 paint2(Reg *r, int bn)
929 {
930         Reg *r1;
931         int z;
932         ulong bb, vreg;
933
934         z = bn/32;
935         bb = 1L << (bn%32);
936         vreg = regbits;
937         if(!(r->act.b[z] & bb))
938                 return vreg;
939         for(;;) {
940                 if(!(r->refbehind.b[z] & bb))
941                         break;
942                 r1 = r->p1;
943                 if(r1 == R)
944                         break;
945                 if(!(r1->refahead.b[z] & bb))
946                         break;
947                 if(!(r1->act.b[z] & bb))
948                         break;
949                 r = r1;
950         }
951         for(;;) {
952                 r->act.b[z] &= ~bb;
953
954                 vreg |= r->regu;
955
956                 if(r->refbehind.b[z] & bb)
957                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
958                                 if(r1->refahead.b[z] & bb)
959                                         vreg |= paint2(r1, bn);
960
961                 if(!(r->refahead.b[z] & bb))
962                         break;
963                 r1 = r->s2;
964                 if(r1 != R)
965                         if(r1->refbehind.b[z] & bb)
966                                 vreg |= paint2(r1, bn);
967                 r = r->s1;
968                 if(r == R)
969                         break;
970                 if(!(r->act.b[z] & bb))
971                         break;
972                 if(!(r->refbehind.b[z] & bb))
973                         break;
974         }
975         return vreg;
976 }
977
978 void
979 paint3(Reg *r, int bn, long rb, int rn)
980 {
981         Reg *r1;
982         Prog *p;
983         int z;
984         ulong bb;
985
986         z = bn/32;
987         bb = 1L << (bn%32);
988         if(r->act.b[z] & bb)
989                 return;
990         for(;;) {
991                 if(!(r->refbehind.b[z] & bb))
992                         break;
993                 r1 = r->p1;
994                 if(r1 == R)
995                         break;
996                 if(!(r1->refahead.b[z] & bb))
997                         break;
998                 if(r1->act.b[z] & bb)
999                         break;
1000                 r = r1;
1001         }
1002
1003         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1004                 addmove(r, bn, rn, 0);
1005         for(;;) {
1006                 r->act.b[z] |= bb;
1007                 p = r->prog;
1008
1009                 if(r->use1.b[z] & bb) {
1010                         if(debug['R'])
1011                                 print("%P", p);
1012                         addreg(&p->from, rn);
1013                         if(debug['R'])
1014                                 print("\t.c%P\n", p);
1015                 }
1016                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1017                         if(debug['R'])
1018                                 print("%P", p);
1019                         addreg(&p->to, rn);
1020                         if(debug['R'])
1021                                 print("\t.c%P\n", p);
1022                 }
1023
1024                 if(STORE(r) & r->regdiff.b[z] & bb)
1025                         addmove(r, bn, rn, 1);
1026                 r->regu |= rb;
1027
1028                 if(r->refbehind.b[z] & bb)
1029                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1030                                 if(r1->refahead.b[z] & bb)
1031                                         paint3(r1, bn, rb, rn);
1032
1033                 if(!(r->refahead.b[z] & bb))
1034                         break;
1035                 r1 = r->s2;
1036                 if(r1 != R)
1037                         if(r1->refbehind.b[z] & bb)
1038                                 paint3(r1, bn, rb, rn);
1039                 r = r->s1;
1040                 if(r == R)
1041                         break;
1042                 if(r->act.b[z] & bb)
1043                         break;
1044                 if(!(r->refbehind.b[z] & bb))
1045                         break;
1046         }
1047 }
1048
1049 void
1050 addreg(Adr *a, int rn)
1051 {
1052
1053         a->sym = 0;
1054         a->name = D_NONE;
1055         a->type = D_REG;
1056         a->reg = rn;
1057         if(rn >= NREG) {
1058                 a->type = D_FREG;
1059                 a->reg = rn-NREG;
1060         }
1061 }
1062
1063 /*
1064  *      bit     reg
1065  *      0       R9
1066  *      1       R10
1067  *      ...     ...
1068  *      4       R13
1069  *      5       R16
1070  *      ...     ...
1071  *      20      R31
1072  */
1073 long
1074 RtoB(int r)
1075 {
1076
1077         if(r >= 9 && r <= 13)
1078                 return 1L << (r-9);
1079         if(r >= 16 && r <= 31)
1080                 return 1L << (r-11);
1081         return 0;
1082 }
1083
1084 int
1085 BtoR(long b)
1086 {
1087         int r;
1088
1089         b &= 0x001fffffL;
1090         if(b == 0)
1091                 return 0;
1092         r = bitno(b) + 9;
1093         if(r >= 14)
1094                 r += 2;
1095         return r;
1096 }
1097
1098 /*
1099  *      bit     reg
1100  *      22      F4
1101  *      23      F6
1102  *      ...     ...
1103  *      31      F22
1104  */
1105 long
1106 FtoB(int f)
1107 {
1108
1109         if(f < 4 || f > 22 || (f&1))
1110                 return 0;
1111         return 1L << (f/2 + 20);
1112 }
1113
1114 BtoF(long b)
1115 {
1116
1117         b &= 0xffc00000L;
1118         if(b == 0)
1119                 return 0;
1120         return bitno(b)*2 - 40;
1121 }