]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/5c/reg.c
a27b7148a713dfef23a9f1538c67b43f7883abd3
[plan9front.git] / sys / src / cmd / 5c / reg.c
1 #include "gc.h"
2
3 void    addsplits(void);
4
5 Reg*
6 rega(void)
7 {
8         Reg *r;
9
10         r = freer;
11         if(r == R) {
12                 r = alloc(sizeof(*r));
13         } else
14                 freer = r->link;
15
16         *r = zreg;
17         return r;
18 }
19
20 int
21 rcmp(const void *a1, const void *a2)
22 {
23         Rgn *p1, *p2;
24         int c1, c2;
25
26         p1 = (Rgn*)a1;
27         p2 = (Rgn*)a2;
28         c1 = p2->cost;
29         c2 = p1->cost;
30         if(c1 -= c2)
31                 return c1;
32         return p2->varno - p1->varno;
33 }
34
35 void
36 regopt(Prog *p)
37 {
38         Reg *r, *r1, *r2;
39         Prog *p1;
40         int i, z;
41         long initpc, val, npc;
42         ulong vreg;
43         Bits bit;
44         struct
45         {
46                 long    m;
47                 long    c;
48                 Reg*    p;
49         } log5[6], *lp;
50
51         firstr = R;
52         lastr = R;
53         nvar = 0;
54         regbits = 0;
55         for(z=0; z<BITS; z++) {
56                 externs.b[z] = 0;
57                 params.b[z] = 0;
58                 consts.b[z] = 0;
59                 addrs.b[z] = 0;
60         }
61
62         /*
63          * pass 1
64          * build aux data structure
65          * allocate pcs
66          * find use and set of variables
67          */
68         val = 5L * 5L * 5L * 5L * 5L;
69         lp = log5;
70         for(i=0; i<5; i++) {
71                 lp->m = val;
72                 lp->c = 0;
73                 lp->p = R;
74                 val /= 5L;
75                 lp++;
76         }
77         val = 0;
78         for(; p != P; p = p->link) {
79                 switch(p->as) {
80                 case ADATA:
81                 case AGLOBL:
82                 case ANAME:
83                 case ASIGNAME:
84                         continue;
85                 }
86                 r = rega();
87                 if(firstr == R) {
88                         firstr = r;
89                         lastr = r;
90                 } else {
91                         lastr->link = r;
92                         r->p1 = lastr;
93                         lastr->s1 = r;
94                         lastr = r;
95                 }
96                 r->prog = p;
97                 r->pc = val;
98                 val++;
99
100                 lp = log5;
101                 for(i=0; i<5; i++) {
102                         lp->c--;
103                         if(lp->c <= 0) {
104                                 lp->c = lp->m;
105                                 if(lp->p != R)
106                                         lp->p->log5 = r;
107                                 lp->p = r;
108                                 (lp+1)->c = 0;
109                                 break;
110                         }
111                         lp++;
112                 }
113
114                 r1 = r->p1;
115                 if(r1 != R)
116                 switch(r1->prog->as) {
117                 case ARET:
118                 case AB:
119                 case ARFE:
120                         r->p1 = R;
121                         r1->s1 = R;
122                 }
123
124                 /*
125                  * left side always read
126                  */
127                 bit = mkvar(&p->from, p->as==AMOVW);
128                 for(z=0; z<BITS; z++)
129                         r->use1.b[z] |= bit.b[z];
130
131                 /*
132                  * right side depends on opcode
133                  */
134                 bit = mkvar(&p->to, 0);
135                 if(bany(&bit))
136                 switch(p->as) {
137                 default:
138                         diag(Z, "reg: unknown asop: %A", p->as);
139                         break;
140
141                 /*
142                  * right side write
143                  */
144                 case ANOP:
145                 case AMOVB:
146                 case AMOVBU:
147                 case AMOVH:
148                 case AMOVHU:
149                 case AMOVW:
150                 case AMOVF:
151                 case AMOVD:
152                         for(z=0; z<BITS; z++)
153                                 r->set.b[z] |= bit.b[z];
154                         break;
155
156                 /*
157                  * funny
158                  */
159                 case ABL:
160                         for(z=0; z<BITS; z++)
161                                 addrs.b[z] |= bit.b[z];
162                         break;
163                 }
164
165                 if(p->as == AMOVM) {
166                         if(p->from.type == D_CONST)
167                                 z = p->from.offset;
168                         else
169                                 z = p->to.offset;
170                         for(i=0; z; i++) {
171                                 if(z&1)
172                                         regbits |= RtoB(i);
173                                 z >>= 1;
174                         }
175                 }
176         }
177         if(firstr == R)
178                 return;
179         initpc = pc - val;
180         npc = val;
181
182         /*
183          * pass 2
184          * turn branch references to pointers
185          * build back pointers
186          */
187         for(r = firstr; r != R; r = r->link) {
188                 p = r->prog;
189                 if(p->to.type == D_BRANCH) {
190                         val = p->to.offset - initpc;
191                         r1 = firstr;
192                         while(r1 != R) {
193                                 r2 = r1->log5;
194                                 if(r2 != R && val >= r2->pc) {
195                                         r1 = r2;
196                                         continue;
197                                 }
198                                 if(r1->pc == val)
199                                         break;
200                                 r1 = r1->link;
201                         }
202                         if(r1 == R) {
203                                 nearln = p->lineno;
204                                 diag(Z, "ref not found\n%P", p);
205                                 continue;
206                         }
207                         if(r1 == r) {
208                                 nearln = p->lineno;
209                                 diag(Z, "ref to self\n%P", p);
210                                 continue;
211                         }
212                         r->s2 = r1;
213                         r->p2link = r1->p2;
214                         r1->p2 = r;
215                 }
216         }
217         if(debug['R']) {
218                 p = firstr->prog;
219                 print("\n%L %D\n", p->lineno, &p->from);
220         }
221
222         /*
223          * pass 2.5
224          * find looping structure
225          */
226         for(r = firstr; r != R; r = r->link)
227                 r->active = 0;
228         change = 0;
229         loopit(firstr, npc);
230
231         /*
232          * pass 3
233          * iterate propagating usage
234          *      back until flow graph is complete
235          */
236 loop1:
237         change = 0;
238         for(r = firstr; r != R; r = r->link)
239                 r->active = 0;
240         for(r = firstr; r != R; r = r->link)
241                 if(r->prog->as == ARET)
242                         prop(r, zbits, zbits);
243 loop11:
244         /* pick up unreachable code */
245         i = 0;
246         for(r = firstr; r != R; r = r1) {
247                 r1 = r->link;
248                 if(r1 && r1->active && !r->active) {
249                         prop(r, zbits, zbits);
250                         i = 1;
251                 }
252         }
253         if(i)
254                 goto loop11;
255         if(change)
256                 goto loop1;
257
258
259         /*
260          * pass 4
261          * iterate propagating register/variable synchrony
262          *      forward until graph is complete
263          */
264 loop2:
265         change = 0;
266         for(r = firstr; r != R; r = r->link)
267                 r->active = 0;
268         synch(firstr, zbits);
269         if(change)
270                 goto loop2;
271
272         addsplits();
273
274         if(debug['R'] && debug['v']) {
275                 print("\nprop structure:\n");
276                 for(r = firstr; r != R; r = r->link) {
277                         print("%ld:%P", r->loop, r->prog);
278                         for(z=0; z<BITS; z++)
279                                 bit.b[z] = r->set.b[z] |
280                                         r->refahead.b[z] | r->calahead.b[z] |
281                                         r->refbehind.b[z] | r->calbehind.b[z] |
282                                         r->use1.b[z] | r->use2.b[z];
283                         if(bany(&bit)) {
284                                 print("\t");
285                                 if(bany(&r->use1))
286                                         print(" u1=%B", r->use1);
287                                 if(bany(&r->use2))
288                                         print(" u2=%B", r->use2);
289                                 if(bany(&r->set))
290                                         print(" st=%B", r->set);
291                                 if(bany(&r->refahead))
292                                         print(" ra=%B", r->refahead);
293                                 if(bany(&r->calahead))
294                                         print(" ca=%B", r->calahead);
295                                 if(bany(&r->refbehind))
296                                         print(" rb=%B", r->refbehind);
297                                 if(bany(&r->calbehind))
298                                         print(" cb=%B", r->calbehind);
299                         }
300                         print("\n");
301                 }
302         }
303
304         /*
305          * pass 5
306          * isolate regions
307          * calculate costs (paint1)
308          */
309         r = firstr;
310         if(r) {
311                 for(z=0; z<BITS; z++)
312                         bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
313                           ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
314                 if(bany(&bit)) {
315                         nearln = r->prog->lineno;
316                         warn(Z, "used and not set: %B", bit);
317                         if(debug['R'] && !debug['w'])
318                                 print("used and not set: %B\n", bit);
319                 }
320         }
321
322         for(r = firstr; r != R; r = r->link)
323                 r->act = zbits;
324         rgp = region;
325         nregion = 0;
326         for(r = firstr; r != R; r = r->link) {
327                 for(z=0; z<BITS; z++)
328                         bit.b[z] = r->set.b[z] &
329                           ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
330                 if(bany(&bit)) {
331                         nearln = r->prog->lineno;
332                         warn(Z, "set and not used: %B", bit);
333                         if(debug['R'])
334                                 print("set and not used: %B\n", bit);
335                         excise(r);
336                 }
337                 for(z=0; z<BITS; z++)
338                         bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
339                 while(bany(&bit)) {
340                         i = bnum(bit);
341                         rgp->enter = r;
342                         rgp->varno = i;
343                         change = 0;
344                         if(debug['R'] && debug['v'])
345                                 print("\n");
346                         paint1(r, i);
347                         bit.b[i/32] &= ~(1L<<(i%32));
348                         if(change <= 0) {
349                                 if(debug['R'])
350                                         print("%L $%d: %B\n",
351                                                 r->prog->lineno, change, blsh(i));
352                                 continue;
353                         }
354                         rgp->cost = change;
355                         nregion++;
356                         if(nregion >= NRGN) {
357                                 warn(Z, "too many regions");
358                                 goto brk;
359                         }
360                         rgp++;
361                 }
362         }
363 brk:
364         qsort(region, nregion, sizeof(region[0]), rcmp);
365
366         /*
367          * pass 6
368          * determine used registers (paint2)
369          * replace code (paint3)
370          */
371         rgp = region;
372         for(i=0; i<nregion; i++) {
373                 bit = blsh(rgp->varno);
374                 vreg = paint2(rgp->enter, rgp->varno);
375                 vreg = allreg(vreg, rgp);
376                 if(debug['R']) {
377                         if(rgp->regno >= NREG)
378                                 print("%L $%d F%d: %B\n",
379                                         rgp->enter->prog->lineno,
380                                         rgp->cost,
381                                         rgp->regno-NREG,
382                                         bit);
383                         else
384                                 print("%L $%d R%d: %B\n",
385                                         rgp->enter->prog->lineno,
386                                         rgp->cost,
387                                         rgp->regno,
388                                         bit);
389                 }
390                 if(rgp->regno != 0)
391                         paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
392                 rgp++;
393         }
394         /*
395          * pass 7
396          * peep-hole on basic block
397          */
398         if(!debug['R'] || debug['P'])
399                 peep();
400
401         /*
402          * pass 8
403          * recalculate pc
404          */
405         val = initpc;
406         for(r = firstr; r != R; r = r1) {
407                 r->pc = val;
408                 p = r->prog;
409                 p1 = P;
410                 r1 = r->link;
411                 if(r1 != R)
412                         p1 = r1->prog;
413                 for(; p != p1; p = p->link) {
414                         switch(p->as) {
415                         default:
416                                 val++;
417                                 break;
418
419                         case ANOP:
420                         case ADATA:
421                         case AGLOBL:
422                         case ANAME:
423                         case ASIGNAME:
424                                 break;
425                         }
426                 }
427         }
428         pc = val;
429
430         /*
431          * fix up branches
432          */
433         if(debug['R'])
434                 if(bany(&addrs))
435                         print("addrs: %B\n", addrs);
436
437         r1 = 0; /* set */
438         for(r = firstr; r != R; r = r->link) {
439                 p = r->prog;
440                 if(p->to.type == D_BRANCH)
441                         p->to.offset = r->s2->pc;
442                 r1 = r;
443         }
444
445         /*
446          * last pass
447          * eliminate nops
448          * free aux structures
449          */
450         for(p = firstr->prog; p != P; p = p->link){
451                 while(p->link && p->link->as == ANOP)
452                         p->link = p->link->link;
453         }
454         if(r1 != R) {
455                 r1->link = freer;
456                 freer = firstr;
457         }
458 }
459
460 void
461 addsplits(void)
462 {
463         Reg *r, *r1;
464         int z, i;
465         Bits bit;
466
467         for(r = firstr; r != R; r = r->link) {
468                 if(r->loop > 1)
469                         continue;
470                 if(r->prog->as == ABL)
471                         continue;
472                 for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
473                         if(r1->loop <= 1)
474                                 continue;
475                         for(z=0; z<BITS; z++)
476                                 bit.b[z] = r1->calbehind.b[z] &
477                                         (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
478                                         ~(r->calahead.b[z] & addrs.b[z]);
479                         while(bany(&bit)) {
480                                 i = bnum(bit);
481                                 bit.b[i/32] &= ~(1L << (i%32));
482                         }
483                 }
484         }
485 }
486
487 /*
488  * add mov b,rn
489  * just after r
490  */
491 void
492 addmove(Reg *r, int bn, int rn, int f)
493 {
494         Prog *p, *p1;
495         Adr *a;
496         Var *v;
497
498         p1 = alloc(sizeof(*p1));
499         *p1 = zprog;
500         p = r->prog;
501
502         p1->link = p->link;
503         p->link = p1;
504         p1->lineno = p->lineno;
505
506         v = var + bn;
507
508         a = &p1->to;
509         a->sym = v->sym;
510         a->name = v->name;
511         a->offset = v->offset;
512         a->etype = v->etype;
513         a->type = D_OREG;
514         if(a->etype == TARRAY || a->sym == S)
515                 a->type = D_CONST;
516
517         p1->as = AMOVW;
518         if(v->etype == TCHAR || v->etype == TUCHAR)
519                 p1->as = AMOVB;
520         if(v->etype == TSHORT || v->etype == TUSHORT)
521                 p1->as = AMOVH;
522         if(v->etype == TFLOAT)
523                 p1->as = AMOVF;
524         if(v->etype == TDOUBLE)
525                 p1->as = AMOVD;
526
527         p1->from.type = D_REG;
528         p1->from.reg = rn;
529         if(rn >= NREG) {
530                 p1->from.type = D_FREG;
531                 p1->from.reg = rn-NREG;
532         }
533         if(!f) {
534                 p1->from = *a;
535                 *a = zprog.from;
536                 a->type = D_REG;
537                 a->reg = rn;
538                 if(rn >= NREG) {
539                         a->type = D_FREG;
540                         a->reg = rn-NREG;
541                 }
542                 if(v->etype == TUCHAR)
543                         p1->as = AMOVBU;
544                 if(v->etype == TUSHORT)
545                         p1->as = AMOVHU;
546         }
547         if(debug['R'])
548                 print("%P\t.a%P\n", p, p1);
549 }
550
551 Bits
552 mkvar(Adr *a, int docon)
553 {
554         Var *v;
555         int i, t, n, et, z;
556         long o;
557         Bits bit;
558         Sym *s;
559
560         t = a->type;
561         if(t == D_REG && a->reg != NREG)
562                 regbits |= RtoB(a->reg);
563         if(t == D_FREG && a->reg != NREG)
564                 regbits |= FtoB(a->reg);
565         s = a->sym;
566         o = a->offset;
567         et = a->etype;
568         if(s == S) {
569                 if(t != D_CONST || !docon || a->reg != NREG)
570                         goto none;
571                 et = TLONG;
572         }
573         if(t == D_CONST) {
574                 if(s == S && sval(o))
575                         goto none;
576         }
577
578         n = a->name;
579         v = var;
580         for(i=0; i<nvar; i++) {
581                 if(s == v->sym)
582                 if(n == v->name)
583                 if(o == v->offset)
584                         goto out;
585                 v++;
586         }
587         if(s)
588                 if(s->name[0] == '.')
589                         goto none;
590         if(nvar >= NVAR) {
591                 if(debug['w'] > 1 && s)
592                         warn(Z, "variable not optimized: %s", s->name);
593                 goto none;
594         }
595         i = nvar;
596         nvar++;
597         v = &var[i];
598         v->sym = s;
599         v->offset = o;
600         v->etype = et;
601         v->name = n;
602         if(debug['R'])
603                 print("bit=%2d et=%2d %D\n", i, et, a);
604 out:
605         bit = blsh(i);
606         if(n == D_EXTERN || n == D_STATIC)
607                 for(z=0; z<BITS; z++)
608                         externs.b[z] |= bit.b[z];
609         if(n == D_PARAM)
610                 for(z=0; z<BITS; z++)
611                         params.b[z] |= bit.b[z];
612         if(v->etype != et || !typechlpfd[et])   /* funny punning */
613                 for(z=0; z<BITS; z++)
614                         addrs.b[z] |= bit.b[z];
615         if(t == D_CONST) {
616                 if(s == S) {
617                         for(z=0; z<BITS; z++)
618                                 consts.b[z] |= bit.b[z];
619                         return bit;
620                 }
621                 if(et != TARRAY)
622                         for(z=0; z<BITS; z++)
623                                 addrs.b[z] |= bit.b[z];
624                 for(z=0; z<BITS; z++)
625                         params.b[z] |= bit.b[z];
626                 return bit;
627         }
628         if(t == D_OREG)
629                 return bit;
630
631 none:
632         return zbits;
633 }
634
635 void
636 prop(Reg *r, Bits ref, Bits cal)
637 {
638         Reg *r1, *r2;
639         int z;
640
641         for(r1 = r; r1 != R; r1 = r1->p1) {
642                 for(z=0; z<BITS; z++) {
643                         ref.b[z] |= r1->refahead.b[z];
644                         if(ref.b[z] != r1->refahead.b[z]) {
645                                 r1->refahead.b[z] = ref.b[z];
646                                 change++;
647                         }
648                         cal.b[z] |= r1->calahead.b[z];
649                         if(cal.b[z] != r1->calahead.b[z]) {
650                                 r1->calahead.b[z] = cal.b[z];
651                                 change++;
652                         }
653                 }
654                 switch(r1->prog->as) {
655                 case ABL:
656                         for(z=0; z<BITS; z++) {
657                                 cal.b[z] |= ref.b[z] | externs.b[z];
658                                 ref.b[z] = 0;
659                         }
660                         break;
661
662                 case ATEXT:
663                         for(z=0; z<BITS; z++) {
664                                 cal.b[z] = 0;
665                                 ref.b[z] = 0;
666                         }
667                         break;
668
669                 case ARET:
670                         for(z=0; z<BITS; z++) {
671                                 cal.b[z] = externs.b[z];
672                                 ref.b[z] = 0;
673                         }
674                 }
675                 for(z=0; z<BITS; z++) {
676                         ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
677                                 r1->use1.b[z] | r1->use2.b[z];
678                         cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
679                         r1->refbehind.b[z] = ref.b[z];
680                         r1->calbehind.b[z] = cal.b[z];
681                 }
682                 if(r1->active)
683                         break;
684                 r1->active = 1;
685         }
686         for(; r != r1; r = r->p1)
687                 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
688                         prop(r2, r->refbehind, r->calbehind);
689 }
690
691 /*
692  * find looping structure
693  *
694  * 1) find reverse postordering
695  * 2) find approximate dominators,
696  *      the actual dominators if the flow graph is reducible
697  *      otherwise, dominators plus some other non-dominators.
698  *      See Matthew S. Hecht and Jeffrey D. Ullman,
699  *      "Analysis of a Simple Algorithm for Global Data Flow Problems",
700  *      Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
701  *      Oct. 1-3, 1973, pp.  207-217.
702  * 3) find all nodes with a predecessor dominated by the current node.
703  *      such a node is a loop head.
704  *      recursively, all preds with a greater rpo number are in the loop
705  */
706 long
707 postorder(Reg *r, Reg **rpo2r, long n)
708 {
709         Reg *r1;
710
711         r->rpo = 1;
712         r1 = r->s1;
713         if(r1 && !r1->rpo)
714                 n = postorder(r1, rpo2r, n);
715         r1 = r->s2;
716         if(r1 && !r1->rpo)
717                 n = postorder(r1, rpo2r, n);
718         rpo2r[n] = r;
719         n++;
720         return n;
721 }
722
723 long
724 rpolca(long *idom, long rpo1, long rpo2)
725 {
726         long t;
727
728         if(rpo1 == -1)
729                 return rpo2;
730         while(rpo1 != rpo2){
731                 if(rpo1 > rpo2){
732                         t = rpo2;
733                         rpo2 = rpo1;
734                         rpo1 = t;
735                 }
736                 while(rpo1 < rpo2){
737                         t = idom[rpo2];
738                         if(t >= rpo2)
739                                 fatal(Z, "bad idom");
740                         rpo2 = t;
741                 }
742         }
743         return rpo1;
744 }
745
746 int
747 doms(long *idom, long r, long s)
748 {
749         while(s > r)
750                 s = idom[s];
751         return s == r;
752 }
753
754 int
755 loophead(long *idom, Reg *r)
756 {
757         long src;
758
759         src = r->rpo;
760         if(r->p1 != R && doms(idom, src, r->p1->rpo))
761                 return 1;
762         for(r = r->p2; r != R; r = r->p2link)
763                 if(doms(idom, src, r->rpo))
764                         return 1;
765         return 0;
766 }
767
768 void
769 loopmark(Reg **rpo2r, long head, Reg *r)
770 {
771         if(r->rpo < head || r->active == head)
772                 return;
773         r->active = head;
774         r->loop += LOOP;
775         if(r->p1 != R)
776                 loopmark(rpo2r, head, r->p1);
777         for(r = r->p2; r != R; r = r->p2link)
778                 loopmark(rpo2r, head, r);
779 }
780
781 void
782 loopit(Reg *r, long nr)
783 {
784         Reg *r1;
785         long i, d, me;
786
787         if(nr > maxnr) {
788                 rpo2r = alloc(nr * sizeof(Reg*));
789                 idom = alloc(nr * sizeof(long));
790                 maxnr = nr;
791         }
792
793         d = postorder(r, rpo2r, 0);
794         if(d > nr)
795                 fatal(Z, "too many reg nodes");
796         nr = d;
797         for(i = 0; i < nr / 2; i++){
798                 r1 = rpo2r[i];
799                 rpo2r[i] = rpo2r[nr - 1 - i];
800                 rpo2r[nr - 1 - i] = r1;
801         }
802         for(i = 0; i < nr; i++)
803                 rpo2r[i]->rpo = i;
804
805         idom[0] = 0;
806         for(i = 0; i < nr; i++){
807                 r1 = rpo2r[i];
808                 me = r1->rpo;
809                 d = -1;
810                 if(r1->p1 != R && r1->p1->rpo < me)
811                         d = r1->p1->rpo;
812                 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
813                         if(r1->rpo < me)
814                                 d = rpolca(idom, d, r1->rpo);
815                 idom[i] = d;
816         }
817
818         for(i = 0; i < nr; i++){
819                 r1 = rpo2r[i];
820                 r1->loop++;
821                 if(r1->p2 != R && loophead(idom, r1))
822                         loopmark(rpo2r, i, r1);
823         }
824 }
825
826 void
827 synch(Reg *r, Bits dif)
828 {
829         Reg *r1;
830         int z;
831
832         for(r1 = r; r1 != R; r1 = r1->s1) {
833                 for(z=0; z<BITS; z++) {
834                         dif.b[z] = (dif.b[z] &
835                                 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
836                                         r1->set.b[z] | r1->regdiff.b[z];
837                         if(dif.b[z] != r1->regdiff.b[z]) {
838                                 r1->regdiff.b[z] = dif.b[z];
839                                 change++;
840                         }
841                 }
842                 if(r1->active)
843                         break;
844                 r1->active = 1;
845                 for(z=0; z<BITS; z++)
846                         dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
847                 if(r1->s2 != R)
848                         synch(r1->s2, dif);
849         }
850 }
851
852 ulong
853 allreg(ulong b, Rgn *r)
854 {
855         Var *v;
856         int i;
857
858         v = var + r->varno;
859         r->regno = 0;
860         switch(v->etype) {
861
862         default:
863                 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
864                 break;
865
866         case TCHAR:
867         case TUCHAR:
868         case TSHORT:
869         case TUSHORT:
870         case TINT:
871         case TUINT:
872         case TLONG:
873         case TULONG:
874         case TIND:
875         case TARRAY:
876                 i = BtoR(~b);
877                 if(i && r->cost >= 0) {
878                         r->regno = i;
879                         return RtoB(i);
880                 }
881                 break;
882
883         case TVLONG:
884         case TDOUBLE:
885         case TFLOAT:
886                 i = BtoF(~b);
887                 if(i && r->cost >= 0) {
888                         r->regno = i+NREG;
889                         return FtoB(i);
890                 }
891                 break;
892         }
893         return 0;
894 }
895
896 void
897 paint1(Reg *r, int bn)
898 {
899         Reg *r1;
900         Prog *p;
901         int z;
902         ulong bb;
903
904         z = bn/32;
905         bb = 1L<<(bn%32);
906         if(r->act.b[z] & bb)
907                 return;
908         for(;;) {
909                 if(!(r->refbehind.b[z] & bb))
910                         break;
911                 r1 = r->p1;
912                 if(r1 == R)
913                         break;
914                 if(!(r1->refahead.b[z] & bb))
915                         break;
916                 if(r1->act.b[z] & bb)
917                         break;
918                 r = r1;
919         }
920
921         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
922                 change -= CLOAD * r->loop;
923                 if(debug['R'] && debug['v'])
924                         print("%ld%P\tld %B $%d\n", r->loop,
925                                 r->prog, blsh(bn), change);
926         }
927         for(;;) {
928                 r->act.b[z] |= bb;
929                 p = r->prog;
930
931                 if(r->use1.b[z] & bb) {
932                         change += CREF * r->loop;
933                         if(debug['R'] && debug['v'])
934                                 print("%ld%P\tu1 %B $%d\n", r->loop,
935                                         p, blsh(bn), change);
936                 }
937
938                 if((r->use2.b[z]|r->set.b[z]) & bb) {
939                         change += CREF * r->loop;
940                         if(debug['R'] && debug['v'])
941                                 print("%ld%P\tu2 %B $%d\n", r->loop,
942                                         p, blsh(bn), change);
943                 }
944
945                 if(STORE(r) & r->regdiff.b[z] & bb) {
946                         change -= CLOAD * r->loop;
947                         if(debug['R'] && debug['v'])
948                                 print("%ld%P\tst %B $%d\n", r->loop,
949                                         p, blsh(bn), change);
950                 }
951
952                 if(r->refbehind.b[z] & bb)
953                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
954                                 if(r1->refahead.b[z] & bb)
955                                         paint1(r1, bn);
956
957                 if(!(r->refahead.b[z] & bb))
958                         break;
959                 r1 = r->s2;
960                 if(r1 != R)
961                         if(r1->refbehind.b[z] & bb)
962                                 paint1(r1, bn);
963                 r = r->s1;
964                 if(r == R)
965                         break;
966                 if(r->act.b[z] & bb)
967                         break;
968                 if(!(r->refbehind.b[z] & bb))
969                         break;
970         }
971 }
972
973 ulong
974 paint2(Reg *r, int bn)
975 {
976         Reg *r1;
977         int z;
978         ulong bb, vreg;
979
980         z = bn/32;
981         bb = 1L << (bn%32);
982         vreg = regbits;
983         if(!(r->act.b[z] & bb))
984                 return vreg;
985         for(;;) {
986                 if(!(r->refbehind.b[z] & bb))
987                         break;
988                 r1 = r->p1;
989                 if(r1 == R)
990                         break;
991                 if(!(r1->refahead.b[z] & bb))
992                         break;
993                 if(!(r1->act.b[z] & bb))
994                         break;
995                 r = r1;
996         }
997         for(;;) {
998                 r->act.b[z] &= ~bb;
999
1000                 vreg |= r->regu;
1001
1002                 if(r->refbehind.b[z] & bb)
1003                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1004                                 if(r1->refahead.b[z] & bb)
1005                                         vreg |= paint2(r1, bn);
1006
1007                 if(!(r->refahead.b[z] & bb))
1008                         break;
1009                 r1 = r->s2;
1010                 if(r1 != R)
1011                         if(r1->refbehind.b[z] & bb)
1012                                 vreg |= paint2(r1, bn);
1013                 r = r->s1;
1014                 if(r == R)
1015                         break;
1016                 if(!(r->act.b[z] & bb))
1017                         break;
1018                 if(!(r->refbehind.b[z] & bb))
1019                         break;
1020         }
1021         return vreg;
1022 }
1023
1024 void
1025 paint3(Reg *r, int bn, long rb, int rn)
1026 {
1027         Reg *r1;
1028         Prog *p;
1029         int z;
1030         ulong bb;
1031
1032         z = bn/32;
1033         bb = 1L << (bn%32);
1034         if(r->act.b[z] & bb)
1035                 return;
1036         for(;;) {
1037                 if(!(r->refbehind.b[z] & bb))
1038                         break;
1039                 r1 = r->p1;
1040                 if(r1 == R)
1041                         break;
1042                 if(!(r1->refahead.b[z] & bb))
1043                         break;
1044                 if(r1->act.b[z] & bb)
1045                         break;
1046                 r = r1;
1047         }
1048
1049         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1050                 addmove(r, bn, rn, 0);
1051         for(;;) {
1052                 r->act.b[z] |= bb;
1053                 p = r->prog;
1054
1055                 if(r->use1.b[z] & bb) {
1056                         if(debug['R'])
1057                                 print("%P", p);
1058                         addreg(&p->from, rn);
1059                         if(debug['R'])
1060                                 print("\t.c%P\n", p);
1061                 }
1062                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1063                         if(debug['R'])
1064                                 print("%P", p);
1065                         addreg(&p->to, rn);
1066                         if(debug['R'])
1067                                 print("\t.c%P\n", p);
1068                 }
1069
1070                 if(STORE(r) & r->regdiff.b[z] & bb)
1071                         addmove(r, bn, rn, 1);
1072                 r->regu |= rb;
1073
1074                 if(r->refbehind.b[z] & bb)
1075                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1076                                 if(r1->refahead.b[z] & bb)
1077                                         paint3(r1, bn, rb, rn);
1078
1079                 if(!(r->refahead.b[z] & bb))
1080                         break;
1081                 r1 = r->s2;
1082                 if(r1 != R)
1083                         if(r1->refbehind.b[z] & bb)
1084                                 paint3(r1, bn, rb, rn);
1085                 r = r->s1;
1086                 if(r == R)
1087                         break;
1088                 if(r->act.b[z] & bb)
1089                         break;
1090                 if(!(r->refbehind.b[z] & bb))
1091                         break;
1092         }
1093 }
1094
1095 void
1096 addreg(Adr *a, int rn)
1097 {
1098
1099         a->sym = 0;
1100         a->name = D_NONE;
1101         a->type = D_REG;
1102         a->reg = rn;
1103         if(rn >= NREG) {
1104                 a->type = D_FREG;
1105                 a->reg = rn-NREG;
1106         }
1107 }
1108
1109 /*
1110  *      bit     reg
1111  *      0       R0
1112  *      1       R1
1113  *      ...     ...
1114  *      10      R10
1115  */
1116 long
1117 RtoB(int r)
1118 {
1119
1120         if(r >= REGMIN && r <= REGMAX)
1121                 return 1L << r;
1122         return 0;
1123 }
1124
1125 int
1126 BtoR(long b)
1127 {
1128         b &= 0x01fcL;   // excluded R9 and R10 for m and g
1129         if(b == 0)
1130                 return 0;
1131         return bitno(b);
1132 }
1133
1134 /*
1135  *      bit     reg
1136  *      18      F2
1137  *      19      F3
1138  *      ...     ...
1139  *      23      F7
1140  */
1141 long
1142 FtoB(int f)
1143 {
1144
1145         if(f < 2 || f > NFREG-1)
1146                 return 0;
1147         return 1L << (f + 16);
1148 }
1149
1150 int
1151 BtoF(long b)
1152 {
1153
1154         b &= 0xfc0000L;
1155         if(b == 0)
1156                 return 0;
1157         return bitno(b) - 16;
1158 }