]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/7c/reg.c
merge
[plan9front.git] / sys / src / cmd / 7c / reg.c
1 #include "gc.h"
2
3 void    addsplits(void);
4
5 Reg*
6 rega(void)
7 {
8         Reg *r;
9
10         r = freer;
11         if(r == R) {
12                 r = alloc(sizeof(*r));
13         } else
14                 freer = r->link;
15
16         *r = zreg;
17         return r;
18 }
19
20 int
21 rcmp(void *a1, void *a2)
22 {
23         Rgn *p1, *p2;
24         int c1, c2;
25
26         p1 = a1;
27         p2 = a2;
28         c1 = p2->cost;
29         c2 = p1->cost;
30         if(c1 -= c2)
31                 return c1;
32         return p2->varno - p1->varno;
33 }
34
35 void
36 regopt(Prog *p)
37 {
38         Reg *r, *r1, *r2;
39         Prog *p1;
40         int i, z;
41         long initpc, val, npc;
42         ulong vreg;
43         Bits bit;
44         struct
45         {
46                 long    m;
47                 long    c;
48                 Reg*    p;
49         } log5[6], *lp;
50
51         firstr = R;
52         lastr = R;
53         nvar = 0;
54         regbits = 0;
55         for(z=0; z<BITS; z++) {
56                 externs.b[z] = 0;
57                 params.b[z] = 0;
58                 consts.b[z] = 0;
59                 addrs.b[z] = 0;
60         }
61
62         /*
63          * pass 1
64          * build aux data structure
65          * allocate pcs
66          * find use and set of variables
67          */
68         val = 5L * 5L * 5L * 5L * 5L;
69         lp = log5;
70         for(i=0; i<5; i++) {
71                 lp->m = val;
72                 lp->c = 0;
73                 lp->p = R;
74                 val /= 5L;
75                 lp++;
76         }
77         val = 0;
78         for(; p != P; p = p->link) {
79                 switch(p->as) {
80                 case ADATA:
81                 case AGLOBL:
82                 case ANAME:
83                         continue;
84                 }
85                 r = rega();
86                 if(firstr == R) {
87                         firstr = r;
88                         lastr = r;
89                 } else {
90                         lastr->link = r;
91                         r->p1 = lastr;
92                         lastr->s1 = r;
93                         lastr = r;
94                 }
95                 r->prog = p;
96                 r->pc = val;
97                 val++;
98
99                 lp = log5;
100                 for(i=0; i<5; i++) {
101                         lp->c--;
102                         if(lp->c <= 0) {
103                                 lp->c = lp->m;
104                                 if(lp->p != R)
105                                         lp->p->log5 = r;
106                                 lp->p = r;
107                                 (lp+1)->c = 0;
108                                 break;
109                         }
110                         lp++;
111                 }
112
113                 r1 = r->p1;
114                 if(r1 != R)
115                 switch(r1->prog->as) {
116                 case ARET:
117                 case AJMP:
118                 case AREI:
119                         r->p1 = R;
120                         r1->s1 = R;
121                 }
122
123                 /*
124                  * left side always read
125                  */
126                 bit = mkvar(&p->from, p->as==AMOVQ || p->as==AMOVL);
127                 for(z=0; z<BITS; z++)
128                         r->use1.b[z] |= bit.b[z];
129
130                 /*
131                  * right side depends on opcode
132                  */
133                 bit = mkvar(&p->to, 0);
134                 if(bany(&bit))
135                 switch(p->as) {
136                 default:
137                         diag(Z, "reg: unknown asop: %ud", p->as);
138                         break;
139
140                 /*
141                  * right side write
142                  */
143                 case ANOP:
144                 case AMOVB:
145                 case AMOVBU:
146                 case AMOVW:
147                 case AMOVWU:
148                 case AMOVL:
149                 case AMOVQ:
150                 case AMOVS:
151                 case AMOVT:
152                         for(z=0; z<BITS; z++)
153                                 r->set.b[z] |= bit.b[z];
154                         break;
155
156                 /*
157                  * funny
158                  */
159                 case AJSR:
160                         for(z=0; z<BITS; z++)
161                                 addrs.b[z] |= bit.b[z];
162                         break;
163                 }
164         }
165         if(firstr == R)
166                 return;
167         initpc = pc - val;
168         npc = val;
169
170         /*
171          * pass 2
172          * turn branch references to pointers
173          * build back pointers
174          */
175         for(r = firstr; r != R; r = r->link) {
176                 p = r->prog;
177                 if(p->to.type == D_BRANCH) {
178                         val = p->to.offset - initpc;
179                         r1 = firstr;
180                         while(r1 != R) {
181                                 r2 = r1->log5;
182                                 if(r2 != R && val >= r2->pc) {
183                                         r1 = r2;
184                                         continue;
185                                 }
186                                 if(r1->pc == val)
187                                         break;
188                                 r1 = r1->link;
189                         }
190                         if(r1 == R) {
191                                 nearln = p->lineno;
192                                 diag(Z, "ref not found\n%P", p);
193                                 continue;
194                         }
195                         if(r1 == r) {
196                                 nearln = p->lineno;
197                                 diag(Z, "ref to self\n%P", p);
198                                 continue;
199                         }
200                         r->s2 = r1;
201                         r->p2link = r1->p2;
202                         r1->p2 = r;
203                 }
204         }
205         if(debug['R']) {
206                 p = firstr->prog;
207                 print("\n%L %D\n", p->lineno, &p->from);
208         }
209
210         /*
211          * pass 2.5
212          * find looping structure
213          */
214         for(r = firstr; r != R; r = r->link)
215                 r->active = 0;
216         change = 0;
217         loopit(firstr, npc);
218
219         /*
220          * pass 3
221          * iterate propagating usage
222          *      back until flow graph is complete
223          */
224 loop1:
225         change = 0;
226         for(r = firstr; r != R; r = r->link)
227                 r->active = 0;
228         for(r = firstr; r != R; r = r->link)
229                 if(r->prog->as == ARET)
230                         prop(r, zbits, zbits);
231 loop11:
232         /* pick up unreachable code */
233         i = 0;
234         for(r = firstr; r != R; r = r1) {
235                 r1 = r->link;
236                 if(r1 && r1->active && !r->active) {
237                         prop(r, zbits, zbits);
238                         i = 1;
239                 }
240         }
241         if(i)
242                 goto loop11;
243         if(change)
244                 goto loop1;
245
246
247         /*
248          * pass 4
249          * iterate propagating register/variable synchrony
250          *      forward until graph is complete
251          */
252 loop2:
253         change = 0;
254         for(r = firstr; r != R; r = r->link)
255                 r->active = 0;
256         synch(firstr, zbits);
257         if(change)
258                 goto loop2;
259
260         addsplits();
261
262         if(debug['R'] && debug['v']) {
263                 print("\nprop structure:\n");
264                 for(r = firstr; r != R; r = r->link) {
265                         print("%ld:%P", r->loop, r->prog);
266                         for(z=0; z<BITS; z++)
267                                 bit.b[z] = r->set.b[z] |
268                                         r->refahead.b[z] | r->calahead.b[z] |
269                                         r->refbehind.b[z] | r->calbehind.b[z] |
270                                         r->use1.b[z] | r->use2.b[z];
271                         if(bany(&bit)) {
272                                 print("\t");
273                                 if(bany(&r->use1))
274                                         print(" u1=%B", r->use1);
275                                 if(bany(&r->use2))
276                                         print(" u2=%B", r->use2);
277                                 if(bany(&r->set))
278                                         print(" st=%B", r->set);
279                                 if(bany(&r->refahead))
280                                         print(" ra=%B", r->refahead);
281                                 if(bany(&r->calahead))
282                                         print(" ca=%B", r->calahead);
283                                 if(bany(&r->refbehind))
284                                         print(" rb=%B", r->refbehind);
285                                 if(bany(&r->calbehind))
286                                         print(" cb=%B", r->calbehind);
287                         }
288                         print("\n");
289                 }
290         }
291
292         /*
293          * pass 5
294          * isolate regions
295          * calculate costs (paint1)
296          */
297         r = firstr;
298         if(r) {
299                 for(z=0; z<BITS; z++)
300                         bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
301                           ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
302                 if(bany(&bit)) {
303                         nearln = r->prog->lineno;
304                         warn(Z, "used and not set: %B", bit);
305                         if(debug['R'] && !debug['w'])
306                                 print("used and not set: %B\n", bit);
307                 }
308         }
309
310         for(r = firstr; r != R; r = r->link)
311                 r->act = zbits;
312         rgp = region;
313         nregion = 0;
314         for(r = firstr; r != R; r = r->link) {
315                 for(z=0; z<BITS; z++)
316                         bit.b[z] = r->set.b[z] &
317                           ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
318                 if(bany(&bit)) {
319                         nearln = r->prog->lineno;
320                         warn(Z, "set and not used: %B", bit);
321                         if(debug['R'])
322                                 print("set and not used: %B\n", bit);
323                         excise(r);
324                 }
325                 for(z=0; z<BITS; z++)
326                         bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
327                 while(bany(&bit)) {
328                         i = bnum(bit);
329                         rgp->enter = r;
330                         rgp->varno = i;
331                         change = 0;
332                         if(debug['R'] && debug['v'])
333                                 print("\n");
334                         paint1(r, i);
335                         bit.b[i/32] &= ~(1L<<(i%32));
336                         if(change <= 0) {
337                                 if(debug['R'])
338                                         print("%L $%d: %B\n",
339                                                 r->prog->lineno, change, blsh(i));
340                                 continue;
341                         }
342                         rgp->cost = change;
343                         nregion++;
344                         if(nregion >= NRGN) {
345                                 warn(Z, "too many regions");
346                                 goto brk;
347                         }
348                         rgp++;
349                 }
350         }
351 brk:
352         qsort(region, nregion, sizeof(region[0]), rcmp);
353
354         /*
355          * pass 6
356          * determine used registers (paint2)
357          * replace code (paint3)
358          */
359         rgp = region;
360         for(i=0; i<nregion; i++) {
361                 bit = blsh(rgp->varno);
362                 vreg = paint2(rgp->enter, rgp->varno);
363                 vreg = allreg(vreg, rgp);
364                 if(debug['R']) {
365                         if(rgp->regno >= NREG)
366                                 print("%L $%d F%d: %B\n",
367                                         rgp->enter->prog->lineno,
368                                         rgp->cost,
369                                         rgp->regno-NREG,
370                                         bit);
371                         else
372                                 print("%L $%d R%d: %B\n",
373                                         rgp->enter->prog->lineno,
374                                         rgp->cost,
375                                         rgp->regno,
376                                         bit);
377                 }
378                 if(rgp->regno != 0)
379                         paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
380                 rgp++;
381         }
382         /*
383          * pass 7
384          * peep-hole on basic block
385          */
386         if(!debug['R'] || debug['P'])
387                 peep();
388
389         /*
390          * pass 8
391          * recalculate pc
392          */
393         val = initpc;
394         for(r = firstr; r != R; r = r1) {
395                 r->pc = val;
396                 p = r->prog;
397                 p1 = P;
398                 r1 = r->link;
399                 if(r1 != R)
400                         p1 = r1->prog;
401                 for(; p != p1; p = p->link) {
402                         switch(p->as) {
403                         default:
404                                 val++;
405                                 break;
406
407                         case ANOP:
408                         case ADATA:
409                         case AGLOBL:
410                         case ANAME:
411                                 break;
412                         }
413                 }
414         }
415         pc = val;
416
417         /*
418          * fix up branches
419          */
420         if(debug['R'])
421                 if(bany(&addrs))
422                         print("addrs: %B\n", addrs);
423
424         r1 = 0; /* set */
425         for(r = firstr; r != R; r = r->link) {
426                 p = r->prog;
427                 if(p->to.type == D_BRANCH)
428                         p->to.offset = r->s2->pc;
429                 r1 = r;
430         }
431
432         /*
433          * last pass
434          * eliminate nops
435          * free aux structures
436          */
437         for(p = firstr->prog; p != P; p = p->link){
438                 while(p->link && p->link->as == ANOP)
439                         p->link = p->link->link;
440         }
441         if(r1 != R) {
442                 r1->link = freer;
443                 freer = firstr;
444         }
445 }
446
447 void
448 addsplits(void)
449 {
450         Reg *r, *r1;
451         int z, i;
452         Bits bit;
453
454         for(r = firstr; r != R; r = r->link) {
455                 if(r->loop > 1)
456                         continue;
457                 if(r->prog->as == AJSR)
458                         continue;
459                 for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
460                         if(r1->loop <= 1)
461                                 continue;
462                         for(z=0; z<BITS; z++)
463                                 bit.b[z] = r1->calbehind.b[z] &
464                                         (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
465                                         ~(r->calahead.b[z] & addrs.b[z]);
466                         while(bany(&bit)) {
467                                 i = bnum(bit);
468                                 bit.b[i/32] &= ~(1L << (i%32));
469                         }
470                 }
471         }
472 }
473
474 /*
475  * add mov b,rn
476  * just after r
477  */
478 void
479 addmove(Reg *r, int bn, int rn, int f)
480 {
481         Prog *p, *p1;
482         Adr *a;
483         Var *v;
484
485         p1 = alloc(sizeof(*p1));
486         *p1 = zprog;
487         p = r->prog;
488
489         p1->link = p->link;
490         p->link = p1;
491         p1->lineno = p->lineno;
492
493         v = var + bn;
494
495         a = &p1->to;
496         a->sym = v->sym;
497         a->name = v->name;
498         a->offset = v->offset;
499         a->etype = v->etype;
500         a->type = D_OREG;
501         if(a->etype == TARRAY || a->sym == S)
502                 a->type = D_CONST;
503
504         p1->as = AMOVL;
505         if(v->etype == TCHAR || v->etype == TUCHAR)             /* TODO: optimise locals? */
506                 p1->as = AMOVB;
507         if(v->etype == TSHORT || v->etype == TUSHORT)
508                 p1->as = AMOVW;
509         if(v->etype == TVLONG || v->etype == TUVLONG)
510                 p1->as = AMOVQ;
511         if(v->etype == TFLOAT)
512                 p1->as = AMOVS;
513         if(v->etype == TDOUBLE)
514                 p1->as = AMOVT;
515
516         p1->from.type = D_REG;
517         p1->from.reg = rn;
518         if(rn >= NREG) {
519                 p1->from.type = D_FREG;
520                 p1->from.reg = rn-NREG;
521         }
522         if(!f) {
523                 p1->from = *a;
524                 *a = zprog.from;
525                 a->type = D_REG;
526                 a->reg = rn;
527                 if(rn >= NREG) {
528                         a->type = D_FREG;
529                         a->reg = rn-NREG;
530                 }
531                 if(v->etype == TUCHAR)
532                         p1->as = AMOVBU;
533                 if(v->etype == TUSHORT)
534                         p1->as = AMOVWU;
535         }
536         if(debug['R'])
537                 print("%P\t.a%P\n", p, p1);
538 }
539
540 Bits
541 mkvar(Adr *a, int docon)
542 {
543         Var *v;
544         int i, t, n, et, z;
545         vlong o;
546         Bits bit;
547         Sym *s;
548
549         t = a->type;
550         if(t == D_REG && a->reg != NREG)
551                 regbits |= RtoB(a->reg);
552         if(t == D_FREG && a->reg != NREG)
553                 regbits |= FtoB(a->reg);
554         s = a->sym;
555         o = a->offset;
556         et = a->etype;
557         if(s == S) {
558                 if(t != D_CONST || !docon || a->reg != NREG)
559                         goto none;
560                 et = TLONG;
561                 if (o > 0x7FFFFFFFLL || o < -0x80000000LL)
562                         et = TVLONG;
563         }
564         if(t == D_CONST) {
565                 if(s == S && sval(o))
566                         goto none;
567         }
568
569         n = a->name;
570         v = var;
571         for(i=0; i<nvar; i++) {
572                 if(s == v->sym)
573                 if(n == v->name)
574                 if(o == v->offset)
575                         goto out;
576                 v++;
577         }
578         if(s)
579                 if(s->name[0] == '.')
580                         goto none;
581         if(nvar >= NVAR) {
582                 if(debug['w'] > 1 && s)
583                         warn(Z, "variable not optimized: %s", s->name);
584                 goto none;
585         }
586         i = nvar;
587         nvar++;
588         v = &var[i];
589         v->sym = s;
590         v->offset = o;
591         v->etype = et;
592         v->name = n;
593         if(debug['R'])
594                 print("bit=%2d et=%2d %D\n", i, et, a);
595 out:
596         bit = blsh(i);
597         if(n == D_EXTERN || n == D_STATIC)
598                 for(z=0; z<BITS; z++)
599                         externs.b[z] |= bit.b[z];
600         if(n == D_PARAM)
601                 for(z=0; z<BITS; z++)
602                         params.b[z] |= bit.b[z];
603         if(v->etype != et || (!typechlpfd[et] && !typev[et]))   /* funny punning? */
604                 for(z=0; z<BITS; z++)
605                         addrs.b[z] |= bit.b[z];
606         if(t == D_CONST) {
607                 if(s == S) {
608                         for(z=0; z<BITS; z++)
609                                 consts.b[z] |= bit.b[z];
610                         return bit;
611                 }
612                 if(et != TARRAY)
613                         for(z=0; z<BITS; z++)
614                                 addrs.b[z] |= bit.b[z];
615                 for(z=0; z<BITS; z++)
616                         params.b[z] |= bit.b[z];
617                 return bit;
618         }
619         if(t == D_OREG)
620                 return bit;
621
622 none:
623         return zbits;
624 }
625
626 void
627 prop(Reg *r, Bits ref, Bits cal)
628 {
629         Reg *r1, *r2;
630         int z;
631
632         for(r1 = r; r1 != R; r1 = r1->p1) {
633                 for(z=0; z<BITS; z++) {
634                         ref.b[z] |= r1->refahead.b[z];
635                         if(ref.b[z] != r1->refahead.b[z]) {
636                                 r1->refahead.b[z] = ref.b[z];
637                                 change++;
638                         }
639                         cal.b[z] |= r1->calahead.b[z];
640                         if(cal.b[z] != r1->calahead.b[z]) {
641                                 r1->calahead.b[z] = cal.b[z];
642                                 change++;
643                         }
644                 }
645                 switch(r1->prog->as) {
646                 case AJSR:
647                         for(z=0; z<BITS; z++) {
648                                 cal.b[z] |= ref.b[z] | externs.b[z];
649                                 ref.b[z] = 0;
650                         }
651                         break;
652
653                 case ATEXT:
654                         for(z=0; z<BITS; z++) {
655                                 cal.b[z] = 0;
656                                 ref.b[z] = 0;
657                         }
658                         break;
659
660                 case ARET:
661                         for(z=0; z<BITS; z++) {
662                                 cal.b[z] = externs.b[z];
663                                 ref.b[z] = 0;
664                         }
665                 }
666                 for(z=0; z<BITS; z++) {
667                         ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
668                                 r1->use1.b[z] | r1->use2.b[z];
669                         cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
670                         r1->refbehind.b[z] = ref.b[z];
671                         r1->calbehind.b[z] = cal.b[z];
672                 }
673                 if(r1->active)
674                         break;
675                 r1->active = 1;
676         }
677         for(; r != r1; r = r->p1)
678                 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
679                         prop(r2, r->refbehind, r->calbehind);
680 }
681
682 /*
683  * find looping structure
684  *
685  * 1) find reverse postordering
686  * 2) find approximate dominators,
687  *      the actual dominators if the flow graph is reducible
688  *      otherwise, dominators plus some other non-dominators.
689  *      See Matthew S. Hecht and Jeffrey D. Ullman,
690  *      "Analysis of a Simple Algorithm for Global Data Flow Problems",
691  *      Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
692  *      Oct. 1-3, 1973, pp.  207-217.
693  * 3) find all nodes with a predecessor dominated by the current node.
694  *      such a node is a loop head.
695  *      recursively, all preds with a greater rpo number are in the loop
696  */
697 long
698 postorder(Reg *r, Reg **rpo2r, long n)
699 {
700         Reg *r1;
701
702         r->rpo = 1;
703         r1 = r->s1;
704         if(r1 && !r1->rpo)
705                 n = postorder(r1, rpo2r, n);
706         r1 = r->s2;
707         if(r1 && !r1->rpo)
708                 n = postorder(r1, rpo2r, n);
709         rpo2r[n] = r;
710         n++;
711         return n;
712 }
713
714 long
715 rpolca(long *idom, long rpo1, long rpo2)
716 {
717         long t;
718
719         if(rpo1 == -1)
720                 return rpo2;
721         while(rpo1 != rpo2){
722                 if(rpo1 > rpo2){
723                         t = rpo2;
724                         rpo2 = rpo1;
725                         rpo1 = t;
726                 }
727                 while(rpo1 < rpo2){
728                         t = idom[rpo2];
729                         if(t >= rpo2)
730                                 fatal(Z, "bad idom");
731                         rpo2 = t;
732                 }
733         }
734         return rpo1;
735 }
736
737 int
738 doms(long *idom, long r, long s)
739 {
740         while(s > r)
741                 s = idom[s];
742         return s == r;
743 }
744
745 int
746 loophead(long *idom, Reg *r)
747 {
748         long src;
749
750         src = r->rpo;
751         if(r->p1 != R && doms(idom, src, r->p1->rpo))
752                 return 1;
753         for(r = r->p2; r != R; r = r->p2link)
754                 if(doms(idom, src, r->rpo))
755                         return 1;
756         return 0;
757 }
758
759 void
760 loopmark(Reg **rpo2r, long head, Reg *r)
761 {
762         if(r->rpo < head || r->active == head)
763                 return;
764         r->active = head;
765         r->loop += LOOP;
766         if(r->p1 != R)
767                 loopmark(rpo2r, head, r->p1);
768         for(r = r->p2; r != R; r = r->p2link)
769                 loopmark(rpo2r, head, r);
770 }
771
772 void
773 loopit(Reg *r, long nr)
774 {
775         Reg *r1;
776         long i, d, me;
777
778         if(nr > maxnr) {
779                 rpo2r = alloc(nr * sizeof(Reg*));
780                 idom = alloc(nr * sizeof(long));
781                 maxnr = nr;
782         }
783
784         d = postorder(r, rpo2r, 0);
785         if(d > nr)
786                 fatal(Z, "too many reg nodes");
787         nr = d;
788         for(i = 0; i < nr / 2; i++){
789                 r1 = rpo2r[i];
790                 rpo2r[i] = rpo2r[nr - 1 - i];
791                 rpo2r[nr - 1 - i] = r1;
792         }
793         for(i = 0; i < nr; i++)
794                 rpo2r[i]->rpo = i;
795
796         idom[0] = 0;
797         for(i = 0; i < nr; i++){
798                 r1 = rpo2r[i];
799                 me = r1->rpo;
800                 d = -1;
801                 if(r1->p1 != R && r1->p1->rpo < me)
802                         d = r1->p1->rpo;
803                 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
804                         if(r1->rpo < me)
805                                 d = rpolca(idom, d, r1->rpo);
806                 idom[i] = d;
807         }
808
809         for(i = 0; i < nr; i++){
810                 r1 = rpo2r[i];
811                 r1->loop++;
812                 if(r1->p2 != R && loophead(idom, r1))
813                         loopmark(rpo2r, i, r1);
814         }
815 }
816
817 void
818 synch(Reg *r, Bits dif)
819 {
820         Reg *r1;
821         int z;
822
823         for(r1 = r; r1 != R; r1 = r1->s1) {
824                 for(z=0; z<BITS; z++) {
825                         dif.b[z] = (dif.b[z] &
826                                 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
827                                         r1->set.b[z] | r1->regdiff.b[z];
828                         if(dif.b[z] != r1->regdiff.b[z]) {
829                                 r1->regdiff.b[z] = dif.b[z];
830                                 change++;
831                         }
832                 }
833                 if(r1->active)
834                         break;
835                 r1->active = 1;
836                 for(z=0; z<BITS; z++)
837                         dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
838                 if(r1->s2 != R)
839                         synch(r1->s2, dif);
840         }
841 }
842
843 ulong
844 allreg(ulong b, Rgn *r)
845 {
846         Var *v;
847         int i;
848
849         v = var + r->varno;
850         r->regno = 0;
851         switch(v->etype) {
852
853         default:
854                 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
855                 break;
856
857         case TCHAR:
858         case TUCHAR:
859         case TSHORT:
860         case TUSHORT:
861         case TINT:
862         case TUINT:
863         case TLONG:
864         case TULONG:
865         case TVLONG:
866         case TUVLONG:
867         case TIND:
868         case TARRAY:
869                 i = BtoR(~b);
870                 if(i && r->cost >= 0) {
871                         r->regno = i;
872                         return RtoB(i);
873                 }
874                 break;
875
876         case TDOUBLE:
877         case TFLOAT:
878                 i = BtoF(~b);
879                 if(i && r->cost >= 0) {
880                         r->regno = i+NREG;
881                         return FtoB(i);
882                 }
883                 break;
884         }
885         return 0;
886 }
887
888 void
889 paint1(Reg *r, int bn)
890 {
891         Reg *r1;
892         Prog *p;
893         int z;
894         ulong bb;
895
896         z = bn/32;
897         bb = 1L<<(bn%32);
898         if(r->act.b[z] & bb)
899                 return;
900         for(;;) {
901                 if(!(r->refbehind.b[z] & bb))
902                         break;
903                 r1 = r->p1;
904                 if(r1 == R)
905                         break;
906                 if(!(r1->refahead.b[z] & bb))
907                         break;
908                 if(r1->act.b[z] & bb)
909                         break;
910                 r = r1;
911         }
912
913         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
914                 change -= CLOAD * r->loop;
915                 if(debug['R'] && debug['v'])
916                         print("%ld%P\tld %B $%d\n", r->loop,
917                                 r->prog, blsh(bn), change);
918         }
919         for(;;) {
920                 r->act.b[z] |= bb;
921                 p = r->prog;
922
923                 if(r->use1.b[z] & bb) {
924                         change += CREF * r->loop;
925                         if(debug['R'] && debug['v'])
926                                 print("%ld%P\tu1 %B $%d\n", r->loop,
927                                         p, blsh(bn), change);
928                 }
929
930                 if((r->use2.b[z]|r->set.b[z]) & bb) {
931                         change += CREF * r->loop;
932                         if(debug['R'] && debug['v'])
933                                 print("%ld%P\tu2 %B $%d\n", r->loop,
934                                         p, blsh(bn), change);
935                 }
936
937                 if(STORE(r) & r->regdiff.b[z] & bb) {
938                         change -= CLOAD * r->loop;
939                         if(debug['R'] && debug['v'])
940                                 print("%ld%P\tst %B $%d\n", r->loop,
941                                         p, blsh(bn), change);
942                 }
943
944                 if(r->refbehind.b[z] & bb)
945                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
946                                 if(r1->refahead.b[z] & bb)
947                                         paint1(r1, bn);
948
949                 if(!(r->refahead.b[z] & bb))
950                         break;
951                 r1 = r->s2;
952                 if(r1 != R)
953                         if(r1->refbehind.b[z] & bb)
954                                 paint1(r1, bn);
955                 r = r->s1;
956                 if(r == R)
957                         break;
958                 if(r->act.b[z] & bb)
959                         break;
960                 if(!(r->refbehind.b[z] & bb))
961                         break;
962         }
963 }
964
965 ulong
966 paint2(Reg *r, int bn)
967 {
968         Reg *r1;
969         int z;
970         ulong bb, vreg;
971
972         z = bn/32;
973         bb = 1L << (bn%32);
974         vreg = regbits;
975         if(!(r->act.b[z] & bb))
976                 return vreg;
977         for(;;) {
978                 if(!(r->refbehind.b[z] & bb))
979                         break;
980                 r1 = r->p1;
981                 if(r1 == R)
982                         break;
983                 if(!(r1->refahead.b[z] & bb))
984                         break;
985                 if(!(r1->act.b[z] & bb))
986                         break;
987                 r = r1;
988         }
989         for(;;) {
990                 r->act.b[z] &= ~bb;
991
992                 vreg |= r->regu;
993
994                 if(r->refbehind.b[z] & bb)
995                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
996                                 if(r1->refahead.b[z] & bb)
997                                         vreg |= paint2(r1, bn);
998
999                 if(!(r->refahead.b[z] & bb))
1000                         break;
1001                 r1 = r->s2;
1002                 if(r1 != R)
1003                         if(r1->refbehind.b[z] & bb)
1004                                 vreg |= paint2(r1, bn);
1005                 r = r->s1;
1006                 if(r == R)
1007                         break;
1008                 if(!(r->act.b[z] & bb))
1009                         break;
1010                 if(!(r->refbehind.b[z] & bb))
1011                         break;
1012         }
1013         return vreg;
1014 }
1015
1016 void
1017 paint3(Reg *r, int bn, long rb, int rn)
1018 {
1019         Reg *r1;
1020         Prog *p;
1021         int z;
1022         ulong bb;
1023
1024         z = bn/32;
1025         bb = 1L << (bn%32);
1026         if(r->act.b[z] & bb)
1027                 return;
1028         for(;;) {
1029                 if(!(r->refbehind.b[z] & bb))
1030                         break;
1031                 r1 = r->p1;
1032                 if(r1 == R)
1033                         break;
1034                 if(!(r1->refahead.b[z] & bb))
1035                         break;
1036                 if(r1->act.b[z] & bb)
1037                         break;
1038                 r = r1;
1039         }
1040
1041         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1042                 addmove(r, bn, rn, 0);
1043         for(;;) {
1044                 r->act.b[z] |= bb;
1045                 p = r->prog;
1046
1047                 if(r->use1.b[z] & bb) {
1048                         if(debug['R'])
1049                                 print("%P", p);
1050                         addreg(&p->from, rn);
1051                         if(p->as == AMOVL && typechlp[var[bn].etype])
1052                                 p->as = AMOVQ;
1053                         if(debug['R'])
1054                                 print("\t.c%P\n", p);
1055                 }
1056                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1057                         if(debug['R'])
1058                                 print("%P", p);
1059                         addreg(&p->to, rn);
1060                         if(p->as == AMOVL && typechlp[var[bn].etype])
1061                                 p->as = AMOVQ;
1062                         if(debug['R'])
1063                                 print("\t.c%P\n", p);
1064                 }
1065
1066                 if(STORE(r) & r->regdiff.b[z] & bb)
1067                         addmove(r, bn, rn, 1);
1068                 r->regu |= rb;
1069
1070                 if(r->refbehind.b[z] & bb)
1071                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1072                                 if(r1->refahead.b[z] & bb)
1073                                         paint3(r1, bn, rb, rn);
1074
1075                 if(!(r->refahead.b[z] & bb))
1076                         break;
1077                 r1 = r->s2;
1078                 if(r1 != R)
1079                         if(r1->refbehind.b[z] & bb)
1080                                 paint3(r1, bn, rb, rn);
1081                 r = r->s1;
1082                 if(r == R)
1083                         break;
1084                 if(r->act.b[z] & bb)
1085                         break;
1086                 if(!(r->refbehind.b[z] & bb))
1087                         break;
1088         }
1089 }
1090
1091 void
1092 addreg(Adr *a, int rn)
1093 {
1094
1095         a->sym = 0;
1096         a->name = D_NONE;
1097         a->type = D_REG;
1098         a->reg = rn;
1099         if(rn >= NREG) {
1100                 a->type = D_FREG;
1101                 a->reg = rn-NREG;
1102         }
1103 }
1104
1105 /*
1106  *      bit     reg
1107  *      0       R2
1108  *      1       R3
1109  *      ...     ...
1110  *      10  R12
1111  *      11  R16
1112  *      ...     ...
1113  *      19      R24
1114  *      20      R25
1115  */
1116 long
1117 RtoB(int r)
1118 {
1119
1120         if(r >= 2 && r <= 12)
1121                 return 1L << (r-2);
1122         if(r >= 16 && r <= 25)
1123                 return 1L << (r-5);
1124         return 0;
1125 }
1126
1127 BtoR(long b)
1128 {
1129         int r;
1130
1131         b &= 0x001fffffL;
1132         if(b == 0)
1133                 return 0;
1134         r = bitno(b) + 2;
1135         if (r >= 13)
1136                 r += 3;
1137         return r;
1138 }
1139
1140 /*
1141  *      bit     reg
1142  *      21      F1
1143  *      22      F2
1144  *      ...     ...
1145  *      31      F11
1146  */
1147 long
1148 FtoB(int f)
1149 {
1150
1151         if(f < 1 || f > 11)
1152                 return 0;
1153         return 1L << (f + 20);
1154 }
1155
1156 int
1157 BtoF(long b)
1158 {
1159
1160         b &= 0xffe00000L;
1161         if(b == 0)
1162                 return 0;
1163         return bitno(b) - 20;
1164 }