]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/8c/reg.c
merge
[plan9front.git] / sys / src / cmd / 8c / reg.c
1 #include "gc.h"
2
3 Reg*
4 rega(void)
5 {
6         Reg *r;
7
8         r = freer;
9         if(r == R) {
10                 r = alloc(sizeof(*r));
11         } else
12                 freer = r->link;
13
14         *r = zreg;
15         return r;
16 }
17
18 int
19 rcmp(const void *a1, const void *a2)
20 {
21         Rgn *p1, *p2;
22         int c1, c2;
23
24         p1 = (Rgn*)a1;
25         p2 = (Rgn*)a2;
26         c1 = p2->cost;
27         c2 = p1->cost;
28         if(c1 -= c2)
29                 return c1;
30         return p2->varno - p1->varno;
31 }
32
33 void
34 regopt(Prog *p)
35 {
36         Reg *r, *r1, *r2;
37         Prog *p1;
38         int i, z;
39         long initpc, val, npc;
40         ulong vreg;
41         Bits bit;
42         struct
43         {
44                 long    m;
45                 long    c;
46                 Reg*    p;
47         } log5[6], *lp;
48
49         firstr = R;
50         lastr = R;
51         nvar = 0;
52         regbits = RtoB(D_SP) | RtoB(D_AX);
53         for(z=0; z<BITS; z++) {
54                 externs.b[z] = 0;
55                 params.b[z] = 0;
56                 consts.b[z] = 0;
57                 addrs.b[z] = 0;
58         }
59
60         /*
61          * pass 1
62          * build aux data structure
63          * allocate pcs
64          * find use and set of variables
65          */
66         val = 5L * 5L * 5L * 5L * 5L;
67         lp = log5;
68         for(i=0; i<5; i++) {
69                 lp->m = val;
70                 lp->c = 0;
71                 lp->p = R;
72                 val /= 5L;
73                 lp++;
74         }
75         val = 0;
76         for(; p != P; p = p->link) {
77                 switch(p->as) {
78                 case ADATA:
79                 case AGLOBL:
80                 case ANAME:
81                 case ASIGNAME:
82                         continue;
83                 }
84                 r = rega();
85                 if(firstr == R) {
86                         firstr = r;
87                         lastr = r;
88                 } else {
89                         lastr->link = r;
90                         r->p1 = lastr;
91                         lastr->s1 = r;
92                         lastr = r;
93                 }
94                 r->prog = p;
95                 r->pc = val;
96                 val++;
97
98                 lp = log5;
99                 for(i=0; i<5; i++) {
100                         lp->c--;
101                         if(lp->c <= 0) {
102                                 lp->c = lp->m;
103                                 if(lp->p != R)
104                                         lp->p->log5 = r;
105                                 lp->p = r;
106                                 (lp+1)->c = 0;
107                                 break;
108                         }
109                         lp++;
110                 }
111
112                 r1 = r->p1;
113                 if(r1 != R)
114                 switch(r1->prog->as) {
115                 case ARET:
116                 case AJMP:
117                 case AIRETL:
118                         r->p1 = R;
119                         r1->s1 = R;
120                 }
121
122                 bit = mkvar(r, &p->from);
123                 if(bany(&bit))
124                 switch(p->as) {
125                 /*
126                  * funny
127                  */
128                 case ALEAL:
129                         for(z=0; z<BITS; z++)
130                                 addrs.b[z] |= bit.b[z];
131                         break;
132
133                 /*
134                  * left side read
135                  */
136                 default:
137                         for(z=0; z<BITS; z++)
138                                 r->use1.b[z] |= bit.b[z];
139                         break;
140                 }
141
142                 bit = mkvar(r, &p->to);
143                 if(bany(&bit))
144                 switch(p->as) {
145                 default:
146                         diag(Z, "reg: unknown op: %A", p->as);
147                         break;
148
149                 /*
150                  * right side read
151                  */
152                 case ACMPB:
153                 case ACMPL:
154                 case ACMPW:
155                         for(z=0; z<BITS; z++)
156                                 r->use2.b[z] |= bit.b[z];
157                         break;
158
159                 /*
160                  * right side write
161                  */
162                 case ANOP:
163                 case AMOVL:
164                 case AMOVB:
165                 case AMOVW:
166                 case AMOVBLSX:
167                 case AMOVBLZX:
168                 case AMOVWLSX:
169                 case AMOVWLZX:
170                         for(z=0; z<BITS; z++)
171                                 r->set.b[z] |= bit.b[z];
172                         break;
173
174                 /*
175                  * right side read+write
176                  */
177                 case AADDB:
178                 case AADDL:
179                 case AADDW:
180                 case AANDB:
181                 case AANDL:
182                 case AANDW:
183                 case ASUBB:
184                 case ASUBL:
185                 case ASUBW:
186                 case AORB:
187                 case AORL:
188                 case AORW:
189                 case AXORB:
190                 case AXORL:
191                 case AXORW:
192                 case ASALB:
193                 case ASALL:
194                 case ASALW:
195                 case ASARB:
196                 case ASARL:
197                 case ASARW:
198                 case AROLB:
199                 case AROLL:
200                 case AROLW:
201                 case ARORB:
202                 case ARORL:
203                 case ARORW:
204                 case ASHLB:
205                 case ASHLL:
206                 case ASHLW:
207                 case ASHRB:
208                 case ASHRL:
209                 case ASHRW:
210                 case AIMULL:
211                 case AIMULW:
212                 case ANEGL:
213                 case ANOTL:
214                 case AADCL:
215                 case ASBBL:
216                         for(z=0; z<BITS; z++) {
217                                 r->set.b[z] |= bit.b[z];
218                                 r->use2.b[z] |= bit.b[z];
219                         }
220                         break;
221
222                 /*
223                  * funny
224                  */
225                 case AFMOVDP:
226                 case AFMOVFP:
227                 case AFMOVVP:
228                 case ACALL:
229                         for(z=0; z<BITS; z++)
230                                 addrs.b[z] |= bit.b[z];
231                         break;
232                 }
233
234                 switch(p->as) {
235                 case AIMULL:
236                 case AIMULW:
237                         if(p->to.type != D_NONE)
238                                 break;
239
240                 case AIDIVB:
241                 case AIDIVL:
242                 case AIDIVW:
243                 case AIMULB:
244                 case ADIVB:
245                 case ADIVL:
246                 case ADIVW:
247                 case AMULB:
248                 case AMULL:
249                 case AMULW:
250
251                 case ACWD:
252                 case ACDQ:
253                         r->regu |= RtoB(D_AX) | RtoB(D_DX);
254                         break;
255
256                 case AREP:
257                 case AREPN:
258                 case ALOOP:
259                 case ALOOPEQ:
260                 case ALOOPNE:
261                         r->regu |= RtoB(D_CX);
262                         break;
263
264                 case AMOVSB:
265                 case AMOVSL:
266                 case AMOVSW:
267                 case ACMPSB:
268                 case ACMPSL:
269                 case ACMPSW:
270                         r->regu |= RtoB(D_SI) | RtoB(D_DI);
271                         break;
272
273                 case ASTOSB:
274                 case ASTOSL:
275                 case ASTOSW:
276                 case ASCASB:
277                 case ASCASL:
278                 case ASCASW:
279                         r->regu |= RtoB(D_AX) | RtoB(D_DI);
280                         break;
281
282                 case AINSB:
283                 case AINSL:
284                 case AINSW:
285                 case AOUTSB:
286                 case AOUTSL:
287                 case AOUTSW:
288                         r->regu |= RtoB(D_DI) | RtoB(D_DX);
289                         break;
290
291                 case AFSTSW:
292                 case ASAHF:
293                         r->regu |= RtoB(D_AX);
294                         break;
295                 }
296         }
297         if(firstr == R)
298                 return;
299         initpc = pc - val;
300         npc = val;
301
302         /*
303          * pass 2
304          * turn branch references to pointers
305          * build back pointers
306          */
307         for(r = firstr; r != R; r = r->link) {
308                 p = r->prog;
309                 if(p->to.type == D_BRANCH) {
310                         val = p->to.offset - initpc;
311                         r1 = firstr;
312                         while(r1 != R) {
313                                 r2 = r1->log5;
314                                 if(r2 != R && val >= r2->pc) {
315                                         r1 = r2;
316                                         continue;
317                                 }
318                                 if(r1->pc == val)
319                                         break;
320                                 r1 = r1->link;
321                         }
322                         if(r1 == R) {
323                                 nearln = p->lineno;
324                                 diag(Z, "ref not found\n%P", p);
325                                 continue;
326                         }
327                         if(r1 == r) {
328                                 nearln = p->lineno;
329                                 diag(Z, "ref to self\n%P", p);
330                                 continue;
331                         }
332                         r->s2 = r1;
333                         r->p2link = r1->p2;
334                         r1->p2 = r;
335                 }
336         }
337         if(debug['R']) {
338                 p = firstr->prog;
339                 print("\n%L %D\n", p->lineno, &p->from);
340         }
341
342         /*
343          * pass 2.5
344          * find looping structure
345          */
346         for(r = firstr; r != R; r = r->link)
347                 r->active = 0;
348         change = 0;
349         loopit(firstr, npc);
350         if(debug['R'] && debug['v']) {
351                 print("\nlooping structure:\n");
352                 for(r = firstr; r != R; r = r->link) {
353                         print("%ld:%P", r->loop, r->prog);
354                         for(z=0; z<BITS; z++)
355                                 bit.b[z] = r->use1.b[z] |
356                                            r->use2.b[z] |
357                                            r->set.b[z];
358                         if(bany(&bit)) {
359                                 print("\t");
360                                 if(bany(&r->use1))
361                                         print(" u1=%B", r->use1);
362                                 if(bany(&r->use2))
363                                         print(" u2=%B", r->use2);
364                                 if(bany(&r->set))
365                                         print(" st=%B", r->set);
366                         }
367                         print("\n");
368                 }
369         }
370
371         /*
372          * pass 3
373          * iterate propagating usage
374          *      back until flow graph is complete
375          */
376 loop1:
377         change = 0;
378         for(r = firstr; r != R; r = r->link)
379                 r->active = 0;
380         for(r = firstr; r != R; r = r->link)
381                 if(r->prog->as == ARET)
382                         prop(r, zbits, zbits);
383 loop11:
384         /* pick up unreachable code */
385         i = 0;
386         for(r = firstr; r != R; r = r1) {
387                 r1 = r->link;
388                 if(r1 && r1->active && !r->active) {
389                         prop(r, zbits, zbits);
390                         i = 1;
391                 }
392         }
393         if(i)
394                 goto loop11;
395         if(change)
396                 goto loop1;
397
398
399         /*
400          * pass 4
401          * iterate propagating register/variable synchrony
402          *      forward until graph is complete
403          */
404 loop2:
405         change = 0;
406         for(r = firstr; r != R; r = r->link)
407                 r->active = 0;
408         synch(firstr, zbits);
409         if(change)
410                 goto loop2;
411
412
413         /*
414          * pass 5
415          * isolate regions
416          * calculate costs (paint1)
417          */
418         r = firstr;
419         if(r) {
420                 for(z=0; z<BITS; z++)
421                         bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
422                           ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
423                 if(bany(&bit)) {
424                         nearln = r->prog->lineno;
425                         warn(Z, "used and not set: %B", bit);
426                         if(debug['R'] && !debug['w'])
427                                 print("used and not set: %B\n", bit);
428                 }
429         }
430         if(debug['R'] && debug['v'])
431                 print("\nprop structure:\n");
432         for(r = firstr; r != R; r = r->link)
433                 r->act = zbits;
434         rgp = region;
435         nregion = 0;
436         for(r = firstr; r != R; r = r->link) {
437                 if(debug['R'] && debug['v']) {
438                         print("%P\t", r->prog);
439                         if(bany(&r->set))
440                                 print("s:%B ", r->set);
441                         if(bany(&r->refahead))
442                                 print("ra:%B ", r->refahead);
443                         if(bany(&r->calahead))
444                                 print("ca:%B ", r->calahead);
445                         print("\n");
446                 }
447                 for(z=0; z<BITS; z++)
448                         bit.b[z] = r->set.b[z] &
449                           ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
450                 if(bany(&bit)) {
451                         nearln = r->prog->lineno;
452                         warn(Z, "set and not used: %B", bit);
453                         if(debug['R'])
454                                 print("set and not used: %B\n", bit);
455                         excise(r);
456                 }
457                 for(z=0; z<BITS; z++)
458                         bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
459                 while(bany(&bit)) {
460                         i = bnum(bit);
461                         rgp->enter = r;
462                         rgp->varno = i;
463                         change = 0;
464                         if(debug['R'] && debug['v'])
465                                 print("\n");
466                         paint1(r, i);
467                         bit.b[i/32] &= ~(1L<<(i%32));
468                         if(change <= 0) {
469                                 if(debug['R'])
470                                         print("%L$%d: %B\n",
471                                                 r->prog->lineno, change, blsh(i));
472                                 continue;
473                         }
474                         rgp->cost = change;
475                         nregion++;
476                         if(nregion >= NRGN) {
477                                 warn(Z, "too many regions");
478                                 goto brk;
479                         }
480                         rgp++;
481                 }
482         }
483 brk:
484         qsort(region, nregion, sizeof(region[0]), rcmp);
485
486         /*
487          * pass 6
488          * determine used registers (paint2)
489          * replace code (paint3)
490          */
491         rgp = region;
492         for(i=0; i<nregion; i++) {
493                 bit = blsh(rgp->varno);
494                 vreg = paint2(rgp->enter, rgp->varno);
495                 vreg = allreg(vreg, rgp);
496                 if(debug['R']) {
497                         print("%L$%d %R: %B\n",
498                                 rgp->enter->prog->lineno,
499                                 rgp->cost,
500                                 rgp->regno,
501                                 bit);
502                 }
503                 if(rgp->regno != 0)
504                         paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
505                 rgp++;
506         }
507         /*
508          * pass 7
509          * peep-hole on basic block
510          */
511         if(!debug['R'] || debug['P'])
512                 peep();
513
514         /*
515          * pass 8
516          * recalculate pc
517          */
518         val = initpc;
519         for(r = firstr; r != R; r = r1) {
520                 r->pc = val;
521                 p = r->prog;
522                 p1 = P;
523                 r1 = r->link;
524                 if(r1 != R)
525                         p1 = r1->prog;
526                 for(; p != p1; p = p->link) {
527                         switch(p->as) {
528                         default:
529                                 val++;
530                                 break;
531
532                         case ANOP:
533                         case ADATA:
534                         case AGLOBL:
535                         case ANAME:
536                         case ASIGNAME:
537                                 break;
538                         }
539                 }
540         }
541         pc = val;
542
543         /*
544          * fix up branches
545          */
546         if(debug['R'])
547                 if(bany(&addrs))
548                         print("addrs: %B\n", addrs);
549
550         r1 = 0; /* set */
551         for(r = firstr; r != R; r = r->link) {
552                 p = r->prog;
553                 if(p->to.type == D_BRANCH)
554                         p->to.offset = r->s2->pc;
555                 r1 = r;
556         }
557
558         /*
559          * last pass
560          * eliminate nops
561          * free aux structures
562          */
563         for(p = firstr->prog; p != P; p = p->link){
564                 while(p->link && p->link->as == ANOP)
565                         p->link = p->link->link;
566         }
567         if(r1 != R) {
568                 r1->link = freer;
569                 freer = firstr;
570         }
571 }
572
573 /*
574  * add mov b,rn
575  * just after r
576  */
577 void
578 addmove(Reg *r, int bn, int rn, int f)
579 {
580         Prog *p, *p1;
581         Adr *a;
582         Var *v;
583
584         p1 = alloc(sizeof(*p1));
585         *p1 = zprog;
586         p = r->prog;
587
588         p1->link = p->link;
589         p->link = p1;
590         p1->lineno = p->lineno;
591
592         v = var + bn;
593
594         a = &p1->to;
595         a->sym = v->sym;
596         a->offset = v->offset;
597         a->etype = v->etype;
598         a->type = v->name;
599
600         p1->as = AMOVL;
601         if(v->etype == TCHAR || v->etype == TUCHAR)
602                 p1->as = AMOVB;
603         if(v->etype == TSHORT || v->etype == TUSHORT)
604                 p1->as = AMOVW;
605
606         p1->from.type = rn;
607         if(!f) {
608                 p1->from = *a;
609                 *a = zprog.from;
610                 a->type = rn;
611                 if(v->etype == TUCHAR)
612                         p1->as = AMOVB;
613                 if(v->etype == TUSHORT)
614                         p1->as = AMOVW;
615         }
616         if(debug['R'])
617                 print("%P\t.a%P\n", p, p1);
618 }
619
620 ulong
621 doregbits(int r)
622 {
623         ulong b;
624
625         b = 0;
626         if(r >= D_INDIR)
627                 r -= D_INDIR;
628         if(r >= D_AX && r <= D_DI)
629                 b |= RtoB(r);
630         else
631         if(r >= D_AL && r <= D_BL)
632                 b |= RtoB(r-D_AL+D_AX);
633         else
634         if(r >= D_AH && r <= D_BH)
635                 b |= RtoB(r-D_AH+D_AX);
636         return b;
637 }
638
639 Bits
640 mkvar(Reg *r, Adr *a)
641 {
642         Var *v;
643         int i, t, n, et, z;
644         long o;
645         Bits bit;
646         Sym *s;
647
648         /*
649          * mark registers used
650          */
651         t = a->type;
652         r->regu |= doregbits(t);
653         r->regu |= doregbits(a->index);
654
655         switch(t) {
656         default:
657                 goto none;
658         case D_ADDR:
659                 a->type = a->index;
660                 bit = mkvar(r, a);
661                 for(z=0; z<BITS; z++)
662                         addrs.b[z] |= bit.b[z];
663                 a->type = t;
664                 goto none;
665         case D_EXTERN:
666         case D_STATIC:
667         case D_PARAM:
668         case D_AUTO:
669                 n = t;
670                 break;
671         }
672         s = a->sym;
673         if(s == S)
674                 goto none;
675         if(s->name[0] == '.')
676                 goto none;
677         et = a->etype;
678         o = a->offset;
679         v = var;
680         for(i=0; i<nvar; i++) {
681                 if(s == v->sym)
682                 if(n == v->name)
683                 if(o == v->offset)
684                         goto out;
685                 v++;
686         }
687         if(nvar >= NVAR) {
688                 if(debug['w'] > 1 && s)
689                         warn(Z, "variable not optimized: %s", s->name);
690                 goto none;
691         }
692         i = nvar;
693         nvar++;
694         v = &var[i];
695         v->sym = s;
696         v->offset = o;
697         v->name = n;
698         v->etype = et;
699         if(debug['R'])
700                 print("bit=%2d et=%2d %D\n", i, et, a);
701
702 out:
703         bit = blsh(i);
704         if(n == D_EXTERN || n == D_STATIC)
705                 for(z=0; z<BITS; z++)
706                         externs.b[z] |= bit.b[z];
707         if(n == D_PARAM)
708                 for(z=0; z<BITS; z++)
709                         params.b[z] |= bit.b[z];
710         if(v->etype != et || !typechlpfd[et])   /* funny punning */
711                 for(z=0; z<BITS; z++)
712                         addrs.b[z] |= bit.b[z];
713         return bit;
714
715 none:
716         return zbits;
717 }
718
719 void
720 prop(Reg *r, Bits ref, Bits cal)
721 {
722         Reg *r1, *r2;
723         int z;
724
725         for(r1 = r; r1 != R; r1 = r1->p1) {
726                 for(z=0; z<BITS; z++) {
727                         ref.b[z] |= r1->refahead.b[z];
728                         if(ref.b[z] != r1->refahead.b[z]) {
729                                 r1->refahead.b[z] = ref.b[z];
730                                 change++;
731                         }
732                         cal.b[z] |= r1->calahead.b[z];
733                         if(cal.b[z] != r1->calahead.b[z]) {
734                                 r1->calahead.b[z] = cal.b[z];
735                                 change++;
736                         }
737                 }
738                 switch(r1->prog->as) {
739                 case ACALL:
740                         for(z=0; z<BITS; z++) {
741                                 cal.b[z] |= ref.b[z] | externs.b[z];
742                                 ref.b[z] = 0;
743                         }
744                         break;
745
746                 case ATEXT:
747                         for(z=0; z<BITS; z++) {
748                                 cal.b[z] = 0;
749                                 ref.b[z] = 0;
750                         }
751                         break;
752
753                 case ARET:
754                         for(z=0; z<BITS; z++) {
755                                 cal.b[z] = externs.b[z];
756                                 ref.b[z] = 0;
757                         }
758                 }
759                 for(z=0; z<BITS; z++) {
760                         ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
761                                 r1->use1.b[z] | r1->use2.b[z];
762                         cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
763                         r1->refbehind.b[z] = ref.b[z];
764                         r1->calbehind.b[z] = cal.b[z];
765                 }
766                 if(r1->active)
767                         break;
768                 r1->active = 1;
769         }
770         for(; r != r1; r = r->p1)
771                 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
772                         prop(r2, r->refbehind, r->calbehind);
773 }
774
775 /*
776  * find looping structure
777  *
778  * 1) find reverse postordering
779  * 2) find approximate dominators,
780  *      the actual dominators if the flow graph is reducible
781  *      otherwise, dominators plus some other non-dominators.
782  *      See Matthew S. Hecht and Jeffrey D. Ullman,
783  *      "Analysis of a Simple Algorithm for Global Data Flow Problems",
784  *      Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
785  *      Oct. 1-3, 1973, pp.  207-217.
786  * 3) find all nodes with a predecessor dominated by the current node.
787  *      such a node is a loop head.
788  *      recursively, all preds with a greater rpo number are in the loop
789  */
790 long
791 postorder(Reg *r, Reg **rpo2r, long n)
792 {
793         Reg *r1;
794
795         r->rpo = 1;
796         r1 = r->s1;
797         if(r1 && !r1->rpo)
798                 n = postorder(r1, rpo2r, n);
799         r1 = r->s2;
800         if(r1 && !r1->rpo)
801                 n = postorder(r1, rpo2r, n);
802         rpo2r[n] = r;
803         n++;
804         return n;
805 }
806
807 long
808 rpolca(long *idom, long rpo1, long rpo2)
809 {
810         long t;
811
812         if(rpo1 == -1)
813                 return rpo2;
814         while(rpo1 != rpo2){
815                 if(rpo1 > rpo2){
816                         t = rpo2;
817                         rpo2 = rpo1;
818                         rpo1 = t;
819                 }
820                 while(rpo1 < rpo2){
821                         t = idom[rpo2];
822                         if(t >= rpo2)
823                                 fatal(Z, "bad idom");
824                         rpo2 = t;
825                 }
826         }
827         return rpo1;
828 }
829
830 int
831 doms(long *idom, long r, long s)
832 {
833         while(s > r)
834                 s = idom[s];
835         return s == r;
836 }
837
838 int
839 loophead(long *idom, Reg *r)
840 {
841         long src;
842
843         src = r->rpo;
844         if(r->p1 != R && doms(idom, src, r->p1->rpo))
845                 return 1;
846         for(r = r->p2; r != R; r = r->p2link)
847                 if(doms(idom, src, r->rpo))
848                         return 1;
849         return 0;
850 }
851
852 void
853 loopmark(Reg **rpo2r, long head, Reg *r)
854 {
855         if(r->rpo < head || r->active == head)
856                 return;
857         r->active = head;
858         r->loop += LOOP;
859         if(r->p1 != R)
860                 loopmark(rpo2r, head, r->p1);
861         for(r = r->p2; r != R; r = r->p2link)
862                 loopmark(rpo2r, head, r);
863 }
864
865 void
866 loopit(Reg *r, long nr)
867 {
868         Reg *r1;
869         long i, d, me;
870
871         if(nr > maxnr) {
872                 rpo2r = alloc(nr * sizeof(Reg*));
873                 idom = alloc(nr * sizeof(long));
874                 maxnr = nr;
875         }
876
877         d = postorder(r, rpo2r, 0);
878         if(d > nr)
879                 fatal(Z, "too many reg nodes");
880         nr = d;
881         for(i = 0; i < nr / 2; i++){
882                 r1 = rpo2r[i];
883                 rpo2r[i] = rpo2r[nr - 1 - i];
884                 rpo2r[nr - 1 - i] = r1;
885         }
886         for(i = 0; i < nr; i++)
887                 rpo2r[i]->rpo = i;
888
889         idom[0] = 0;
890         for(i = 0; i < nr; i++){
891                 r1 = rpo2r[i];
892                 me = r1->rpo;
893                 d = -1;
894                 if(r1->p1 != R && r1->p1->rpo < me)
895                         d = r1->p1->rpo;
896                 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
897                         if(r1->rpo < me)
898                                 d = rpolca(idom, d, r1->rpo);
899                 idom[i] = d;
900         }
901
902         for(i = 0; i < nr; i++){
903                 r1 = rpo2r[i];
904                 r1->loop++;
905                 if(r1->p2 != R && loophead(idom, r1))
906                         loopmark(rpo2r, i, r1);
907         }
908 }
909
910 void
911 synch(Reg *r, Bits dif)
912 {
913         Reg *r1;
914         int z;
915
916         for(r1 = r; r1 != R; r1 = r1->s1) {
917                 for(z=0; z<BITS; z++) {
918                         dif.b[z] = (dif.b[z] &
919                                 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
920                                         r1->set.b[z] | r1->regdiff.b[z];
921                         if(dif.b[z] != r1->regdiff.b[z]) {
922                                 r1->regdiff.b[z] = dif.b[z];
923                                 change++;
924                         }
925                 }
926                 if(r1->active)
927                         break;
928                 r1->active = 1;
929                 for(z=0; z<BITS; z++)
930                         dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
931                 if(r1->s2 != R)
932                         synch(r1->s2, dif);
933         }
934 }
935
936 ulong
937 allreg(ulong b, Rgn *r)
938 {
939         Var *v;
940         int i;
941
942         v = var + r->varno;
943         r->regno = 0;
944         switch(v->etype) {
945
946         default:
947                 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
948                 break;
949
950         case TCHAR:
951         case TUCHAR:
952         case TSHORT:
953         case TUSHORT:
954         case TINT:
955         case TUINT:
956         case TLONG:
957         case TULONG:
958         case TIND:
959         case TARRAY:
960                 i = BtoR(~b);
961                 if(i && r->cost > 0) {
962                         r->regno = i;
963                         return RtoB(i);
964                 }
965                 break;
966
967         case TDOUBLE:
968         case TFLOAT:
969                 break;
970         }
971         return 0;
972 }
973
974 void
975 paint1(Reg *r, int bn)
976 {
977         Reg *r1;
978         Prog *p;
979         int z;
980         ulong bb;
981
982         z = bn/32;
983         bb = 1L<<(bn%32);
984         if(r->act.b[z] & bb)
985                 return;
986         for(;;) {
987                 if(!(r->refbehind.b[z] & bb))
988                         break;
989                 r1 = r->p1;
990                 if(r1 == R)
991                         break;
992                 if(!(r1->refahead.b[z] & bb))
993                         break;
994                 if(r1->act.b[z] & bb)
995                         break;
996                 r = r1;
997         }
998
999         if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
1000                 change -= CLOAD * r->loop;
1001                 if(debug['R'] && debug['v'])
1002                         print("%ld%P\tld %B $%d\n", r->loop,
1003                                 r->prog, blsh(bn), change);
1004         }
1005         for(;;) {
1006                 r->act.b[z] |= bb;
1007                 p = r->prog;
1008
1009                 if(r->use1.b[z] & bb) {
1010                         change += CREF * r->loop;
1011                         if(p->as == AFMOVL)
1012                                 if(BtoR(bb) != D_F0)
1013                                         change = -CINF;
1014                         if(debug['R'] && debug['v'])
1015                                 print("%ld%P\tu1 %B $%d\n", r->loop,
1016                                         p, blsh(bn), change);
1017                 }
1018
1019                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1020                         change += CREF * r->loop;
1021                         if(p->as == AFMOVL)
1022                                 if(BtoR(bb) != D_F0)
1023                                         change = -CINF;
1024                         if(debug['R'] && debug['v'])
1025                                 print("%ld%P\tu2 %B $%d\n", r->loop,
1026                                         p, blsh(bn), change);
1027                 }
1028
1029                 if(STORE(r) & r->regdiff.b[z] & bb) {
1030                         change -= CLOAD * r->loop;
1031                         if(p->as == AFMOVL)
1032                                 if(BtoR(bb) != D_F0)
1033                                         change = -CINF;
1034                         if(debug['R'] && debug['v'])
1035                                 print("%ld%P\tst %B $%d\n", r->loop,
1036                                         p, blsh(bn), change);
1037                 }
1038
1039                 if(r->refbehind.b[z] & bb)
1040                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1041                                 if(r1->refahead.b[z] & bb)
1042                                         paint1(r1, bn);
1043
1044                 if(!(r->refahead.b[z] & bb))
1045                         break;
1046                 r1 = r->s2;
1047                 if(r1 != R)
1048                         if(r1->refbehind.b[z] & bb)
1049                                 paint1(r1, bn);
1050                 r = r->s1;
1051                 if(r == R)
1052                         break;
1053                 if(r->act.b[z] & bb)
1054                         break;
1055                 if(!(r->refbehind.b[z] & bb))
1056                         break;
1057         }
1058 }
1059
1060 ulong
1061 regset(Reg *r, ulong bb)
1062 {
1063         ulong b, set;
1064         Adr v;
1065         int c;
1066
1067         set = 0;
1068         v = zprog.from;
1069         while(b = bb & ~(bb-1)) {
1070                 v.type = BtoR(b);
1071                 c = copyu(r->prog, &v, A);
1072                 if(c == 3)
1073                         set |= b;
1074                 bb &= ~b;
1075         }
1076         return set;
1077 }
1078
1079 ulong
1080 reguse(Reg *r, ulong bb)
1081 {
1082         ulong b, set;
1083         Adr v;
1084         int c;
1085
1086         set = 0;
1087         v = zprog.from;
1088         while(b = bb & ~(bb-1)) {
1089                 v.type = BtoR(b);
1090                 c = copyu(r->prog, &v, A);
1091                 if(c == 1 || c == 2 || c == 4)
1092                         set |= b;
1093                 bb &= ~b;
1094         }
1095         return set;
1096 }
1097
1098 ulong
1099 paint2(Reg *r, int bn)
1100 {
1101         Reg *r1;
1102         int z;
1103         ulong bb, vreg, x;
1104
1105         z = bn/32;
1106         bb = 1L << (bn%32);
1107         vreg = regbits;
1108         if(!(r->act.b[z] & bb))
1109                 return vreg;
1110         for(;;) {
1111                 if(!(r->refbehind.b[z] & bb))
1112                         break;
1113                 r1 = r->p1;
1114                 if(r1 == R)
1115                         break;
1116                 if(!(r1->refahead.b[z] & bb))
1117                         break;
1118                 if(!(r1->act.b[z] & bb))
1119                         break;
1120                 r = r1;
1121         }
1122         for(;;) {
1123                 r->act.b[z] &= ~bb;
1124
1125                 vreg |= r->regu;
1126
1127                 if(r->refbehind.b[z] & bb)
1128                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1129                                 if(r1->refahead.b[z] & bb)
1130                                         vreg |= paint2(r1, bn);
1131
1132                 if(!(r->refahead.b[z] & bb))
1133                         break;
1134                 r1 = r->s2;
1135                 if(r1 != R)
1136                         if(r1->refbehind.b[z] & bb)
1137                                 vreg |= paint2(r1, bn);
1138                 r = r->s1;
1139                 if(r == R)
1140                         break;
1141                 if(!(r->act.b[z] & bb))
1142                         break;
1143                 if(!(r->refbehind.b[z] & bb))
1144                         break;
1145         }
1146
1147         bb = vreg;
1148         for(; r; r=r->s1) {
1149                 x = r->regu & ~bb;
1150                 if(x) {
1151                         vreg |= reguse(r, x);
1152                         bb |= regset(r, x);
1153                 }
1154         }
1155         return vreg;
1156 }
1157
1158 void
1159 paint3(Reg *r, int bn, long rb, int rn)
1160 {
1161         Reg *r1;
1162         Prog *p;
1163         int z;
1164         ulong bb;
1165
1166         z = bn/32;
1167         bb = 1L << (bn%32);
1168         if(r->act.b[z] & bb)
1169                 return;
1170         for(;;) {
1171                 if(!(r->refbehind.b[z] & bb))
1172                         break;
1173                 r1 = r->p1;
1174                 if(r1 == R)
1175                         break;
1176                 if(!(r1->refahead.b[z] & bb))
1177                         break;
1178                 if(r1->act.b[z] & bb)
1179                         break;
1180                 r = r1;
1181         }
1182
1183         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1184                 addmove(r, bn, rn, 0);
1185         for(;;) {
1186                 r->act.b[z] |= bb;
1187                 p = r->prog;
1188
1189                 if(r->use1.b[z] & bb) {
1190                         if(debug['R'])
1191                                 print("%P", p);
1192                         addreg(&p->from, rn);
1193                         if(debug['R'])
1194                                 print("\t.c%P\n", p);
1195                 }
1196                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1197                         if(debug['R'])
1198                                 print("%P", p);
1199                         addreg(&p->to, rn);
1200                         if(debug['R'])
1201                                 print("\t.c%P\n", p);
1202                 }
1203
1204                 if(STORE(r) & r->regdiff.b[z] & bb)
1205                         addmove(r, bn, rn, 1);
1206                 r->regu |= rb;
1207
1208                 if(r->refbehind.b[z] & bb)
1209                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1210                                 if(r1->refahead.b[z] & bb)
1211                                         paint3(r1, bn, rb, rn);
1212
1213                 if(!(r->refahead.b[z] & bb))
1214                         break;
1215                 r1 = r->s2;
1216                 if(r1 != R)
1217                         if(r1->refbehind.b[z] & bb)
1218                                 paint3(r1, bn, rb, rn);
1219                 r = r->s1;
1220                 if(r == R)
1221                         break;
1222                 if(r->act.b[z] & bb)
1223                         break;
1224                 if(!(r->refbehind.b[z] & bb))
1225                         break;
1226         }
1227 }
1228
1229 void
1230 addreg(Adr *a, int rn)
1231 {
1232
1233         a->sym = 0;
1234         a->offset = 0;
1235         a->type = rn;
1236 }
1237
1238 long
1239 RtoB(int r)
1240 {
1241
1242         if(r < D_AX || r > D_DI)
1243                 return 0;
1244         return 1L << (r-D_AX);
1245 }
1246
1247 int
1248 BtoR(long b)
1249 {
1250
1251         b &= 0xffL;
1252         if(b == 0)
1253                 return 0;
1254         return bitno(b) + D_AX;
1255 }