]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/2c/reg.c
Import sources from 2011-03-30 iso image
[plan9front.git] / sys / src / cmd / 2c / reg.c
1 #include "gc.h"
2
3 Reg*
4 rega(void)
5 {
6         Reg *r;
7
8         r = freer;
9         if(r == R) {
10                 r = alloc(sizeof(*r));
11         } else
12                 freer = r->link;
13
14         *r = zreg;
15         return r;
16 }
17
18 int
19 rcmp(const void *a1, const void *a2)
20 {
21         Rgn *p1, *p2;
22         int c1, c2;
23
24         p1 = (Rgn*)a1;
25         p2 = (Rgn*)a2;
26         c1 = p2->costr;
27         if(p2->costa > c1)
28                 c1 = p2->costa;
29         c2 = p1->costr;
30         if(p1->costa > c2)
31                 c2 = p1->costa;
32         if(c1 -= c2)
33                 return c1;
34         return p2->varno - p1->varno;
35 }
36
37 void
38 regopt(Prog *p)
39 {
40         Reg *r, *r1, *r2;
41         Prog *p1;
42         int i, z;
43         long val, initpc, npc;
44         ulong vreg;
45         Bits bit;
46         Var *v;
47         struct {
48                 long    m;
49                 long    c;
50                 Reg*    p;
51         } log5[6], *lp;
52
53         firstr = R;
54         lastr = R;
55         nvar = 0;
56         for(z=0; z<BITS; z++) {
57                 externs.b[z] = 0;
58                 params.b[z] = 0;
59                 addrs.b[z] = 0;
60         }
61         regbits = RtoB(0) |             /* return reg */
62                 AtoB(6) | AtoB(7) |     /* sp and sb */
63                 FtoB(0) | FtoB(1);      /* floating return reg */
64         for(i=0; i<NREG; i++) {
65                 if(regused[i])
66                         regbits |= RtoB(i);
67                 if(fregused[i])
68                         regbits |= FtoB(i);
69                 if(aregused[i])
70                         regbits |= AtoB(i);
71         }
72
73         /*
74          * pass 1
75          * build aux data structure
76          * allocate pcs
77          * find use and set of variables
78          */
79         val = 5L * 5L * 5L * 5L * 5L;
80         lp = log5;
81         for(i=0; i<5; i++) {
82                 lp->m = val;
83                 lp->c = 0;
84                 lp->p = R;
85                 val /= 5L;
86                 lp++;
87         }
88         val = 0;
89         for(; p != P; p = p->link) {
90                 switch(p->as) {
91                 case ADATA:
92                 case AGLOBL:
93                 case ANAME:
94                 case ASIGNAME:
95                         continue;
96                 }
97                 r = rega();
98                 if(firstr == R) {
99                         firstr = r;
100                         lastr = r;
101                 } else {
102                         lastr->link = r;
103                         r->p1 = lastr;
104                         lastr->s1 = r;
105                         lastr = r;
106                 }
107                 r->prog = p;
108                 r->pc = val;
109                 val++;
110
111                 lp = log5;
112                 for(i=0; i<5; i++) {
113                         lp->c--;
114                         if(lp->c <= 0) {
115                                 lp->c = lp->m;
116                                 if(lp->p != R)
117                                         lp->p->log5 = r;
118                                 lp->p = r;
119                                 (lp+1)->c = 0;
120                                 break;
121                         }
122                         lp++;
123                 }
124
125                 r1 = r->p1;
126                 if(r1 != R)
127                 switch(r1->prog->as) {
128                 case ABRA:
129                 case ARTS:
130                 case ARTE:
131                         r->p1 = R;
132                         r1->s1 = R;
133                 }
134
135                 bit = mkvar(&p->from, AGOK);
136                 if(bany(&bit))
137                 switch(p->as) {
138                 case ALEA:
139                         if(!(mvbits & B_INDIR))
140                         for(z=0; z<BITS; z++)
141                                 addrs.b[z] |= bit.b[z];
142
143                 default:
144                         if(mvbits & B_ADDR)
145                         for(z=0; z<BITS; z++)
146                                 addrs.b[z] |= bit.b[z];
147                         for(z=0; z<BITS; z++)
148                                 r->use1.b[z] |= bit.b[z];
149                 }
150
151                 bit = mkvar(&p->to, p->as);
152                 if(bany(&bit))
153                 switch(p->as) {
154                 case ABSR:      /* funny */
155                         for(z=0; z<BITS; z++)
156                                 addrs.b[z] |= bit.b[z];
157                         goto def;
158
159                 case APEA:
160                         if(!(mvbits & B_INDIR))
161                         for(z=0; z<BITS; z++)
162                                 addrs.b[z] |= bit.b[z];
163
164                 def:
165                 case ACMPB: case ACMPW: case ACMPL:
166                 case AFCMPF: case AFCMPD:
167                 case ATSTB: case ATSTW: case ATSTL:
168                 case AFTSTF: case AFTSTD:
169                 case ABFEXTU: case ABFEXTS:
170                         if(mvbits & B_ADDR)
171                         for(z=0; z<BITS; z++)
172                                 addrs.b[z] |= bit.b[z];
173                         for(z=0; z<BITS; z++)
174                                 r->use2.b[z] |= bit.b[z];
175                         break;
176
177                 default:
178                         diag(Z, "reg: unknown asop: %A", p->as);
179
180                 case AADDB: case AADDW: case AADDL:
181                 case ASUBB: case ASUBW: case ASUBL:
182                 case AANDB: case AANDW: case AANDL:
183                 case AORB: case AORW: case AORL:
184                 case AEORB: case AEORW: case AEORL:
185                 case ABFINS:
186                         for(z=0; z<BITS; z++)
187                                 r->use2.b[z] |= bit.b[z];
188
189                 case ANOP:
190                 case AMOVB: case AMOVW: case AMOVL:
191                 case AFMOVEB: case AFMOVEW: case AFMOVEL:
192                 case ACLRB: case ACLRW: case ACLRL:
193                 case AFMOVEF: case AFMOVED:
194                         if(mvbits & B_INDIR)
195                         for(z=0; z<BITS; z++)
196                                 r->use2.b[z] |= bit.b[z];
197                         else
198                         for(z=0; z<BITS; z++)
199                                 r->set.b[z] |= bit.b[z];
200                         break;
201
202                 }
203         }
204         if(firstr == R)
205                 return;
206         initpc = pc - val;
207         npc = val;
208
209         /*
210          * pass 2
211          * turn branch references to pointers
212          * build back pointers
213          */
214         for(r = firstr; r != R; r = r->link) {
215                 p = r->prog;
216                 if(p->to.type == D_BRANCH) {
217                         val = p->to.offset - initpc;
218                         r1 = firstr;
219                         while(r1 != R) {
220                                 r2 = r1->log5;
221                                 if(r2 != R && val >= r2->pc) {
222                                         r1 = r2;
223                                         continue;
224                                 }
225                                 if(r1->pc == val)
226                                         break;
227                                 r1 = r1->link;
228                         }
229                         if(r1 == R) {
230                                 diag(Z, "ref not found\n%L:%P", p->lineno, p);
231                                 continue;
232                         }
233                         if(r1 == r) {
234                                 diag(Z, "ref to self");
235                                 continue;
236                         }
237                         r->s2 = r1;
238                         r->p2link = r1->p2;
239                         r1->p2 = r;
240                 }
241         }
242         if(debug['R'])
243                 print("\n%L %D\n", firstr->prog->lineno, &firstr->prog->from);
244
245         /*
246          * pass 2.5
247          * find looping structure
248          */
249         for(r = firstr; r != R; r = r->link)
250                 r->active = 0;
251         changer = 0;
252         loopit(firstr, npc);
253         if(debug['R'] && debug['v']) {
254                 print("\nlooping structure:\n");
255                 for(r = firstr; r != R; r = r->link) {
256                         print("%ld:%P", r->loop, r->prog);
257                         for(z=0; z<BITS; z++)
258                                 bit.b[z] = r->use1.b[z] |
259                                         r->use2.b[z] | r->set.b[z];
260                         if(bany(&bit)) {
261                                 print("\t");
262                                 if(bany(&r->use1))
263                                         print(" u1=%B", r->use1);
264                                 if(bany(&r->use2))
265                                         print(" u2=%B", r->use2);
266                                 if(bany(&r->set))
267                                         print(" st=%B", r->set);
268                         }
269                         print("\n");
270                 }
271         }
272
273         /*
274          * pass 3
275          * iterate propagating usage
276          *      back until flow graph is complete
277          */
278 loop1:
279         changer = 0;
280         for(r = firstr; r != R; r = r->link)
281                 r->active = 0;
282         for(r = firstr; r != R; r = r->link)
283                 if(r->prog->as == ARTS)
284                         prop(r, zbits, zbits);
285 loop11:
286         /* pick up unreachable code */
287         i = 0;
288         for(r = firstr; r != R; r = r1) {
289                 r1 = r->link;
290                 if(r1 && r1->active && !r->active) {
291                         prop(r, zbits, zbits);
292                         i = 1;
293                 }
294         }
295         if(i)
296                 goto loop11;
297         if(changer)
298                 goto loop1;
299
300         /*
301          * pass 4
302          * iterate propagating register/variable synchrony
303          *      forward until graph is complete
304          */
305 loop2:
306         changer = 0;
307         for(r = firstr; r != R; r = r->link)
308                 r->active = 0;
309         synch(firstr, zbits);
310         if(changer)
311                 goto loop2;
312
313
314         /*
315          * pass 5
316          * isolate regions
317          * calculate costs (paint1)
318          */
319         r = firstr;
320         if(r) {
321                 for(z=0; z<BITS; z++)
322                         bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
323                           ~(externs.b[z] | params.b[z] | addrs.b[z]);
324                 if(bany(&bit)) {
325                         nearln = r->prog->lineno;
326                         warn(Z, "used and not set: %B", bit);
327                         if(debug['R'] && !debug['w'])
328                                 print("used and not set: %B\n", bit);
329
330                         /*
331                          * 68040 'feature':
332                          *      load of a denormalized fp will trap
333                          */
334                         while(bany(&bit)) {
335                                 i = bnum(bit);
336                                 bit.b[i/32] &= ~(1L << (i%32));
337                                 v = var + i;
338                                 if(v->type == D_AUTO) {
339                                         r->set.b[i/32] |= (1L << (i%32));
340                                         if(typefd[v->etype])
341                                                 addmove(r, i, NREG+NREG, 1);
342                                 }
343                         }
344                 }
345         }
346         if(debug['R'] && debug['v'])
347                 print("\nprop structure:\n");
348         for(r = firstr; r != R; r = r->link) {
349                 if(debug['R'] && debug['v'])
350                         print("%P\n     set = %B; rah = %B; cal = %B\n",
351                                 r->prog, r->set, r->refahead, r->calahead);
352                 r->act = zbits;
353         }
354         rgp = region;
355         nregion = 0;
356         for(r = firstr; r != R; r = r->link) {
357                 for(z=0; z<BITS; z++)
358                         bit.b[z] = r->set.b[z] &
359                           ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
360                 if(bany(&bit)) {
361                         nearln = r->prog->lineno;
362                         warn(Z, "set and not used: %B", bit);
363                         if(debug['R'])
364                                 print("set an not used: %B\n", bit);
365                         excise(r);
366                 }
367                 for(z=0; z<BITS; z++)
368                         bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
369                 while(bany(&bit)) {
370                         i = bnum(bit);
371                         rgp->enter = r;
372                         rgp->varno = i;
373                         changer = 0;
374                         changea = 0;
375                         if(debug['R'] && debug['v'])
376                                 print("\n");
377                         paint1(r, i);
378                         bit.b[i/32] &= ~(1L<<(i%32));
379                         if(changer <= 0 && changea <= 0) {
380                                 if(debug['R'])
381                                         print("%L$%d.%d: %B\n",
382                                                 r->prog->lineno,
383                                                 changer, changea, blsh(i));
384                                 continue;
385                         }
386                         rgp->costr = changer;
387                         rgp->costa = changea;
388                         nregion++;
389                         if(nregion >= NRGN) {
390                                 warn(Z, "too many regions");
391                                 goto brk;
392                         }
393                         rgp++;
394                 }
395         }
396 brk:
397         qsort(region, nregion, sizeof(region[0]), rcmp);
398
399         /*
400          * pass 6
401          * determine used registers (paint2)
402          * replace code (paint3)
403          */
404         rgp = region;
405         for(i=0; i<nregion; i++) {
406                 bit = blsh(rgp->varno);
407                 vreg = paint2(rgp->enter, rgp->varno);
408                 vreg = allreg(vreg, rgp);
409                 if(debug['R'])
410                         print("%L$%d.%d %R: %B\n",
411                                 rgp->enter->prog->lineno,
412                                 rgp->costr, rgp->costa,
413                                 rgp->regno,
414                                 bit);
415                 if(rgp->regno != D_NONE)
416                         paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
417                 rgp++;
418         }
419         /*
420          * pass 7
421          * peep-hole on basic block
422          */
423         if(!debug['R'] || debug['P'])
424                 peep();
425
426         /*
427          * pass 8
428          * recalculate pc
429          */
430         val = initpc;
431         for(r = firstr; r != R; r = r1) {
432                 r->pc = val;
433                 p = r->prog;
434                 p1 = P;
435                 r1 = r->link;
436                 if(r1 != R)
437                         p1 = r1->prog;
438                 for(; p != p1; p = p->link) {
439                         switch(p->as) {
440                         default:
441                                 val++;
442                                 break;
443
444                         case ANOP:
445                         case ADATA:
446                         case AGLOBL:
447                         case ANAME:
448                         case ASIGNAME:
449                                 break;
450                         }
451                 }
452         }
453         pc = val;
454
455         /*
456          * fix up branches
457          */
458         if(debug['R'])
459                 if(bany(&addrs))
460                         print("addrs: %B\n", addrs);
461
462         r1 = 0; /* set */
463         for(r = firstr; r != R; r = r->link) {
464                 p = r->prog;
465                 if(p->to.type == D_BRANCH)
466                         p->to.offset = r->s2->pc;
467                 r1 = r;
468         }
469
470         /*
471          * last pass
472          * eliminate nops
473          * free aux structures
474          */
475         for(p = firstr->prog; p != P; p = p->link){
476                 while(p->link && p->link->as == ANOP)
477                         p->link = p->link->link;
478         }
479         if(r1 != R) {
480                 r1->link = freer;
481                 freer = firstr;
482         }
483 }
484
485 /*
486  * add mov b,rn
487  * just after r
488  */
489 void
490 addmove(Reg *r, int bn, int rn, int f)
491 {
492         Prog *p, *p1;
493         Var *v;
494         int badccr;
495
496         badccr = 0;
497         p = r->prog;
498         p1 = p->link;
499         if(p1)
500         switch(p1->as) {
501         case AMOVW:
502                 if(p1->from.type == D_CCR)
503                         p = p1;
504                 break;
505
506         case ABEQ:
507         case ABNE:
508         case ABLE:
509         case ABLS:
510         case ABLT:
511         case ABMI:
512         case ABGE:
513         case ABPL:
514         case ABGT:
515         case ABHI:
516         case ABCC:
517         case ABCS:
518                 p1 = prg();
519                 p1->link = p->link;
520                 p->link = p1;
521                 p1->lineno = p->lineno;
522
523                 p1->from.type = D_CCR;
524                 p1->to.type = D_TOS;
525                 p1->as = AMOVW;
526                 p = p1;
527                 badccr = 1;
528         }
529         p1 = prg();
530         p1->link = p->link;
531         p->link = p1;
532         p1->lineno = p->lineno;
533
534         v = var + bn;
535         p1->from.sym = v->sym;
536         p1->from.type = v->type;
537         p1->from.offset = v->offset;
538         p1->from.etype = v->etype;
539         p1->to.type = rn;
540         if(f) {
541                 p1->to = p1->from;
542                 p1->from = zprog.from;
543                 p1->from.type = rn;
544         }
545         p1->as = opxt[OAS][v->etype];
546         if(badccr) {
547                 p = p1;
548                 p1 = prg();
549                 p1->link = p->link;
550                 p->link = p1;
551                 p1->lineno = p->lineno;
552
553                 p1->from.type = D_TOS;
554                 p1->to.type = D_CCR;
555                 p1->as = AMOVW;
556         }
557         if(debug['R'])
558                 print("%P\t.a%P\n", p, p1);
559 }
560
561 Bits
562 mkvar(Adr *a, int as)
563 {
564         Var *v;
565         int i, t, z;
566         long o;
567         Bits bit;
568         Sym *s;
569
570         mvbits = 0;
571         t = a->type & D_MASK;
572         switch(t) {
573
574         default:
575                 if(t >= D_R0 && t < D_R0+NREG) {
576                         regbits |= RtoB(t-D_R0);
577                         if(as == ADIVUL || as == ADIVSL)
578                                 regbits |= RtoB(t-D_R0+1);
579                 }
580                 if(t >= D_A0 && t < D_A0+NREG)
581                         regbits |= AtoB(t-D_A0);
582                 if(t >= D_F0 && t < D_F0+NREG)
583                         regbits |= FtoB(t-D_F0);
584                 goto none;
585
586         case D_EXTERN:
587         case D_STATIC:
588         case D_AUTO:
589         case D_PARAM:
590                 break;
591         }
592         s = a->sym;
593         if(s == S)
594                 goto none;
595
596         if((a->type & I_MASK) == I_ADDR)
597                 mvbits |= B_ADDR;
598
599         switch(a->index & I_MASK) {
600         case I_INDEX1:
601                 mvbits |= B_ADDR;
602                 break;
603
604         case I_INDEX2:
605         case I_INDEX3:
606                 mvbits |= B_INDIR;
607                 break;
608         }
609
610         o = a->offset;
611         v = var;
612         for(i=0; i<nvar; i++) {
613                 if(s == v->sym)
614                 if(t == v->type)
615                 if(o == v->offset)
616                         goto out;
617                 v++;
618         }
619         if(s)
620                 if(s->name[0] == '.')
621                         goto none;
622         if(nvar >= NVAR) {
623                 if(debug['w'] > 1 && s)
624                         warn(Z, "variable not optimized: %s", s->name);
625                 goto none;
626         }
627         i = nvar;
628         nvar++;
629         v = &var[i];
630         v->sym = s;
631         v->offset = o;
632         v->etype = a->etype;
633         v->type = t;
634         if(debug['R'])
635                 print("bit=%2d et=%2d %s (%p,%d,%ld)\n",
636                         i, a->etype, s->name,
637                         v->sym, v->type, v->offset);
638
639 out:
640         bit = blsh(i);
641         if(t == D_EXTERN || t == D_STATIC)
642                 for(z=0; z<BITS; z++)
643                         externs.b[z] |= bit.b[z];
644         if(t == D_PARAM)
645                 for(z=0; z<BITS; z++)
646                         params.b[z] |= bit.b[z];
647         if(a->etype != v->etype || !typechlpfd[a->etype])
648                 for(z=0; z<BITS; z++)
649                         addrs.b[z] |= bit.b[z]; /* funny punning */
650         return bit;
651
652 none:
653         return zbits;
654 }
655
656 void
657 prop(Reg *r, Bits ref, Bits cal)
658 {
659         Reg *r1, *r2;
660         int z;
661
662         for(r1 = r; r1 != R; r1 = r1->p1) {
663                 for(z=0; z<BITS; z++) {
664                         ref.b[z] |= r1->refahead.b[z];
665                         if(ref.b[z] != r1->refahead.b[z]) {
666                                 r1->refahead.b[z] = ref.b[z];
667                                 changer++;
668                         }
669                         cal.b[z] |= r1->calahead.b[z];
670                         if(cal.b[z] != r1->calahead.b[z]) {
671                                 r1->calahead.b[z] = cal.b[z];
672                                 changer++;
673                         }
674                 }
675                 switch(r1->prog->as) {
676                 case ABSR:
677                         for(z=0; z<BITS; z++) {
678                                 cal.b[z] |= ref.b[z] | externs.b[z];
679                                 ref.b[z] = 0;
680                         }
681                         break;
682
683                 case ATEXT:
684                         for(z=0; z<BITS; z++) {
685                                 cal.b[z] = 0;
686                                 ref.b[z] = 0;
687                         }
688                         break;
689
690                 case ARTS:
691                         for(z=0; z<BITS; z++) {
692                                 cal.b[z] = externs.b[z];
693                                 ref.b[z] = 0;
694                         }
695                 }
696                 for(z=0; z<BITS; z++) {
697                         ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
698                                 r1->use1.b[z] | r1->use2.b[z];
699                         cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
700                         r1->refbehind.b[z] = ref.b[z];
701                         r1->calbehind.b[z] = cal.b[z];
702                 }
703                 if(r1->active)
704                         break;
705                 r1->active = 1;
706         }
707         for(; r != r1; r = r->p1)
708                 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
709                         prop(r2, r->refbehind, r->calbehind);
710 }
711
712 /*
713  * find looping structure
714  *
715  * 1) find reverse postordering
716  * 2) find approximate dominators,
717  *      the actual dominators if the flow graph is reducible
718  *      otherwise, dominators plus some other non-dominators.
719  *      See Matthew S. Hecht and Jeffrey D. Ullman,
720  *      "Analysis of a Simple Algorithm for Global Data Flow Problems",
721  *      Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
722  *      Oct. 1-3, 1973, pp.  207-217.
723  * 3) find all nodes with a predecessor dominated by the current node.
724  *      such a node is a loop head.
725  *      recursively, all preds with a greater rpo number are in the loop
726  */
727 long
728 postorder(Reg *r, Reg **rpo2r, long n)
729 {
730         Reg *r1;
731
732         r->rpo = 1;
733         r1 = r->s1;
734         if(r1 && !r1->rpo)
735                 n = postorder(r1, rpo2r, n);
736         r1 = r->s2;
737         if(r1 && !r1->rpo)
738                 n = postorder(r1, rpo2r, n);
739         rpo2r[n] = r;
740         n++;
741         return n;
742 }
743
744 long
745 rpolca(long *idom, long rpo1, long rpo2)
746 {
747         long t;
748
749         if(rpo1 == -1)
750                 return rpo2;
751         while(rpo1 != rpo2){
752                 if(rpo1 > rpo2){
753                         t = rpo2;
754                         rpo2 = rpo1;
755                         rpo1 = t;
756                 }
757                 while(rpo1 < rpo2){
758                         t = idom[rpo2];
759                         if(t >= rpo2)
760                                 fatal(Z, "bad idom");
761                         rpo2 = t;
762                 }
763         }
764         return rpo1;
765 }
766
767 int
768 doms(long *idom, long r, long s)
769 {
770         while(s > r)
771                 s = idom[s];
772         return s == r;
773 }
774
775 int
776 loophead(long *idom, Reg *r)
777 {
778         long src;
779
780         src = r->rpo;
781         if(r->p1 != R && doms(idom, src, r->p1->rpo))
782                 return 1;
783         for(r = r->p2; r != R; r = r->p2link)
784                 if(doms(idom, src, r->rpo))
785                         return 1;
786         return 0;
787 }
788
789 void
790 loopmark(Reg **rpo2r, long head, Reg *r)
791 {
792         if(r->rpo < head || r->active == head)
793                 return;
794         r->active = head;
795         r->loop += LOOP;
796         if(r->p1 != R)
797                 loopmark(rpo2r, head, r->p1);
798         for(r = r->p2; r != R; r = r->p2link)
799                 loopmark(rpo2r, head, r);
800 }
801
802 void
803 loopit(Reg *r, long nr)
804 {
805         Reg *r1;
806         long i, d, me;
807
808         if(nr > maxnr) {
809                 rpo2r = alloc(nr * sizeof(Reg*));
810                 idom = alloc(nr * sizeof(long));
811                 maxnr = nr;
812         }
813
814         d = postorder(r, rpo2r, 0);
815         if(d > nr)
816                 fatal(Z, "too many reg nodes");
817         nr = d;
818         for(i = 0; i < nr / 2; i++){
819                 r1 = rpo2r[i];
820                 rpo2r[i] = rpo2r[nr - 1 - i];
821                 rpo2r[nr - 1 - i] = r1;
822         }
823         for(i = 0; i < nr; i++)
824                 rpo2r[i]->rpo = i;
825
826         idom[0] = 0;
827         for(i = 0; i < nr; i++){
828                 r1 = rpo2r[i];
829                 me = r1->rpo;
830                 d = -1;
831                 if(r1->p1 != R && r1->p1->rpo < me)
832                         d = r1->p1->rpo;
833                 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
834                         if(r1->rpo < me)
835                                 d = rpolca(idom, d, r1->rpo);
836                 idom[i] = d;
837         }
838
839         for(i = 0; i < nr; i++){
840                 r1 = rpo2r[i];
841                 r1->loop++;
842                 if(r1->p2 != R && loophead(idom, r1))
843                         loopmark(rpo2r, i, r1);
844         }
845 }
846
847 void
848 synch(Reg *r, Bits dif)
849 {
850         Reg *r1;
851         int z;
852
853         for(r1 = r; r1 != R; r1 = r1->s1) {
854                 for(z=0; z<BITS; z++) {
855                         dif.b[z] = (dif.b[z] &
856                                 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
857                                         r1->set.b[z] | r1->regdiff.b[z];
858                         if(dif.b[z] != r1->regdiff.b[z]) {
859                                 r1->regdiff.b[z] = dif.b[z];
860                                 changer++;
861                         }
862                 }
863                 if(r1->active)
864                         break;
865                 r1->active = 1;
866                 for(z=0; z<BITS; z++)
867                         dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
868                 if(r1->s2 != R)
869                         synch(r1->s2, dif);
870         }
871 }
872
873 ulong
874 allreg(ulong b, Rgn *r)
875 {
876         Var *v;
877         int i, j;
878
879         v = var + r->varno;
880         r->regno = D_NONE;
881         switch(v->etype) {
882
883         default:
884                 diag(Z, "unknown etype");
885                 break;
886
887         case TCHAR:
888         case TUCHAR:
889         case TSHORT:
890         case TUSHORT:
891         case TINT:
892         case TUINT:
893         case TLONG:
894         case TULONG:
895         case TIND:
896                 i = BtoR(~b);
897                 j = BtoA(~b);
898                 if(r->costa == r->costr)
899                         if(i > j)
900                                 i = NREG;
901                 if(j < NREG && r->costa > 0)
902                 if(r->costa > r->costr || i >= NREG) {
903                         r->regno = D_A0 + j;
904                         return AtoB(j);
905                 }
906                 if(i < NREG && r->costr > 0) {
907                         r->regno = D_R0 + i;
908                         return RtoB(i);
909                 }
910                 break;
911
912         case TDOUBLE:
913         case TFLOAT:
914                 i = BtoF(~b);
915                 if(i < NREG) {
916                         r->regno = D_F0 + i;
917                         return FtoB(i);
918                 }
919                 break;
920         }
921         return 0;
922 }
923
924 void
925 paint1(Reg *r, int bn)
926 {
927         Reg *r1;
928         Prog *p;
929         int z;
930         ulong bb;
931         int x;
932
933         z = bn/32;
934         bb = 1L<<(bn%32);
935         if(r->act.b[z] & bb)
936                 return;
937         for(;;) {
938                 if(!(r->refbehind.b[z] & bb))
939                         break;
940                 r1 = r->p1;
941                 if(r1 == R)
942                         break;
943                 if(!(r1->refahead.b[z] & bb))
944                         break;
945                 if(r1->act.b[z] & bb)
946                         break;
947                 r = r1;
948         }
949         if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
950                 changer -= CLOAD * r->loop;
951                 changea -= CLOAD * r->loop;
952                 if(debug['R'] && debug['v'])
953                         print("%ld%P\tld %B $%d.%d\n", r->loop,
954                                 r->prog, blsh(bn), changer, changea);
955         }
956         for(;;) {
957                 r->act.b[z] |= bb;
958                 p = r->prog;
959
960                 if(r->use1.b[z] & bb) {
961                         changer += CREF * r->loop;
962                         changea += CREF * r->loop;
963                         x = p->from.index;
964                         if(x == D_NONE) {
965                                 switch(p->as) {
966                                 default:
967                                         changea = -CINF;
968                                 case AADDL:
969                                 case ASUBL:
970                                 case AMOVL:
971                                 case ACMPL:
972                                         break;
973                                 }
974                         } else {
975                                 changer += (CXREF-CREF) * r->loop;
976                                 if(x != (I_INDEX3|D_NONE))
977                                         changer = -CINF;
978                                 if((x&I_MASK) == I_INDEX1)
979                                         changea = -CINF;
980                         }
981                         if(p->as == AMOVL) {
982                                 x = p->to.type;
983                                 if(x >= D_R0 && x < D_R0+NREG)
984                                         changer += r->loop;
985                                 if(x >= D_A0 && x < D_A0+NREG)
986                                         changea += r->loop;
987                         }
988                         if(debug['R'] && debug['v'])
989                                 print("%ld%P\tu1 %B $%d.%d\n", r->loop,
990                                         p, blsh(bn), changer, changea);
991                 }
992                 if((r->use2.b[z]|r->set.b[z]) & bb) {
993                         changer += CREF * r->loop;
994                         changea += CREF * r->loop;
995                         x = p->to.index;
996                         if(x == D_NONE)
997                                 switch(p->as) {
998                                 default:
999                                         changea = -CINF;
1000                                         break;
1001                                 case AMOVL:
1002                                 case AADDL:
1003                                 case ACMPL:
1004                                 case ASUBL:
1005                                 case ACLRL:     /* can be faked */
1006                                 case ATSTL:     /* can be faked */
1007                                         break;
1008                                 }
1009                         else {
1010                                 changer += (CXREF-CREF) * r->loop;
1011                                 if(x != (I_INDEX3|D_NONE))
1012                                         changer = -CINF;
1013                                 if((x&I_MASK) == I_INDEX1)
1014                                         changea = -CINF;
1015                         }
1016                         if(p->as == AMOVL) {
1017                                 x = p->from.type;
1018                                 if(x >= D_R0 && x < D_R0+NREG)
1019                                         changer += r->loop;
1020                                 if(x >= D_A0 && x < D_A0+NREG)
1021                                         changea += r->loop;
1022                         }
1023                         if(debug['R'] && debug['v'])
1024                                 print("%ld%P\tu2 %B $%d.%d\n", r->loop,
1025                                         p, blsh(bn), changer, changea);
1026                 }
1027                 if(STORE(r) & r->regdiff.b[z] & bb) {
1028                         changer -= CLOAD * r->loop;
1029                         changea -= CLOAD * r->loop;
1030                         if(debug['R'] && debug['v'])
1031                                 print("%ld%P\tst %B $%d.%d\n", r->loop,
1032                                         p, blsh(bn), changer, changea);
1033                 }
1034
1035                 if(r->refbehind.b[z] & bb)
1036                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1037                                 if(r1->refahead.b[z] & bb)
1038                                         paint1(r1, bn);
1039
1040                 if(!(r->refahead.b[z] & bb))
1041                         break;
1042                 r1 = r->s2;
1043                 if(r1 != R)
1044                         if(r1->refbehind.b[z] & bb)
1045                                 paint1(r1, bn);
1046                 r = r->s1;
1047                 if(r == R)
1048                         break;
1049                 if(r->act.b[z] & bb)
1050                         break;
1051                 if(!(r->refbehind.b[z] & bb))
1052                         break;
1053         }
1054 }
1055
1056 ulong
1057 paint2(Reg *r, int bn)
1058 {
1059         Reg *r1;
1060         int z;
1061         ulong bb, vreg;
1062
1063         z = bn/32;
1064         bb = 1L << (bn%32);
1065         vreg = regbits;
1066         if(!(r->act.b[z] & bb))
1067                 return vreg;
1068         for(;;) {
1069                 if(!(r->refbehind.b[z] & bb))
1070                         break;
1071                 r1 = r->p1;
1072                 if(r1 == R)
1073                         break;
1074                 if(!(r1->refahead.b[z] & bb))
1075                         break;
1076                 if(!(r1->act.b[z] & bb))
1077                         break;
1078                 r = r1;
1079         }
1080         for(;;) {
1081                 r->act.b[z] &= ~bb;
1082
1083                 vreg |= r->regu;
1084
1085                 if(r->refbehind.b[z] & bb)
1086                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1087                                 if(r1->refahead.b[z] & bb)
1088                                         vreg |= paint2(r1, bn);
1089
1090                 if(!(r->refahead.b[z] & bb))
1091                         break;
1092                 r1 = r->s2;
1093                 if(r1 != R)
1094                         if(r1->refbehind.b[z] & bb)
1095                                 vreg |= paint2(r1, bn);
1096                 r = r->s1;
1097                 if(r == R)
1098                         break;
1099                 if(!(r->act.b[z] & bb))
1100                         break;
1101                 if(!(r->refbehind.b[z] & bb))
1102                         break;
1103         }
1104         return vreg;
1105 }
1106
1107 void
1108 paint3(Reg *r, int bn, ulong rb, int rn)
1109 {
1110         Reg *r1;
1111         Prog *p;
1112         int z;
1113         ulong bb;
1114
1115         z = bn/32;
1116         bb = 1L << (bn%32);
1117         if(r->act.b[z] & bb)
1118                 return;
1119         for(;;) {
1120                 if(!(r->refbehind.b[z] & bb))
1121                         break;
1122                 r1 = r->p1;
1123                 if(r1 == R)
1124                         break;
1125                 if(!(r1->refahead.b[z] & bb))
1126                         break;
1127                 if(r1->act.b[z] & bb)
1128                         break;
1129                 r = r1;
1130         }
1131         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1132                 addmove(r, bn, rn, 0);
1133         for(;;) {
1134                 r->act.b[z] |= bb;
1135                 p = r->prog;
1136
1137                 if(r->use1.b[z] & bb) {
1138                         if(debug['R'])
1139                                 print("%P", p);
1140                         addreg(&p->from, rn);
1141                         if(debug['R'])
1142                                 print("\t.c%P\n", p);
1143                 }
1144                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1145                         if(debug['R'])
1146                                 print("%P", p);
1147                         addreg(&p->to, rn);
1148                         if(debug['R'])
1149                                 print("\t.c%P\n", p);
1150                 }
1151                 if(STORE(r) & r->regdiff.b[z] & bb)
1152                         addmove(r, bn, rn, 1);
1153                 r->regu |= rb;
1154
1155                 if(r->refbehind.b[z] & bb)
1156                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1157                                 if(r1->refahead.b[z] & bb)
1158                                         paint3(r1, bn, rb, rn);
1159
1160                 if(!(r->refahead.b[z] & bb))
1161                         break;
1162                 r1 = r->s2;
1163                 if(r1 != R)
1164                         if(r1->refbehind.b[z] & bb)
1165                                 paint3(r1, bn, rb, rn);
1166                 r = r->s1;
1167                 if(r == R)
1168                         break;
1169                 if(r->act.b[z] & bb)
1170                         break;
1171                 if(!(r->refbehind.b[z] & bb))
1172                         break;
1173         }
1174 }
1175
1176 void
1177 addreg(Adr *a, int rn)
1178 {
1179         int x;
1180
1181         a->sym = 0;
1182         x = a->index;
1183         if(rn >= D_R0 && rn < D_R0+NREG)
1184                 goto addr;
1185         if(x == (I_INDEX3|D_NONE)) {
1186                 a->type = rn | I_INDIR;
1187                 a->index = D_NONE;
1188                 a->offset = a->displace;
1189                 a->displace = 0;
1190                 return;
1191         }
1192         if(x != D_NONE) {
1193                 a->type = rn | I_INDIR;
1194                 a->index += I_INDEX1 - I_INDEX2;
1195                 a->offset = a->displace;
1196                 a->displace = 0;
1197                 return;
1198         }
1199         a->type = rn | (a->type & I_INDIR);
1200         return;
1201
1202 addr:
1203         if(x == (I_INDEX3|D_NONE)) {
1204                 a->type = D_NONE|I_INDIR;
1205                 a->index += I_INDEX1 + rn - D_NONE - I_INDEX3;
1206                 a->scale = 4;   /* .L*1 */
1207                 a->offset = a->displace;
1208                 a->displace = 0;
1209                 return;
1210         }
1211         a->type = rn | (a->type & I_INDIR);
1212 }
1213
1214 /*
1215  *      bit     reg
1216  *      0-7     R0-R7
1217  *      8-15    A0-A7
1218  *      16-23   F0-F7
1219  */
1220 ulong
1221 RtoB(int r)
1222 {
1223
1224         if(r < 0 || r >= NREG)
1225                 return 0;
1226         return 1L << (r + 0);
1227 }
1228
1229 int
1230 BtoR(ulong b)
1231 {
1232
1233         b &= 0x0000ffL;
1234         if(b == 0)
1235                 return NREG;
1236         return bitno(b) - 0;
1237 }
1238
1239 ulong
1240 AtoB(int a)
1241 {
1242
1243         if(a < 0 || a >= NREG)
1244                 return 0;
1245         return 1L << (a + NREG);
1246 }
1247
1248 int
1249 BtoA(ulong b)
1250 {
1251
1252         b &= 0x00ff00L;
1253         if(b == 0)
1254                 return NREG;
1255         return bitno(b) - NREG;
1256 }
1257
1258 ulong
1259 FtoB(int f)
1260 {
1261
1262         if(f < 0 || f >= NREG)
1263                 return 0;
1264         return 1L << (f + NREG+NREG);
1265 }
1266
1267 int
1268 BtoF(ulong b)
1269 {
1270
1271         b &= 0xff0000L;
1272         if(b == 0)
1273                 return NREG;
1274         return bitno(b) - NREG-NREG;
1275 }