]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/1c/reg.c
9bootfat: rename open() to fileinit and make it static as its really a internal funct...
[plan9front.git] / sys / src / cmd / 1c / reg.c
1 #include "gc.h"
2
3 Reg*
4 rega(void)
5 {
6         Reg *r;
7
8         r = freer;
9         if(r == R) {
10                 r = alloc(sizeof(*r));
11         } else
12                 freer = r->link;
13
14         *r = zreg;
15         return r;
16 }
17
18 int
19 rcmp(const void *a1, const void *a2)
20 {
21         Rgn *p1, *p2;
22         int c1, c2;
23
24         p1 = (Rgn*)a1;
25         p2 = (Rgn*)a2;
26         c1 = p2->costr;
27         if(p2->costa > c1)
28                 c1 = p2->costa;
29         c2 = p1->costr;
30         if(p1->costa > c2)
31                 c2 = p1->costa;
32         if(c1 -= c2)
33                 return c1;
34         return p2->varno - p1->varno;
35 }
36
37 void
38 regopt(Prog *p)
39 {
40         Reg *r, *r1, *r2;
41         Prog *p1;
42         int i, z;
43         long val, initpc, npc;
44         ulong vreg;
45         Bits bit;
46         Var *v;
47         struct {
48                 long    m;
49                 long    c;
50                 Reg*    p;
51         } log5[6], *lp;
52
53         firstr = R;
54         lastr = R;
55         nvar = 0;
56         for(z=0; z<BITS; z++) {
57                 externs.b[z] = 0;
58                 params.b[z] = 0;
59                 addrs.b[z] = 0;
60         }
61         regbits = RtoB(0) |             /* return reg */
62                 AtoB(6) | AtoB(7) |     /* sp and sb */
63                 FtoB(0) | FtoB(1);      /* floating return reg */
64         for(i=0; i<NREG; i++) {
65                 if(regused[i])
66                         regbits |= RtoB(i);
67                 if(fregused[i])
68                         regbits |= FtoB(i);
69                 if(aregused[i])
70                         regbits |= AtoB(i);
71         }
72
73         /*
74          * pass 1
75          * build aux data structure
76          * allocate pcs
77          * find use and set of variables
78          */
79         val = 5L * 5L * 5L * 5L * 5L;
80         lp = log5;
81         for(i=0; i<5; i++) {
82                 lp->m = val;
83                 lp->c = 0;
84                 lp->p = R;
85                 val /= 5L;
86                 lp++;
87         }
88         val = 0;
89         for(; p != P; p = p->link) {
90                 switch(p->as) {
91                 case ADATA:
92                 case AGLOBL:
93                 case ANAME:
94                 case ASIGNAME:
95                         continue;
96                 }
97                 r = rega();
98                 if(firstr == R) {
99                         firstr = r;
100                         lastr = r;
101                 } else {
102                         lastr->link = r;
103                         r->p1 = lastr;
104                         lastr->s1 = r;
105                         lastr = r;
106                 }
107                 r->prog = p;
108                 r->pc = val;
109                 val++;
110
111                 lp = log5;
112                 for(i=0; i<5; i++) {
113                         lp->c--;
114                         if(lp->c <= 0) {
115                                 lp->c = lp->m;
116                                 if(lp->p != R)
117                                         lp->p->log5 = r;
118                                 lp->p = r;
119                                 (lp+1)->c = 0;
120                                 break;
121                         }
122                         lp++;
123                 }
124
125                 r1 = r->p1;
126                 if(r1 != R)
127                 switch(r1->prog->as) {
128                 case ABRA:
129                 case ARTS:
130                 case ARTE:
131                         r->p1 = R;
132                         r1->s1 = R;
133                 }
134
135                 bit = mkvar(&p->from, AGOK);
136                 if(bany(&bit))
137                 switch(p->as) {
138                 case ALEA:
139                         if(!(mvbits & B_INDIR))
140                         for(z=0; z<BITS; z++)
141                                 addrs.b[z] |= bit.b[z];
142
143                 default:
144                         if(mvbits & B_ADDR)
145                         for(z=0; z<BITS; z++)
146                                 addrs.b[z] |= bit.b[z];
147                         for(z=0; z<BITS; z++)
148                                 r->use1.b[z] |= bit.b[z];
149                 }
150
151                 bit = mkvar(&p->to, p->as);
152                 if(bany(&bit))
153                 switch(p->as) {
154                 case ABSR:      /* funny */
155                         for(z=0; z<BITS; z++)
156                                 addrs.b[z] |= bit.b[z];
157                         goto def;
158
159                 case APEA:
160                         if(!(mvbits & B_INDIR))
161                         for(z=0; z<BITS; z++)
162                                 addrs.b[z] |= bit.b[z];
163
164                 def:
165                 case ACMPB: case ACMPW: case ACMPL:
166                 case AFCMPF: case AFCMPD:
167                 case ATSTB: case ATSTW: case ATSTL:
168                 case AFTSTF: case AFTSTD:
169                 case ABFEXTU: case ABFEXTS:
170                         if(mvbits & B_ADDR)
171                         for(z=0; z<BITS; z++)
172                                 addrs.b[z] |= bit.b[z];
173                         for(z=0; z<BITS; z++)
174                                 r->use2.b[z] |= bit.b[z];
175                         break;
176
177                 default:
178                         diag(Z, "reg: unknown asop: %A", p->as);
179
180                 case AADDB: case AADDW: case AADDL:
181                 case ASUBB: case ASUBW: case ASUBL:
182                 case AANDB: case AANDW: case AANDL:
183                 case AORB: case AORW: case AORL:
184                 case AEORB: case AEORW: case AEORL:
185                 case ABFINS:
186                         for(z=0; z<BITS; z++)
187                                 r->use2.b[z] |= bit.b[z];
188
189                 case ANOP:
190                 case AMOVB: case AMOVW: case AMOVL:
191                 case AFMOVEB: case AFMOVEW: case AFMOVEL:
192                 case ACLRB: case ACLRW: case ACLRL:
193                 case AFMOVEF: case AFMOVED:
194                         if(mvbits & B_INDIR)
195                         for(z=0; z<BITS; z++)
196                                 r->use2.b[z] |= bit.b[z];
197                         else
198                         for(z=0; z<BITS; z++)
199                                 r->set.b[z] |= bit.b[z];
200                         break;
201
202                 }
203         }
204         if(firstr == R)
205                 return;
206         initpc = pc - val;
207         npc = val;
208
209         /*
210          * pass 2
211          * turn branch references to pointers
212          * build back pointers
213          */
214         for(r = firstr; r != R; r = r->link) {
215                 p = r->prog;
216                 if(p->to.type == D_BRANCH) {
217                         val = p->to.offset - initpc;
218                         r1 = firstr;
219                         while(r1 != R) {
220                                 r2 = r1->log5;
221                                 if(r2 != R && val >= r2->pc) {
222                                         r1 = r2;
223                                         continue;
224                                 }
225                                 if(r1->pc == val)
226                                         break;
227                                 r1 = r1->link;
228                         }
229                         if(r1 == R) {
230                                 diag(Z, "ref not found\n%L:%P", p->lineno, p);
231                                 continue;
232                         }
233                         if(r1 == r) {
234                                 diag(Z, "ref to self");
235                                 continue;
236                         }
237                         r->s2 = r1;
238                         r->p2link = r1->p2;
239                         r1->p2 = r;
240                 }
241         }
242         if(debug['R'])
243                 print("\n%L %D\n", firstr->prog->lineno, &firstr->prog->from);
244
245         /*
246          * pass 2.5
247          * find looping structure
248          */
249         for(r = firstr; r != R; r = r->link)
250                 r->active = 0;
251         changer = 0;
252         loopit(firstr, npc);
253         if(debug['R'] && debug['v']) {
254                 print("\nlooping structure:\n");
255                 for(r = firstr; r != R; r = r->link) {
256                         print("%ld:%P", r->loop, r->prog);
257                         for(z=0; z<BITS; z++)
258                                 bit.b[z] = r->use1.b[z] |
259                                         r->use2.b[z] | r->set.b[z];
260                         if(bany(&bit)) {
261                                 print("\t");
262                                 if(bany(&r->use1))
263                                         print(" u1=%B", r->use1);
264                                 if(bany(&r->use2))
265                                         print(" u2=%B", r->use2);
266                                 if(bany(&r->set))
267                                         print(" st=%B", r->set);
268                         }
269                         print("\n");
270                 }
271         }
272
273         /*
274          * pass 3
275          * iterate propagating usage
276          *      back until flow graph is complete
277          */
278 loop1:
279         changer = 0;
280         for(r = firstr; r != R; r = r->link)
281                 r->active = 0;
282         for(r = firstr; r != R; r = r->link)
283                 if(r->prog->as == ARTS)
284                         prop(r, zbits, zbits);
285 loop11:
286         /* pick up unreachable code */
287         i = 0;
288         for(r = firstr; r != R; r = r1) {
289                 r1 = r->link;
290                 if(r1 && r1->active && !r->active) {
291                         prop(r, zbits, zbits);
292                         i = 1;
293                 }
294         }
295         if(i)
296                 goto loop11;
297         if(changer)
298                 goto loop1;
299
300         /*
301          * pass 4
302          * iterate propagating register/variable synchrony
303          *      forward until graph is complete
304          */
305 loop2:
306         changer = 0;
307         for(r = firstr; r != R; r = r->link)
308                 r->active = 0;
309         synch(firstr, zbits);
310         if(changer)
311                 goto loop2;
312
313
314         /*
315          * pass 5
316          * isolate regions
317          * calculate costs (paint1)
318          */
319         r = firstr;
320         if(r) {
321                 for(z=0; z<BITS; z++)
322                         bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
323                           ~(externs.b[z] | params.b[z] | addrs.b[z]);
324                 if(bany(&bit)) {
325                         nearln = r->prog->lineno;
326                         warn(Z, "used and not set: %B", bit);
327                         if(debug['R'] && !debug['w'])
328                                 print("used and not set: %B\n", bit);
329
330                         /*
331                          * 68040 'feature':
332                          *      load of a denormalized fp will trap
333                          */
334                         while(bany(&bit)) {
335                                 i = bnum(bit);
336                                 bit.b[i/32] &= ~(1L << (i%32));
337                                 v = var + i;
338                                 if(v->type == D_AUTO) {
339                                         r->set.b[i/32] |= (1L << (i%32));
340                                         if(typefd[v->etype])
341                                                 addmove(r, i, NREG+NREG, 1);
342                                 }
343                         }
344                 }
345         }
346         if(debug['R'] && debug['v'])
347                 print("\nprop structure:\n");
348         for(r = firstr; r != R; r = r->link) {
349                 if(debug['R'] && debug['v'])
350                         print("%P\n     set = %B; rah = %B; cal = %B\n",
351                                 r->prog, r->set, r->refahead, r->calahead);
352                 r->act = zbits;
353         }
354         rgp = region;
355         nregion = 0;
356         for(r = firstr; r != R; r = r->link) {
357                 for(z=0; z<BITS; z++)
358                         bit.b[z] = r->set.b[z] &
359                           ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
360                 if(bany(&bit)) {
361                         nearln = r->prog->lineno;
362                         warn(Z, "set and not used: %B", bit);
363                         if(debug['R'])
364                                 print("set an not used: %B\n", bit);
365                         excise(r);
366                 }
367                 for(z=0; z<BITS; z++)
368                         bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
369                 while(bany(&bit)) {
370                         i = bnum(bit);
371                         rgp->enter = r;
372                         rgp->varno = i;
373                         changer = 0;
374                         changea = 0;
375                         if(debug['R'] && debug['v'])
376                                 print("\n");
377                         paint1(r, i);
378                         bit.b[i/32] &= ~(1L<<(i%32));
379                         if(changer <= 0 && changea <= 0) {
380                                 if(debug['R'])
381                                         print("%L$%d.%d: %B\n",
382                                                 r->prog->lineno,
383                                                 changer, changea, blsh(i));
384                                 continue;
385                         }
386                         rgp->costr = changer;
387                         rgp->costa = changea;
388                         nregion++;
389                         if(nregion >= NRGN) {
390                                 warn(Z, "too many regions");
391                                 goto brk;
392                         }
393                         rgp++;
394                 }
395         }
396 brk:
397         qsort(region, nregion, sizeof(region[0]), rcmp);
398
399         /*
400          * pass 6
401          * determine used registers (paint2)
402          * replace code (paint3)
403          */
404         rgp = region;
405         for(i=0; i<nregion; i++) {
406                 bit = blsh(rgp->varno);
407                 vreg = paint2(rgp->enter, rgp->varno);
408                 vreg = allreg(vreg, rgp);
409                 if(debug['R'])
410                         print("%L$%d.%d %R: %B\n",
411                                 rgp->enter->prog->lineno,
412                                 rgp->costr, rgp->costa,
413                                 rgp->regno,
414                                 bit);
415                 if(rgp->regno != D_NONE)
416                         paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
417                 rgp++;
418         }
419         /*
420          * pass 7
421          * peep-hole on basic block
422          */
423         if(!debug['R'] || debug['P'])
424                 peep();
425
426         /*
427          * pass 8
428          * recalculate pc
429          */
430         val = initpc;
431         for(r = firstr; r != R; r = r1) {
432                 r->pc = val;
433                 p = r->prog;
434                 p1 = P;
435                 r1 = r->link;
436                 if(r1 != R)
437                         p1 = r1->prog;
438                 for(; p != p1; p = p->link) {
439                         switch(p->as) {
440                         default:
441                                 val++;
442                                 break;
443
444                         case ANOP:
445                         case ADATA:
446                         case AGLOBL:
447                         case ANAME:
448                         case ASIGNAME:
449                                 break;
450                         }
451                 }
452         }
453         pc = val;
454
455         /*
456          * fix up branches
457          */
458         if(debug['R'])
459                 if(bany(&addrs))
460                         print("addrs: %B\n", addrs);
461
462         r1 = 0; /* set */
463         for(r = firstr; r != R; r = r->link) {
464                 p = r->prog;
465                 if(p->to.type == D_BRANCH)
466                         p->to.offset = r->s2->pc;
467                 r1 = r;
468         }
469
470         /*
471          * last pass
472          * eliminate nops
473          * free aux structures
474          */
475         for(p = firstr->prog; p != P; p = p->link){
476                 while(p->link && p->link->as == ANOP)
477                         p->link = p->link->link;
478         }
479         if(r1 != R) {
480                 r1->link = freer;
481                 freer = firstr;
482         }
483 }
484
485 /*
486  * add mov b,rn
487  * just after r
488  */
489 void
490 addmove(Reg *r, int bn, int rn, int f)
491 {
492         Prog *p, *p1;
493         Var *v;
494         int badccr;
495
496         badccr = 0;
497         p = r->prog;
498         p1 = p->link;
499         if(p1)
500         switch(p1->as) {
501         case AMOVW:
502                 if(p1->from.type == D_CCR)
503                         p = p1;
504                 break;
505
506         case ABEQ:
507         case ABNE:
508         case ABLE:
509         case ABLS:
510         case ABLT:
511         case ABMI:
512         case ABGE:
513         case ABPL:
514         case ABGT:
515         case ABHI:
516         case ABCC:
517         case ABCS:
518                 p1 = prg();
519                 p1->link = p->link;
520                 p->link = p1;
521                 p1->lineno = p->lineno;
522
523                 p1->from.type = D_CCR;
524                 p1->to.type = D_TOS;
525                 p1->as = AMOVW;
526                 p = p1;
527                 badccr = 1;
528         }
529         p1 = prg();
530         p1->link = p->link;
531         p->link = p1;
532         p1->lineno = p->lineno;
533
534         v = var + bn;
535         p1->from.sym = v->sym;
536         p1->from.type = v->type;
537         p1->from.offset = v->offset;
538         p1->from.etype = v->etype;
539         p1->to.type = rn;
540         if(f) {
541                 p1->to = p1->from;
542                 p1->from = zprog.from;
543                 p1->from.type = rn;
544         }
545         p1->as = opxt[OAS][v->etype];
546         if(badccr) {
547                 p = p1;
548                 p1 = prg();
549                 p1->link = p->link;
550                 p->link = p1;
551                 p1->lineno = p->lineno;
552
553                 p1->from.type = D_TOS;
554                 p1->to.type = D_CCR;
555                 p1->as = AMOVW;
556         }
557         if(debug['R'])
558                 print("%P\t.a%P\n", p, p1);
559 }
560
561 Bits
562 mkvar(Adr *a, int as)
563 {
564         Var *v;
565         int i, t, z;
566         long o;
567         Bits bit;
568         Sym *s;
569
570         mvbits = 0;
571         t = a->type & D_MASK;
572         switch(t) {
573
574         default:
575                 if(t >= D_R0 && t < D_R0+NREG) {
576                         regbits |= RtoB(t-D_R0);
577                         if(as == ADIVUL || as == ADIVSL)
578                                 regbits |= RtoB(t-D_R0+1);
579                 }
580                 if(t >= D_A0 && t < D_A0+NREG)
581                         regbits |= AtoB(t-D_A0);
582                 if(t >= D_F0 && t < D_F0+NREG)
583                         regbits |= FtoB(t-D_F0);
584                 goto none;
585
586         case D_EXTERN:
587         case D_STATIC:
588         case D_AUTO:
589         case D_PARAM:
590                 break;
591         }
592         s = a->sym;
593         if(s == S)
594                 goto none;
595
596         if((a->type & I_MASK) == I_ADDR)
597                 mvbits |= B_ADDR;
598
599         o = a->offset;
600         v = var;
601         for(i=0; i<nvar; i++) {
602                 if(s == v->sym)
603                 if(t == v->type)
604                 if(o == v->offset)
605                         goto out;
606                 v++;
607         }
608         if(s)
609                 if(s->name[0] == '.')
610                         goto none;
611         if(nvar >= NVAR) {
612                 if(debug['w'] > 1 && s)
613                         warn(Z, "variable not optimized: %s", s->name);
614                 goto none;
615         }
616         i = nvar;
617         nvar++;
618         v = &var[i];
619         v->sym = s;
620         v->offset = o;
621         v->etype = a->etype;
622         v->type = t;
623         if(debug['R'])
624                 print("bit=%2d et=%2d %s (%p,%d,%ld)\n",
625                         i, a->etype, s->name,
626                         v->sym, v->type, v->offset);
627
628 out:
629         bit = blsh(i);
630         if(t == D_EXTERN || t == D_STATIC)
631                 for(z=0; z<BITS; z++)
632                         externs.b[z] |= bit.b[z];
633         if(t == D_PARAM)
634                 for(z=0; z<BITS; z++)
635                         params.b[z] |= bit.b[z];
636         if(a->etype != v->etype || !typechlpfd[a->etype])
637                 for(z=0; z<BITS; z++)
638                         addrs.b[z] |= bit.b[z]; /* funny punning */
639         return bit;
640
641 none:
642         return zbits;
643 }
644
645 void
646 prop(Reg *r, Bits ref, Bits cal)
647 {
648         Reg *r1, *r2;
649         int z;
650
651         for(r1 = r; r1 != R; r1 = r1->p1) {
652                 for(z=0; z<BITS; z++) {
653                         ref.b[z] |= r1->refahead.b[z];
654                         if(ref.b[z] != r1->refahead.b[z]) {
655                                 r1->refahead.b[z] = ref.b[z];
656                                 changer++;
657                         }
658                         cal.b[z] |= r1->calahead.b[z];
659                         if(cal.b[z] != r1->calahead.b[z]) {
660                                 r1->calahead.b[z] = cal.b[z];
661                                 changer++;
662                         }
663                 }
664                 switch(r1->prog->as) {
665                 case ABSR:
666                         for(z=0; z<BITS; z++) {
667                                 cal.b[z] |= ref.b[z] | externs.b[z];
668                                 ref.b[z] = 0;
669                         }
670                         break;
671
672                 case ATEXT:
673                         for(z=0; z<BITS; z++) {
674                                 cal.b[z] = 0;
675                                 ref.b[z] = 0;
676                         }
677                         break;
678
679                 case ARTS:
680                         for(z=0; z<BITS; z++) {
681                                 cal.b[z] = externs.b[z];
682                                 ref.b[z] = 0;
683                         }
684                 }
685                 for(z=0; z<BITS; z++) {
686                         ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
687                                 r1->use1.b[z] | r1->use2.b[z];
688                         cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
689                         r1->refbehind.b[z] = ref.b[z];
690                         r1->calbehind.b[z] = cal.b[z];
691                 }
692                 if(r1->active)
693                         break;
694                 r1->active = 1;
695         }
696         for(; r != r1; r = r->p1)
697                 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
698                         prop(r2, r->refbehind, r->calbehind);
699 }
700
701 /*
702  * find looping structure
703  *
704  * 1) find reverse postordering
705  * 2) find approximate dominators,
706  *      the actual dominators if the flow graph is reducible
707  *      otherwise, dominators plus some other non-dominators.
708  *      See Matthew S. Hecht and Jeffrey D. Ullman,
709  *      "Analysis of a Simple Algorithm for Global Data Flow Problems",
710  *      Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
711  *      Oct. 1-3, 1973, pp.  207-217.
712  * 3) find all nodes with a predecessor dominated by the current node.
713  *      such a node is a loop head.
714  *      recursively, all preds with a greater rpo number are in the loop
715  */
716 long
717 postorder(Reg *r, Reg **rpo2r, long n)
718 {
719         Reg *r1;
720
721         r->rpo = 1;
722         r1 = r->s1;
723         if(r1 && !r1->rpo)
724                 n = postorder(r1, rpo2r, n);
725         r1 = r->s2;
726         if(r1 && !r1->rpo)
727                 n = postorder(r1, rpo2r, n);
728         rpo2r[n] = r;
729         n++;
730         return n;
731 }
732
733 long
734 rpolca(long *idom, long rpo1, long rpo2)
735 {
736         long t;
737
738         if(rpo1 == -1)
739                 return rpo2;
740         while(rpo1 != rpo2){
741                 if(rpo1 > rpo2){
742                         t = rpo2;
743                         rpo2 = rpo1;
744                         rpo1 = t;
745                 }
746                 while(rpo1 < rpo2){
747                         t = idom[rpo2];
748                         if(t >= rpo2)
749                                 fatal(Z, "bad idom");
750                         rpo2 = t;
751                 }
752         }
753         return rpo1;
754 }
755
756 int
757 doms(long *idom, long r, long s)
758 {
759         while(s > r)
760                 s = idom[s];
761         return s == r;
762 }
763
764 int
765 loophead(long *idom, Reg *r)
766 {
767         long src;
768
769         src = r->rpo;
770         if(r->p1 != R && doms(idom, src, r->p1->rpo))
771                 return 1;
772         for(r = r->p2; r != R; r = r->p2link)
773                 if(doms(idom, src, r->rpo))
774                         return 1;
775         return 0;
776 }
777
778 void
779 loopmark(Reg **rpo2r, long head, Reg *r)
780 {
781         if(r->rpo < head || r->active == head)
782                 return;
783         r->active = head;
784         r->loop += LOOP;
785         if(r->p1 != R)
786                 loopmark(rpo2r, head, r->p1);
787         for(r = r->p2; r != R; r = r->p2link)
788                 loopmark(rpo2r, head, r);
789 }
790
791 void
792 loopit(Reg *r, long nr)
793 {
794         Reg *r1;
795         long i, d, me;
796
797         if(nr > maxnr) {
798                 rpo2r = alloc(nr * sizeof(Reg*));
799                 idom = alloc(nr * sizeof(long));
800                 maxnr = nr;
801         }
802
803         d = postorder(r, rpo2r, 0);
804         if(d > nr)
805                 fatal(Z, "too many reg nodes");
806         nr = d;
807         for(i = 0; i < nr / 2; i++){
808                 r1 = rpo2r[i];
809                 rpo2r[i] = rpo2r[nr - 1 - i];
810                 rpo2r[nr - 1 - i] = r1;
811         }
812         for(i = 0; i < nr; i++)
813                 rpo2r[i]->rpo = i;
814
815         idom[0] = 0;
816         for(i = 0; i < nr; i++){
817                 r1 = rpo2r[i];
818                 me = r1->rpo;
819                 d = -1;
820                 if(r1->p1 != R && r1->p1->rpo < me)
821                         d = r1->p1->rpo;
822                 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
823                         if(r1->rpo < me)
824                                 d = rpolca(idom, d, r1->rpo);
825                 idom[i] = d;
826         }
827
828         for(i = 0; i < nr; i++){
829                 r1 = rpo2r[i];
830                 r1->loop++;
831                 if(r1->p2 != R && loophead(idom, r1))
832                         loopmark(rpo2r, i, r1);
833         }
834 }
835
836 void
837 synch(Reg *r, Bits dif)
838 {
839         Reg *r1;
840         int z;
841
842         for(r1 = r; r1 != R; r1 = r1->s1) {
843                 for(z=0; z<BITS; z++) {
844                         dif.b[z] = (dif.b[z] &
845                                 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
846                                         r1->set.b[z] | r1->regdiff.b[z];
847                         if(dif.b[z] != r1->regdiff.b[z]) {
848                                 r1->regdiff.b[z] = dif.b[z];
849                                 changer++;
850                         }
851                 }
852                 if(r1->active)
853                         break;
854                 r1->active = 1;
855                 for(z=0; z<BITS; z++)
856                         dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
857                 if(r1->s2 != R)
858                         synch(r1->s2, dif);
859         }
860 }
861
862 ulong
863 allreg(ulong b, Rgn *r)
864 {
865         Var *v;
866         int i, j;
867
868         v = var + r->varno;
869         r->regno = D_NONE;
870         switch(v->etype) {
871
872         default:
873                 diag(Z, "unknown etype");
874                 break;
875
876         case TCHAR:
877         case TUCHAR:
878         case TSHORT:
879         case TUSHORT:
880         case TINT:
881         case TUINT:
882         case TLONG:
883         case TULONG:
884         case TIND:
885                 i = BtoR(~b);
886                 j = BtoA(~b);
887                 if(r->costa == r->costr)
888                         if(i > j)
889                                 i = NREG;
890                 if(j < NREG && r->costa > 0)
891                 if(r->costa > r->costr || i >= NREG) {
892                         r->regno = D_A0 + j;
893                         return AtoB(j);
894                 }
895                 if(i < NREG && r->costr > 0) {
896                         r->regno = D_R0 + i;
897                         return RtoB(i);
898                 }
899                 break;
900
901         case TDOUBLE:
902         case TFLOAT:
903                 i = BtoF(~b);
904                 if(i < NREG) {
905                         r->regno = D_F0 + i;
906                         return FtoB(i);
907                 }
908                 break;
909         }
910         return 0;
911 }
912
913 void
914 paint1(Reg *r, int bn)
915 {
916         Reg *r1;
917         Prog *p;
918         int z;
919         ulong bb;
920         int x;
921
922         z = bn/32;
923         bb = 1L<<(bn%32);
924         if(r->act.b[z] & bb)
925                 return;
926         for(;;) {
927                 if(!(r->refbehind.b[z] & bb))
928                         break;
929                 r1 = r->p1;
930                 if(r1 == R)
931                         break;
932                 if(!(r1->refahead.b[z] & bb))
933                         break;
934                 if(r1->act.b[z] & bb)
935                         break;
936                 r = r1;
937         }
938         if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
939                 changer -= CLOAD * r->loop;
940                 changea -= CLOAD * r->loop;
941                 if(debug['R'] && debug['v'])
942                         print("%ld%P\tld %B $%d.%d\n", r->loop,
943                                 r->prog, blsh(bn), changer, changea);
944         }
945         for(;;) {
946                 r->act.b[z] |= bb;
947                 p = r->prog;
948
949                 if(r->use1.b[z] & bb) {
950                         changer += CREF * r->loop;
951                         changea += CREF * r->loop;
952                         switch(p->as) {
953                         default:
954                                 changea = -CINF;
955                         case AADDL:
956                         case ASUBL:
957                         case AMOVL:
958                         case ACMPL:
959                                 break;
960                         }
961                         if(p->as == AMOVL) {
962                                 x = p->to.type;
963                                 if(x >= D_R0 && x < D_R0+NREG)
964                                         changer += r->loop;
965                                 if(x >= D_A0 && x < D_A0+NREG)
966                                         changea += r->loop;
967                         }
968                         if(debug['R'] && debug['v'])
969                                 print("%ld%P\tu1 %B $%d.%d\n", r->loop,
970                                         p, blsh(bn), changer, changea);
971                 }
972                 if((r->use2.b[z]|r->set.b[z]) & bb) {
973                         changer += CREF * r->loop;
974                         changea += CREF * r->loop;
975                         switch(p->as) {
976                         default:
977                                 changea = -CINF;
978                                 break;
979                         case AMOVL:
980                         case AADDL:
981                         case ACMPL:
982                         case ASUBL:
983                         case ACLRL:     /* can be faked */
984                         case ATSTL:     /* can be faked */
985                                 break;
986                         }
987                         if(p->as == AMOVL) {
988                                 x = p->from.type;
989                                 if(x >= D_R0 && x < D_R0+NREG)
990                                         changer += r->loop;
991                                 if(x >= D_A0 && x < D_A0+NREG)
992                                         changea += r->loop;
993                         }
994                         if(debug['R'] && debug['v'])
995                                 print("%ld%P\tu2 %B $%d.%d\n", r->loop,
996                                         p, blsh(bn), changer, changea);
997                 }
998                 if(STORE(r) & r->regdiff.b[z] & bb) {
999                         changer -= CLOAD * r->loop;
1000                         changea -= CLOAD * r->loop;
1001                         if(debug['R'] && debug['v'])
1002                                 print("%ld%P\tst %B $%d.%d\n", r->loop,
1003                                         p, blsh(bn), changer, changea);
1004                 }
1005
1006                 if(r->refbehind.b[z] & bb)
1007                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1008                                 if(r1->refahead.b[z] & bb)
1009                                         paint1(r1, bn);
1010
1011                 if(!(r->refahead.b[z] & bb))
1012                         break;
1013                 r1 = r->s2;
1014                 if(r1 != R)
1015                         if(r1->refbehind.b[z] & bb)
1016                                 paint1(r1, bn);
1017                 r = r->s1;
1018                 if(r == R)
1019                         break;
1020                 if(r->act.b[z] & bb)
1021                         break;
1022                 if(!(r->refbehind.b[z] & bb))
1023                         break;
1024         }
1025 }
1026
1027 ulong
1028 paint2(Reg *r, int bn)
1029 {
1030         Reg *r1;
1031         int z;
1032         ulong bb, vreg;
1033
1034         z = bn/32;
1035         bb = 1L << (bn%32);
1036         vreg = regbits;
1037         if(!(r->act.b[z] & bb))
1038                 return vreg;
1039         for(;;) {
1040                 if(!(r->refbehind.b[z] & bb))
1041                         break;
1042                 r1 = r->p1;
1043                 if(r1 == R)
1044                         break;
1045                 if(!(r1->refahead.b[z] & bb))
1046                         break;
1047                 if(!(r1->act.b[z] & bb))
1048                         break;
1049                 r = r1;
1050         }
1051         for(;;) {
1052                 r->act.b[z] &= ~bb;
1053
1054                 vreg |= r->regu;
1055
1056                 if(r->refbehind.b[z] & bb)
1057                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1058                                 if(r1->refahead.b[z] & bb)
1059                                         vreg |= paint2(r1, bn);
1060
1061                 if(!(r->refahead.b[z] & bb))
1062                         break;
1063                 r1 = r->s2;
1064                 if(r1 != R)
1065                         if(r1->refbehind.b[z] & bb)
1066                                 vreg |= paint2(r1, bn);
1067                 r = r->s1;
1068                 if(r == R)
1069                         break;
1070                 if(!(r->act.b[z] & bb))
1071                         break;
1072                 if(!(r->refbehind.b[z] & bb))
1073                         break;
1074         }
1075         return vreg;
1076 }
1077
1078 void
1079 paint3(Reg *r, int bn, ulong rb, int rn)
1080 {
1081         Reg *r1;
1082         Prog *p;
1083         int z;
1084         ulong bb;
1085
1086         z = bn/32;
1087         bb = 1L << (bn%32);
1088         if(r->act.b[z] & bb)
1089                 return;
1090         for(;;) {
1091                 if(!(r->refbehind.b[z] & bb))
1092                         break;
1093                 r1 = r->p1;
1094                 if(r1 == R)
1095                         break;
1096                 if(!(r1->refahead.b[z] & bb))
1097                         break;
1098                 if(r1->act.b[z] & bb)
1099                         break;
1100                 r = r1;
1101         }
1102         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1103                 addmove(r, bn, rn, 0);
1104         for(;;) {
1105                 r->act.b[z] |= bb;
1106                 p = r->prog;
1107
1108                 if(r->use1.b[z] & bb) {
1109                         if(debug['R'])
1110                                 print("%P", p);
1111                         addreg(&p->from, rn);
1112                         if(debug['R'])
1113                                 print("\t.c%P\n", p);
1114                 }
1115                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1116                         if(debug['R'])
1117                                 print("%P", p);
1118                         addreg(&p->to, rn);
1119                         if(debug['R'])
1120                                 print("\t.c%P\n", p);
1121                 }
1122                 if(STORE(r) & r->regdiff.b[z] & bb)
1123                         addmove(r, bn, rn, 1);
1124                 r->regu |= rb;
1125
1126                 if(r->refbehind.b[z] & bb)
1127                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1128                                 if(r1->refahead.b[z] & bb)
1129                                         paint3(r1, bn, rb, rn);
1130
1131                 if(!(r->refahead.b[z] & bb))
1132                         break;
1133                 r1 = r->s2;
1134                 if(r1 != R)
1135                         if(r1->refbehind.b[z] & bb)
1136                                 paint3(r1, bn, rb, rn);
1137                 r = r->s1;
1138                 if(r == R)
1139                         break;
1140                 if(r->act.b[z] & bb)
1141                         break;
1142                 if(!(r->refbehind.b[z] & bb))
1143                         break;
1144         }
1145 }
1146
1147 void
1148 addreg(Adr *a, int rn)
1149 {
1150         a->sym = 0;
1151         if(rn >= D_R0 && rn < D_R0+NREG)
1152                 goto addr;
1153         a->type = rn | (a->type & I_INDIR);
1154         return;
1155
1156 addr:
1157         a->type = rn | (a->type & I_INDIR);
1158 }
1159
1160 /*
1161  *      bit     reg
1162  *      0-7     R0-R7
1163  *      8-15    A0-A7
1164  *      16-23   F0-F7
1165  */
1166 ulong
1167 RtoB(int r)
1168 {
1169
1170         if(r < 0 || r >= NREG)
1171                 return 0;
1172         return 1L << (r + 0);
1173 }
1174
1175 int
1176 BtoR(ulong b)
1177 {
1178
1179         b &= 0x0000ffL;
1180         if(b == 0)
1181                 return NREG;
1182         return bitno(b) - 0;
1183 }
1184
1185 ulong
1186 AtoB(int a)
1187 {
1188
1189         if(a < 0 || a >= NREG)
1190                 return 0;
1191         return 1L << (a + NREG);
1192 }
1193
1194 int
1195 BtoA(ulong b)
1196 {
1197
1198         b &= 0x00ff00L;
1199         if(b == 0)
1200                 return NREG;
1201         return bitno(b) - NREG;
1202 }
1203
1204 ulong
1205 FtoB(int f)
1206 {
1207
1208         if(f < 0 || f >= NREG)
1209                 return 0;
1210         return 1L << (f + NREG+NREG);
1211 }
1212
1213 int
1214 BtoF(ulong b)
1215 {
1216
1217         b &= 0xff0000L;
1218         if(b == 0)
1219                 return NREG;
1220         return bitno(b) - NREG-NREG;
1221 }