]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/6c/reg.c
upas/fs: remove useless loop in rf822()
[plan9front.git] / sys / src / cmd / 6c / reg.c
1 #include "gc.h"
2
3 Reg*
4 rega(void)
5 {
6         Reg *r;
7
8         r = freer;
9         if(r == R) {
10                 r = alloc(sizeof(*r));
11         } else
12                 freer = r->link;
13
14         *r = zreg;
15         return r;
16 }
17
18 int
19 rcmp(const void *a1, const void *a2)
20 {
21         Rgn *p1, *p2;
22         int c1, c2;
23
24         p1 = (Rgn*)a1;
25         p2 = (Rgn*)a2;
26         c1 = p2->cost;
27         c2 = p1->cost;
28         if(c1 -= c2)
29                 return c1;
30         return p2->varno - p1->varno;
31 }
32
33 void
34 regopt(Prog *p)
35 {
36         Reg *r, *r1, *r2;
37         Prog *p1;
38         int i, z;
39         long initpc, val, npc;
40         ulong vreg;
41         Bits bit;
42         struct
43         {
44                 long    m;
45                 long    c;
46                 Reg*    p;
47         } log5[6], *lp;
48
49         firstr = R;
50         lastr = R;
51         nvar = 0;
52         regbits = RtoB(D_SP) | RtoB(D_AX) | RtoB(D_X0);
53         if(REGEXT)
54                 regbits |= RtoB(REGEXT) | RtoB(REGEXT-1);
55         for(z=0; z<BITS; z++) {
56                 externs.b[z] = 0;
57                 params.b[z] = 0;
58                 consts.b[z] = 0;
59                 addrs.b[z] = 0;
60         }
61
62         /*
63          * pass 1
64          * build aux data structure
65          * allocate pcs
66          * find use and set of variables
67          */
68         val = 5L * 5L * 5L * 5L * 5L;
69         lp = log5;
70         for(i=0; i<5; i++) {
71                 lp->m = val;
72                 lp->c = 0;
73                 lp->p = R;
74                 val /= 5L;
75                 lp++;
76         }
77         val = 0;
78         for(; p != P; p = p->link) {
79                 switch(p->as) {
80                 case ADATA:
81                 case AGLOBL:
82                 case ANAME:
83                 case ASIGNAME:
84                         continue;
85                 }
86                 r = rega();
87                 if(firstr == R) {
88                         firstr = r;
89                         lastr = r;
90                 } else {
91                         lastr->link = r;
92                         r->p1 = lastr;
93                         lastr->s1 = r;
94                         lastr = r;
95                 }
96                 r->prog = p;
97                 r->pc = val;
98                 val++;
99
100                 lp = log5;
101                 for(i=0; i<5; i++) {
102                         lp->c--;
103                         if(lp->c <= 0) {
104                                 lp->c = lp->m;
105                                 if(lp->p != R)
106                                         lp->p->log5 = r;
107                                 lp->p = r;
108                                 (lp+1)->c = 0;
109                                 break;
110                         }
111                         lp++;
112                 }
113
114                 r1 = r->p1;
115                 if(r1 != R)
116                 switch(r1->prog->as) {
117                 case ARET:
118                 case AJMP:
119                 case AIRETL:
120                 case AIRETQ:
121                         r->p1 = R;
122                         r1->s1 = R;
123                 }
124
125                 bit = mkvar(r, &p->from);
126                 if(bany(&bit))
127                 switch(p->as) {
128                 /*
129                  * funny
130                  */
131                 case ALEAL:
132                 case ALEAQ:
133                         for(z=0; z<BITS; z++)
134                                 addrs.b[z] |= bit.b[z];
135                         break;
136
137                 /*
138                  * left side read
139                  */
140                 default:
141                         for(z=0; z<BITS; z++)
142                                 r->use1.b[z] |= bit.b[z];
143                         break;
144                 }
145
146                 bit = mkvar(r, &p->to);
147                 if(bany(&bit))
148                 switch(p->as) {
149                 default:
150                         diag(Z, "reg: unknown op: %A", p->as);
151                         break;
152
153                 /*
154                  * right side read
155                  */
156                 case ACMPB:
157                 case ACMPL:
158                 case ACMPQ:
159                 case ACMPW:
160                 case ACOMISS:
161                 case ACOMISD:
162                 case AUCOMISS:
163                 case AUCOMISD:
164                         for(z=0; z<BITS; z++)
165                                 r->use2.b[z] |= bit.b[z];
166                         break;
167
168                 /*
169                  * right side write
170                  */
171                 case ANOP:
172                 case AMOVL:
173                 case AMOVQ:
174                 case AMOVB:
175                 case AMOVW:
176                 case AMOVBLSX:
177                 case AMOVBLZX:
178                 case AMOVBQSX:
179                 case AMOVBQZX:
180                 case AMOVLQSX:
181                 case AMOVLQZX:
182                 case AMOVWLSX:
183                 case AMOVWLZX:
184                 case AMOVWQSX:
185                 case AMOVWQZX:
186                 case AMOVQL:
187
188                 case AMOVSS:
189                 case AMOVSD:
190                 case ACVTSD2SL:
191                 case ACVTSD2SQ:
192                 case ACVTSD2SS:
193                 case ACVTSL2SD:
194                 case ACVTSL2SS:
195                 case ACVTSQ2SD:
196                 case ACVTSQ2SS:
197                 case ACVTSS2SD:
198                 case ACVTSS2SL:
199                 case ACVTSS2SQ:
200                 case ACVTTSD2SL:
201                 case ACVTTSD2SQ:
202                 case ACVTTSS2SL:
203                 case ACVTTSS2SQ:
204                         for(z=0; z<BITS; z++)
205                                 r->set.b[z] |= bit.b[z];
206                         break;
207
208                 /*
209                  * right side read+write
210                  */
211                 case AADDB:
212                 case AADDL:
213                 case AADDQ:
214                 case AADDW:
215                 case AANDB:
216                 case AANDL:
217                 case AANDQ:
218                 case AANDW:
219                 case ASUBB:
220                 case ASUBL:
221                 case ASUBQ:
222                 case ASUBW:
223                 case AORB:
224                 case AORL:
225                 case AORQ:
226                 case AORW:
227                 case AXORB:
228                 case AXORL:
229                 case AXORQ:
230                 case AXORW:
231                 case ASALB:
232                 case ASALL:
233                 case ASALQ:
234                 case ASALW:
235                 case ASARB:
236                 case ASARL:
237                 case ASARQ:
238                 case ASARW:
239                 case AROLB:
240                 case AROLL:
241                 case AROLQ:
242                 case AROLW:
243                 case ARORB:
244                 case ARORL:
245                 case ARORQ:
246                 case ARORW:
247                 case ASHLB:
248                 case ASHLL:
249                 case ASHLQ:
250                 case ASHLW:
251                 case ASHRB:
252                 case ASHRL:
253                 case ASHRQ:
254                 case ASHRW:
255                 case AIMULL:
256                 case AIMULQ:
257                 case AIMULW:
258                 case ANEGL:
259                 case ANEGQ:
260                 case ANOTL:
261                 case ANOTQ:
262                 case AADCL:
263                 case AADCQ:
264                 case ASBBL:
265                 case ASBBQ:
266
267                 case AADDSD:
268                 case AADDSS:
269                 case ACMPSD:
270                 case ACMPSS:
271                 case ADIVSD:
272                 case ADIVSS:
273                 case AMAXSD:
274                 case AMAXSS:
275                 case AMINSD:
276                 case AMINSS:
277                 case AMULSD:
278                 case AMULSS:
279                 case ARCPSS:
280                 case ARSQRTSS:
281                 case ASQRTSD:
282                 case ASQRTSS:
283                 case ASUBSD:
284                 case ASUBSS:
285                 case AXORPD:
286                         for(z=0; z<BITS; z++) {
287                                 r->set.b[z] |= bit.b[z];
288                                 r->use2.b[z] |= bit.b[z];
289                         }
290                         break;
291
292                 /*
293                  * funny
294                  */
295                 case ACALL:
296                         for(z=0; z<BITS; z++)
297                                 addrs.b[z] |= bit.b[z];
298                         break;
299                 }
300
301                 switch(p->as) {
302                 case AIMULL:
303                 case AIMULQ:
304                 case AIMULW:
305                         if(p->to.type != D_NONE)
306                                 break;
307
308                 case AIDIVB:
309                 case AIDIVL:
310                 case AIDIVQ:
311                 case AIDIVW:
312                 case AIMULB:
313                 case ADIVB:
314                 case ADIVL:
315                 case ADIVQ:
316                 case ADIVW:
317                 case AMULB:
318                 case AMULL:
319                 case AMULQ:
320                 case AMULW:
321
322                 case ACWD:
323                 case ACDQ:
324                 case ACQO:
325                         r->regu |= RtoB(D_AX) | RtoB(D_DX);
326                         break;
327
328                 case AREP:
329                 case AREPN:
330                 case ALOOP:
331                 case ALOOPEQ:
332                 case ALOOPNE:
333                         r->regu |= RtoB(D_CX);
334                         break;
335
336                 case AMOVSB:
337                 case AMOVSL:
338                 case AMOVSQ:
339                 case AMOVSW:
340                 case ACMPSB:
341                 case ACMPSL:
342                 case ACMPSQ:
343                 case ACMPSW:
344                         r->regu |= RtoB(D_SI) | RtoB(D_DI);
345                         break;
346
347                 case ASTOSB:
348                 case ASTOSL:
349                 case ASTOSQ:
350                 case ASTOSW:
351                 case ASCASB:
352                 case ASCASL:
353                 case ASCASQ:
354                 case ASCASW:
355                         r->regu |= RtoB(D_AX) | RtoB(D_DI);
356                         break;
357
358                 case AINSB:
359                 case AINSL:
360                 case AINSW:
361                 case AOUTSB:
362                 case AOUTSL:
363                 case AOUTSW:
364                         r->regu |= RtoB(D_DI) | RtoB(D_DX);
365                         break;
366                 }
367         }
368         if(firstr == R)
369                 return;
370         initpc = pc - val;
371         npc = val;
372
373         /*
374          * pass 2
375          * turn branch references to pointers
376          * build back pointers
377          */
378         for(r = firstr; r != R; r = r->link) {
379                 p = r->prog;
380                 if(p->to.type == D_BRANCH) {
381                         val = p->to.offset - initpc;
382                         r1 = firstr;
383                         while(r1 != R) {
384                                 r2 = r1->log5;
385                                 if(r2 != R && val >= r2->pc) {
386                                         r1 = r2;
387                                         continue;
388                                 }
389                                 if(r1->pc == val)
390                                         break;
391                                 r1 = r1->link;
392                         }
393                         if(r1 == R) {
394                                 nearln = p->lineno;
395                                 diag(Z, "ref not found\n%P", p);
396                                 continue;
397                         }
398                         if(r1 == r) {
399                                 nearln = p->lineno;
400                                 diag(Z, "ref to self\n%P", p);
401                                 continue;
402                         }
403                         r->s2 = r1;
404                         r->p2link = r1->p2;
405                         r1->p2 = r;
406                 }
407         }
408         if(debug['R']) {
409                 p = firstr->prog;
410                 print("\n%L %D\n", p->lineno, &p->from);
411         }
412
413         /*
414          * pass 2.5
415          * find looping structure
416          */
417         for(r = firstr; r != R; r = r->link)
418                 r->active = 0;
419         change = 0;
420         loopit(firstr, npc);
421         if(debug['R'] && debug['v']) {
422                 print("\nlooping structure:\n");
423                 for(r = firstr; r != R; r = r->link) {
424                         print("%ld:%P", r->loop, r->prog);
425                         for(z=0; z<BITS; z++)
426                                 bit.b[z] = r->use1.b[z] |
427                                            r->use2.b[z] |
428                                            r->set.b[z];
429                         if(bany(&bit)) {
430                                 print("\t");
431                                 if(bany(&r->use1))
432                                         print(" u1=%B", r->use1);
433                                 if(bany(&r->use2))
434                                         print(" u2=%B", r->use2);
435                                 if(bany(&r->set))
436                                         print(" st=%B", r->set);
437                         }
438                         print("\n");
439                 }
440         }
441
442         /*
443          * pass 3
444          * iterate propagating usage
445          *      back until flow graph is complete
446          */
447 loop1:
448         change = 0;
449         for(r = firstr; r != R; r = r->link)
450                 r->active = 0;
451         for(r = firstr; r != R; r = r->link)
452                 if(r->prog->as == ARET)
453                         prop(r, zbits, zbits);
454 loop11:
455         /* pick up unreachable code */
456         i = 0;
457         for(r = firstr; r != R; r = r1) {
458                 r1 = r->link;
459                 if(r1 && r1->active && !r->active) {
460                         prop(r, zbits, zbits);
461                         i = 1;
462                 }
463         }
464         if(i)
465                 goto loop11;
466         if(change)
467                 goto loop1;
468
469
470         /*
471          * pass 4
472          * iterate propagating register/variable synchrony
473          *      forward until graph is complete
474          */
475 loop2:
476         change = 0;
477         for(r = firstr; r != R; r = r->link)
478                 r->active = 0;
479         synch(firstr, zbits);
480         if(change)
481                 goto loop2;
482
483
484         /*
485          * pass 5
486          * isolate regions
487          * calculate costs (paint1)
488          */
489         r = firstr;
490         if(r) {
491                 for(z=0; z<BITS; z++)
492                         bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
493                           ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
494                 if(bany(&bit)) {
495                         nearln = r->prog->lineno;
496                         warn(Z, "used and not set: %B", bit);
497                         if(debug['R'] && !debug['w'])
498                                 print("used and not set: %B\n", bit);
499                 }
500         }
501         if(debug['R'] && debug['v'])
502                 print("\nprop structure:\n");
503         for(r = firstr; r != R; r = r->link)
504                 r->act = zbits;
505         rgp = region;
506         nregion = 0;
507         for(r = firstr; r != R; r = r->link) {
508                 if(debug['R'] && debug['v']) {
509                         print("%P\t", r->prog);
510                         if(bany(&r->set))
511                                 print("s:%B ", r->set);
512                         if(bany(&r->refahead))
513                                 print("ra:%B ", r->refahead);
514                         if(bany(&r->calahead))
515                                 print("ca:%B ", r->calahead);
516                         print("\n");
517                 }
518                 for(z=0; z<BITS; z++)
519                         bit.b[z] = r->set.b[z] &
520                           ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
521                 if(bany(&bit)) {
522                         nearln = r->prog->lineno;
523                         warn(Z, "set and not used: %B", bit);
524                         if(debug['R'])
525                                 print("set and not used: %B\n", bit);
526                         excise(r);
527                 }
528                 for(z=0; z<BITS; z++)
529                         bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
530                 while(bany(&bit)) {
531                         i = bnum(bit);
532                         rgp->enter = r;
533                         rgp->varno = i;
534                         change = 0;
535                         if(debug['R'] && debug['v'])
536                                 print("\n");
537                         paint1(r, i);
538                         bit.b[i/32] &= ~(1L<<(i%32));
539                         if(change <= 0) {
540                                 if(debug['R'])
541                                         print("%L$%d: %B\n",
542                                                 r->prog->lineno, change, blsh(i));
543                                 continue;
544                         }
545                         rgp->cost = change;
546                         nregion++;
547                         if(nregion >= NRGN) {
548                                 warn(Z, "too many regions");
549                                 goto brk;
550                         }
551                         rgp++;
552                 }
553         }
554 brk:
555         qsort(region, nregion, sizeof(region[0]), rcmp);
556
557         /*
558          * pass 6
559          * determine used registers (paint2)
560          * replace code (paint3)
561          */
562         rgp = region;
563         for(i=0; i<nregion; i++) {
564                 bit = blsh(rgp->varno);
565                 vreg = paint2(rgp->enter, rgp->varno);
566                 vreg = allreg(vreg, rgp);
567                 if(debug['R']) {
568                         print("%L$%d %R: %B\n",
569                                 rgp->enter->prog->lineno,
570                                 rgp->cost,
571                                 rgp->regno,
572                                 bit);
573                 }
574                 if(rgp->regno != 0)
575                         paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
576                 rgp++;
577         }
578         /*
579          * pass 7
580          * peep-hole on basic block
581          */
582         if(!debug['R'] || debug['P'])
583                 peep();
584
585         /*
586          * pass 8
587          * recalculate pc
588          */
589         val = initpc;
590         for(r = firstr; r != R; r = r1) {
591                 r->pc = val;
592                 p = r->prog;
593                 p1 = P;
594                 r1 = r->link;
595                 if(r1 != R)
596                         p1 = r1->prog;
597                 for(; p != p1; p = p->link) {
598                         switch(p->as) {
599                         default:
600                                 val++;
601                                 break;
602
603                         case ANOP:
604                         case ADATA:
605                         case AGLOBL:
606                         case ANAME:
607                         case ASIGNAME:
608                                 break;
609                         }
610                 }
611         }
612         pc = val;
613
614         /*
615          * fix up branches
616          */
617         if(debug['R'])
618                 if(bany(&addrs))
619                         print("addrs: %B\n", addrs);
620
621         r1 = 0; /* set */
622         for(r = firstr; r != R; r = r->link) {
623                 p = r->prog;
624                 if(p->to.type == D_BRANCH)
625                         p->to.offset = r->s2->pc;
626                 r1 = r;
627         }
628
629         /*
630          * last pass
631          * eliminate nops
632          * free aux structures
633          */
634         for(p = firstr->prog; p != P; p = p->link){
635                 while(p->link && p->link->as == ANOP)
636                         p->link = p->link->link;
637         }
638         if(r1 != R) {
639                 r1->link = freer;
640                 freer = firstr;
641         }
642 }
643
644 /*
645  * add mov b,rn
646  * just after r
647  */
648 void
649 addmove(Reg *r, int bn, int rn, int f)
650 {
651         Prog *p, *p1;
652         Adr *a;
653         Var *v;
654
655         p1 = alloc(sizeof(*p1));
656         *p1 = zprog;
657         p = r->prog;
658
659         p1->link = p->link;
660         p->link = p1;
661         p1->lineno = p->lineno;
662
663         v = var + bn;
664
665         a = &p1->to;
666         a->sym = v->sym;
667         a->offset = v->offset;
668         a->etype = v->etype;
669         a->type = v->name;
670
671         p1->as = AMOVL;
672         if(v->etype == TCHAR || v->etype == TUCHAR)
673                 p1->as = AMOVB;
674         if(v->etype == TSHORT || v->etype == TUSHORT)
675                 p1->as = AMOVW;
676         if(v->etype == TVLONG || v->etype == TUVLONG || v->etype == TIND)
677                 p1->as = AMOVQ;
678         if(v->etype == TFLOAT)
679                 p1->as = AMOVSS;
680         if(v->etype == TDOUBLE)
681                 p1->as = AMOVSD;
682
683         p1->from.type = rn;
684         if(!f) {
685                 p1->from = *a;
686                 *a = zprog.from;
687                 a->type = rn;
688                 if(v->etype == TUCHAR)
689                         p1->as = AMOVB;
690                 if(v->etype == TUSHORT)
691                         p1->as = AMOVW;
692         }
693         if(debug['R'])
694                 print("%P\t.a%P\n", p, p1);
695 }
696
697 ulong
698 doregbits(int r)
699 {
700         ulong b;
701
702         b = 0;
703         if(r >= D_INDIR)
704                 r -= D_INDIR;
705         if(r >= D_AX && r <= D_R15)
706                 b |= RtoB(r);
707         else
708         if(r >= D_AL && r <= D_R15B)
709                 b |= RtoB(r-D_AL+D_AX);
710         else
711         if(r >= D_AH && r <= D_BH)
712                 b |= RtoB(r-D_AH+D_AX);
713         else
714         if(r >= D_X0 && r <= D_X0+15)
715                 b |= FtoB(r);
716         return b;
717 }
718
719 Bits
720 mkvar(Reg *r, Adr *a)
721 {
722         Var *v;
723         int i, t, n, et, z;
724         long o;
725         Bits bit;
726         Sym *s;
727
728         /*
729          * mark registers used
730          */
731         t = a->type;
732         r->regu |= doregbits(t);
733         r->regu |= doregbits(a->index);
734
735         switch(t) {
736         default:
737                 goto none;
738         case D_ADDR:
739                 a->type = a->index;
740                 bit = mkvar(r, a);
741                 for(z=0; z<BITS; z++)
742                         addrs.b[z] |= bit.b[z];
743                 a->type = t;
744                 goto none;
745         case D_EXTERN:
746         case D_STATIC:
747         case D_PARAM:
748         case D_AUTO:
749                 n = t;
750                 break;
751         }
752         s = a->sym;
753         if(s == S)
754                 goto none;
755         if(s->name[0] == '.')
756                 goto none;
757         et = a->etype;
758         o = a->offset;
759         v = var;
760         for(i=0; i<nvar; i++) {
761                 if(s == v->sym)
762                 if(n == v->name)
763                 if(o == v->offset)
764                         goto out;
765                 v++;
766         }
767         if(nvar >= NVAR) {
768                 if(debug['w'] > 1 && s)
769                         warn(Z, "variable not optimized: %s", s->name);
770                 goto none;
771         }
772         i = nvar;
773         nvar++;
774         v = &var[i];
775         v->sym = s;
776         v->offset = o;
777         v->name = n;
778         v->etype = et;
779         if(debug['R'])
780                 print("bit=%2d et=%2d %D\n", i, et, a);
781
782 out:
783         bit = blsh(i);
784         if(n == D_EXTERN || n == D_STATIC)
785                 for(z=0; z<BITS; z++)
786                         externs.b[z] |= bit.b[z];
787         if(n == D_PARAM)
788                 for(z=0; z<BITS; z++)
789                         params.b[z] |= bit.b[z];
790         if(v->etype != et || !(typechlpfd[et] || typev[et]))    /* funny punning */
791                 for(z=0; z<BITS; z++)
792                         addrs.b[z] |= bit.b[z];
793         return bit;
794
795 none:
796         return zbits;
797 }
798
799 void
800 prop(Reg *r, Bits ref, Bits cal)
801 {
802         Reg *r1, *r2;
803         int z;
804
805         for(r1 = r; r1 != R; r1 = r1->p1) {
806                 for(z=0; z<BITS; z++) {
807                         ref.b[z] |= r1->refahead.b[z];
808                         if(ref.b[z] != r1->refahead.b[z]) {
809                                 r1->refahead.b[z] = ref.b[z];
810                                 change++;
811                         }
812                         cal.b[z] |= r1->calahead.b[z];
813                         if(cal.b[z] != r1->calahead.b[z]) {
814                                 r1->calahead.b[z] = cal.b[z];
815                                 change++;
816                         }
817                 }
818                 switch(r1->prog->as) {
819                 case ACALL:
820                         for(z=0; z<BITS; z++) {
821                                 cal.b[z] |= ref.b[z] | externs.b[z];
822                                 ref.b[z] = 0;
823                         }
824                         break;
825
826                 case ATEXT:
827                         for(z=0; z<BITS; z++) {
828                                 cal.b[z] = 0;
829                                 ref.b[z] = 0;
830                         }
831                         break;
832
833                 case ARET:
834                         for(z=0; z<BITS; z++) {
835                                 cal.b[z] = externs.b[z];
836                                 ref.b[z] = 0;
837                         }
838                 }
839                 for(z=0; z<BITS; z++) {
840                         ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
841                                 r1->use1.b[z] | r1->use2.b[z];
842                         cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
843                         r1->refbehind.b[z] = ref.b[z];
844                         r1->calbehind.b[z] = cal.b[z];
845                 }
846                 if(r1->active)
847                         break;
848                 r1->active = 1;
849         }
850         for(; r != r1; r = r->p1)
851                 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
852                         prop(r2, r->refbehind, r->calbehind);
853 }
854
855 /*
856  * find looping structure
857  *
858  * 1) find reverse postordering
859  * 2) find approximate dominators,
860  *      the actual dominators if the flow graph is reducible
861  *      otherwise, dominators plus some other non-dominators.
862  *      See Matthew S. Hecht and Jeffrey D. Ullman,
863  *      "Analysis of a Simple Algorithm for Global Data Flow Problems",
864  *      Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
865  *      Oct. 1-3, 1973, pp.  207-217.
866  * 3) find all nodes with a predecessor dominated by the current node.
867  *      such a node is a loop head.
868  *      recursively, all preds with a greater rpo number are in the loop
869  */
870 long
871 postorder(Reg *r, Reg **rpo2r, long n)
872 {
873         Reg *r1;
874
875         r->rpo = 1;
876         r1 = r->s1;
877         if(r1 && !r1->rpo)
878                 n = postorder(r1, rpo2r, n);
879         r1 = r->s2;
880         if(r1 && !r1->rpo)
881                 n = postorder(r1, rpo2r, n);
882         rpo2r[n] = r;
883         n++;
884         return n;
885 }
886
887 long
888 rpolca(long *idom, long rpo1, long rpo2)
889 {
890         long t;
891
892         if(rpo1 == -1)
893                 return rpo2;
894         while(rpo1 != rpo2){
895                 if(rpo1 > rpo2){
896                         t = rpo2;
897                         rpo2 = rpo1;
898                         rpo1 = t;
899                 }
900                 while(rpo1 < rpo2){
901                         t = idom[rpo2];
902                         if(t >= rpo2)
903                                 fatal(Z, "bad idom");
904                         rpo2 = t;
905                 }
906         }
907         return rpo1;
908 }
909
910 int
911 doms(long *idom, long r, long s)
912 {
913         while(s > r)
914                 s = idom[s];
915         return s == r;
916 }
917
918 int
919 loophead(long *idom, Reg *r)
920 {
921         long src;
922
923         src = r->rpo;
924         if(r->p1 != R && doms(idom, src, r->p1->rpo))
925                 return 1;
926         for(r = r->p2; r != R; r = r->p2link)
927                 if(doms(idom, src, r->rpo))
928                         return 1;
929         return 0;
930 }
931
932 void
933 loopmark(Reg **rpo2r, long head, Reg *r)
934 {
935         if(r->rpo < head || r->active == head)
936                 return;
937         r->active = head;
938         r->loop += LOOP;
939         if(r->p1 != R)
940                 loopmark(rpo2r, head, r->p1);
941         for(r = r->p2; r != R; r = r->p2link)
942                 loopmark(rpo2r, head, r);
943 }
944
945 void
946 loopit(Reg *r, long nr)
947 {
948         Reg *r1;
949         long i, d, me;
950
951         if(nr > maxnr) {
952                 rpo2r = alloc(nr * sizeof(Reg*));
953                 idom = alloc(nr * sizeof(long));
954                 maxnr = nr;
955         }
956
957         d = postorder(r, rpo2r, 0);
958         if(d > nr)
959                 fatal(Z, "too many reg nodes");
960         nr = d;
961         for(i = 0; i < nr / 2; i++){
962                 r1 = rpo2r[i];
963                 rpo2r[i] = rpo2r[nr - 1 - i];
964                 rpo2r[nr - 1 - i] = r1;
965         }
966         for(i = 0; i < nr; i++)
967                 rpo2r[i]->rpo = i;
968
969         idom[0] = 0;
970         for(i = 0; i < nr; i++){
971                 r1 = rpo2r[i];
972                 me = r1->rpo;
973                 d = -1;
974                 if(r1->p1 != R && r1->p1->rpo < me)
975                         d = r1->p1->rpo;
976                 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
977                         if(r1->rpo < me)
978                                 d = rpolca(idom, d, r1->rpo);
979                 idom[i] = d;
980         }
981
982         for(i = 0; i < nr; i++){
983                 r1 = rpo2r[i];
984                 r1->loop++;
985                 if(r1->p2 != R && loophead(idom, r1))
986                         loopmark(rpo2r, i, r1);
987         }
988 }
989
990 void
991 synch(Reg *r, Bits dif)
992 {
993         Reg *r1;
994         int z;
995
996         for(r1 = r; r1 != R; r1 = r1->s1) {
997                 for(z=0; z<BITS; z++) {
998                         dif.b[z] = (dif.b[z] &
999                                 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
1000                                         r1->set.b[z] | r1->regdiff.b[z];
1001                         if(dif.b[z] != r1->regdiff.b[z]) {
1002                                 r1->regdiff.b[z] = dif.b[z];
1003                                 change++;
1004                         }
1005                 }
1006                 if(r1->active)
1007                         break;
1008                 r1->active = 1;
1009                 for(z=0; z<BITS; z++)
1010                         dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
1011                 if(r1->s2 != R)
1012                         synch(r1->s2, dif);
1013         }
1014 }
1015
1016 ulong
1017 allreg(ulong b, Rgn *r)
1018 {
1019         Var *v;
1020         int i;
1021
1022         v = var + r->varno;
1023         r->regno = 0;
1024         switch(v->etype) {
1025
1026         default:
1027                 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
1028                 break;
1029
1030         case TCHAR:
1031         case TUCHAR:
1032         case TSHORT:
1033         case TUSHORT:
1034         case TINT:
1035         case TUINT:
1036         case TLONG:
1037         case TULONG:
1038         case TVLONG:
1039         case TUVLONG:
1040         case TIND:
1041         case TARRAY:
1042                 i = BtoR(~b);
1043                 if(i && r->cost > 0) {
1044                         r->regno = i;
1045                         return RtoB(i);
1046                 }
1047                 break;
1048
1049         case TDOUBLE:
1050         case TFLOAT:
1051                 i = BtoF(~b);
1052                 if(i && r->cost > 0) {
1053                         r->regno = i;
1054                         return FtoB(i);
1055                 }
1056                 break;
1057         }
1058         return 0;
1059 }
1060
1061 void
1062 paint1(Reg *r, int bn)
1063 {
1064         Reg *r1;
1065         Prog *p;
1066         int z;
1067         ulong bb;
1068
1069         z = bn/32;
1070         bb = 1L<<(bn%32);
1071         if(r->act.b[z] & bb)
1072                 return;
1073         for(;;) {
1074                 if(!(r->refbehind.b[z] & bb))
1075                         break;
1076                 r1 = r->p1;
1077                 if(r1 == R)
1078                         break;
1079                 if(!(r1->refahead.b[z] & bb))
1080                         break;
1081                 if(r1->act.b[z] & bb)
1082                         break;
1083                 r = r1;
1084         }
1085
1086         if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
1087                 change -= CLOAD * r->loop;
1088                 if(debug['R'] && debug['v'])
1089                         print("%ld%P\tld %B $%d\n", r->loop,
1090                                 r->prog, blsh(bn), change);
1091         }
1092         for(;;) {
1093                 r->act.b[z] |= bb;
1094                 p = r->prog;
1095
1096                 if(r->use1.b[z] & bb) {
1097                         change += CREF * r->loop;
1098                         if(debug['R'] && debug['v'])
1099                                 print("%ld%P\tu1 %B $%d\n", r->loop,
1100                                         p, blsh(bn), change);
1101                 }
1102
1103                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1104                         change += CREF * r->loop;
1105                         if(debug['R'] && debug['v'])
1106                                 print("%ld%P\tu2 %B $%d\n", r->loop,
1107                                         p, blsh(bn), change);
1108                 }
1109
1110                 if(STORE(r) & r->regdiff.b[z] & bb) {
1111                         change -= CLOAD * r->loop;
1112                         if(debug['R'] && debug['v'])
1113                                 print("%ld%P\tst %B $%d\n", r->loop,
1114                                         p, blsh(bn), change);
1115                 }
1116
1117                 if(r->refbehind.b[z] & bb)
1118                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1119                                 if(r1->refahead.b[z] & bb)
1120                                         paint1(r1, bn);
1121
1122                 if(!(r->refahead.b[z] & bb))
1123                         break;
1124                 r1 = r->s2;
1125                 if(r1 != R)
1126                         if(r1->refbehind.b[z] & bb)
1127                                 paint1(r1, bn);
1128                 r = r->s1;
1129                 if(r == R)
1130                         break;
1131                 if(r->act.b[z] & bb)
1132                         break;
1133                 if(!(r->refbehind.b[z] & bb))
1134                         break;
1135         }
1136 }
1137
1138 ulong
1139 regset(Reg *r, ulong bb)
1140 {
1141         ulong b, set;
1142         Adr v;
1143         int c;
1144
1145         set = 0;
1146         v = zprog.from;
1147         while(b = bb & ~(bb-1)) {
1148                 v.type = b & 0xFFFF? BtoR(b): BtoF(b);
1149                 if(v.type == 0)
1150                         diag(Z, "zero v.type for %#lux", b);
1151                 c = copyu(r->prog, &v, A);
1152                 if(c == 3)
1153                         set |= b;
1154                 bb &= ~b;
1155         }
1156         return set;
1157 }
1158
1159 ulong
1160 reguse(Reg *r, ulong bb)
1161 {
1162         ulong b, set;
1163         Adr v;
1164         int c;
1165
1166         set = 0;
1167         v = zprog.from;
1168         while(b = bb & ~(bb-1)) {
1169                 v.type = b & 0xFFFF? BtoR(b): BtoF(b);
1170                 c = copyu(r->prog, &v, A);
1171                 if(c == 1 || c == 2 || c == 4)
1172                         set |= b;
1173                 bb &= ~b;
1174         }
1175         return set;
1176 }
1177
1178 ulong
1179 paint2(Reg *r, int bn)
1180 {
1181         Reg *r1;
1182         int z;
1183         ulong bb, vreg, x;
1184
1185         z = bn/32;
1186         bb = 1L << (bn%32);
1187         vreg = regbits;
1188         if(!(r->act.b[z] & bb))
1189                 return vreg;
1190         for(;;) {
1191                 if(!(r->refbehind.b[z] & bb))
1192                         break;
1193                 r1 = r->p1;
1194                 if(r1 == R)
1195                         break;
1196                 if(!(r1->refahead.b[z] & bb))
1197                         break;
1198                 if(!(r1->act.b[z] & bb))
1199                         break;
1200                 r = r1;
1201         }
1202         for(;;) {
1203                 r->act.b[z] &= ~bb;
1204
1205                 vreg |= r->regu;
1206
1207                 if(r->refbehind.b[z] & bb)
1208                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1209                                 if(r1->refahead.b[z] & bb)
1210                                         vreg |= paint2(r1, bn);
1211
1212                 if(!(r->refahead.b[z] & bb))
1213                         break;
1214                 r1 = r->s2;
1215                 if(r1 != R)
1216                         if(r1->refbehind.b[z] & bb)
1217                                 vreg |= paint2(r1, bn);
1218                 r = r->s1;
1219                 if(r == R)
1220                         break;
1221                 if(!(r->act.b[z] & bb))
1222                         break;
1223                 if(!(r->refbehind.b[z] & bb))
1224                         break;
1225         }
1226
1227         bb = vreg;
1228         for(; r; r=r->s1) {
1229                 x = r->regu & ~bb;
1230                 if(x) {
1231                         vreg |= reguse(r, x);
1232                         bb |= regset(r, x);
1233                 }
1234         }
1235         return vreg;
1236 }
1237
1238 void
1239 paint3(Reg *r, int bn, long rb, int rn)
1240 {
1241         Reg *r1;
1242         Prog *p;
1243         int z;
1244         ulong bb;
1245
1246         z = bn/32;
1247         bb = 1L << (bn%32);
1248         if(r->act.b[z] & bb)
1249                 return;
1250         for(;;) {
1251                 if(!(r->refbehind.b[z] & bb))
1252                         break;
1253                 r1 = r->p1;
1254                 if(r1 == R)
1255                         break;
1256                 if(!(r1->refahead.b[z] & bb))
1257                         break;
1258                 if(r1->act.b[z] & bb)
1259                         break;
1260                 r = r1;
1261         }
1262
1263         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1264                 addmove(r, bn, rn, 0);
1265         for(;;) {
1266                 r->act.b[z] |= bb;
1267                 p = r->prog;
1268
1269                 if(r->use1.b[z] & bb) {
1270                         if(debug['R'])
1271                                 print("%P", p);
1272                         addreg(&p->from, rn);
1273                         if(debug['R'])
1274                                 print("\t.c%P\n", p);
1275                 }
1276                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1277                         if(debug['R'])
1278                                 print("%P", p);
1279                         addreg(&p->to, rn);
1280                         if(debug['R'])
1281                                 print("\t.c%P\n", p);
1282                 }
1283
1284                 if(STORE(r) & r->regdiff.b[z] & bb)
1285                         addmove(r, bn, rn, 1);
1286                 r->regu |= rb;
1287
1288                 if(r->refbehind.b[z] & bb)
1289                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1290                                 if(r1->refahead.b[z] & bb)
1291                                         paint3(r1, bn, rb, rn);
1292
1293                 if(!(r->refahead.b[z] & bb))
1294                         break;
1295                 r1 = r->s2;
1296                 if(r1 != R)
1297                         if(r1->refbehind.b[z] & bb)
1298                                 paint3(r1, bn, rb, rn);
1299                 r = r->s1;
1300                 if(r == R)
1301                         break;
1302                 if(r->act.b[z] & bb)
1303                         break;
1304                 if(!(r->refbehind.b[z] & bb))
1305                         break;
1306         }
1307 }
1308
1309 void
1310 addreg(Adr *a, int rn)
1311 {
1312
1313         a->sym = 0;
1314         a->offset = 0;
1315         a->type = rn;
1316 }
1317
1318 long
1319 RtoB(int r)
1320 {
1321
1322         if(r < D_AX || r > D_R15)
1323                 return 0;
1324         return 1L << (r-D_AX);
1325 }
1326
1327 int
1328 BtoR(long b)
1329 {
1330
1331         b &= 0xffffL;
1332         if(b == 0)
1333                 return 0;
1334         return bitno(b) + D_AX;
1335 }
1336
1337 /*
1338  *      bit     reg
1339  *      16      X5
1340  *      17      X6
1341  *      18      X7
1342  */
1343 long
1344 FtoB(int f)
1345 {
1346         if(f < FREGMIN || f > FREGEXT)
1347                 return 0;
1348         return 1L << (f - FREGMIN + 16);
1349 }
1350
1351 int
1352 BtoF(long b)
1353 {
1354
1355         b &= 0x70000L;
1356         if(b == 0)
1357                 return 0;
1358         return bitno(b) - 16 + FREGMIN;
1359 }