]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/8c/reg.c
ndb/dnsquery, ndb/csquery: handle long lines
[plan9front.git] / sys / src / cmd / 8c / reg.c
1 #include "gc.h"
2
3 Reg*
4 rega(void)
5 {
6         Reg *r;
7
8         r = freer;
9         if(r == R) {
10                 r = alloc(sizeof(*r));
11         } else
12                 freer = r->link;
13
14         *r = zreg;
15         return r;
16 }
17
18 int
19 rcmp(const void *a1, const void *a2)
20 {
21         Rgn *p1, *p2;
22         int c1, c2;
23
24         p1 = (Rgn*)a1;
25         p2 = (Rgn*)a2;
26         c1 = p2->cost;
27         c2 = p1->cost;
28         if(c1 -= c2)
29                 return c1;
30         return p2->varno - p1->varno;
31 }
32
33 void
34 regopt(Prog *p)
35 {
36         Reg *r, *r1, *r2;
37         Prog *p1;
38         int i, z;
39         long initpc, val, npc;
40         ulong vreg;
41         Bits bit;
42         struct
43         {
44                 long    m;
45                 long    c;
46                 Reg*    p;
47         } log5[6], *lp;
48
49         firstr = R;
50         lastr = R;
51         nvar = 0;
52         regbits = RtoB(D_SP) | RtoB(D_AX);
53         for(z=0; z<BITS; z++) {
54                 externs.b[z] = 0;
55                 params.b[z] = 0;
56                 consts.b[z] = 0;
57                 addrs.b[z] = 0;
58         }
59
60         /*
61          * pass 1
62          * build aux data structure
63          * allocate pcs
64          * find use and set of variables
65          */
66         val = 5L * 5L * 5L * 5L * 5L;
67         lp = log5;
68         for(i=0; i<5; i++) {
69                 lp->m = val;
70                 lp->c = 0;
71                 lp->p = R;
72                 val /= 5L;
73                 lp++;
74         }
75         val = 0;
76         for(; p != P; p = p->link) {
77                 switch(p->as) {
78                 case ADATA:
79                 case AGLOBL:
80                 case ANAME:
81                 case ASIGNAME:
82                         continue;
83                 }
84                 r = rega();
85                 if(firstr == R) {
86                         firstr = r;
87                         lastr = r;
88                 } else {
89                         lastr->link = r;
90                         r->p1 = lastr;
91                         lastr->s1 = r;
92                         lastr = r;
93                 }
94                 r->prog = p;
95                 r->pc = val;
96                 val++;
97
98                 lp = log5;
99                 for(i=0; i<5; i++) {
100                         lp->c--;
101                         if(lp->c <= 0) {
102                                 lp->c = lp->m;
103                                 if(lp->p != R)
104                                         lp->p->log5 = r;
105                                 lp->p = r;
106                                 (lp+1)->c = 0;
107                                 break;
108                         }
109                         lp++;
110                 }
111
112                 r1 = r->p1;
113                 if(r1 != R)
114                 switch(r1->prog->as) {
115                 case ARET:
116                 case AJMP:
117                 case AIRETL:
118                         r->p1 = R;
119                         r1->s1 = R;
120                 }
121
122                 bit = mkvar(r, &p->from, p->as==AMOVL);
123                 if(bany(&bit))
124                 switch(p->as) {
125                 /*
126                  * funny
127                  */
128                 case ALEAL:
129                         for(z=0; z<BITS; z++)
130                                 addrs.b[z] |= bit.b[z];
131                         break;
132
133                 /*
134                  * left side read
135                  */
136                 default:
137                         for(z=0; z<BITS; z++)
138                                 r->use1.b[z] |= bit.b[z];
139                         break;
140                 }
141
142                 bit = mkvar(r, &p->to, 0);
143                 if(bany(&bit))
144                 switch(p->as) {
145                 default:
146                         diag(Z, "reg: unknown op: %A", p->as);
147                         break;
148
149                 /*
150                  * right side read
151                  */
152                 case ACMPB:
153                 case ACMPL:
154                 case ACMPW:
155                         for(z=0; z<BITS; z++)
156                                 r->use2.b[z] |= bit.b[z];
157                         break;
158
159                 /*
160                  * right side write
161                  */
162                 case ANOP:
163                 case AMOVL:
164                 case AMOVB:
165                 case AMOVW:
166                 case AMOVBLSX:
167                 case AMOVBLZX:
168                 case AMOVWLSX:
169                 case AMOVWLZX:
170                         for(z=0; z<BITS; z++)
171                                 r->set.b[z] |= bit.b[z];
172                         break;
173
174                 /*
175                  * right side read+write
176                  */
177                 case AADDB:
178                 case AADDL:
179                 case AADDW:
180                 case AANDB:
181                 case AANDL:
182                 case AANDW:
183                 case ASUBB:
184                 case ASUBL:
185                 case ASUBW:
186                 case AORB:
187                 case AORL:
188                 case AORW:
189                 case AXORB:
190                 case AXORL:
191                 case AXORW:
192                 case ASALB:
193                 case ASALL:
194                 case ASALW:
195                 case ASARB:
196                 case ASARL:
197                 case ASARW:
198                 case AROLB:
199                 case AROLL:
200                 case AROLW:
201                 case ARORB:
202                 case ARORL:
203                 case ARORW:
204                 case ASHLB:
205                 case ASHLL:
206                 case ASHLW:
207                 case ASHRB:
208                 case ASHRL:
209                 case ASHRW:
210                 case AIMULL:
211                 case AIMULW:
212                 case ANEGL:
213                 case ANOTL:
214                 case AADCL:
215                 case ASBBL:
216                         for(z=0; z<BITS; z++) {
217                                 r->set.b[z] |= bit.b[z];
218                                 r->use2.b[z] |= bit.b[z];
219                         }
220                         break;
221
222                 /*
223                  * funny
224                  */
225                 case AFMOVDP:
226                 case AFMOVFP:
227                 case AFMOVLP:
228                 case AFMOVVP:
229                 case AFMOVWP:
230                 case ACALL:
231                         for(z=0; z<BITS; z++)
232                                 addrs.b[z] |= bit.b[z];
233                         break;
234                 }
235
236                 switch(p->as) {
237                 case AIMULL:
238                 case AIMULW:
239                         if(p->to.type != D_NONE)
240                                 break;
241
242                 case AIDIVB:
243                 case AIDIVL:
244                 case AIDIVW:
245                 case AIMULB:
246                 case ADIVB:
247                 case ADIVL:
248                 case ADIVW:
249                 case AMULB:
250                 case AMULL:
251                 case AMULW:
252
253                 case ACWD:
254                 case ACDQ:
255                         r->regu |= RtoB(D_AX) | RtoB(D_DX);
256                         break;
257
258                 case AREP:
259                 case AREPN:
260                 case ALOOP:
261                 case ALOOPEQ:
262                 case ALOOPNE:
263                         r->regu |= RtoB(D_CX);
264                         break;
265
266                 case AMOVSB:
267                 case AMOVSL:
268                 case AMOVSW:
269                 case ACMPSB:
270                 case ACMPSL:
271                 case ACMPSW:
272                         r->regu |= RtoB(D_SI) | RtoB(D_DI);
273                         break;
274
275                 case ASTOSB:
276                 case ASTOSL:
277                 case ASTOSW:
278                 case ASCASB:
279                 case ASCASL:
280                 case ASCASW:
281                         r->regu |= RtoB(D_AX) | RtoB(D_DI);
282                         break;
283
284                 case AINSB:
285                 case AINSL:
286                 case AINSW:
287                 case AOUTSB:
288                 case AOUTSL:
289                 case AOUTSW:
290                         r->regu |= RtoB(D_DI) | RtoB(D_DX);
291                         break;
292
293                 case AFSTSW:
294                 case ASAHF:
295                         r->regu |= RtoB(D_AX);
296                         break;
297                 }
298         }
299         if(firstr == R)
300                 return;
301         initpc = pc - val;
302         npc = val;
303
304         /*
305          * pass 2
306          * turn branch references to pointers
307          * build back pointers
308          */
309         for(r = firstr; r != R; r = r->link) {
310                 p = r->prog;
311                 if(p->to.type == D_BRANCH) {
312                         val = p->to.offset - initpc;
313                         r1 = firstr;
314                         while(r1 != R) {
315                                 r2 = r1->log5;
316                                 if(r2 != R && val >= r2->pc) {
317                                         r1 = r2;
318                                         continue;
319                                 }
320                                 if(r1->pc == val)
321                                         break;
322                                 r1 = r1->link;
323                         }
324                         if(r1 == R) {
325                                 nearln = p->lineno;
326                                 diag(Z, "ref not found\n%P", p);
327                                 continue;
328                         }
329                         if(r1 == r) {
330                                 nearln = p->lineno;
331                                 diag(Z, "ref to self\n%P", p);
332                                 continue;
333                         }
334                         r->s2 = r1;
335                         r->p2link = r1->p2;
336                         r1->p2 = r;
337                 }
338         }
339         if(debug['R']) {
340                 p = firstr->prog;
341                 print("\n%L %D\n", p->lineno, &p->from);
342         }
343
344         /*
345          * pass 2.5
346          * find looping structure
347          */
348         for(r = firstr; r != R; r = r->link)
349                 r->active = 0;
350         change = 0;
351         loopit(firstr, npc);
352         if(debug['R'] && debug['v']) {
353                 print("\nlooping structure:\n");
354                 for(r = firstr; r != R; r = r->link) {
355                         print("%ld:%P", r->loop, r->prog);
356                         for(z=0; z<BITS; z++)
357                                 bit.b[z] = r->use1.b[z] |
358                                            r->use2.b[z] |
359                                            r->set.b[z];
360                         if(bany(&bit)) {
361                                 print("\t");
362                                 if(bany(&r->use1))
363                                         print(" u1=%B", r->use1);
364                                 if(bany(&r->use2))
365                                         print(" u2=%B", r->use2);
366                                 if(bany(&r->set))
367                                         print(" st=%B", r->set);
368                         }
369                         print("\n");
370                 }
371         }
372
373         /*
374          * pass 3
375          * iterate propagating usage
376          *      back until flow graph is complete
377          */
378 loop1:
379         change = 0;
380         for(r = firstr; r != R; r = r->link)
381                 r->active = 0;
382         for(r = firstr; r != R; r = r->link)
383                 if(r->prog->as == ARET)
384                         prop(r, zbits, zbits);
385 loop11:
386         /* pick up unreachable code */
387         i = 0;
388         for(r = firstr; r != R; r = r1) {
389                 r1 = r->link;
390                 if(r1 && r1->active && !r->active) {
391                         prop(r, zbits, zbits);
392                         i = 1;
393                 }
394         }
395         if(i)
396                 goto loop11;
397         if(change)
398                 goto loop1;
399
400
401         /*
402          * pass 4
403          * iterate propagating register/variable synchrony
404          *      forward until graph is complete
405          */
406 loop2:
407         change = 0;
408         for(r = firstr; r != R; r = r->link)
409                 r->active = 0;
410         synch(firstr, zbits);
411         if(change)
412                 goto loop2;
413
414
415         /*
416          * pass 5
417          * isolate regions
418          * calculate costs (paint1)
419          */
420         r = firstr;
421         if(r) {
422                 for(z=0; z<BITS; z++)
423                         bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
424                           ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
425                 if(bany(&bit)) {
426                         nearln = r->prog->lineno;
427                         warn(Z, "used and not set: %B", bit);
428                         if(debug['R'] && !debug['w'])
429                                 print("used and not set: %B\n", bit);
430                 }
431         }
432         if(debug['R'] && debug['v'])
433                 print("\nprop structure:\n");
434         for(r = firstr; r != R; r = r->link)
435                 r->act = zbits;
436         rgp = region;
437         nregion = 0;
438         for(r = firstr; r != R; r = r->link) {
439                 if(debug['R'] && debug['v']) {
440                         print("%P\t", r->prog);
441                         if(bany(&r->set))
442                                 print("s:%B ", r->set);
443                         if(bany(&r->refahead))
444                                 print("ra:%B ", r->refahead);
445                         if(bany(&r->calahead))
446                                 print("ca:%B ", r->calahead);
447                         print("\n");
448                 }
449                 for(z=0; z<BITS; z++)
450                         bit.b[z] = r->set.b[z] &
451                           ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
452                 if(bany(&bit)) {
453                         nearln = r->prog->lineno;
454                         warn(Z, "set and not used: %B", bit);
455                         if(debug['R'])
456                                 print("set and not used: %B\n", bit);
457                         excise(r);
458                 }
459                 for(z=0; z<BITS; z++)
460                         bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
461                 while(bany(&bit)) {
462                         i = bnum(bit);
463                         rgp->enter = r;
464                         rgp->varno = i;
465                         change = 0;
466                         if(debug['R'] && debug['v'])
467                                 print("\n");
468                         paint1(r, i);
469                         bit.b[i/32] &= ~(1L<<(i%32));
470                         if(change <= 0) {
471                                 if(debug['R'])
472                                         print("%L$%d: %B\n",
473                                                 r->prog->lineno, change, blsh(i));
474                                 continue;
475                         }
476                         rgp->cost = change;
477                         nregion++;
478                         if(nregion >= NRGN) {
479                                 warn(Z, "too many regions");
480                                 goto brk;
481                         }
482                         rgp++;
483                 }
484         }
485 brk:
486         qsort(region, nregion, sizeof(region[0]), rcmp);
487
488         /*
489          * pass 6
490          * determine used registers (paint2)
491          * replace code (paint3)
492          */
493         rgp = region;
494         for(i=0; i<nregion; i++) {
495                 bit = blsh(rgp->varno);
496                 vreg = paint2(rgp->enter, rgp->varno);
497                 vreg = allreg(vreg, rgp);
498                 if(debug['R']) {
499                         print("%L$%d %R: %B\n",
500                                 rgp->enter->prog->lineno,
501                                 rgp->cost,
502                                 rgp->regno,
503                                 bit);
504                 }
505                 if(rgp->regno != 0)
506                         paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
507                 rgp++;
508         }
509         /*
510          * pass 7
511          * peep-hole on basic block
512          */
513         if(!debug['R'] || debug['P'])
514                 peep();
515
516         /*
517          * pass 8
518          * recalculate pc
519          */
520         val = initpc;
521         for(r = firstr; r != R; r = r1) {
522                 r->pc = val;
523                 p = r->prog;
524                 p1 = P;
525                 r1 = r->link;
526                 if(r1 != R)
527                         p1 = r1->prog;
528                 for(; p != p1; p = p->link) {
529                         switch(p->as) {
530                         default:
531                                 val++;
532                                 break;
533
534                         case ANOP:
535                         case ADATA:
536                         case AGLOBL:
537                         case ANAME:
538                         case ASIGNAME:
539                                 break;
540                         }
541                 }
542         }
543         pc = val;
544
545         /*
546          * fix up branches
547          */
548         if(debug['R'])
549                 if(bany(&addrs))
550                         print("addrs: %B\n", addrs);
551
552         r1 = 0; /* set */
553         for(r = firstr; r != R; r = r->link) {
554                 p = r->prog;
555                 if(p->to.type == D_BRANCH)
556                         p->to.offset = r->s2->pc;
557                 r1 = r;
558         }
559
560         /*
561          * last pass
562          * eliminate nops
563          * free aux structures
564          */
565         for(p = firstr->prog; p != P; p = p->link){
566                 while(p->link && p->link->as == ANOP)
567                         p->link = p->link->link;
568         }
569         if(r1 != R) {
570                 r1->link = freer;
571                 freer = firstr;
572         }
573 }
574
575 /*
576  * add mov b,rn
577  * just after r
578  */
579 void
580 addmove(Reg *r, int bn, int rn, int f)
581 {
582         Prog *p, *p1;
583         Adr *a;
584         Var *v;
585
586         p1 = alloc(sizeof(*p1));
587         *p1 = zprog;
588         p = r->prog;
589
590         p1->link = p->link;
591         p->link = p1;
592         p1->lineno = p->lineno;
593
594         v = var + bn;
595
596         a = &p1->to;
597         a->sym = v->sym;
598         a->offset = v->offset;
599         a->etype = v->etype;
600         a->type = v->name;
601
602         p1->as = AMOVL;
603         if(v->etype == TCHAR || v->etype == TUCHAR)
604                 p1->as = AMOVB;
605         if(v->etype == TSHORT || v->etype == TUSHORT)
606                 p1->as = AMOVW;
607
608         p1->from.type = rn;
609         if(!f) {
610                 p1->from = *a;
611                 *a = zprog.from;
612                 a->type = rn;
613                 if(v->etype == TUCHAR)
614                         p1->as = AMOVB;
615                 if(v->etype == TUSHORT)
616                         p1->as = AMOVW;
617         }
618         if(debug['R'])
619                 print("%P\t.a%P\n", p, p1);
620 }
621
622 ulong
623 doregbits(int r)
624 {
625         ulong b;
626
627         b = 0;
628         if(r >= D_INDIR)
629                 r -= D_INDIR;
630         if(r >= D_AX && r <= D_DI)
631                 b |= RtoB(r);
632         else
633         if(r >= D_AL && r <= D_BL)
634                 b |= RtoB(r-D_AL+D_AX);
635         else
636         if(r >= D_AH && r <= D_BH)
637                 b |= RtoB(r-D_AH+D_AX);
638         return b;
639 }
640
641 Bits
642 mkvar(Reg *r, Adr *a, int isro)
643 {
644         Var *v;
645         int i, t, n, et, z;
646         long o;
647         Bits bit;
648         Sym *s;
649
650         /*
651          * mark registers used
652          */
653         t = a->type;
654         r->regu |= doregbits(t);
655         r->regu |= doregbits(a->index);
656         et = a->etype;
657
658         switch(t) {
659         default:
660                 goto none;
661         case D_INDIR+D_GS:
662                 if(!isro || 1)
663                         goto none;
664                 n = t;
665                 {static Sym er; a->sym = &er;}
666                 a->sym->name = "$extreg";
667                 break;
668         case D_ADDR:
669                 a->type = a->index;
670                 bit = mkvar(r, a, 0);
671                 for(z=0; z<BITS; z++)
672                         addrs.b[z] |= bit.b[z];
673                 a->type = t;
674                 goto none;
675         case D_EXTERN:
676         case D_STATIC:
677         case D_PARAM:
678         case D_AUTO:
679                 n = t;
680                 break;
681         }
682         s = a->sym;
683         if(s == S)
684                 goto none;
685         if(s->name[0] == '.')
686                 goto none;
687         o = a->offset;
688         v = var;
689         for(i=0; i<nvar; i++) {
690                 if(s == v->sym)
691                 if(n == v->name)
692                 if(o == v->offset)
693                         goto out;
694                 v++;
695         }
696         if(nvar >= NVAR) {
697                 if(debug['w'] > 1 && s)
698                         warn(Z, "variable not optimized: %s", s->name);
699                 goto none;
700         }
701         i = nvar;
702         nvar++;
703         v = &var[i];
704         v->sym = s;
705         v->offset = o;
706         v->name = n;
707         v->etype = et;
708         if(debug['R'])
709                 print("bit=%2d et=%2d %D\n", i, et, a);
710
711 out:
712         bit = blsh(i);
713         if(n == D_EXTERN || n == D_STATIC)
714                 for(z=0; z<BITS; z++)
715                         externs.b[z] |= bit.b[z];
716         if(n == D_PARAM)
717                 for(z=0; z<BITS; z++)
718                         params.b[z] |= bit.b[z];
719         if(v->etype != et || !typechlpfd[et])   /* funny punning */
720                 for(z=0; z<BITS; z++)
721                         addrs.b[z] |= bit.b[z];
722         return bit;
723
724 none:
725         return zbits;
726 }
727
728 void
729 prop(Reg *r, Bits ref, Bits cal)
730 {
731         Reg *r1, *r2;
732         int z;
733
734         for(r1 = r; r1 != R; r1 = r1->p1) {
735                 for(z=0; z<BITS; z++) {
736                         ref.b[z] |= r1->refahead.b[z];
737                         if(ref.b[z] != r1->refahead.b[z]) {
738                                 r1->refahead.b[z] = ref.b[z];
739                                 change++;
740                         }
741                         cal.b[z] |= r1->calahead.b[z];
742                         if(cal.b[z] != r1->calahead.b[z]) {
743                                 r1->calahead.b[z] = cal.b[z];
744                                 change++;
745                         }
746                 }
747                 switch(r1->prog->as) {
748                 case ACALL:
749                         for(z=0; z<BITS; z++) {
750                                 cal.b[z] |= ref.b[z] | externs.b[z];
751                                 ref.b[z] = 0;
752                         }
753                         break;
754
755                 case ATEXT:
756                         for(z=0; z<BITS; z++) {
757                                 cal.b[z] = 0;
758                                 ref.b[z] = 0;
759                         }
760                         break;
761
762                 case ARET:
763                         for(z=0; z<BITS; z++) {
764                                 cal.b[z] = externs.b[z];
765                                 ref.b[z] = 0;
766                         }
767                 }
768                 for(z=0; z<BITS; z++) {
769                         ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
770                                 r1->use1.b[z] | r1->use2.b[z];
771                         cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
772                         r1->refbehind.b[z] = ref.b[z];
773                         r1->calbehind.b[z] = cal.b[z];
774                 }
775                 if(r1->active)
776                         break;
777                 r1->active = 1;
778         }
779         for(; r != r1; r = r->p1)
780                 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
781                         prop(r2, r->refbehind, r->calbehind);
782 }
783
784 /*
785  * find looping structure
786  *
787  * 1) find reverse postordering
788  * 2) find approximate dominators,
789  *      the actual dominators if the flow graph is reducible
790  *      otherwise, dominators plus some other non-dominators.
791  *      See Matthew S. Hecht and Jeffrey D. Ullman,
792  *      "Analysis of a Simple Algorithm for Global Data Flow Problems",
793  *      Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
794  *      Oct. 1-3, 1973, pp.  207-217.
795  * 3) find all nodes with a predecessor dominated by the current node.
796  *      such a node is a loop head.
797  *      recursively, all preds with a greater rpo number are in the loop
798  */
799 long
800 postorder(Reg *r, Reg **rpo2r, long n)
801 {
802         Reg *r1;
803
804         r->rpo = 1;
805         r1 = r->s1;
806         if(r1 && !r1->rpo)
807                 n = postorder(r1, rpo2r, n);
808         r1 = r->s2;
809         if(r1 && !r1->rpo)
810                 n = postorder(r1, rpo2r, n);
811         rpo2r[n] = r;
812         n++;
813         return n;
814 }
815
816 long
817 rpolca(long *idom, long rpo1, long rpo2)
818 {
819         long t;
820
821         if(rpo1 == -1)
822                 return rpo2;
823         while(rpo1 != rpo2){
824                 if(rpo1 > rpo2){
825                         t = rpo2;
826                         rpo2 = rpo1;
827                         rpo1 = t;
828                 }
829                 while(rpo1 < rpo2){
830                         t = idom[rpo2];
831                         if(t >= rpo2)
832                                 fatal(Z, "bad idom");
833                         rpo2 = t;
834                 }
835         }
836         return rpo1;
837 }
838
839 int
840 doms(long *idom, long r, long s)
841 {
842         while(s > r)
843                 s = idom[s];
844         return s == r;
845 }
846
847 int
848 loophead(long *idom, Reg *r)
849 {
850         long src;
851
852         src = r->rpo;
853         if(r->p1 != R && doms(idom, src, r->p1->rpo))
854                 return 1;
855         for(r = r->p2; r != R; r = r->p2link)
856                 if(doms(idom, src, r->rpo))
857                         return 1;
858         return 0;
859 }
860
861 void
862 loopmark(Reg **rpo2r, long head, Reg *r)
863 {
864         if(r->rpo < head || r->active == head)
865                 return;
866         r->active = head;
867         r->loop += LOOP;
868         if(r->p1 != R)
869                 loopmark(rpo2r, head, r->p1);
870         for(r = r->p2; r != R; r = r->p2link)
871                 loopmark(rpo2r, head, r);
872 }
873
874 void
875 loopit(Reg *r, long nr)
876 {
877         Reg *r1;
878         long i, d, me;
879
880         if(nr > maxnr) {
881                 rpo2r = alloc(nr * sizeof(Reg*));
882                 idom = alloc(nr * sizeof(long));
883                 maxnr = nr;
884         }
885
886         d = postorder(r, rpo2r, 0);
887         if(d > nr)
888                 fatal(Z, "too many reg nodes");
889         nr = d;
890         for(i = 0; i < nr / 2; i++){
891                 r1 = rpo2r[i];
892                 rpo2r[i] = rpo2r[nr - 1 - i];
893                 rpo2r[nr - 1 - i] = r1;
894         }
895         for(i = 0; i < nr; i++)
896                 rpo2r[i]->rpo = i;
897
898         idom[0] = 0;
899         for(i = 0; i < nr; i++){
900                 r1 = rpo2r[i];
901                 me = r1->rpo;
902                 d = -1;
903                 if(r1->p1 != R && r1->p1->rpo < me)
904                         d = r1->p1->rpo;
905                 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
906                         if(r1->rpo < me)
907                                 d = rpolca(idom, d, r1->rpo);
908                 idom[i] = d;
909         }
910
911         for(i = 0; i < nr; i++){
912                 r1 = rpo2r[i];
913                 r1->loop++;
914                 if(r1->p2 != R && loophead(idom, r1))
915                         loopmark(rpo2r, i, r1);
916         }
917 }
918
919 void
920 synch(Reg *r, Bits dif)
921 {
922         Reg *r1;
923         int z;
924
925         for(r1 = r; r1 != R; r1 = r1->s1) {
926                 for(z=0; z<BITS; z++) {
927                         dif.b[z] = (dif.b[z] &
928                                 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
929                                         r1->set.b[z] | r1->regdiff.b[z];
930                         if(dif.b[z] != r1->regdiff.b[z]) {
931                                 r1->regdiff.b[z] = dif.b[z];
932                                 change++;
933                         }
934                 }
935                 if(r1->active)
936                         break;
937                 r1->active = 1;
938                 for(z=0; z<BITS; z++)
939                         dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
940                 if(r1->s2 != R)
941                         synch(r1->s2, dif);
942         }
943 }
944
945 ulong
946 allreg(ulong b, Rgn *r)
947 {
948         Var *v;
949         int i;
950
951         v = var + r->varno;
952         r->regno = 0;
953         switch(v->etype) {
954
955         default:
956                 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
957                 break;
958
959         case TCHAR:
960         case TUCHAR:
961         case TSHORT:
962         case TUSHORT:
963         case TINT:
964         case TUINT:
965         case TLONG:
966         case TULONG:
967         case TIND:
968         case TARRAY:
969                 i = BtoR(~b);
970                 if(i && r->cost > 0) {
971                         r->regno = i;
972                         return RtoB(i);
973                 }
974                 break;
975
976         case TDOUBLE:
977         case TFLOAT:
978                 break;
979         }
980         return 0;
981 }
982
983 void
984 paint1(Reg *r, int bn)
985 {
986         Reg *r1;
987         Prog *p;
988         int z;
989         ulong bb;
990
991         z = bn/32;
992         bb = 1L<<(bn%32);
993         if(r->act.b[z] & bb)
994                 return;
995         for(;;) {
996                 if(!(r->refbehind.b[z] & bb))
997                         break;
998                 r1 = r->p1;
999                 if(r1 == R)
1000                         break;
1001                 if(!(r1->refahead.b[z] & bb))
1002                         break;
1003                 if(r1->act.b[z] & bb)
1004                         break;
1005                 r = r1;
1006         }
1007
1008         if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
1009                 change -= CLOAD * r->loop;
1010                 if(debug['R'] && debug['v'])
1011                         print("%ld%P\tld %B $%d\n", r->loop,
1012                                 r->prog, blsh(bn), change);
1013         }
1014         for(;;) {
1015                 r->act.b[z] |= bb;
1016                 p = r->prog;
1017
1018                 if(r->use1.b[z] & bb) {
1019                         change += CREF * r->loop;
1020                         if(p->as == AFMOVL || p->as == AFMOVW)
1021                                 if(BtoR(bb) != D_F0)
1022                                         change = -CINF;
1023                         if(debug['R'] && debug['v'])
1024                                 print("%ld%P\tu1 %B $%d\n", r->loop,
1025                                         p, blsh(bn), change);
1026                 }
1027
1028                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1029                         change += CREF * r->loop;
1030                         if(p->as == AFMOVL || p->as == AFMOVW)
1031                                 if(BtoR(bb) != D_F0)
1032                                         change = -CINF;
1033                         if(debug['R'] && debug['v'])
1034                                 print("%ld%P\tu2 %B $%d\n", r->loop,
1035                                         p, blsh(bn), change);
1036                 }
1037
1038                 if(STORE(r) & r->regdiff.b[z] & bb) {
1039                         change -= CLOAD * r->loop;
1040                         if(p->as == AFMOVL || p->as == AFMOVW)
1041                                 if(BtoR(bb) != D_F0)
1042                                         change = -CINF;
1043                         if(debug['R'] && debug['v'])
1044                                 print("%ld%P\tst %B $%d\n", r->loop,
1045                                         p, blsh(bn), change);
1046                 }
1047
1048                 if(r->refbehind.b[z] & bb)
1049                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1050                                 if(r1->refahead.b[z] & bb)
1051                                         paint1(r1, bn);
1052
1053                 if(!(r->refahead.b[z] & bb))
1054                         break;
1055                 r1 = r->s2;
1056                 if(r1 != R)
1057                         if(r1->refbehind.b[z] & bb)
1058                                 paint1(r1, bn);
1059                 r = r->s1;
1060                 if(r == R)
1061                         break;
1062                 if(r->act.b[z] & bb)
1063                         break;
1064                 if(!(r->refbehind.b[z] & bb))
1065                         break;
1066         }
1067 }
1068
1069 ulong
1070 regset(Reg *r, ulong bb)
1071 {
1072         ulong b, set;
1073         Adr v;
1074         int c;
1075
1076         set = 0;
1077         v = zprog.from;
1078         while(b = bb & ~(bb-1)) {
1079                 v.type = BtoR(b);
1080                 c = copyu(r->prog, &v, A);
1081                 if(c == 3)
1082                         set |= b;
1083                 bb &= ~b;
1084         }
1085         return set;
1086 }
1087
1088 ulong
1089 reguse(Reg *r, ulong bb)
1090 {
1091         ulong b, set;
1092         Adr v;
1093         int c;
1094
1095         set = 0;
1096         v = zprog.from;
1097         while(b = bb & ~(bb-1)) {
1098                 v.type = BtoR(b);
1099                 c = copyu(r->prog, &v, A);
1100                 if(c == 1 || c == 2 || c == 4)
1101                         set |= b;
1102                 bb &= ~b;
1103         }
1104         return set;
1105 }
1106
1107 ulong
1108 paint2(Reg *r, int bn)
1109 {
1110         Reg *r1;
1111         int z;
1112         ulong bb, vreg, x;
1113
1114         z = bn/32;
1115         bb = 1L << (bn%32);
1116         vreg = regbits;
1117         if(!(r->act.b[z] & bb))
1118                 return vreg;
1119         for(;;) {
1120                 if(!(r->refbehind.b[z] & bb))
1121                         break;
1122                 r1 = r->p1;
1123                 if(r1 == R)
1124                         break;
1125                 if(!(r1->refahead.b[z] & bb))
1126                         break;
1127                 if(!(r1->act.b[z] & bb))
1128                         break;
1129                 r = r1;
1130         }
1131         for(;;) {
1132                 r->act.b[z] &= ~bb;
1133
1134                 vreg |= r->regu;
1135
1136                 if(r->refbehind.b[z] & bb)
1137                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1138                                 if(r1->refahead.b[z] & bb)
1139                                         vreg |= paint2(r1, bn);
1140
1141                 if(!(r->refahead.b[z] & bb))
1142                         break;
1143                 r1 = r->s2;
1144                 if(r1 != R)
1145                         if(r1->refbehind.b[z] & bb)
1146                                 vreg |= paint2(r1, bn);
1147                 r = r->s1;
1148                 if(r == R)
1149                         break;
1150                 if(!(r->act.b[z] & bb))
1151                         break;
1152                 if(!(r->refbehind.b[z] & bb))
1153                         break;
1154         }
1155
1156         bb = vreg;
1157         for(; r; r=r->s1) {
1158                 x = r->regu & ~bb;
1159                 if(x) {
1160                         vreg |= reguse(r, x);
1161                         bb |= regset(r, x);
1162                 }
1163         }
1164         return vreg;
1165 }
1166
1167 void
1168 paint3(Reg *r, int bn, long rb, int rn)
1169 {
1170         Reg *r1;
1171         Prog *p;
1172         int z;
1173         ulong bb;
1174
1175         z = bn/32;
1176         bb = 1L << (bn%32);
1177         if(r->act.b[z] & bb)
1178                 return;
1179         for(;;) {
1180                 if(!(r->refbehind.b[z] & bb))
1181                         break;
1182                 r1 = r->p1;
1183                 if(r1 == R)
1184                         break;
1185                 if(!(r1->refahead.b[z] & bb))
1186                         break;
1187                 if(r1->act.b[z] & bb)
1188                         break;
1189                 r = r1;
1190         }
1191
1192         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1193                 addmove(r, bn, rn, 0);
1194         for(;;) {
1195                 r->act.b[z] |= bb;
1196                 p = r->prog;
1197
1198                 if(r->use1.b[z] & bb) {
1199                         if(debug['R'])
1200                                 print("%P", p);
1201                         addreg(&p->from, rn);
1202                         if(debug['R'])
1203                                 print("\t.c%P\n", p);
1204                 }
1205                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1206                         if(debug['R'])
1207                                 print("%P", p);
1208                         addreg(&p->to, rn);
1209                         if(debug['R'])
1210                                 print("\t.c%P\n", p);
1211                 }
1212
1213                 if(STORE(r) & r->regdiff.b[z] & bb)
1214                         addmove(r, bn, rn, 1);
1215                 r->regu |= rb;
1216
1217                 if(r->refbehind.b[z] & bb)
1218                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1219                                 if(r1->refahead.b[z] & bb)
1220                                         paint3(r1, bn, rb, rn);
1221
1222                 if(!(r->refahead.b[z] & bb))
1223                         break;
1224                 r1 = r->s2;
1225                 if(r1 != R)
1226                         if(r1->refbehind.b[z] & bb)
1227                                 paint3(r1, bn, rb, rn);
1228                 r = r->s1;
1229                 if(r == R)
1230                         break;
1231                 if(r->act.b[z] & bb)
1232                         break;
1233                 if(!(r->refbehind.b[z] & bb))
1234                         break;
1235         }
1236 }
1237
1238 void
1239 addreg(Adr *a, int rn)
1240 {
1241
1242         a->sym = 0;
1243         a->offset = 0;
1244         a->type = rn;
1245 }
1246
1247 long
1248 RtoB(int r)
1249 {
1250
1251         if(r < D_AX || r > D_DI)
1252                 return 0;
1253         return 1L << (r-D_AX);
1254 }
1255
1256 int
1257 BtoR(long b)
1258 {
1259
1260         b &= 0xffL;
1261         if(b == 0)
1262                 return 0;
1263         return bitno(b) + D_AX;
1264 }