]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/vc/reg.c
ip/cifsd: dont return garbage in upper 32 bit of unix extension stat fields
[plan9front.git] / sys / src / cmd / vc / reg.c
1 #include "gc.h"
2
3 void    addsplits(void);
4
5 Reg*
6 rega(void)
7 {
8         Reg *r;
9
10         r = freer;
11         if(r == R) {
12                 r = alloc(sizeof(*r));
13         } else
14                 freer = r->link;
15
16         *r = zreg;
17         return r;
18 }
19
20 int
21 rcmp(const void *a1, const void *a2)
22 {
23         Rgn *p1, *p2;
24         int c1, c2;
25
26         p1 = (Rgn*)a1;
27         p2 = (Rgn*)a2;
28         c1 = p2->cost;
29         c2 = p1->cost;
30         if(c1 -= c2)
31                 return c1;
32         return p2->varno - p1->varno;
33 }
34
35 void
36 regopt(Prog *p)
37 {
38         Reg *r, *r1, *r2;
39         Prog *p1;
40         int i, z;
41         long initpc, val, npc;
42         ulong vreg;
43         Bits bit;
44         struct
45         {
46                 long    m;
47                 long    c;
48                 Reg*    p;
49         } log5[6], *lp;
50
51         firstr = R;
52         lastr = R;
53         nvar = 0;
54         regbits = 0;
55         for(z=0; z<BITS; z++) {
56                 externs.b[z] = 0;
57                 params.b[z] = 0;
58                 consts.b[z] = 0;
59                 addrs.b[z] = 0;
60         }
61
62         /*
63          * pass 1
64          * build aux data structure
65          * allocate pcs
66          * find use and set of variables
67          */
68         val = 5L * 5L * 5L * 5L * 5L;
69         lp = log5;
70         for(i=0; i<5; i++) {
71                 lp->m = val;
72                 lp->c = 0;
73                 lp->p = R;
74                 val /= 5L;
75                 lp++;
76         }
77         val = 0;
78         for(; p != P; p = p->link) {
79                 switch(p->as) {
80                 case ADATA:
81                 case AGLOBL:
82                 case ANAME:
83                 case ASIGNAME:
84                         continue;
85                 }
86                 r = rega();
87                 if(firstr == R) {
88                         firstr = r;
89                         lastr = r;
90                 } else {
91                         lastr->link = r;
92                         r->p1 = lastr;
93                         lastr->s1 = r;
94                         lastr = r;
95                 }
96                 r->prog = p;
97                 r->pc = val;
98                 val++;
99
100                 lp = log5;
101                 for(i=0; i<5; i++) {
102                         lp->c--;
103                         if(lp->c <= 0) {
104                                 lp->c = lp->m;
105                                 if(lp->p != R)
106                                         lp->p->log5 = r;
107                                 lp->p = r;
108                                 (lp+1)->c = 0;
109                                 break;
110                         }
111                         lp++;
112                 }
113
114                 r1 = r->p1;
115                 if(r1 != R)
116                 switch(r1->prog->as) {
117                 case ARET:
118                 case AJMP:
119                 case ARFE:
120                         r->p1 = R;
121                         r1->s1 = R;
122                 }
123
124                 /*
125                  * left side always read
126                  */
127                 bit = mkvar(&p->from, p->as==AMOVW);
128                 for(z=0; z<BITS; z++)
129                         r->use1.b[z] |= bit.b[z];
130
131                 /*
132                  * right side depends on opcode
133                  */
134                 bit = mkvar(&p->to, 0);
135                 if(bany(&bit))
136                 switch(p->as) {
137                 default:
138                         diag(Z, "reg: unknown asop: %A", p->as);
139                         break;
140
141                 /*
142                  * right side write
143                  */
144                 case ANOP:
145                 case AMOVB:
146                 case AMOVBU:
147                 case AMOVH:
148                 case AMOVHU:
149                 case AMOVW:
150                 case AMOVF:
151                 case AMOVD:
152                         for(z=0; z<BITS; z++)
153                                 r->set.b[z] |= bit.b[z];
154                         break;
155
156                 /*
157                  * funny
158                  */
159                 case AJAL:
160                         for(z=0; z<BITS; z++)
161                                 addrs.b[z] |= bit.b[z];
162                         break;
163                 }
164         }
165         if(firstr == R)
166                 return;
167         initpc = pc - val;
168         npc = val;
169
170         /*
171          * pass 2
172          * turn branch references to pointers
173          * build back pointers
174          */
175         for(r = firstr; r != R; r = r->link) {
176                 p = r->prog;
177                 if(p->to.type == D_BRANCH) {
178                         val = p->to.offset - initpc;
179                         r1 = firstr;
180                         while(r1 != R) {
181                                 r2 = r1->log5;
182                                 if(r2 != R && val >= r2->pc) {
183                                         r1 = r2;
184                                         continue;
185                                 }
186                                 if(r1->pc == val)
187                                         break;
188                                 r1 = r1->link;
189                         }
190                         if(r1 == R) {
191                                 nearln = p->lineno;
192                                 diag(Z, "ref not found\n%P", p);
193                                 continue;
194                         }
195                         if(r1 == r) {
196                                 nearln = p->lineno;
197                                 diag(Z, "ref to self\n%P", p);
198                                 continue;
199                         }
200                         r->s2 = r1;
201                         r->p2link = r1->p2;
202                         r1->p2 = r;
203                 }
204         }
205         if(debug['R']) {
206                 p = firstr->prog;
207                 print("\n%L %D\n", p->lineno, &p->from);
208         }
209
210         /*
211          * pass 2.5
212          * find looping structure
213          */
214         for(r = firstr; r != R; r = r->link)
215                 r->active = 0;
216         change = 0;
217         loopit(firstr, npc);
218
219         /*
220          * pass 3
221          * iterate propagating usage
222          *      back until flow graph is complete
223          */
224 loop1:
225         change = 0;
226         for(r = firstr; r != R; r = r->link)
227                 r->active = 0;
228         for(r = firstr; r != R; r = r->link)
229                 if(r->prog->as == ARET)
230                         prop(r, zbits, zbits);
231 loop11:
232         /* pick up unreachable code */
233         i = 0;
234         for(r = firstr; r != R; r = r1) {
235                 r1 = r->link;
236                 if(r1 && r1->active && !r->active) {
237                         prop(r, zbits, zbits);
238                         i = 1;
239                 }
240         }
241         if(i)
242                 goto loop11;
243         if(change)
244                 goto loop1;
245
246
247         /*
248          * pass 4
249          * iterate propagating register/variable synchrony
250          *      forward until graph is complete
251          */
252 loop2:
253         change = 0;
254         for(r = firstr; r != R; r = r->link)
255                 r->active = 0;
256         synch(firstr, zbits);
257         if(change)
258                 goto loop2;
259
260         addsplits();
261
262         if(debug['R'] && debug['v']) {
263                 print("\nprop structure:\n");
264                 for(r = firstr; r != R; r = r->link) {
265                         print("%ld:%P", r->loop, r->prog);
266                         for(z=0; z<BITS; z++)
267                                 bit.b[z] = r->set.b[z] |
268                                         r->refahead.b[z] | r->calahead.b[z] |
269                                         r->refbehind.b[z] | r->calbehind.b[z] |
270                                         r->use1.b[z] | r->use2.b[z];
271                         if(bany(&bit)) {
272                                 print("\t");
273                                 if(bany(&r->use1))
274                                         print(" u1=%B", r->use1);
275                                 if(bany(&r->use2))
276                                         print(" u2=%B", r->use2);
277                                 if(bany(&r->set))
278                                         print(" st=%B", r->set);
279                                 if(bany(&r->refahead))
280                                         print(" ra=%B", r->refahead);
281                                 if(bany(&r->calahead))
282                                         print(" ca=%B", r->calahead);
283                                 if(bany(&r->refbehind))
284                                         print(" rb=%B", r->refbehind);
285                                 if(bany(&r->calbehind))
286                                         print(" cb=%B", r->calbehind);
287                                 if(bany(&r->regdiff))
288                                         print(" rd=%B", r->regdiff);
289                         }
290                         print("\n");
291                 }
292         }
293
294         /*
295          * pass 5
296          * isolate regions
297          * calculate costs (paint1)
298          */
299         r = firstr;
300         if(r) {
301                 for(z=0; z<BITS; z++)
302                         bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
303                           ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
304                 if(bany(&bit)) {
305                         nearln = r->prog->lineno;
306                         warn(Z, "used and not set: %B", bit);
307                         if(debug['R'] && !debug['w'])
308                                 print("used and not set: %B\n", bit);
309                 }
310         }
311
312         for(r = firstr; r != R; r = r->link)
313                 r->act = zbits;
314         rgp = region;
315         nregion = 0;
316         for(r = firstr; r != R; r = r->link) {
317                 for(z=0; z<BITS; z++)
318                         bit.b[z] = r->set.b[z] &
319                           ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
320                 if(bany(&bit)) {
321                         nearln = r->prog->lineno;
322                         warn(Z, "set and not used: %B", bit);
323                         if(debug['R'])
324                                 print("set and not used: %B\n", bit);
325                         excise(r);
326                 }
327                 for(z=0; z<BITS; z++)
328                         bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
329                 while(bany(&bit)) {
330                         i = bnum(bit);
331                         rgp->enter = r;
332                         rgp->varno = i;
333                         change = 0;
334                         if(debug['R'] && debug['v'])
335                                 print("\n");
336                         paint1(r, i);
337                         bit.b[i/32] &= ~(1L<<(i%32));
338                         if(change <= 0) {
339                                 if(debug['R'])
340                                         print("%L $%d: %B\n",
341                                                 r->prog->lineno, change, blsh(i));
342                                 continue;
343                         }
344                         rgp->cost = change;
345                         nregion++;
346                         if(nregion >= NRGN) {
347                                 warn(Z, "too many regions");
348                                 goto brk;
349                         }
350                         rgp++;
351                 }
352         }
353 brk:
354         qsort(region, nregion, sizeof(region[0]), rcmp);
355
356         /*
357          * pass 6
358          * determine used registers (paint2)
359          * replace code (paint3)
360          */
361         rgp = region;
362         for(i=0; i<nregion; i++) {
363                 bit = blsh(rgp->varno);
364                 vreg = paint2(rgp->enter, rgp->varno);
365                 vreg = allreg(vreg, rgp);
366                 if(debug['R']) {
367                         if(rgp->regno >= NREG)
368                                 print("%L $%d F%d: %B\n",
369                                         rgp->enter->prog->lineno,
370                                         rgp->cost,
371                                         rgp->regno-NREG,
372                                         bit);
373                         else
374                                 print("%L $%d R%d: %B\n",
375                                         rgp->enter->prog->lineno,
376                                         rgp->cost,
377                                         rgp->regno,
378                                         bit);
379                 }
380                 if(rgp->regno != 0)
381                         paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
382                 rgp++;
383         }
384         /*
385          * pass 7
386          * peep-hole on basic block
387          */
388         if(!debug['R'] || debug['P'])
389                 peep();
390
391         /*
392          * pass 8
393          * recalculate pc
394          */
395         val = initpc;
396         for(r = firstr; r != R; r = r1) {
397                 r->pc = val;
398                 p = r->prog;
399                 p1 = P;
400                 r1 = r->link;
401                 if(r1 != R)
402                         p1 = r1->prog;
403                 for(; p != p1; p = p->link) {
404                         switch(p->as) {
405                         default:
406                                 val++;
407                                 break;
408
409                         case ANOP:
410                         case ADATA:
411                         case AGLOBL:
412                         case ANAME:
413                         case ASIGNAME:
414                                 break;
415                         }
416                 }
417         }
418         pc = val;
419
420         /*
421          * fix up branches
422          */
423         if(debug['R'])
424                 if(bany(&addrs))
425                         print("addrs: %B\n", addrs);
426
427         r1 = 0; /* set */
428         for(r = firstr; r != R; r = r->link) {
429                 p = r->prog;
430                 if(p->to.type == D_BRANCH)
431                         p->to.offset = r->s2->pc;
432                 r1 = r;
433         }
434
435         /*
436          * last pass
437          * eliminate nops
438          * free aux structures
439          */
440         for(p = firstr->prog; p != P; p = p->link){
441                 while(p->link && p->link->as == ANOP)
442                         p->link = p->link->link;
443         }
444         if(r1 != R) {
445                 r1->link = freer;
446                 freer = firstr;
447         }
448 }
449
450 void
451 addsplits(void)
452 {
453         Reg *r, *r1;
454         int z, i;
455         Bits bit;
456
457         for(r = firstr; r != R; r = r->link) {
458                 if(r->loop > 1)
459                         continue;
460                 if(r->prog->as == AJAL)
461                         continue;
462                 for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
463                         if(r1->loop <= 1)
464                                 continue;
465                         for(z=0; z<BITS; z++)
466                                 bit.b[z] = r1->calbehind.b[z] &
467                                         (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
468                                         ~(r->calahead.b[z] & addrs.b[z]);
469                         while(bany(&bit)) {
470                                 i = bnum(bit);
471                                 bit.b[i/32] &= ~(1L << (i%32));
472                         }
473                 }
474         }
475 }
476
477 /*
478  * add mov b,rn
479  * just after r
480  */
481 void
482 addmove(Reg *r, int bn, int rn, int f)
483 {
484         Prog *p, *p1;
485         Adr *a;
486         Var *v;
487
488         p1 = alloc(sizeof(*p1));
489         *p1 = zprog;
490         p = r->prog;
491
492         p1->link = p->link;
493         p->link = p1;
494         p1->lineno = p->lineno;
495
496         v = var + bn;
497
498         a = &p1->to;
499         a->sym = v->sym;
500         a->name = v->name;
501         a->offset = v->offset;
502         a->etype = v->etype;
503         a->type = D_OREG;
504         if(a->etype == TARRAY || a->sym == S)
505                 a->type = D_CONST;
506
507         p1->as = AMOVW;
508         if(v->etype == TCHAR || v->etype == TUCHAR)
509                 p1->as = AMOVB;
510         if(v->etype == TSHORT || v->etype == TUSHORT)
511                 p1->as = AMOVH;
512         if(v->etype == TFLOAT)
513                 p1->as = AMOVF;
514         if(v->etype == TDOUBLE)
515                 p1->as = AMOVD;
516
517         p1->from.type = D_REG;
518         p1->from.reg = rn;
519         if(rn >= NREG) {
520                 p1->from.type = D_FREG;
521                 p1->from.reg = rn-NREG;
522         }
523         if(!f) {
524                 p1->from = *a;
525                 *a = zprog.from;
526                 a->type = D_REG;
527                 a->reg = rn;
528                 if(rn >= NREG) {
529                         a->type = D_FREG;
530                         a->reg = rn-NREG;
531                 }
532                 if(v->etype == TUCHAR)
533                         p1->as = AMOVBU;
534                 if(v->etype == TUSHORT)
535                         p1->as = AMOVHU;
536         }
537         if(debug['R'])
538                 print("%P\t.a%P\n", p, p1);
539 }
540
541 Bits
542 mkvar(Adr *a, int docon)
543 {
544         Var *v;
545         int i, t, n, et, z;
546         long o;
547         Bits bit;
548         Sym *s;
549
550         t = a->type;
551         if(t == D_REG && a->reg != NREG)
552                 regbits |= RtoB(a->reg);
553         if(t == D_FREG && a->reg != NREG)
554                 regbits |= FtoB(a->reg);
555         s = a->sym;
556         o = a->offset;
557         et = a->etype;
558         if(s == S) {
559                 if(t != D_CONST || !docon || a->reg != NREG)
560                         goto none;
561                 et = TLONG;
562         }
563         if(t == D_CONST) {
564                 if(s == S && sval(o))
565                         goto none;
566         }
567
568         n = a->name;
569         v = var;
570         for(i=0; i<nvar; i++) {
571                 if(s == v->sym)
572                 if(n == v->name)
573                 if(o == v->offset)
574                         goto out;
575                 v++;
576         }
577         if(s)
578                 if(s->name[0] == '.')
579                         goto none;
580         if(nvar >= NVAR) {
581                 if(debug['w'] > 1 && s)
582                         warn(Z, "variable not optimized: %s", s->name);
583                 goto none;
584         }
585         i = nvar;
586         nvar++;
587         v = &var[i];
588         v->sym = s;
589         v->offset = o;
590         v->etype = et;
591         v->name = n;
592         if(debug['R'])
593                 print("bit=%2d et=%2d %D\n", i, et, a);
594 out:
595         bit = blsh(i);
596         if(n == D_EXTERN || n == D_STATIC)
597                 for(z=0; z<BITS; z++)
598                         externs.b[z] |= bit.b[z];
599         if(n == D_PARAM)
600                 for(z=0; z<BITS; z++)
601                         params.b[z] |= bit.b[z];
602         if(v->etype != et || !typechlpfd[et])   /* funny punning */
603                 for(z=0; z<BITS; z++)
604                         addrs.b[z] |= bit.b[z];
605         if(t == D_CONST) {
606                 if(s == S) {
607                         for(z=0; z<BITS; z++)
608                                 consts.b[z] |= bit.b[z];
609                         return bit;
610                 }
611                 if(et != TARRAY)
612                         for(z=0; z<BITS; z++)
613                                 addrs.b[z] |= bit.b[z];
614                 for(z=0; z<BITS; z++)
615                         params.b[z] |= bit.b[z];
616                 return bit;
617         }
618         if(t == D_OREG)
619                 return bit;
620
621 none:
622         return zbits;
623 }
624
625 void
626 prop(Reg *r, Bits ref, Bits cal)
627 {
628         Reg *r1, *r2;
629         int z;
630
631         for(r1 = r; r1 != R; r1 = r1->p1) {
632                 for(z=0; z<BITS; z++) {
633                         ref.b[z] |= r1->refahead.b[z];
634                         if(ref.b[z] != r1->refahead.b[z]) {
635                                 r1->refahead.b[z] = ref.b[z];
636                                 change++;
637                         }
638                         cal.b[z] |= r1->calahead.b[z];
639                         if(cal.b[z] != r1->calahead.b[z]) {
640                                 r1->calahead.b[z] = cal.b[z];
641                                 change++;
642                         }
643                 }
644                 switch(r1->prog->as) {
645                 case AJAL:
646                         for(z=0; z<BITS; z++) {
647                                 cal.b[z] |= ref.b[z] | externs.b[z];
648                                 ref.b[z] = 0;
649                         }
650                         break;
651
652                 case ATEXT:
653                         for(z=0; z<BITS; z++) {
654                                 cal.b[z] = 0;
655                                 ref.b[z] = 0;
656                         }
657                         break;
658
659                 case ARET:
660                         for(z=0; z<BITS; z++) {
661                                 cal.b[z] = externs.b[z];
662                                 ref.b[z] = 0;
663                         }
664                 }
665                 for(z=0; z<BITS; z++) {
666                         ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
667                                 r1->use1.b[z] | r1->use2.b[z];
668                         cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
669                         r1->refbehind.b[z] = ref.b[z];
670                         r1->calbehind.b[z] = cal.b[z];
671                 }
672                 if(r1->active)
673                         break;
674                 r1->active = 1;
675         }
676         for(; r != r1; r = r->p1)
677                 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
678                         prop(r2, r->refbehind, r->calbehind);
679 }
680
681 /*
682  * find looping structure
683  *
684  * 1) find reverse postordering
685  * 2) find approximate dominators,
686  *      the actual dominators if the flow graph is reducible
687  *      otherwise, dominators plus some other non-dominators.
688  *      See Matthew S. Hecht and Jeffrey D. Ullman,
689  *      "Analysis of a Simple Algorithm for Global Data Flow Problems",
690  *      Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
691  *      Oct. 1-3, 1973, pp.  207-217.
692  * 3) find all nodes with a predecessor dominated by the current node.
693  *      such a node is a loop head.
694  *      recursively, all preds with a greater rpo number are in the loop
695  */
696 long
697 postorder(Reg *r, Reg **rpo2r, long n)
698 {
699         Reg *r1;
700
701         r->rpo = 1;
702         r1 = r->s1;
703         if(r1 && !r1->rpo)
704                 n = postorder(r1, rpo2r, n);
705         r1 = r->s2;
706         if(r1 && !r1->rpo)
707                 n = postorder(r1, rpo2r, n);
708         rpo2r[n] = r;
709         n++;
710         return n;
711 }
712
713 long
714 rpolca(long *idom, long rpo1, long rpo2)
715 {
716         long t;
717
718         if(rpo1 == -1)
719                 return rpo2;
720         while(rpo1 != rpo2){
721                 if(rpo1 > rpo2){
722                         t = rpo2;
723                         rpo2 = rpo1;
724                         rpo1 = t;
725                 }
726                 while(rpo1 < rpo2){
727                         t = idom[rpo2];
728                         if(t >= rpo2)
729                                 fatal(Z, "bad idom");
730                         rpo2 = t;
731                 }
732         }
733         return rpo1;
734 }
735
736 int
737 doms(long *idom, long r, long s)
738 {
739         while(s > r)
740                 s = idom[s];
741         return s == r;
742 }
743
744 int
745 loophead(long *idom, Reg *r)
746 {
747         long src;
748
749         src = r->rpo;
750         if(r->p1 != R && doms(idom, src, r->p1->rpo))
751                 return 1;
752         for(r = r->p2; r != R; r = r->p2link)
753                 if(doms(idom, src, r->rpo))
754                         return 1;
755         return 0;
756 }
757
758 void
759 loopmark(Reg **rpo2r, long head, Reg *r)
760 {
761         if(r->rpo < head || r->active == head)
762                 return;
763         r->active = head;
764         r->loop += LOOP;
765         if(r->p1 != R)
766                 loopmark(rpo2r, head, r->p1);
767         for(r = r->p2; r != R; r = r->p2link)
768                 loopmark(rpo2r, head, r);
769 }
770
771 void
772 loopit(Reg *r, long nr)
773 {
774         Reg *r1;
775         long i, d, me;
776
777         if(nr > maxnr) {
778                 rpo2r = alloc(nr * sizeof(Reg*));
779                 idom = alloc(nr * sizeof(long));
780                 maxnr = nr;
781         }
782
783         d = postorder(r, rpo2r, 0);
784         if(d > nr)
785                 fatal(Z, "too many reg nodes");
786         nr = d;
787         for(i = 0; i < nr / 2; i++){
788                 r1 = rpo2r[i];
789                 rpo2r[i] = rpo2r[nr - 1 - i];
790                 rpo2r[nr - 1 - i] = r1;
791         }
792         for(i = 0; i < nr; i++)
793                 rpo2r[i]->rpo = i;
794
795         idom[0] = 0;
796         for(i = 0; i < nr; i++){
797                 r1 = rpo2r[i];
798                 me = r1->rpo;
799                 d = -1;
800                 if(r1->p1 != R && r1->p1->rpo < me)
801                         d = r1->p1->rpo;
802                 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
803                         if(r1->rpo < me)
804                                 d = rpolca(idom, d, r1->rpo);
805                 idom[i] = d;
806         }
807
808         for(i = 0; i < nr; i++){
809                 r1 = rpo2r[i];
810                 r1->loop++;
811                 if(r1->p2 != R && loophead(idom, r1))
812                         loopmark(rpo2r, i, r1);
813         }
814 }
815
816 void
817 synch(Reg *r, Bits dif)
818 {
819         Reg *r1;
820         int z;
821
822         for(r1 = r; r1 != R; r1 = r1->s1) {
823                 for(z=0; z<BITS; z++) {
824                         dif.b[z] = (dif.b[z] &
825                                 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
826                                         r1->set.b[z] | r1->regdiff.b[z];
827                         if(dif.b[z] != r1->regdiff.b[z]) {
828                                 r1->regdiff.b[z] = dif.b[z];
829                                 change++;
830                         }
831                 }
832                 if(r1->active)
833                         break;
834                 r1->active = 1;
835                 for(z=0; z<BITS; z++)
836                         dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
837                 if(r1->s2 != R)
838                         synch(r1->s2, dif);
839         }
840 }
841
842 ulong
843 allreg(ulong b, Rgn *r)
844 {
845         Var *v;
846         int i;
847
848         v = var + r->varno;
849         r->regno = 0;
850         switch(v->etype) {
851
852         default:
853                 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
854                 break;
855
856         case TCHAR:
857         case TUCHAR:
858         case TSHORT:
859         case TUSHORT:
860         case TINT:
861         case TUINT:
862         case TLONG:
863         case TULONG:
864         case TIND:
865         case TARRAY:
866                 i = BtoR(~b);
867                 if(i && r->cost >= 0) {
868                         r->regno = i;
869                         return RtoB(i);
870                 }
871                 break;
872
873         case TDOUBLE:
874         case TFLOAT:
875                 i = BtoF(~b);
876                 if(i && r->cost >= 0) {
877                         r->regno = i+NREG;
878                         return FtoB(i);
879                 }
880                 break;
881         }
882         return 0;
883 }
884
885 void
886 paint1(Reg *r, int bn)
887 {
888         Reg *r1;
889         Prog *p;
890         int z;
891         ulong bb;
892
893         z = bn/32;
894         bb = 1L<<(bn%32);
895         if(r->act.b[z] & bb)
896                 return;
897         for(;;) {
898                 if(!(r->refbehind.b[z] & bb))
899                         break;
900                 r1 = r->p1;
901                 if(r1 == R)
902                         break;
903                 if(!(r1->refahead.b[z] & bb))
904                         break;
905                 if(r1->act.b[z] & bb)
906                         break;
907                 r = r1;
908         }
909
910         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
911                 change -= CLOAD * r->loop;
912                 if(debug['R'] && debug['v'])
913                         print("%ld%P\tld %B $%d\n", r->loop,
914                                 r->prog, blsh(bn), change);
915         }
916         for(;;) {
917                 r->act.b[z] |= bb;
918                 p = r->prog;
919
920                 if(r->use1.b[z] & bb) {
921                         change += CREF * r->loop;
922                         if(debug['R'] && debug['v'])
923                                 print("%ld%P\tu1 %B $%d\n", r->loop,
924                                         p, blsh(bn), change);
925                 }
926
927                 if((r->use2.b[z]|r->set.b[z]) & bb) {
928                         change += CREF * r->loop;
929                         if(debug['R'] && debug['v'])
930                                 print("%ld%P\tu2 %B $%d\n", r->loop,
931                                         p, blsh(bn), change);
932                 }
933
934                 if(STORE(r) & r->regdiff.b[z] & bb) {
935                         change -= CLOAD * r->loop;
936                         if(debug['R'] && debug['v'])
937                                 print("%ld%P\tst %B $%d\n", r->loop,
938                                         p, blsh(bn), change);
939                 }
940
941                 if(r->refbehind.b[z] & bb)
942                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
943                                 if(r1->refahead.b[z] & bb)
944                                         paint1(r1, bn);
945
946                 if(!(r->refahead.b[z] & bb))
947                         break;
948                 r1 = r->s2;
949                 if(r1 != R)
950                         if(r1->refbehind.b[z] & bb)
951                                 paint1(r1, bn);
952                 r = r->s1;
953                 if(r == R)
954                         break;
955                 if(r->act.b[z] & bb)
956                         break;
957                 if(!(r->refbehind.b[z] & bb))
958                         break;
959         }
960 }
961
962 ulong
963 paint2(Reg *r, int bn)
964 {
965         Reg *r1;
966         int z;
967         ulong bb, vreg;
968
969         z = bn/32;
970         bb = 1L << (bn%32);
971         vreg = regbits;
972         if(!(r->act.b[z] & bb))
973                 return vreg;
974         for(;;) {
975                 if(!(r->refbehind.b[z] & bb))
976                         break;
977                 r1 = r->p1;
978                 if(r1 == R)
979                         break;
980                 if(!(r1->refahead.b[z] & bb))
981                         break;
982                 if(!(r1->act.b[z] & bb))
983                         break;
984                 r = r1;
985         }
986         for(;;) {
987                 r->act.b[z] &= ~bb;
988
989                 vreg |= r->regu;
990
991                 if(r->refbehind.b[z] & bb)
992                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
993                                 if(r1->refahead.b[z] & bb)
994                                         vreg |= paint2(r1, bn);
995
996                 if(!(r->refahead.b[z] & bb))
997                         break;
998                 r1 = r->s2;
999                 if(r1 != R)
1000                         if(r1->refbehind.b[z] & bb)
1001                                 vreg |= paint2(r1, bn);
1002                 r = r->s1;
1003                 if(r == R)
1004                         break;
1005                 if(!(r->act.b[z] & bb))
1006                         break;
1007                 if(!(r->refbehind.b[z] & bb))
1008                         break;
1009         }
1010         return vreg;
1011 }
1012
1013 void
1014 paint3(Reg *r, int bn, long rb, int rn)
1015 {
1016         Reg *r1;
1017         Prog *p;
1018         int z;
1019         ulong bb;
1020
1021         z = bn/32;
1022         bb = 1L << (bn%32);
1023         if(r->act.b[z] & bb)
1024                 return;
1025         for(;;) {
1026                 if(!(r->refbehind.b[z] & bb))
1027                         break;
1028                 r1 = r->p1;
1029                 if(r1 == R)
1030                         break;
1031                 if(!(r1->refahead.b[z] & bb))
1032                         break;
1033                 if(r1->act.b[z] & bb)
1034                         break;
1035                 r = r1;
1036         }
1037
1038         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1039                 addmove(r, bn, rn, 0);
1040         for(;;) {
1041                 r->act.b[z] |= bb;
1042                 p = r->prog;
1043
1044                 if(r->use1.b[z] & bb) {
1045                         if(debug['R'])
1046                                 print("%P", p);
1047                         addreg(&p->from, rn);
1048                         if(debug['R'])
1049                                 print("\t.c%P\n", p);
1050                 }
1051                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1052                         if(debug['R'])
1053                                 print("%P", p);
1054                         addreg(&p->to, rn);
1055                         if(debug['R'])
1056                                 print("\t.c%P\n", p);
1057                 }
1058
1059                 if(STORE(r) & r->regdiff.b[z] & bb)
1060                         addmove(r, bn, rn, 1);
1061                 r->regu |= rb;
1062
1063                 if(r->refbehind.b[z] & bb)
1064                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1065                                 if(r1->refahead.b[z] & bb)
1066                                         paint3(r1, bn, rb, rn);
1067
1068                 if(!(r->refahead.b[z] & bb))
1069                         break;
1070                 r1 = r->s2;
1071                 if(r1 != R)
1072                         if(r1->refbehind.b[z] & bb)
1073                                 paint3(r1, bn, rb, rn);
1074                 r = r->s1;
1075                 if(r == R)
1076                         break;
1077                 if(r->act.b[z] & bb)
1078                         break;
1079                 if(!(r->refbehind.b[z] & bb))
1080                         break;
1081         }
1082 }
1083
1084 void
1085 addreg(Adr *a, int rn)
1086 {
1087
1088         a->sym = 0;
1089         a->name = D_NONE;
1090         a->type = D_REG;
1091         a->reg = rn;
1092         if(rn >= NREG) {
1093                 a->type = D_FREG;
1094                 a->reg = rn-NREG;
1095         }
1096 }
1097
1098 /*
1099  *      bit     reg
1100  *      0       R3
1101  *      1       R4
1102  *      ...     ...
1103  *      19      R22
1104  *      20      R23
1105  */
1106 long
1107 RtoB(int r)
1108 {
1109
1110         if(r < 3 || r > 23)
1111                 return 0;
1112         return 1L << (r-3);
1113 }
1114
1115 BtoR(long b)
1116 {
1117
1118         b &= 0x001fffffL;
1119         if(b == 0)
1120                 return 0;
1121         return bitno(b) + 3;
1122 }
1123
1124 /*
1125  *      bit     reg
1126  *      22      F4
1127  *      23      F6
1128  *      ...     ...
1129  *      31      F22
1130  */
1131 long
1132 FtoB(int f)
1133 {
1134
1135         if(f < 4 || f > 22 || (f&1))
1136                 return 0;
1137         return 1L << (f/2 + 20);
1138 }
1139
1140 int
1141 BtoF(long b)
1142 {
1143
1144         b &= 0xffc00000L;
1145         if(b == 0)
1146                 return 0;
1147         return bitno(b)*2 - 40;
1148 }