]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/7c/reg.c
Libflac: Tell it that we have stdint.h so it finds SIZE_MAX
[plan9front.git] / sys / src / cmd / 7c / reg.c
1 #include "gc.h"
2
3 void    addsplits(void);
4
5 Reg*
6 rega(void)
7 {
8         Reg *r;
9
10         r = freer;
11         if(r == R) {
12                 r = alloc(sizeof(*r));
13         } else
14                 freer = r->link;
15
16         *r = zreg;
17         return r;
18 }
19
20 int
21 rcmp(void *a1, void *a2)
22 {
23         Rgn *p1, *p2;
24         int c1, c2;
25
26         p1 = (Rgn*)a1;
27         p2 = (Rgn*)a2;
28         c1 = p2->cost;
29         c2 = p1->cost;
30         if(c1 -= c2)
31                 return c1;
32         return p2->varno - p1->varno;
33 }
34
35 void
36 regopt(Prog *p)
37 {
38         Reg *r, *r1, *r2;
39         Prog *p1;
40         int i, z;
41         long initpc, val, npc;
42         ulong vreg;
43         Bits bit;
44         struct
45         {
46                 long    m;
47                 long    c;
48                 Reg*    p;
49         } log5[6], *lp;
50
51         firstr = R;
52         lastr = R;
53         nvar = 0;
54         regbits = 0;
55         for(z=0; z<BITS; z++) {
56                 externs.b[z] = 0;
57                 params.b[z] = 0;
58                 consts.b[z] = 0;
59                 addrs.b[z] = 0;
60         }
61
62         /*
63          * pass 1
64          * build aux data structure
65          * allocate pcs
66          * find use and set of variables
67          */
68         val = 5L * 5L * 5L * 5L * 5L;
69         lp = log5;
70         for(i=0; i<5; i++) {
71                 lp->m = val;
72                 lp->c = 0;
73                 lp->p = R;
74                 val /= 5L;
75                 lp++;
76         }
77         val = 0;
78         for(; p != P; p = p->link) {
79                 switch(p->as) {
80                 case ADATA:
81                 case AGLOBL:
82                 case ANAME:
83                 case ASIGNAME:
84                         continue;
85                 }
86                 r = rega();
87                 if(firstr == R) {
88                         firstr = r;
89                         lastr = r;
90                 } else {
91                         lastr->link = r;
92                         r->p1 = lastr;
93                         lastr->s1 = r;
94                         lastr = r;
95                 }
96                 r->prog = p;
97                 r->pc = val;
98                 val++;
99
100                 lp = log5;
101                 for(i=0; i<5; i++) {
102                         lp->c--;
103                         if(lp->c <= 0) {
104                                 lp->c = lp->m;
105                                 if(lp->p != R)
106                                         lp->p->log5 = r;
107                                 lp->p = r;
108                                 (lp+1)->c = 0;
109                                 break;
110                         }
111                         lp++;
112                 }
113
114                 r1 = r->p1;
115                 if(r1 != R)
116                 switch(r1->prog->as) {
117                 case ARETURN:
118                 case ARET:
119                 case AB:
120                 case AERET:
121                         r->p1 = R;
122                         r1->s1 = R;
123                 }
124
125                 /*
126                  * left side always read
127                  */
128                 bit = mkvar(&p->from, p->as==AMOVW || p->as == AMOVWU || p->as == AMOV);
129                 for(z=0; z<BITS; z++)
130                         r->use1.b[z] |= bit.b[z];
131
132                 /*
133                  * right side depends on opcode
134                  */
135                 bit = mkvar(&p->to, 0);
136                 if(bany(&bit))
137                 switch(p->as) {
138                 default:
139                         diag(Z, "reg: unknown asop: %A", p->as);
140                         break;
141
142                 /*
143                  * right side write
144                  */
145                 case ANOP:
146                 case AMOV:
147                 case AMOVB:
148                 case AMOVBU:
149                 case AMOVH:
150                 case AMOVHU:
151                 case AMOVW:
152                 case AMOVWU:
153                 case ASXTW:
154                 case AFMOVS:
155                 case AFCVTSD:
156                 case AFMOVD:
157                 case AFCVTDS:
158                         for(z=0; z<BITS; z++)
159                                 r->set.b[z] |= bit.b[z];
160                         break;
161
162                 /*
163                  * funny
164                  */
165                 case ABL:
166                         for(z=0; z<BITS; z++)
167                                 addrs.b[z] |= bit.b[z];
168                         break;
169                 }
170
171         }
172         if(firstr == R)
173                 return;
174         initpc = pc - val;
175         npc = val;
176
177         /*
178          * pass 2
179          * turn branch references to pointers
180          * build back pointers
181          */
182         for(r = firstr; r != R; r = r->link) {
183                 p = r->prog;
184                 if(p->to.type == D_BRANCH) {
185                         val = p->to.offset - initpc;
186                         r1 = firstr;
187                         while(r1 != R) {
188                                 r2 = r1->log5;
189                                 if(r2 != R && val >= r2->pc) {
190                                         r1 = r2;
191                                         continue;
192                                 }
193                                 if(r1->pc == val)
194                                         break;
195                                 r1 = r1->link;
196                         }
197                         if(r1 == R) {
198                                 nearln = p->lineno;
199                                 diag(Z, "ref not found\n%P", p);
200                                 continue;
201                         }
202                         if(r1 == r) {
203                                 nearln = p->lineno;
204                                 diag(Z, "ref to self\n%P", p);
205                                 continue;
206                         }
207                         r->s2 = r1;
208                         r->p2link = r1->p2;
209                         r1->p2 = r;
210                 }
211         }
212         if(debug['R']) {
213                 p = firstr->prog;
214                 print("\n%L %D\n", p->lineno, &p->from);
215         }
216
217         /*
218          * pass 2.5
219          * find looping structure
220          */
221         for(r = firstr; r != R; r = r->link)
222                 r->active = 0;
223         change = 0;
224         loopit(firstr, npc);
225
226         /*
227          * pass 3
228          * iterate propagating usage
229          *      back until flow graph is complete
230          */
231 loop1:
232         change = 0;
233         for(r = firstr; r != R; r = r->link)
234                 r->active = 0;
235         for(r = firstr; r != R; r = r->link)
236                 if(r->prog->as == ARET || r->prog->as == ARETURN)
237                         prop(r, zbits, zbits);
238 loop11:
239         /* pick up unreachable code */
240         i = 0;
241         for(r = firstr; r != R; r = r1) {
242                 r1 = r->link;
243                 if(r1 && r1->active && !r->active) {
244                         prop(r, zbits, zbits);
245                         i = 1;
246                 }
247         }
248         if(i)
249                 goto loop11;
250         if(change)
251                 goto loop1;
252
253
254         /*
255          * pass 4
256          * iterate propagating register/variable synchrony
257          *      forward until graph is complete
258          */
259 loop2:
260         change = 0;
261         for(r = firstr; r != R; r = r->link)
262                 r->active = 0;
263         synch(firstr, zbits);
264         if(change)
265                 goto loop2;
266
267         addsplits();
268
269         if(debug['R'] && debug['v']) {
270                 print("\nprop structure:\n");
271                 for(r = firstr; r != R; r = r->link) {
272                         print("%ld:%P", r->loop, r->prog);
273                         for(z=0; z<BITS; z++)
274                                 bit.b[z] = r->set.b[z] |
275                                         r->refahead.b[z] | r->calahead.b[z] |
276                                         r->refbehind.b[z] | r->calbehind.b[z] |
277                                         r->use1.b[z] | r->use2.b[z];
278                         if(bany(&bit)) {
279                                 print("\t");
280                                 if(bany(&r->use1))
281                                         print(" u1=%B", r->use1);
282                                 if(bany(&r->use2))
283                                         print(" u2=%B", r->use2);
284                                 if(bany(&r->set))
285                                         print(" st=%B", r->set);
286                                 if(bany(&r->refahead))
287                                         print(" ra=%B", r->refahead);
288                                 if(bany(&r->calahead))
289                                         print(" ca=%B", r->calahead);
290                                 if(bany(&r->refbehind))
291                                         print(" rb=%B", r->refbehind);
292                                 if(bany(&r->calbehind))
293                                         print(" cb=%B", r->calbehind);
294                         }
295                         print("\n");
296                 }
297         }
298
299         /*
300          * pass 5
301          * isolate regions
302          * calculate costs (paint1)
303          */
304         r = firstr;
305         if(r) {
306                 for(z=0; z<BITS; z++)
307                         bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
308                           ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
309                 if(bany(&bit)) {
310                         nearln = r->prog->lineno;
311                         warn(Z, "used and not set: %B", bit);
312                         if(debug['R'] && !debug['w'])
313                                 print("used and not set: %B\n", bit);
314                 }
315         }
316
317         for(r = firstr; r != R; r = r->link)
318                 r->act = zbits;
319         rgp = region;
320         nregion = 0;
321         for(r = firstr; r != R; r = r->link) {
322                 for(z=0; z<BITS; z++)
323                         bit.b[z] = r->set.b[z] &
324                           ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
325                 if(bany(&bit)) {
326                         nearln = r->prog->lineno;
327                         warn(Z, "set and not used: %B", bit);
328                         if(debug['R'])
329                                 print("set and not used: %B\n", bit);
330                         excise(r);
331                 }
332                 for(z=0; z<BITS; z++)
333                         bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
334                 while(bany(&bit)) {
335                         i = bnum(bit);
336                         rgp->enter = r;
337                         rgp->varno = i;
338                         change = 0;
339                         if(debug['R'] && debug['v'])
340                                 print("\n");
341                         paint1(r, i);
342                         bit.b[i/32] &= ~(1L<<(i%32));
343                         if(change <= 0) {
344                                 if(debug['R'])
345                                         print("%L $%d: %B\n",
346                                                 r->prog->lineno, change, blsh(i));
347                                 continue;
348                         }
349                         rgp->cost = change;
350                         nregion++;
351                         if(nregion >= NRGN) {
352                                 warn(Z, "too many regions");
353                                 goto brk;
354                         }
355                         rgp++;
356                 }
357         }
358 brk:
359         qsort(region, nregion, sizeof(region[0]), rcmp);
360
361         /*
362          * pass 6
363          * determine used registers (paint2)
364          * replace code (paint3)
365          */
366         rgp = region;
367         for(i=0; i<nregion; i++) {
368                 bit = blsh(rgp->varno);
369                 vreg = paint2(rgp->enter, rgp->varno);
370                 vreg = allreg(vreg, rgp);
371                 if(debug['R']) {
372                         if(rgp->regno >= NREG)
373                                 print("%L $%d F%d: %B\n",
374                                         rgp->enter->prog->lineno,
375                                         rgp->cost,
376                                         rgp->regno-NREG,
377                                         bit);
378                         else
379                                 print("%L $%d R%d: %B\n",
380                                         rgp->enter->prog->lineno,
381                                         rgp->cost,
382                                         rgp->regno,
383                                         bit);
384                 }
385                 if(rgp->regno != 0)
386                         paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
387                 rgp++;
388         }
389         /*
390          * pass 7
391          * peep-hole on basic block
392          */
393         if(!debug['R'] || debug['P'])
394                 peep();
395
396         /*
397          * pass 8
398          * recalculate pc
399          */
400         val = initpc;
401         for(r = firstr; r != R; r = r1) {
402                 r->pc = val;
403                 p = r->prog;
404                 p1 = P;
405                 r1 = r->link;
406                 if(r1 != R)
407                         p1 = r1->prog;
408                 for(; p != p1; p = p->link) {
409                         switch(p->as) {
410                         default:
411                                 val++;
412                                 break;
413
414                         case ANOP:
415                         case ADATA:
416                         case AGLOBL:
417                         case ANAME:
418                         case ASIGNAME:
419                                 break;
420                         }
421                 }
422         }
423         pc = val;
424
425         /*
426          * fix up branches
427          */
428         if(debug['R'])
429                 if(bany(&addrs))
430                         print("addrs: %B\n", addrs);
431
432         r1 = 0; /* set */
433         for(r = firstr; r != R; r = r->link) {
434                 p = r->prog;
435                 if(p->to.type == D_BRANCH)
436                         p->to.offset = r->s2->pc;
437                 r1 = r;
438         }
439
440         /*
441          * last pass
442          * eliminate nops
443          * free aux structures
444          */
445         for(p = firstr->prog; p != P; p = p->link){
446                 while(p->link && p->link->as == ANOP)
447                         p->link = p->link->link;
448         }
449         if(r1 != R) {
450                 r1->link = freer;
451                 freer = firstr;
452         }
453 }
454
455 void
456 addsplits(void)
457 {
458         Reg *r, *r1;
459         int z, i;
460         Bits bit;
461
462         for(r = firstr; r != R; r = r->link) {
463                 if(r->loop > 1)
464                         continue;
465                 if(r->prog->as == ABL)
466                         continue;
467                 for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
468                         if(r1->loop <= 1)
469                                 continue;
470                         for(z=0; z<BITS; z++)
471                                 bit.b[z] = r1->calbehind.b[z] &
472                                         (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
473                                         ~(r->calahead.b[z] & addrs.b[z]);
474                         while(bany(&bit)) {
475                                 i = bnum(bit);
476                                 bit.b[i/32] &= ~(1L << (i%32));
477                         }
478                 }
479         }
480 }
481
482 /*
483  * add mov b,rn
484  * just after r
485  */
486 void
487 addmove(Reg *r, int bn, int rn, int f)
488 {
489         Prog *p, *p1;
490         Adr *a;
491         Var *v;
492
493         p1 = alloc(sizeof(*p1));
494         *p1 = zprog;
495         p = r->prog;
496
497         p1->link = p->link;
498         p->link = p1;
499         p1->lineno = p->lineno;
500
501         v = var + bn;
502
503         a = &p1->to;
504         a->sym = v->sym;
505         a->name = v->name;
506         a->offset = v->offset;
507         a->etype = v->etype;
508         a->type = D_OREG;
509         if(a->etype == TARRAY || a->sym == S)
510                 a->type = D_CONST;
511
512         p1->as = AMOVW;
513         if(v->etype == TCHAR || v->etype == TUCHAR)
514                 p1->as = AMOVB;
515         if(v->etype == TSHORT || v->etype == TUSHORT)
516                 p1->as = AMOVH;
517         if(v->etype == TVLONG || v->etype == TUVLONG || v->etype == TIND)
518                 p1->as = AMOV;
519         if(v->etype == TFLOAT)
520                 p1->as = AFMOVS;
521         if(v->etype == TDOUBLE)
522                 p1->as = AFMOVD;
523
524         p1->from.type = D_REG;
525         p1->from.reg = rn;
526         if(rn >= NREG) {
527                 p1->from.type = D_FREG;
528                 p1->from.reg = rn-NREG;
529         }
530         if(!f) {
531                 p1->from = *a;
532                 *a = zprog.from;
533                 a->type = D_REG;
534                 a->reg = rn;
535                 if(rn >= NREG) {
536                         a->type = D_FREG;
537                         a->reg = rn-NREG;
538                 }
539                 if(v->etype == TUCHAR)
540                         p1->as = AMOVBU;
541                 if(v->etype == TUSHORT)
542                         p1->as = AMOVHU;
543                 if(v->etype == TUINT || v->etype == TULONG)
544                         p1->as = AMOVWU;
545         }
546         if(debug['R'])
547                 print("%P\t.a%P\n", p, p1);
548 }
549
550 Bits
551 mkvar(Adr *a, int docon)
552 {
553         Var *v;
554         int i, t, n, et, z;
555         vlong o;
556         Bits bit;
557         Sym *s;
558
559         t = a->type;
560         if(t == D_REG && a->reg != NREG)
561                 regbits |= RtoB(a->reg);
562         if(t == D_FREG && a->reg != NREG)
563                 regbits |= FtoB(a->reg);
564         s = a->sym;
565         o = a->offset;
566         et = a->etype;
567         if(s == S) {
568                 if(t != D_CONST || !docon || a->reg != NREG)
569                         goto none;
570                 // et = TLONG;
571         }
572         if(t == D_CONST) {
573                 if(s == S && sval(o))
574                         goto none;
575         }
576
577         n = a->name;
578         v = var;
579         for(i=0; i<nvar; i++) {
580                 if(s == v->sym)
581                 if(n == v->name)
582                 if(o == v->offset)
583                         goto out;
584                 v++;
585         }
586         if(s)
587                 if(s->name[0] == '.')
588                         goto none;
589         if(nvar >= NVAR) {
590                 if(debug['w'] > 1 && s)
591                         warn(Z, "variable not optimized: %s", s->name);
592                 goto none;
593         }
594         i = nvar;
595         nvar++;
596         v = &var[i];
597         v->sym = s;
598         v->offset = o;
599         v->etype = et;
600         v->name = n;
601         if(debug['R'])
602                 print("bit=%2d et=%2d %D\n", i, et, a);
603 out:
604         bit = blsh(i);
605         if(n == D_EXTERN || n == D_STATIC)
606                 for(z=0; z<BITS; z++)
607                         externs.b[z] |= bit.b[z];
608         if(n == D_PARAM)
609                 for(z=0; z<BITS; z++)
610                         params.b[z] |= bit.b[z];
611         if(v->etype != et || !(typechlpfd[et] || typev[et]))    /* funny punning */
612                 for(z=0; z<BITS; z++)
613                         addrs.b[z] |= bit.b[z];
614         if(t == D_CONST) {
615                 if(s == S) {
616                         for(z=0; z<BITS; z++)
617                                 consts.b[z] |= bit.b[z];
618                         return bit;
619                 }
620                 if(et != TARRAY)
621                         for(z=0; z<BITS; z++)
622                                 addrs.b[z] |= bit.b[z];
623                 for(z=0; z<BITS; z++)
624                         params.b[z] |= bit.b[z];
625                 return bit;
626         }
627         if(t == D_OREG)
628                 return bit;
629
630 none:
631         return zbits;
632 }
633
634 void
635 prop(Reg *r, Bits ref, Bits cal)
636 {
637         Reg *r1, *r2;
638         int z;
639
640         for(r1 = r; r1 != R; r1 = r1->p1) {
641                 for(z=0; z<BITS; z++) {
642                         ref.b[z] |= r1->refahead.b[z];
643                         if(ref.b[z] != r1->refahead.b[z]) {
644                                 r1->refahead.b[z] = ref.b[z];
645                                 change++;
646                         }
647                         cal.b[z] |= r1->calahead.b[z];
648                         if(cal.b[z] != r1->calahead.b[z]) {
649                                 r1->calahead.b[z] = cal.b[z];
650                                 change++;
651                         }
652                 }
653                 switch(r1->prog->as) {
654                 case ABL:
655                         for(z=0; z<BITS; z++) {
656                                 cal.b[z] |= ref.b[z] | externs.b[z];
657                                 ref.b[z] = 0;
658                         }
659                         break;
660
661                 case ATEXT:
662                         for(z=0; z<BITS; z++) {
663                                 cal.b[z] = 0;
664                                 ref.b[z] = 0;
665                         }
666                         break;
667
668                 case ARET:
669                 case ARETURN:
670                         for(z=0; z<BITS; z++) {
671                                 cal.b[z] = externs.b[z];
672                                 ref.b[z] = 0;
673                         }
674                 }
675                 for(z=0; z<BITS; z++) {
676                         ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
677                                 r1->use1.b[z] | r1->use2.b[z];
678                         cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
679                         r1->refbehind.b[z] = ref.b[z];
680                         r1->calbehind.b[z] = cal.b[z];
681                 }
682                 if(r1->active)
683                         break;
684                 r1->active = 1;
685         }
686         for(; r != r1; r = r->p1)
687                 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
688                         prop(r2, r->refbehind, r->calbehind);
689 }
690
691 /*
692  * find looping structure
693  *
694  * 1) find reverse postordering
695  * 2) find approximate dominators,
696  *      the actual dominators if the flow graph is reducible
697  *      otherwise, dominators plus some other non-dominators.
698  *      See Matthew S. Hecht and Jeffrey D. Ullman,
699  *      "Analysis of a Simple Algorithm for Global Data Flow Problems",
700  *      Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
701  *      Oct. 1-3, 1973, pp.  207-217.
702  * 3) find all nodes with a predecessor dominated by the current node.
703  *      such a node is a loop head.
704  *      recursively, all preds with a greater rpo number are in the loop
705  */
706 long
707 postorder(Reg *r, Reg **rpo2r, long n)
708 {
709         Reg *r1;
710
711         r->rpo = 1;
712         r1 = r->s1;
713         if(r1 && !r1->rpo)
714                 n = postorder(r1, rpo2r, n);
715         r1 = r->s2;
716         if(r1 && !r1->rpo)
717                 n = postorder(r1, rpo2r, n);
718         rpo2r[n] = r;
719         n++;
720         return n;
721 }
722
723 long
724 rpolca(long *idom, long rpo1, long rpo2)
725 {
726         long t;
727
728         if(rpo1 == -1)
729                 return rpo2;
730         while(rpo1 != rpo2){
731                 if(rpo1 > rpo2){
732                         t = rpo2;
733                         rpo2 = rpo1;
734                         rpo1 = t;
735                 }
736                 while(rpo1 < rpo2){
737                         t = idom[rpo2];
738                         if(t >= rpo2)
739                                 fatal(Z, "bad idom");
740                         rpo2 = t;
741                 }
742         }
743         return rpo1;
744 }
745
746 int
747 doms(long *idom, long r, long s)
748 {
749         while(s > r)
750                 s = idom[s];
751         return s == r;
752 }
753
754 int
755 loophead(long *idom, Reg *r)
756 {
757         long src;
758
759         src = r->rpo;
760         if(r->p1 != R && doms(idom, src, r->p1->rpo))
761                 return 1;
762         for(r = r->p2; r != R; r = r->p2link)
763                 if(doms(idom, src, r->rpo))
764                         return 1;
765         return 0;
766 }
767
768 void
769 loopmark(Reg **rpo2r, long head, Reg *r)
770 {
771         if(r->rpo < head || r->active == head)
772                 return;
773         r->active = head;
774         r->loop += LOOP;
775         if(r->p1 != R)
776                 loopmark(rpo2r, head, r->p1);
777         for(r = r->p2; r != R; r = r->p2link)
778                 loopmark(rpo2r, head, r);
779 }
780
781 void
782 loopit(Reg *r, long nr)
783 {
784         Reg *r1;
785         long i, d, me;
786
787         if(nr > maxnr) {
788                 rpo2r = alloc(nr * sizeof(Reg*));
789                 idom = alloc(nr * sizeof(long));
790                 maxnr = nr;
791         }
792
793         d = postorder(r, rpo2r, 0);
794         if(d > nr)
795                 fatal(Z, "too many reg nodes");
796         nr = d;
797         for(i = 0; i < nr / 2; i++){
798                 r1 = rpo2r[i];
799                 rpo2r[i] = rpo2r[nr - 1 - i];
800                 rpo2r[nr - 1 - i] = r1;
801         }
802         for(i = 0; i < nr; i++)
803                 rpo2r[i]->rpo = i;
804
805         idom[0] = 0;
806         for(i = 0; i < nr; i++){
807                 r1 = rpo2r[i];
808                 me = r1->rpo;
809                 d = -1;
810                 if(r1->p1 != R && r1->p1->rpo < me)
811                         d = r1->p1->rpo;
812                 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
813                         if(r1->rpo < me)
814                                 d = rpolca(idom, d, r1->rpo);
815                 idom[i] = d;
816         }
817
818         for(i = 0; i < nr; i++){
819                 r1 = rpo2r[i];
820                 r1->loop++;
821                 if(r1->p2 != R && loophead(idom, r1))
822                         loopmark(rpo2r, i, r1);
823         }
824 }
825
826 void
827 synch(Reg *r, Bits dif)
828 {
829         Reg *r1;
830         int z;
831
832         for(r1 = r; r1 != R; r1 = r1->s1) {
833                 for(z=0; z<BITS; z++) {
834                         dif.b[z] = (dif.b[z] &
835                                 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
836                                         r1->set.b[z] | r1->regdiff.b[z];
837                         if(dif.b[z] != r1->regdiff.b[z]) {
838                                 r1->regdiff.b[z] = dif.b[z];
839                                 change++;
840                         }
841                 }
842                 if(r1->active)
843                         break;
844                 r1->active = 1;
845                 for(z=0; z<BITS; z++)
846                         dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
847                 if(r1->s2 != R)
848                         synch(r1->s2, dif);
849         }
850 }
851
852 ulong
853 allreg(ulong b, Rgn *r)
854 {
855         Var *v;
856         int i;
857
858         v = var + r->varno;
859         r->regno = 0;
860         switch(v->etype) {
861
862         default:
863                 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
864                 break;
865
866         case TCHAR:
867         case TUCHAR:
868         case TSHORT:
869         case TUSHORT:
870         case TINT:
871         case TUINT:
872         case TLONG:
873         case TULONG:
874         case TIND:
875         case TVLONG:
876         case TUVLONG:
877         case TARRAY:
878                 i = BtoR(~b);
879                 if(i && r->cost > 0) {
880                         r->regno = i;
881                         return RtoB(i);
882                 }
883                 break;
884
885         case TDOUBLE:
886         case TFLOAT:
887                 i = BtoF(~b);
888                 if(i && r->cost > 0) {
889                         r->regno = i+NREG;
890                         return FtoB(i);
891                 }
892                 break;
893         }
894         return 0;
895 }
896
897 void
898 paint1(Reg *r, int bn)
899 {
900         Reg *r1;
901         Prog *p;
902         int z;
903         ulong bb;
904
905         z = bn/32;
906         bb = 1L<<(bn%32);
907         if(r->act.b[z] & bb)
908                 return;
909         for(;;) {
910                 if(!(r->refbehind.b[z] & bb))
911                         break;
912                 r1 = r->p1;
913                 if(r1 == R)
914                         break;
915                 if(!(r1->refahead.b[z] & bb))
916                         break;
917                 if(r1->act.b[z] & bb)
918                         break;
919                 r = r1;
920         }
921
922         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
923                 change -= CLOAD * r->loop;
924                 if(debug['R'] && debug['v'])
925                         print("%ld%P\tld %B $%d\n", r->loop,
926                                 r->prog, blsh(bn), change);
927         }
928         for(;;) {
929                 r->act.b[z] |= bb;
930                 p = r->prog;
931
932                 if(r->use1.b[z] & bb) {
933                         change += CREF * r->loop;
934                         if(p->to.type == D_FREG && (p->as == AMOVW || p->as == AMOV))
935                                 change = -CINF;         /* cant go Rreg to Freg */
936                         if(debug['R'] && debug['v'])
937                                 print("%ld%P\tu1 %B $%d\n", r->loop,
938                                         p, blsh(bn), change);
939                 }
940
941                 if((r->use2.b[z]|r->set.b[z]) & bb) {
942                         change += CREF * r->loop;
943                         if(p->from.type == D_FREG && (p->as == AMOVW || p->as == AMOV))
944                                 change = -CINF;         /* cant go Rreg to Freg */
945                         if(debug['R'] && debug['v'])
946                                 print("%ld%P\tu2 %B $%d\n", r->loop,
947                                         p, blsh(bn), change);
948                 }
949
950                 if(STORE(r) & r->regdiff.b[z] & bb) {
951                         change -= CLOAD * r->loop;
952                         if(debug['R'] && debug['v'])
953                                 print("%ld%P\tst %B $%d\n", r->loop,
954                                         p, blsh(bn), change);
955                 }
956
957                 if(r->refbehind.b[z] & bb)
958                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
959                                 if(r1->refahead.b[z] & bb)
960                                         paint1(r1, bn);
961
962                 if(!(r->refahead.b[z] & bb))
963                         break;
964                 r1 = r->s2;
965                 if(r1 != R)
966                         if(r1->refbehind.b[z] & bb)
967                                 paint1(r1, bn);
968                 r = r->s1;
969                 if(r == R)
970                         break;
971                 if(r->act.b[z] & bb)
972                         break;
973                 if(!(r->refbehind.b[z] & bb))
974                         break;
975         }
976 }
977
978 ulong
979 paint2(Reg *r, int bn)
980 {
981         Reg *r1;
982         int z;
983         ulong bb, vreg;
984
985         z = bn/32;
986         bb = 1L << (bn%32);
987         vreg = regbits;
988         if(!(r->act.b[z] & bb))
989                 return vreg;
990         for(;;) {
991                 if(!(r->refbehind.b[z] & bb))
992                         break;
993                 r1 = r->p1;
994                 if(r1 == R)
995                         break;
996                 if(!(r1->refahead.b[z] & bb))
997                         break;
998                 if(!(r1->act.b[z] & bb))
999                         break;
1000                 r = r1;
1001         }
1002         for(;;) {
1003                 r->act.b[z] &= ~bb;
1004
1005                 vreg |= r->regu;
1006
1007                 if(r->refbehind.b[z] & bb)
1008                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1009                                 if(r1->refahead.b[z] & bb)
1010                                         vreg |= paint2(r1, bn);
1011
1012                 if(!(r->refahead.b[z] & bb))
1013                         break;
1014                 r1 = r->s2;
1015                 if(r1 != R)
1016                         if(r1->refbehind.b[z] & bb)
1017                                 vreg |= paint2(r1, bn);
1018                 r = r->s1;
1019                 if(r == R)
1020                         break;
1021                 if(!(r->act.b[z] & bb))
1022                         break;
1023                 if(!(r->refbehind.b[z] & bb))
1024                         break;
1025         }
1026         return vreg;
1027 }
1028
1029 void
1030 paint3(Reg *r, int bn, long rb, int rn)
1031 {
1032         Reg *r1;
1033         Prog *p;
1034         int z;
1035         ulong bb;
1036
1037         z = bn/32;
1038         bb = 1L << (bn%32);
1039         if(r->act.b[z] & bb)
1040                 return;
1041         for(;;) {
1042                 if(!(r->refbehind.b[z] & bb))
1043                         break;
1044                 r1 = r->p1;
1045                 if(r1 == R)
1046                         break;
1047                 if(!(r1->refahead.b[z] & bb))
1048                         break;
1049                 if(r1->act.b[z] & bb)
1050                         break;
1051                 r = r1;
1052         }
1053
1054         if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1055                 addmove(r, bn, rn, 0);
1056         for(;;) {
1057                 r->act.b[z] |= bb;
1058                 p = r->prog;
1059
1060                 if(r->use1.b[z] & bb) {
1061                         if(debug['R'])
1062                                 print("%P", p);
1063                         addreg(&p->from, rn);
1064                         if(debug['R'])
1065                                 print("\t.c%P\n", p);
1066                 }
1067                 if((r->use2.b[z]|r->set.b[z]) & bb) {
1068                         if(debug['R'])
1069                                 print("%P", p);
1070                         addreg(&p->to, rn);
1071                         if(debug['R'])
1072                                 print("\t.c%P\n", p);
1073                 }
1074
1075                 if(STORE(r) & r->regdiff.b[z] & bb)
1076                         addmove(r, bn, rn, 1);
1077                 r->regu |= rb;
1078
1079                 if(r->refbehind.b[z] & bb)
1080                         for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1081                                 if(r1->refahead.b[z] & bb)
1082                                         paint3(r1, bn, rb, rn);
1083
1084                 if(!(r->refahead.b[z] & bb))
1085                         break;
1086                 r1 = r->s2;
1087                 if(r1 != R)
1088                         if(r1->refbehind.b[z] & bb)
1089                                 paint3(r1, bn, rb, rn);
1090                 r = r->s1;
1091                 if(r == R)
1092                         break;
1093                 if(r->act.b[z] & bb)
1094                         break;
1095                 if(!(r->refbehind.b[z] & bb))
1096                         break;
1097         }
1098 }
1099
1100 void
1101 addreg(Adr *a, int rn)
1102 {
1103
1104         a->sym = 0;
1105         a->name = D_NONE;
1106         a->type = D_REG;
1107         a->reg = rn;
1108         if(rn >= NREG) {
1109                 a->type = D_FREG;
1110                 a->reg = rn-NREG;
1111         }
1112 }
1113
1114 /*
1115  *      bit     reg
1116  *      0       R9
1117  *      1       R10
1118  *      ...     ...
1119  *      6       R15
1120  */
1121 long
1122 RtoB(int r)
1123 {
1124         if(r >= REGMIN && r <= REGMAX)
1125                 return 1L << (r-REGMIN);
1126         return 0;
1127 }
1128
1129 int
1130 BtoR(long b)
1131 {
1132         b &= 0x07fL;
1133         if(b == 0)
1134                 return 0;
1135         return bitno(b) + REGMIN;
1136 }
1137
1138 /*
1139  *      bit     reg
1140  *      22      F7
1141  *      23      F8
1142  *      ...     ...
1143  *      29      F14
1144  */
1145 long
1146 FtoB(int f)
1147 {
1148         if(f < FREGMIN || f > FREGEXT)
1149                 return 0;
1150         return 1L << (f - FREGMIN + 22);
1151 }
1152
1153 int
1154 BtoF(long b)
1155 {
1156
1157         b &= 0x3fc00000L;
1158         if(b == 0)
1159                 return 0;
1160         return bitno(b) - 22 + FREGMIN;
1161 }