]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/5l/sched.c
9bootfat: rename open() to fileinit and make it static as its really a internal funct...
[plan9front.git] / sys / src / cmd / 5l / sched.c
1 #include        "l.h"
2
3 enum
4 {
5         E_PSR   = 1<<0,
6         E_MEM   = 1<<3,
7         E_MEMSP = 1<<4,         /* uses offset and size */
8         E_MEMSB = 1<<5,         /* uses offset and size */
9         ANYMEM  = E_MEM|E_MEMSP|E_MEMSB,
10         DELAY   = BRANCH|LOAD|FCMP,
11 };
12
13 typedef struct  Sch     Sch;
14 typedef struct  Dep     Dep;
15
16 struct  Dep
17 {
18         ulong   ireg;
19         ulong   freg;
20         ulong   cc;
21 };
22 struct  Sch
23 {
24         Prog    p;
25         Dep     set;
26         Dep     used;
27         long    offset;
28         char    size;
29         char    nop;
30         char    comp;
31 };
32
33 void    regused(Sch*, Prog*);
34 int     depend(Sch*, Sch*);
35 int     conflict(Sch*, Sch*);
36 int     offoverlap(Sch*, Sch*);
37 void    dumpbits(Sch*, Dep*);
38
39 void
40 sched(Prog *p0, Prog *pe)
41 {
42         Prog *p, *q;
43         Sch sch[NSCHED], *s, *t, *u, *se, stmp;
44
45         /*
46          * build side structure
47          */
48         s = sch;
49         for(p=p0;; p=p->link) {
50                 memset(s, 0, sizeof(*s));
51                 s->p = *p;
52                 regused(s, p);
53                 if(debug['X']) {
54                         Bprint(&bso, "%P\t\tset", &s->p);
55                         dumpbits(s, &s->set);
56                         Bprint(&bso, "; used");
57                         dumpbits(s, &s->used);
58                         if(s->comp)
59                                 Bprint(&bso, "; compound");
60                         if(s->p.mark & LOAD)
61                                 Bprint(&bso, "; load");
62                         if(s->p.mark & BRANCH)
63                                 Bprint(&bso, "; branch");
64                         if(s->p.mark & FCMP)
65                                 Bprint(&bso, "; fcmp");
66                         Bprint(&bso, "\n");
67                 }
68                 if(p == pe)
69                         break;
70                 s++;
71         }
72         se = s;
73
74         /*
75          * prepass to move things around
76          * does nothing, but tries to make
77          * the actual scheduler work better
78          */
79         for(s=sch; s<=se; s++) {
80                 if(!(s->p.mark & LOAD))
81                         continue;
82                 /* always good to put nonconflict loads together */
83                 for(t=s+1; t<=se; t++) {
84                         if(!(t->p.mark & LOAD))
85                                 continue;
86                         if(t->p.mark & BRANCH)
87                                 break;
88                         if(conflict(s, t))
89                                 break;
90                         for(u=t-1; u>s; u--)
91                                 if(depend(u, t))
92                                         goto no11;
93                         u = s+1;
94                         stmp = *t;
95                         memmove(s+2, u, (uchar*)t - (uchar*)u);
96                         *u = stmp;
97                         break;
98                 }
99         no11:
100
101                 /* put schedule fodder above load */
102                 for(t=s+1; t<=se; t++) {
103                         if(t->p.mark & BRANCH)
104                                 break;
105                         if(s > sch && conflict(s-1, t))
106                                 continue;
107                         for(u=t-1; u>=s; u--)
108                                 if(depend(t, u))
109                                         goto no1;
110                         stmp = *t;
111                         memmove(s+1, s, (uchar*)t - (uchar*)s);
112                         *s = stmp;
113                         if(!(s->p.mark & LOAD))
114                                 break;
115                 no1:;
116                 }
117         }
118
119         for(s=se; s>=sch; s--) {
120                 if(!(s->p.mark & DELAY))
121                         continue;
122                 if(s < se)
123                         if(!conflict(s, s+1))
124                                 goto out3;
125                 /*
126                  * s is load, s+1 is immediate use of result or end of block
127                  * t is the trial instruction to insert between s and s+1
128                  */
129                 if(!debug['Y'])
130                 for(t=s-1; t>=sch; t--) {
131                         if(t->comp)
132                                 if(s->p.mark & BRANCH)
133                                         goto no2;
134                         if(t->p.mark & DELAY)
135                                 if(s >= se || conflict(t, s+1))
136                                         goto no2;
137                         for(u=t+1; u<=s; u++)
138                                 if(depend(u, t))
139                                         goto no2;
140                         goto out2;
141                 no2:;
142                 }
143                 if(debug['X'])
144                         Bprint(&bso, "?l%P\n", &s->p);
145                 s->nop = 1;
146                 if(debug['v']) {
147                         if(s->p.mark & LOAD) {
148                                 nop.load.count++;
149                                 nop.load.outof++;
150                         }
151                         if(s->p.mark & BRANCH) {
152                                 nop.branch.count++;
153                                 nop.branch.outof++;
154                         }
155                         if(s->p.mark & FCMP) {
156                                 nop.fcmp.count++;
157                                 nop.fcmp.outof++;
158                         }
159                 }
160                 continue;
161
162         out2:
163                 if(debug['X']) {
164                         Bprint(&bso, "!l%P\n", &t->p);
165                         Bprint(&bso, "%P\n", &s->p);
166                 }
167                 stmp = *t;
168                 memmove(t, t+1, (uchar*)s - (uchar*)t);
169                 *s = stmp;
170                 s--;
171
172         out3:
173                 if(debug['v']) {
174                         if(s->p.mark & LOAD)
175                                 nop.load.outof++;
176                         if(s->p.mark & BRANCH)
177                                 nop.branch.outof++;
178                         if(s->p.mark & FCMP)
179                                 nop.fcmp.outof++;
180                 }
181         }
182
183         /* Avoid HI/LO use->set */
184         t = sch+1;
185         for(s=sch; s<se-1; s++, t++) {
186                 if((s->used.cc & E_PSR) == 0)
187                         continue;
188                 if(t->set.cc & E_PSR)
189                         s->nop = 2;
190         }
191
192         /*
193          * put it all back
194          */
195         for(s=sch, p=p0; s<=se; s++, p=q) {
196                 q = p->link;
197                 if(q != s->p.link) {
198                         *p = s->p;
199                         p->link = q;
200                 }
201                 while(s->nop--)
202                         addnop(p);
203         }
204         if(debug['X']) {
205                 Bprint(&bso, "\n");
206                 Bflush(&bso);
207         }
208 }
209
210 void
211 regused(Sch *s, Prog *realp)
212 {
213         int c, ar, ad, ld, sz;
214         ulong m;
215         Prog *p;
216
217         p = &s->p;
218         s->comp = compound(p);
219         s->nop = 0;
220         if(s->comp) {
221                 s->set.ireg |= 1<<REGTMP;
222                 s->used.ireg |= 1<<REGTMP;
223         }
224
225         ar = 0;         /* dest is really reference */
226         ad = 0;         /* source/dest is really address */
227         ld = 0;         /* opcode is load instruction */
228         sz = 20;        /* size of load/store for overlap computation */
229
230 /*
231  * flags based on opcode
232  */
233         switch(p->as) {
234         case ATEXT:
235                 curtext = realp;
236                 autosize = p->to.offset + 4;
237                 ad = 1;
238                 break;
239         case ABL:
240                 c = p->reg;
241                 if(c == NREG)
242                         c = REGLINK;
243                 s->set.ireg |= 1<<c;
244                 ar = 1;
245                 ad = 1;
246                 break;
247         case ACMP:
248         case ACMPF:
249         case ACMPD:
250                 ar = 1;
251                 s->set.cc |= E_PSR;
252                 p->mark |= FCMP;
253                 break;
254         case AB:
255                 ar = 1;
256                 ad = 1;
257                 break;
258         case AMOVB:
259         case AMOVBU:
260                 sz = 1;
261                 ld = 1;
262                 break;
263         case AMOVH:
264         case AMOVHU:
265                 sz = 2;
266                 ld = 1;
267                 break;
268         case AMOVF:
269         case AMOVW:
270                 sz = 4;
271                 ld = 1;
272                 break;
273         case AMOVD:
274                 sz = 8;
275                 ld = 1;
276                 break;
277         case ADIV:
278         case ADIVU:
279         case AMUL:
280         case AMULU:
281         case AMOD:
282         case AMODU:
283
284         case AADD:
285         case AAND:
286         case ANOR:
287         case AORR:
288         case ASLL:
289         case ASRA:
290         case ASRL:
291         case ASUB:
292         case AEOR:
293
294         case AADDD:
295         case AADDF:
296         case ASUBD:
297         case ASUBF:
298         case AMULF:
299         case AMULD:
300         case ADIVF:
301                 if(p->reg == NREG) {
302                         if(p->to.type == D_REG || p->to.type == D_FREG)
303                                 p->reg = p->to.reg;
304                         if(p->reg == NREG)
305                                 print("botch %P\n", p);
306                 }
307                 break;
308         }
309
310 /*
311  * flags based on 'to' field
312  */
313         c = p->to.class;
314         if(c == 0) {
315                 c = aclass(&p->to) + 1;
316                 p->to.class = c;
317         }
318         c--;
319         switch(c) {
320         default:
321                 print("unknown class %d %D\n", c, &p->to);
322
323         case C_ZCON:
324         case C_ICON:
325         case C_SCON:
326         case C_SICON:
327         case C_LCON:
328         case C_NONE:
329         case C_SBRA:
330         case C_LBRA:
331                 break;
332
333         case C_PSR:
334                 s->set.cc |= E_PSR;
335                 break;
336         case C_ZOREG:
337         case C_SOREG:
338         case C_LOREG:
339                 c = p->to.reg;
340                 s->used.ireg |= 1<<c;
341                 if(ad)
342                         break;
343                 s->size = sz;
344                 s->offset = regoff(&p->to);
345
346                 m = ANYMEM;
347                 if(c == REGSB)
348                         m = E_MEMSB;
349                 if(c == REGSP)
350                         m = E_MEMSP;
351
352                 if(ar)
353                         s->used.cc |= m;
354                 else
355                         s->set.cc |= m;
356                 break;
357         case C_SACON:
358         case C_LACON:
359                 s->used.ireg |= 1<<REGSP;
360                 break;
361         case C_SECON:
362         case C_LECON:
363                 s->used.ireg |= 1<<REGSB;
364                 break;
365         case C_REG:
366                 if(ar)
367                         s->used.ireg |= 1<<p->to.reg;
368                 else
369                         s->set.ireg |= 1<<p->to.reg;
370                 break;
371         case C_REGREG:
372                 if(ar){
373                         s->used.ireg |= 1<<p->to.reg;
374                         s->used.ireg |= 1<<p->to.offset;
375                 }else{
376                         s->set.ireg |= 1<<p->to.reg;
377                         s->set.ireg |= 1<<p->to.offset;
378                 }
379                 break;
380         case C_FREG:
381                 /* do better -- determine double prec */
382                 if(ar) {
383                         s->used.freg |= 1<<p->to.reg;
384                         s->used.freg |= 1<<(p->to.reg|1);
385                 } else {
386                         s->set.freg |= 1<<p->to.reg;
387                         s->set.freg |= 1<<(p->to.reg|1);
388                 }
389                 if(ld && p->from.type == D_REG)
390                         p->mark |= LOAD;
391                 break;
392         case C_SAUTO:
393         case C_LAUTO:
394                 s->used.ireg |= 1<<REGSP;
395                 if(ad)
396                         break;
397                 s->size = sz;
398                 s->offset = regoff(&p->to);
399
400                 if(ar)
401                         s->used.cc |= E_MEMSP;
402                 else
403                         s->set.cc |= E_MEMSP;
404                 break;
405         case C_SEXT:
406         case C_LEXT:
407                 s->used.ireg |= 1<<REGSB;
408                 if(ad)
409                         break;
410                 s->size = sz;
411                 s->offset = regoff(&p->to);
412
413                 if(ar)
414                         s->used.cc |= E_MEMSB;
415                 else
416                         s->set.cc |= E_MEMSB;
417                 break;
418         }
419
420 /*
421  * flags based on 'from' field
422  */
423         c = p->from.class;
424         if(c == 0) {
425                 c = aclass(&p->from) + 1;
426                 p->from.class = c;
427         }
428         c--;
429         switch(c) {
430         default:
431                 print("unknown class %d %D\n", c, &p->from);
432
433         case C_ZCON:
434         case C_ICON:
435         case C_SCON:
436         case C_SICON:
437         case C_LCON:
438         case C_NONE:
439         case C_SBRA:
440         case C_LBRA:
441                 break;
442         case C_PSR:
443                 s->used.cc |= E_PSR;
444                 break;
445         case C_ZOREG:
446         case C_SOREG:
447         case C_LOREG:
448                 c = p->from.reg;
449                 s->used.ireg |= 1<<c;
450                 if(ld)
451                         p->mark |= LOAD;
452                 s->size = sz;
453                 s->offset = regoff(&p->from);
454
455                 m = ANYMEM;
456                 if(c == REGSB)
457                         m = E_MEMSB;
458                 if(c == REGSP)
459                         m = E_MEMSP;
460
461                 s->used.cc |= m;
462                 break;
463         case C_SACON:
464         case C_LACON:
465                 s->used.ireg |= 1<<REGSP;
466                 break;
467         case C_SECON:
468         case C_LECON:
469                 s->used.ireg |= 1<<REGSB;
470                 break;
471         case C_REG:
472                 s->used.ireg |= 1<<p->from.reg;
473                 break;
474         case C_REGREG:
475                 s->used.ireg |= 1<<p->from.reg;
476                 s->used.ireg |= 1<<p->from.offset;
477                 break;
478         case C_FREG:
479                 /* do better -- determine double prec */
480                 s->used.freg |= 1<<p->from.reg;
481                 s->used.freg |= 1<<(p->from.reg|1);
482                 if(ld && p->to.type == D_REG)
483                         p->mark |= LOAD;
484                 break;
485         case C_SAUTO:
486         case C_LAUTO:
487                 s->used.ireg |= 1<<REGSP;
488                 if(ld)
489                         p->mark |= LOAD;
490                 if(ad)
491                         break;
492                 s->size = sz;
493                 s->offset = regoff(&p->from);
494
495                 s->used.cc |= E_MEMSP;
496                 break;
497         case C_SEXT:
498         case C_LEXT:
499                 s->used.ireg |= 1<<REGSB;
500                 if(ld)
501                         p->mark |= LOAD;
502                 if(ad)
503                         break;
504                 s->size = sz;
505                 s->offset = regoff(&p->from);
506
507                 s->used.cc |= E_MEMSB;
508                 break;
509         }
510
511         c = p->reg;
512         if(c != NREG) {
513                 if(p->from.type == D_FREG || p->to.type == D_FREG) {
514                         s->used.freg |= 1<<c;
515                         s->used.freg |= 1<<(c|1);
516                 } else
517                         s->used.ireg |= 1<<c;
518         }
519 }
520
521 /*
522  * test to see if 2 instrictions can be
523  * interchanged without changing semantics
524  */
525 int
526 depend(Sch *sa, Sch *sb)
527 {
528         ulong x;
529
530         if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
531                 return 1;
532         if(sb->set.ireg & sa->used.ireg)
533                 return 1;
534
535         if(sa->set.freg & (sb->set.freg|sb->used.freg))
536                 return 1;
537         if(sb->set.freg & sa->used.freg)
538                 return 1;
539
540         /*
541          * special case.
542          * loads from same address cannot pass.
543          * this is for hardware fifo's and the like
544          */
545         if(sa->used.cc & sb->used.cc & E_MEM)
546                 if(sa->p.reg == sb->p.reg)
547                 if(regoff(&sa->p.from) == regoff(&sb->p.from))
548                         return 1;
549
550         x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
551                 (sb->set.cc & sa->used.cc);
552         if(x) {
553                 /*
554                  * allow SB and SP to pass each other.
555                  * allow SB to pass SB iff doffsets are ok
556                  * anything else conflicts
557                  */
558                 if(x != E_MEMSP && x != E_MEMSB)
559                         return 1;
560                 x = sa->set.cc | sb->set.cc |
561                         sa->used.cc | sb->used.cc;
562                 if(x & E_MEM)
563                         return 1;
564                 if(offoverlap(sa, sb))
565                         return 1;
566         }
567
568         return 0; 
569 }
570
571 int
572 offoverlap(Sch *sa, Sch *sb)
573 {
574
575         if(sa->offset < sb->offset) {
576                 if(sa->offset+sa->size > sb->offset)
577                         return 1;
578                 return 0;
579         }
580         if(sb->offset+sb->size > sa->offset)
581                 return 1;
582         return 0;
583 }
584
585 /*
586  * test 2 adjacent instructions
587  * and find out if inserted instructions
588  * are desired to prevent stalls.
589  */
590 int
591 conflict(Sch *sa, Sch *sb)
592 {
593
594         if(sa->set.ireg & sb->used.ireg)
595                 return 1;
596         if(sa->set.freg & sb->used.freg)
597                 return 1;
598         if(sa->set.cc & sb->used.cc)
599                 return 1;
600
601         return 0;
602 }
603
604 int
605 compound(Prog *p)
606 {
607         Optab *o;
608
609         o = oplook(p);
610         if(o->size != 4)
611                 return 1;
612         if(p->to.type == D_REG && p->to.reg == REGSB)
613                 return 1;
614         return 0;
615 }
616
617 void
618 dumpbits(Sch *s, Dep *d)
619 {
620         int i;
621
622         for(i=0; i<32; i++)
623                 if(d->ireg & (1<<i))
624                         Bprint(&bso, " R%d", i);
625         for(i=0; i<32; i++)
626                 if(d->freg & (1<<i))
627                         Bprint(&bso, " F%d", i);
628         for(i=0; i<32; i++)
629                 switch(d->cc & (1<<i)) {
630                 default:
631                         break;
632                 case E_PSR:
633                         Bprint(&bso, " PSR");
634                         break;
635                 case E_MEM:
636                         Bprint(&bso, " MEM%d", s->size);
637                         break;
638                 case E_MEMSB:
639                         Bprint(&bso, " SB%d", s->size);
640                         break;
641                 case E_MEMSP:
642                         Bprint(&bso, " SP%d", s->size);
643                         break;
644                 }
645 }