]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/vl/sched.c
usb lib: add maxpkt and ntds to Altc struct
[plan9front.git] / sys / src / cmd / vl / sched.c
1 #include        "l.h"
2
3 enum
4 {
5         E_HILO  = 1<<0,
6         E_FCR   = 1<<1,
7         E_MCR   = 1<<2,
8         E_MEM   = 1<<3,
9         E_MEMSP = 1<<4, /* uses offset and size */
10         E_MEMSB = 1<<5, /* uses offset and size */
11         ANYMEM  = E_MEM|E_MEMSP|E_MEMSB,
12         DELAY   = BRANCH|LOAD|FCMP,
13 };
14
15 typedef struct  Sch     Sch;
16 typedef struct  Dep     Dep;
17
18 struct  Dep
19 {
20         ulong   ireg;
21         ulong   freg;
22         ulong   cc;
23 };
24 struct  Sch
25 {
26         Prog    p;
27         Dep     set;
28         Dep     used;
29         long    soffset;
30         char    size;
31         char    nop;
32         char    comp;
33 };
34
35 void    regsused(Sch*, Prog*);
36 int     depend(Sch*, Sch*);
37 int     conflict(Sch*, Sch*);
38 int     offoverlap(Sch*, Sch*);
39 void    dumpbits(Sch*, Dep*);
40
41 void
42 sched(Prog *p0, Prog *pe)
43 {
44         Prog *p, *q;
45         Sch sch[NSCHED], *s, *t, *u, *se, stmp;
46
47         /*
48          * build side structure
49          */
50         s = sch;
51         for(p=p0;; p=p->link) {
52                 memset(s, 0, sizeof(*s));
53                 s->p = *p;
54                 regsused(s, p);
55                 if(debug['X']) {
56                         Bprint(&bso, "%P\t\tset", &s->p);
57                         dumpbits(s, &s->set);
58                         Bprint(&bso, "; used");
59                         dumpbits(s, &s->used);
60                         if(s->comp)
61                                 Bprint(&bso, "; compound");
62                         if(s->p.mark & LOAD)
63                                 Bprint(&bso, "; load");
64                         if(s->p.mark & BRANCH)
65                                 Bprint(&bso, "; branch");
66                         if(s->p.mark & FCMP)
67                                 Bprint(&bso, "; fcmp");
68                         Bprint(&bso, "\n");
69                 }
70                 if(p == pe)
71                         break;
72                 s++;
73         }
74         se = s;
75
76         /*
77          * prepass to move things around
78          * does nothing, but tries to make
79          * the actual scheduler work better
80          */
81         for(s=sch; s<=se; s++) {
82                 if(!(s->p.mark & LOAD))
83                         continue;
84                 /* always good to put nonconflict loads together */
85                 for(t=s+1; t<=se; t++) {
86                         if(!(t->p.mark & LOAD))
87                                 continue;
88                         if(t->p.mark & BRANCH || t->set.ireg & (1<<REGSP))
89                                 break;
90                         if(conflict(s, t))
91                                 break;
92                         for(u=t-1; u>s; u--)
93                                 if(depend(u, t))
94                                         goto no11;
95                         u = s+1;
96                         stmp = *t;
97                         memmove(s+2, u, (uchar*)t - (uchar*)u);
98                         *u = stmp;
99                         break;
100                 }
101         no11:
102
103                 /* put schedule fodder above load */
104                 for(t=s+1; t<=se; t++) {
105                         if(t->p.mark & BRANCH || t->set.ireg & (1<<REGSP))
106                                 break;
107                         if(s > sch && conflict(s-1, t))
108                                 continue;
109                         for(u=t-1; u>=s; u--)
110                                 if(depend(t, u))
111                                         goto no1;
112                         stmp = *t;
113                         memmove(s+1, s, (uchar*)t - (uchar*)s);
114                         *s = stmp;
115                         if(!(s->p.mark & LOAD))
116                                 break;
117                 no1:;
118                 }
119         }
120
121         for(s=se; s>=sch; s--) {
122                 if(!(s->p.mark & DELAY))
123                         continue;
124                 if(s < se)
125                         if(!conflict(s, s+1))
126                                 goto out3;
127                 /*
128                  * s is load, s+1 is immediate use of result or end of block
129                  * t is the trial instruction to insert between s and s+1
130                  */
131                 if(!debug['Y'])
132                 for(t=s-1; t>=sch; t--) {
133                         if(t->comp)
134                                 if(s->p.mark & BRANCH)
135                                         goto no2;
136                         if(t->p.mark & DELAY)
137                                 if(s >= se || conflict(t, s+1))
138                                         goto no2;
139                         for(u=t+1; u<=s; u++)
140                                 if(depend(u, t))
141                                         goto no2;
142                         goto out2;
143                 no2:;
144                 }
145                 if(debug['X'])
146                         Bprint(&bso, "?l%P\n", &s->p);
147                 s->nop = 1;
148                 if(debug['v']) {
149                         if(s->p.mark & LOAD) {
150                                 nop.load.count++;
151                                 nop.load.outof++;
152                         }
153                         if(s->p.mark & BRANCH) {
154                                 nop.branch.count++;
155                                 nop.branch.outof++;
156                         }
157                         if(s->p.mark & FCMP) {
158                                 nop.fcmp.count++;
159                                 nop.fcmp.outof++;
160                         }
161                 }
162                 continue;
163
164         out2:
165                 if(debug['X']) {
166                         Bprint(&bso, "!l%P\n", &t->p);
167                         Bprint(&bso, "%P\n", &s->p);
168                 }
169                 stmp = *t;
170                 memmove(t, t+1, (uchar*)s - (uchar*)t);
171                 *s = stmp;
172                 s--;
173
174         out3:
175                 if(debug['v']) {
176                         if(s->p.mark & LOAD)
177                                 nop.load.outof++;
178                         if(s->p.mark & BRANCH)
179                                 nop.branch.outof++;
180                         if(s->p.mark & FCMP)
181                                 nop.fcmp.outof++;
182                 }
183         }
184
185         /* Avoid HI/LO use->set */
186         t = sch+1;
187         for(s=sch; s<se-1; s++, t++) {
188                 if((s->used.cc & E_HILO) == 0)
189                         continue;
190                 if(t->set.cc & E_HILO)
191                         s->nop = 2;
192         }
193
194         /*
195          * put it all back
196          */
197         for(s=sch, p=p0; s<=se; s++, p=q) {
198                 q = p->link;
199                 if(q != s->p.link) {
200                         *p = s->p;
201                         p->link = q;
202                 }
203                 while(s->nop--)
204                         addnop(p);
205         }
206         if(debug['X']) {
207                 Bprint(&bso, "\n");
208                 Bflush(&bso);
209         }
210 }
211
212 void
213 regsused(Sch *s, Prog *realp)
214 {
215         int c, ar, ad, ld, sz;
216         ulong m;
217         Prog *p;
218
219         p = &s->p;
220         s->comp = compound(p);
221         s->nop = 0;
222         if(s->comp) {
223                 s->set.ireg |= 1<<REGTMP;
224                 s->used.ireg |= 1<<REGTMP;
225         }
226
227         ar = 0;         /* dest is really reference */
228         ad = 0;         /* source/dest is really address */
229         ld = 0;         /* opcode is load instruction */
230         sz = 20;        /* size of load/store for overlap computation */
231
232 /*
233  * flags based on opcode
234  */
235         switch(p->as) {
236         case ATEXT:
237                 curtext = realp;
238                 autosize = p->to.offset + 4;
239                 ad = 1;
240                 break;
241         case AJAL:
242                 c = p->reg;
243                 if(c == NREG)
244                         c = REGLINK;
245                 s->set.ireg |= 1<<c;
246                 ar = 1;
247                 ad = 1;
248                 break;
249         case ABGEZAL:
250         case ABLTZAL:
251                 s->set.ireg |= 1<<REGLINK;
252         case ABEQ:
253         case ABGEZ:
254         case ABGTZ:
255         case ABLEZ:
256         case ABLTZ:
257         case ABNE:
258                 ar = 1;
259                 ad = 1;
260                 break;
261         case ABFPT:
262         case ABFPF:
263                 ad = 1;
264                 s->used.cc |= E_FCR;
265                 break;
266         case ACMPEQD:
267         case ACMPEQF:
268         case ACMPGED:
269         case ACMPGEF:
270         case ACMPGTD:
271         case ACMPGTF:
272                 ar = 1;
273                 s->set.cc |= E_FCR;
274                 p->mark |= FCMP;
275                 break;
276         case AJMP:
277                 ar = 1;
278                 ad = 1;
279                 break;
280         case AMOVB:
281         case AMOVBU:
282                 sz = 1;
283                 ld = 1;
284                 break;
285         case AMOVH:
286         case AMOVHU:
287                 sz = 2;
288                 ld = 1;
289                 break;
290         case AMOVF:
291         case AMOVW:
292         case AMOVWL:
293         case AMOVWR:
294                 sz = 4;
295                 ld = 1;
296                 break;
297         case AMOVD:
298         case AMOVV:
299         case AMOVVL:
300         case AMOVVR:
301                 sz = 8;
302                 ld = 1;
303                 break;
304         case ADIV:
305         case ADIVU:
306         case AMUL:
307         case AMULU:
308         case AREM:
309         case AREMU:
310                 s->set.cc = E_HILO;
311         case AADD:
312         case AADDU:
313         case AAND:
314         case ANOR:
315         case AOR:
316         case ASGT:
317         case ASGTU:
318         case ASLL:
319         case ASRA:
320         case ASRL:
321         case ASUB:
322         case ASUBU:
323         case AXOR:
324
325         case AADDD:
326         case AADDF:
327         case AADDW:
328         case ASUBD:
329         case ASUBF:
330         case ASUBW:
331         case AMULF:
332         case AMULD:
333         case AMULW:
334         case ADIVF:
335         case ADIVD:
336         case ADIVW:
337                 if(p->reg == NREG) {
338                         if(p->to.type == D_REG || p->to.type == D_FREG)
339                                 p->reg = p->to.reg;
340                         if(p->reg == NREG)
341                                 print("botch %P\n", p);
342                 }
343                 break;
344         }
345
346 /*
347  * flags based on 'to' field
348  */
349         c = p->to.class;
350         if(c == 0) {
351                 c = aclass(&p->to) + 1;
352                 p->to.class = c;
353         }
354         c--;
355         switch(c) {
356         default:
357                 print("unknown class %d %D\n", c, &p->to);
358
359         case C_ZCON:
360         case C_SCON:
361         case C_ADD0CON:
362         case C_AND0CON:
363         case C_ADDCON:
364         case C_ANDCON:
365         case C_UCON:
366         case C_LCON:
367         case C_NONE:
368         case C_SBRA:
369         case C_LBRA:
370                 break;
371
372         case C_HI:
373         case C_LO:
374                 s->set.cc |= E_HILO;
375                 break;
376         case C_FCREG:
377                 s->set.cc |= E_FCR;
378                 break;
379         case C_MREG:
380                 s->set.cc |= E_MCR;
381                 break;
382         case C_ZOREG:
383         case C_SOREG:
384         case C_LOREG:
385                 c = p->to.reg;
386                 s->used.ireg |= 1<<c;
387                 if(ad)
388                         break;
389                 s->size = sz;
390                 s->soffset = regoff(&p->to);
391
392                 m = ANYMEM;
393                 if(c == REGSB)
394                         m = E_MEMSB;
395                 if(c == REGSP)
396                         m = E_MEMSP;
397
398                 if(ar)
399                         s->used.cc |= m;
400                 else
401                         s->set.cc |= m;
402                 break;
403         case C_SACON:
404         case C_LACON:
405                 s->used.ireg |= 1<<REGSP;
406                 break;
407         case C_SECON:
408         case C_LECON:
409                 s->used.ireg |= 1<<REGSB;
410                 break;
411         case C_REG:
412                 if(ar)
413                         s->used.ireg |= 1<<p->to.reg;
414                 else
415                         s->set.ireg |= 1<<p->to.reg;
416                 break;
417         case C_FREG:
418                 /* do better -- determine double prec */
419                 if(ar) {
420                         s->used.freg |= 1<<p->to.reg;
421                         s->used.freg |= 1<<(p->to.reg|1);
422                 } else {
423                         s->set.freg |= 1<<p->to.reg;
424                         s->set.freg |= 1<<(p->to.reg|1);
425                 }
426                 if(ld && p->from.type == D_REG)
427                         p->mark |= LOAD;
428                 break;
429         case C_SAUTO:
430         case C_LAUTO:
431                 s->used.ireg |= 1<<REGSP;
432                 if(ad)
433                         break;
434                 s->size = sz;
435                 s->soffset = regoff(&p->to);
436
437                 if(ar)
438                         s->used.cc |= E_MEMSP;
439                 else
440                         s->set.cc |= E_MEMSP;
441                 break;
442         case C_SEXT:
443         case C_LEXT:
444                 s->used.ireg |= 1<<REGSB;
445                 if(ad)
446                         break;
447                 s->size = sz;
448                 s->soffset = regoff(&p->to);
449
450                 if(ar)
451                         s->used.cc |= E_MEMSB;
452                 else
453                         s->set.cc |= E_MEMSB;
454                 break;
455         }
456
457 /*
458  * flags based on 'from' field
459  */
460         c = p->from.class;
461         if(c == 0) {
462                 c = aclass(&p->from) + 1;
463                 p->from.class = c;
464         }
465         c--;
466         switch(c) {
467         default:
468                 print("unknown class %d %D\n", c, &p->from);
469
470         case C_ZCON:
471         case C_SCON:
472         case C_ADD0CON:
473         case C_AND0CON:
474         case C_ADDCON:
475         case C_ANDCON:
476         case C_UCON:
477         case C_LCON:
478         case C_NONE:
479         case C_SBRA:
480         case C_LBRA:
481                 break;
482         case C_HI:
483         case C_LO:
484                 s->used.cc |= E_HILO;
485                 break;
486         case C_FCREG:
487                 s->used.cc |= E_FCR;
488                 break;
489         case C_MREG:
490                 s->used.cc |= E_MCR;
491                 break;
492         case C_ZOREG:
493         case C_SOREG:
494         case C_LOREG:
495                 c = p->from.reg;
496                 s->used.ireg |= 1<<c;
497                 if(ld)
498                         p->mark |= LOAD;
499                 s->size = sz;
500                 s->soffset = regoff(&p->from);
501
502                 m = ANYMEM;
503                 if(c == REGSB)
504                         m = E_MEMSB;
505                 if(c == REGSP)
506                         m = E_MEMSP;
507
508                 s->used.cc |= m;
509                 break;
510         case C_SACON:
511         case C_LACON:
512                 s->used.ireg |= 1<<REGSP;
513                 break;
514         case C_SECON:
515         case C_LECON:
516                 s->used.ireg |= 1<<REGSB;
517                 break;
518         case C_REG:
519                 s->used.ireg |= 1<<p->from.reg;
520                 break;
521         case C_FREG:
522                 /* do better -- determine double prec */
523                 s->used.freg |= 1<<p->from.reg;
524                 s->used.freg |= 1<<(p->from.reg|1);
525                 if(ld && p->to.type == D_REG)
526                         p->mark |= LOAD;
527                 break;
528         case C_SAUTO:
529         case C_LAUTO:
530                 s->used.ireg |= 1<<REGSP;
531                 if(ld)
532                         p->mark |= LOAD;
533                 if(ad)
534                         break;
535                 s->size = sz;
536                 s->soffset = regoff(&p->from);
537
538                 s->used.cc |= E_MEMSP;
539                 break;
540         case C_SEXT:
541         case C_LEXT:
542                 s->used.ireg |= 1<<REGSB;
543                 if(ld)
544                         p->mark |= LOAD;
545                 if(ad)
546                         break;
547                 s->size = sz;
548                 s->soffset = regoff(&p->from);
549
550                 s->used.cc |= E_MEMSB;
551                 break;
552         }
553
554         c = p->reg;
555         if(c != NREG) {
556                 if(p->from.type == D_FREG || p->to.type == D_FREG) {
557                         s->used.freg |= 1<<c;
558                         s->used.freg |= 1<<(c|1);
559                 } else
560                         s->used.ireg |= 1<<c;
561         }
562         s->set.ireg &= ~(1<<REGZERO);           /* R0 cant be set */
563 }
564
565 /*
566  * test to see if 2 instrictions can be
567  * interchanged without changing semantics
568  */
569 int
570 depend(Sch *sa, Sch *sb)
571 {
572         ulong x;
573
574         if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
575                 return 1;
576         if(sb->set.ireg & sa->used.ireg)
577                 return 1;
578
579         if(sa->set.freg & (sb->set.freg|sb->used.freg))
580                 return 1;
581         if(sb->set.freg & sa->used.freg)
582                 return 1;
583
584         /*
585          * special case.
586          * loads from same address cannot pass.
587          * this is for hardware fifo's and the like
588          */
589         if(sa->used.cc & sb->used.cc & E_MEM)
590                 if(sa->p.reg == sb->p.reg)
591                 if(regoff(&sa->p.from) == regoff(&sb->p.from))
592                         return 1;
593
594         x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
595                 (sb->set.cc & sa->used.cc);
596         if(x) {
597                 /*
598                  * allow SB and SP to pass each other.
599                  * allow SB to pass SB iff doffsets are ok
600                  * anything else conflicts
601                  */
602                 if(x != E_MEMSP && x != E_MEMSB)
603                         return 1;
604                 x = sa->set.cc | sb->set.cc |
605                         sa->used.cc | sb->used.cc;
606                 if(x & E_MEM)
607                         return 1;
608                 if(offoverlap(sa, sb))
609                         return 1;
610         }
611
612         return 0; 
613 }
614
615 int
616 offoverlap(Sch *sa, Sch *sb)
617 {
618
619         if(sa->soffset < sb->soffset) {
620                 if(sa->soffset+sa->size > sb->soffset)
621                         return 1;
622                 return 0;
623         }
624         if(sb->soffset+sb->size > sa->soffset)
625                 return 1;
626         return 0;
627 }
628
629 /*
630  * test 2 adjacent instructions
631  * and find out if inserted instructions
632  * are desired to prevent stalls.
633  */
634 int
635 conflict(Sch *sa, Sch *sb)
636 {
637         if(sa->set.ireg & sb->used.ireg)
638                 return 1;
639         if(sa->set.freg & sb->used.freg)
640                 return 1;
641         if(sa->set.cc & sb->used.cc)
642                 return 1;
643
644         return 0;
645 }
646
647 int
648 compound(Prog *p)
649 {
650         Optab *o;
651
652         o = oplook(p);
653         if(o->size != 4)
654                 return 1;
655         if(p->to.type == D_REG && p->to.reg == REGSB)
656                 return 1;
657         return 0;
658 }
659
660 void
661 dumpbits(Sch *s, Dep *d)
662 {
663         int i;
664
665         for(i=0; i<32; i++)
666                 if(d->ireg & (1<<i))
667                         Bprint(&bso, " R%d", i);
668         for(i=0; i<32; i++)
669                 if(d->freg & (1<<i))
670                         Bprint(&bso, " F%d", i);
671         for(i=0; i<32; i++)
672                 switch(d->cc & (1<<i)) {
673                 default:
674                         break;
675                 case E_HILO:
676                         Bprint(&bso, " HILO");
677                         break;
678                 case E_FCR:
679                         Bprint(&bso, " FCR");
680                         break;
681                 case E_MCR:
682                         Bprint(&bso, " MCR");
683                         break;
684                 case E_MEM:
685                         Bprint(&bso, " MEM%d", s->size);
686                         break;
687                 case E_MEMSB:
688                         Bprint(&bso, " SB%d", s->size);
689                         break;
690                 case E_MEMSP:
691                         Bprint(&bso, " SP%d", s->size);
692                         break;
693                 }
694 }