]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/ql/sched.c
Import sources from 2011-03-30 iso image
[plan9front.git] / sys / src / cmd / ql / sched.c
1 #include        "l.h"
2
3 enum
4 {
5         E_ICC   = 1<<0,
6         E_FCC   = 1<<1,
7         E_MEM   = 1<<2,
8         E_MEMSP = 1<<3, /* uses offset and size */
9         E_MEMSB = 1<<4, /* uses offset and size */
10         E_LR    = 1<<5,
11         E_CR = 1<<6,
12         E_CTR = 1<<7,
13         E_XER = 1<<8,
14
15         E_CR0 = 0xF<<0,
16         E_CR1 = 0xF<<4,
17
18         ANYMEM  = E_MEM|E_MEMSP|E_MEMSB,
19         ALL     = ~0,
20 };
21
22 typedef struct  Sch     Sch;
23 typedef struct  Dep     Dep;
24
25 struct  Dep
26 {
27         ulong   ireg;
28         ulong   freg;
29         ulong   cc;
30         ulong   cr;
31 };
32 struct  Sch
33 {
34         Prog    p;
35         Dep     set;
36         Dep     used;
37         long    soffset;
38         char    size;
39         char    comp;
40 };
41
42 void    regused(Sch*, Prog*);
43 int     depend(Sch*, Sch*);
44 int     conflict(Sch*, Sch*);
45 int     offoverlap(Sch*, Sch*);
46 void    dumpbits(Sch*, Dep*);
47
48 void
49 sched(Prog *p0, Prog *pe)
50 {
51         Prog *p, *q;
52         Sch sch[NSCHED], *s, *t, *u, *se, stmp;
53
54         if(!debug['Q'])
55                 return;
56         /*
57          * build side structure
58          */
59         s = sch;
60         for(p=p0;; p=p->link) {
61                 memset(s, 0, sizeof(*s));
62                 s->p = *p;
63                 regused(s, p);
64                 if(debug['X']) {
65                         Bprint(&bso, "%P\tset", &s->p);
66                         dumpbits(s, &s->set);
67                         Bprint(&bso, "; used");
68                         dumpbits(s, &s->used);
69                         if(s->comp)
70                                 Bprint(&bso, "; compound");
71                         if(s->p.mark & LOAD)
72                                 Bprint(&bso, "; load");
73                         if(s->p.mark & BRANCH)
74                                 Bprint(&bso, "; branch");
75                         if(s->p.mark & FCMP)
76                                 Bprint(&bso, "; fcmp");
77                         Bprint(&bso, "\n");
78                 }
79                 s++;
80                 if(p == pe)
81                         break;
82         }
83         se = s;
84
85         for(s=se-1; s>=sch; s--) {
86
87                 /*
88                  * load delay. interlocked.
89                  */
90                 if(s->p.mark & LOAD) {
91                         if(s >= se-1)
92                                 continue;
93                         if(!conflict(s, (s+1)))
94                                 continue;
95                         /*
96                          * s is load, s+1 is immediate use of result
97                          * t is the trial instruction to insert between s and s+1
98                          */
99                         for(t=s-1; t>=sch; t--) {
100                                 if(t->p.mark & BRANCH)
101                                         goto no2;
102                                 if(t->p.mark & FCMP)
103                                         if((s+1)->p.mark & BRANCH)
104                                                 goto no2;
105                                 if(t->p.mark & LOAD)
106                                         if(conflict(t, (s+1)))
107                                                 goto no2;
108                                 for(u=t+1; u<=s; u++)
109                                         if(depend(u, t))
110                                                 goto no2;
111                                 goto out2;
112                         no2:;
113                         }
114                         if(debug['X'])
115                                 Bprint(&bso, "?l%P\n", &s->p);
116                         continue;
117                 out2:
118                         if(debug['X']) {
119                                 Bprint(&bso, "!l%P\n", &t->p);
120                                 Bprint(&bso, "%P\n", &s->p);
121                         }
122                         stmp = *t;
123                         memmove(t, t+1, (uchar*)s - (uchar*)t);
124                         *s = stmp;
125                         s--;
126                         continue;
127                 }
128
129                 /*
130                  * fop2 delay.
131                  */
132                 if(s->p.mark & FCMP) {
133                         if(s >= se-1)
134                                 continue;
135                         if(!((s+1)->p.mark & BRANCH))
136                                 continue;
137                         /* t is the trial instruction to use */
138                         for(t=s-1; t>=sch; t--) {
139                                 for(u=t+1; u<=s; u++)
140                                         if(depend(u, t))
141                                                 goto no3;
142                                 goto out3;
143                         no3:;
144                         }
145                         if(debug['X'])
146                                 Bprint(&bso, "?f%P\n", &s->p);
147                         continue;
148                 out3:
149                         if(debug['X']) {
150                                 Bprint(&bso, "!f%P\n", &t->p);
151                                 Bprint(&bso, "%P\n", &s->p);
152                         }
153                         stmp = *t;
154                         memmove(t, t+1, (uchar*)s - (uchar*)t);
155                         *s = stmp;
156                         s--;
157                         continue;
158                 }
159         }
160
161         /*
162          * put it all back
163          */
164         for(s=sch, p=p0; s<se; s++, p=q) {
165                 q = p->link;
166                 if(q != s->p.link) {
167                         *p = s->p;
168                         p->link = q;
169                 }
170         }
171         if(debug['X'])
172                 Bprint(&bso, "\n");
173 }
174
175 void
176 regused(Sch *s, Prog *realp)
177 {
178         int c, ar, ad, ld, sz, nr, upd;
179         ulong m;
180         Prog *p;
181
182         p = &s->p;
183         s->comp = compound(p);
184         if(s->comp) {
185                 s->set.ireg |= 1<<REGTMP;
186                 s->used.ireg |= 1<<REGTMP;
187         }
188         ar = 0;         /* dest is really reference */
189         ad = 0;         /* source/dest is really address */
190         ld = 0;         /* opcode is load instruction */
191         sz = 32*4;              /* size of load/store for overlap computation */
192         nr = 0; /* source/dest is not really reg */
193         upd = 0;        /* move with update; changes reg */
194
195 /*
196  * flags based on opcode
197  */
198         switch(p->as) {
199         case ATEXT:
200                 curtext = realp;
201                 autosize = p->to.offset + 4;
202                 ad = 1;
203                 break;
204         case ABL:
205                 s->set.cc |= E_LR;
206                 ar = 1;
207                 ad = 1;
208                 break;
209         case ABR:
210                 ar = 1;
211                 ad = 1;
212                 break;
213         case ACMP:
214                 s->set.cc |= E_ICC;
215                 if(p->reg == 0)
216                         s->set.cr |= E_CR0;
217                 else
218                         s->set.cr |= (0xF<<((p->reg&7)*4));
219                 ar = 1;
220                 break;
221         case AFCMPO:
222         case AFCMPU:
223                 s->set.cc |= E_FCC;
224                 if(p->reg == 0)
225                         s->set.cr |= E_CR0;
226                 else
227                         s->set.cr |= (0xF<<((p->reg&7)*4));
228                 ar = 1;
229                 break;
230         case ACRAND:
231         case ACRANDN:
232         case ACREQV:
233         case ACRNAND:
234         case ACRNOR:
235         case ACROR:
236         case ACRORN:
237         case ACRXOR:
238                 s->used.cr |= 1<<p->from.reg;
239                 s->set.cr |= 1<<p->to.reg;
240                 nr = 1;
241                 break;
242         case ABCL:      /* tricky */
243                 s->used.cc |= E_FCC|E_ICC;
244                 s->used.cr = ALL;
245                 s->set.cc |= E_LR;
246                 ar = 1;
247                 break;
248         case ABC:               /* tricky */
249                 s->used.cc |= E_FCC|E_ICC;
250                 s->used.cr = ALL;
251                 ar = 1;
252                 break;
253         case ABEQ:
254         case ABGE:
255         case ABGT:
256         case ABLE:
257         case ABLT:
258         case ABNE:
259         case ABVC:
260         case ABVS:
261                 s->used.cc |= E_ICC;
262                 s->used.cr |= E_CR0;
263                 ar = 1;
264                 break;
265         case ALSW:
266         case AMOVMW:
267                 /* could do better */
268                 sz = 32*4;
269                 ld = 1;
270                 break;
271         case AMOVBU:
272         case AMOVBZU:
273                 upd = 1;
274                 sz = 1;
275                 ld = 1;
276                 break;
277         case AMOVB:
278         case AMOVBZ:
279                 sz = 1;
280                 ld = 1;
281                 break;
282         case AMOVHU:
283         case AMOVHZU:
284                 upd = 1;
285                 sz = 2;
286                 ld = 1;
287                 break;
288         case AMOVH:
289         case AMOVHBR:
290         case AMOVHZ:
291                 sz = 2;
292                 ld = 1;
293                 break;
294         case AFMOVSU:
295         case AMOVWU:
296                 upd = 1;
297                 sz = 4;
298                 ld = 1;
299                 break;
300         case AFMOVS:
301         case AMOVW:
302         case AMOVWBR:
303         case ALWAR:
304                 sz = 4;
305                 ld = 1;
306                 break;
307         case AFMOVDU:
308                 upd = 1;
309                 sz = 8;
310                 ld = 1;
311                 break;
312         case AFMOVD:
313                 sz = 8;
314                 ld = 1;
315                 break;
316         case AFMOVDCC:
317                 sz = 8;
318                 ld = 1;
319                 s->set.cc |= E_FCC;
320                 s->set.cr |= E_CR1;
321                 break;
322         case AMOVFL:
323         case AMOVCRFS:
324         case AMTFSB0:
325         case AMTFSB0CC:
326         case AMTFSB1:
327         case AMTFSB1CC:
328                 s->set.ireg = ALL;
329                 s->set.freg = ALL;
330                 s->set.cc = ALL;
331                 s->set.cr = ALL;
332                 break;
333         case AADDCC:
334         case AADDVCC:
335         case AADDCCC:
336         case AADDCVCC:
337         case AADDMECC:
338         case AADDMEVCC:
339         case AADDECC:
340         case AADDEVCC:
341         case AADDZECC:
342         case AADDZEVCC:
343         case AANDCC:
344         case AANDNCC:
345         case ACNTLZWCC:
346         case ADIVWCC:
347         case ADIVWVCC:
348         case ADIVWUCC:
349         case ADIVWUVCC:
350         case AEQVCC:
351         case AEXTSBCC:
352         case AEXTSHCC:
353         case AMULHWCC:
354         case AMULHWUCC:
355         case AMULLWCC:
356         case AMULLWVCC:
357         case ANANDCC:
358         case ANEGCC:
359         case ANEGVCC:
360         case ANORCC:
361         case AORCC:
362         case AORNCC:
363         case AREMCC:
364         case AREMVCC:
365         case AREMUCC:
366         case AREMUVCC:
367         case ARLWMICC:
368         case ARLWNMCC:
369         case ASLWCC:
370         case ASRAWCC:
371         case ASRWCC:
372         case ASTWCCC:
373         case ASUBCC:
374         case ASUBVCC:
375         case ASUBCCC:
376         case ASUBCVCC:
377         case ASUBMECC:
378         case ASUBMEVCC:
379         case ASUBECC:
380         case ASUBEVCC:
381         case ASUBZECC:
382         case ASUBZEVCC:
383         case AXORCC:
384                 s->set.cc |= E_ICC;
385                 s->set.cr |= E_CR0;
386                 break;
387         case AFABSCC:
388         case AFADDCC:
389         case AFADDSCC:
390         case AFCTIWCC:
391         case AFCTIWZCC:
392         case AFDIVCC:
393         case AFDIVSCC:
394         case AFMADDCC:
395         case AFMADDSCC:
396         case AFMSUBCC:
397         case AFMSUBSCC:
398         case AFMULCC:
399         case AFMULSCC:
400         case AFNABSCC:
401         case AFNEGCC:
402         case AFNMADDCC:
403         case AFNMADDSCC:
404         case AFNMSUBCC:
405         case AFNMSUBSCC:
406         case AFRSPCC:
407         case AFSUBCC:
408         case AFSUBSCC:
409                 s->set.cc |= E_FCC;
410                 s->set.cr |= E_CR1;
411                 break;
412         }
413
414 /*
415  * flags based on 'to' field
416  */
417         c = p->to.class;
418         if(c == 0) {
419                 c = aclass(&p->to) + 1;
420                 p->to.class = c;
421         }
422         c--;
423         switch(c) {
424         default:
425                 print("unknown class %d %D\n", c, &p->to);
426
427         case C_NONE:
428         case C_ZCON:
429         case C_SCON:
430         case C_UCON:
431         case C_LCON:
432         case C_ADDCON:
433         case C_ANDCON:
434         case C_SBRA:
435         case C_LBRA:
436                 break;
437         case C_CREG:
438                 c = p->to.reg;
439                 if(c == NREG)
440                         s->set.cr = ALL;
441                 else
442                         s->set.cr |= (0xF << ((p->from.reg&7)*4));
443                 s->set.cc = ALL;
444                 break;
445         case C_SPR:
446         case C_SREG:
447         case C_FPSCR:
448         case C_MSR:
449         case C_XER:
450                 s->set.ireg = ALL;
451                 s->set.freg = ALL;
452                 s->set.cc = ALL;
453                 s->set.cr = ALL;
454                 break;
455         case C_LR:
456                 s->set.cc |= E_LR;
457                 break;
458         case C_CTR:
459                 s->set.cc |= E_CTR;
460                 break;
461         case C_ZOREG:
462         case C_SOREG:
463         case C_LOREG:
464                 c = p->to.reg;
465                 s->used.ireg |= 1<<c;
466                 if(upd)
467                         s->set.ireg |= 1<<c;
468                 if(ad)
469                         break;
470                 s->size = sz;
471                 s->soffset = regoff(&p->to);
472
473                 m = ANYMEM;
474                 if(c == REGSB)
475                         m = E_MEMSB;
476                 if(c == REGSP)
477                         m = E_MEMSP;
478
479                 if(ar)
480                         s->used.cc |= m;
481                 else
482                         s->set.cc |= m;
483                 break;
484         case C_SACON:
485         case C_LACON:
486                 s->used.ireg |= 1<<REGSP;
487                 if(upd)
488                         s->set.ireg |= 1<<c;
489                 break;
490         case C_SECON:
491         case C_LECON:
492                 s->used.ireg |= 1<<REGSB;
493                 if(upd)
494                         s->set.ireg |= 1<<c;
495                 break;
496         case C_REG:
497                 if(nr)
498                         break;
499                 if(ar)
500                         s->used.ireg |= 1<<p->to.reg;
501                 else
502                         s->set.ireg |= 1<<p->to.reg;
503                 break;
504         case C_FREG:
505                 if(ar)
506                         s->used.freg |= 1<<p->to.reg;
507                 else
508                         s->set.freg |= 1<<p->to.reg;
509                 break;
510         case C_SAUTO:
511         case C_LAUTO:
512                 s->used.ireg |= 1<<REGSP;
513                 if(upd)
514                         s->set.ireg |= 1<<c;
515                 if(ad)
516                         break;
517                 s->size = sz;
518                 s->soffset = regoff(&p->to);
519
520                 if(ar)
521                         s->used.cc |= E_MEMSP;
522                 else
523                         s->set.cc |= E_MEMSP;
524                 break;
525         case C_SEXT:
526         case C_LEXT:
527                 s->used.ireg |= 1<<REGSB;
528                 if(upd)
529                         s->set.ireg |= 1<<c;
530                 if(ad)
531                         break;
532                 s->size = sz;
533                 s->soffset = regoff(&p->to);
534
535                 if(ar)
536                         s->used.cc |= E_MEMSB;
537                 else
538                         s->set.cc |= E_MEMSB;
539                 break;
540         }
541
542 /*
543  * flags based on 'from' field
544  */
545         c = p->from.class;
546         if(c == 0) {
547                 c = aclass(&p->from) + 1;
548                 p->from.class = c;
549         }
550         c--;
551         switch(c) {
552         default:
553                 print("unknown class %d %D\n", c, &p->from);
554
555         case C_NONE:
556         case C_ZCON:
557         case C_SCON:
558         case C_UCON:
559         case C_LCON:
560         case C_ADDCON:
561         case C_ANDCON:
562         case C_SBRA:
563         case C_LBRA:
564                 c = p->from.reg;
565                 if(c != NREG)
566                         s->used.ireg |= 1<<c;
567                 break;
568         case C_CREG:
569                 c = p->from.reg;
570                 if(c == NREG)
571                         s->used.cr = ALL;
572                 else
573                         s->used.cr |= (0xF << ((p->from.reg&7)*4));
574                 s->used.cc = ALL;
575                 break;
576         case C_SPR:
577         case C_SREG:
578         case C_FPSCR:
579         case C_MSR:
580         case C_XER:
581                 s->set.ireg = ALL;
582                 s->set.freg = ALL;
583                 s->set.cc = ALL;
584                 s->set.cr = ALL;
585                 break;
586         case C_LR:
587                 s->used.cc |= E_LR;
588                 break;
589         case C_CTR:
590                 s->used.cc |= E_CTR;
591                 break;
592         case C_ZOREG:
593         case C_SOREG:
594         case C_LOREG:
595                 c = p->from.reg;
596                 s->used.ireg |= 1<<c;
597                 if(ld)
598                         p->mark |= LOAD;
599                 if(ad)
600                         break;
601                 s->size = sz;
602                 s->soffset = regoff(&p->from);
603
604                 m = ANYMEM;
605                 if(c == REGSB)
606                         m = E_MEMSB;
607                 if(c == REGSP)
608                         m = E_MEMSP;
609
610                 s->used.cc |= m;
611                 break;
612         case C_SACON:
613         case C_LACON:
614                 s->used.ireg |= 1<<REGSP;
615                 break;
616         case C_SECON:
617         case C_LECON:
618                 s->used.ireg |= 1<<REGSB;
619                 break;
620         case C_REG:
621                 if(nr)
622                         break;
623                 s->used.ireg |= 1<<p->from.reg;
624                 break;
625         case C_FREG:
626                 s->used.freg |= 1<<p->from.reg;
627                 break;
628         case C_SAUTO:
629         case C_LAUTO:
630                 s->used.ireg |= 1<<REGSP;
631                 if(ld)
632                         p->mark |= LOAD;
633                 if(ad)
634                         break;
635                 s->size = sz;
636                 s->soffset = regoff(&p->from);
637
638                 s->used.cc |= E_MEMSP;
639                 break;
640         case C_SEXT:
641         case C_LEXT:
642                 s->used.ireg |= 1<<REGSB;
643                 if(ld)
644                         p->mark |= LOAD;
645                 if(ad)
646                         break;
647                 s->size = sz;
648                 s->soffset = regoff(&p->from);
649
650                 s->used.cc |= E_MEMSB;
651                 break;
652         }
653         
654         c = p->reg;
655         if(c != NREG) {
656                 if(p->from.type == D_FREG || p->to.type == D_FREG)
657                         s->used.freg |= 1<<c;
658                 else
659                         s->used.ireg |= 1<<c;
660         }
661 }
662
663 /*
664  * test to see if 2 instrictions can be
665  * interchanged without changing semantics
666  */
667 int
668 depend(Sch *sa, Sch *sb)
669 {
670         ulong x;
671
672         if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
673                 return 1;
674         if(sb->set.ireg & sa->used.ireg)
675                 return 1;
676
677         if(sa->set.freg & (sb->set.freg|sb->used.freg))
678                 return 1;
679         if(sb->set.freg & sa->used.freg)
680                 return 1;
681
682         if(sa->set.cr & (sb->set.cr|sb->used.cr))
683                 return 1;
684         if(sb->set.cr & sa->used.cr)
685                 return 1;
686
687
688         x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
689                 (sb->set.cc & sa->used.cc);
690         if(x) {
691                 /*
692                  * allow SB and SP to pass each other.
693                  * allow SB to pass SB iff doffsets are ok
694                  * anything else conflicts
695                  */
696                 if(x != E_MEMSP && x != E_MEMSB)
697                         return 1;
698                 x = sa->set.cc | sb->set.cc |
699                         sa->used.cc | sb->used.cc;
700                 if(x & E_MEM)
701                         return 1;
702                 if(offoverlap(sa, sb))
703                         return 1;
704         }
705
706         return 0; 
707 }
708
709 int
710 offoverlap(Sch *sa, Sch *sb)
711 {
712
713         if(sa->soffset < sb->soffset) {
714                 if(sa->soffset+sa->size > sb->soffset)
715                         return 1;
716                 return 0;
717         }
718         if(sb->soffset+sb->size > sa->soffset)
719                 return 1;
720         return 0;
721 }
722
723 /*
724  * test 2 adjacent instructions
725  * and find out if inserted instructions
726  * are desired to prevent stalls.
727  * first instruction is a load instruction.
728  */
729 int
730 conflict(Sch *sa, Sch *sb)
731 {
732
733         if(sa->set.ireg & sb->used.ireg)
734                 return 1;
735         if(sa->set.freg & sb->used.freg)
736                 return 1;
737         if(sa->set.cr & sb->used.cr)
738                 return 1;
739         return 0;
740 }
741
742 int
743 compound(Prog *p)
744 {
745         Optab *o;
746
747         o = oplook(p);
748         if(o->size != 4)
749                 return 1;
750         if(p->to.type == D_REG && p->to.reg == REGSB)
751                 return 1;
752         return 0;
753 }
754
755 void
756 dumpbits(Sch *s, Dep *d)
757 {
758         int i;
759
760         for(i=0; i<32; i++)
761                 if(d->ireg & (1<<i))
762                         Bprint(&bso, " R%d", i);
763         for(i=0; i<32; i++)
764                 if(d->freg & (1<<i))
765                         Bprint(&bso, " F%d", i);
766         for(i=0; i<32; i++)
767                 if(d->cr & (1<<i))
768                         Bprint(&bso, " C%d", i);
769         for(i=0; i<32; i++)
770                 switch(d->cc & (1<<i)) {
771                 default:
772                         break;
773                 case E_ICC:
774                         Bprint(&bso, " ICC");
775                         break;
776                 case E_FCC:
777                         Bprint(&bso, " FCC");
778                         break;
779                 case E_LR:
780                         Bprint(&bso, " LR");
781                         break;
782                 case E_CR:
783                         Bprint(&bso, " CR");
784                         break;
785                 case E_CTR:
786                         Bprint(&bso, " CTR");
787                         break;
788                 case E_XER:
789                         Bprint(&bso, " XER");
790                         break;
791                 case E_MEM:
792                         Bprint(&bso, " MEM%d", s->size);
793                         break;
794                 case E_MEMSB:
795                         Bprint(&bso, " SB%d", s->size);
796                         break;
797                 case E_MEMSP:
798                         Bprint(&bso, " SP%d", s->size);
799                         break;
800                 }
801 }