]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/kl/sched.c
Import sources from 2011-03-30 iso image
[plan9front.git] / sys / src / cmd / kl / sched.c
1 #include        "l.h"
2
3 enum
4 {
5         E_ICC   = 1<<0,
6         E_FCC   = 1<<1,
7         E_MEM   = 1<<2,
8         E_MEMSP = 1<<3, /* uses offset and size */
9         E_MEMSB = 1<<4, /* uses offset and size */
10         ANYMEM  = E_MEM|E_MEMSP|E_MEMSB,
11         ALL     = ~0
12 };
13
14 typedef struct  Sch     Sch;
15 typedef struct  Dep     Dep;
16
17 struct  Dep
18 {
19         ulong   ireg;
20         ulong   freg;
21         ulong   cc;
22 };
23 struct  Sch
24 {
25         Prog    p;
26         Dep     set;
27         Dep     used;
28         long    soffset;
29         char    size;
30         char    nop;
31         char    comp;
32 };
33
34 void    regsused(Sch*, Prog*);
35 int     depend(Sch*, Sch*);
36 int     conflict(Sch*, Sch*);
37 int     offoverlap(Sch*, Sch*);
38 void    dumpbits(Sch*, Dep*);
39
40 void
41 sched(Prog *p0, Prog *pe)
42 {
43         Prog *p, *q;
44         Sch sch[NSCHED], *s, *t, *u, *se, stmp;
45
46         /*
47          * build side structure
48          */
49         s = sch;
50         for(p=p0;; p=p->link) {
51                 memset(s, 0, sizeof(*s));
52                 s->p = *p;
53                 regsused(s, p);
54                 if(debug['X']) {
55                         Bprint(&bso, "%P\tset", &s->p);
56                         dumpbits(s, &s->set);
57                         Bprint(&bso, "; used");
58                         dumpbits(s, &s->used);
59                         if(s->comp)
60                                 Bprint(&bso, "; compound");
61                         if(s->p.mark & LOAD)
62                                 Bprint(&bso, "; load");
63                         if(s->p.mark & BRANCH)
64                                 Bprint(&bso, "; branch");
65                         if(s->p.mark & FCMP)
66                                 Bprint(&bso, "; fcmp");
67                         Bprint(&bso, "\n");
68                 }
69                 s++;
70                 if(p == pe)
71                         break;
72         }
73         se = s;
74
75         for(s=se-1; s>=sch; s--) {
76                 /*
77                  * branch delay slot
78                  */
79                 if(s->p.mark & BRANCH) {
80                         /* t is the trial instruction to use */
81                         for(t=s-1; t>=sch; t--) {
82                                 if(t->comp || (t->p.mark & FCMP))
83                                         goto no1;
84                                 for(u=t+1; u<=s; u++)
85                                         if(depend(u, t))
86                                                 goto no1;
87                                 goto out1;
88                         no1:;
89                         }
90                         if(debug['X'])
91                                 Bprint(&bso, "?b%P\n", &s->p);
92                         s->nop = 1;
93                         continue;
94
95                 out1:
96                         if(debug['X']) {
97                                 Bprint(&bso, "!b%P\n", &t->p);
98                                 Bprint(&bso, "%P\n", &s->p);
99                         }
100                         stmp = *t;
101                         memmove(t, t+1, (uchar*)s - (uchar*)t);
102                         *s = stmp;
103                         s--;
104                         continue;
105                 }
106
107                 /*
108                  * load delay. interlocked.
109                  */
110                 if(s->p.mark & LOAD) {
111                         if(s >= se-1)
112                                 continue;
113                         if(!conflict(s, (s+1)))
114                                 continue;
115                         /*
116                          * s is load, s+1 is immediate use of result
117                          * t is the trial instruction to insert between s and s+1
118                          */
119                         for(t=s-1; t>=sch; t--) {
120                                 if(t->p.mark & BRANCH)
121                                         goto no2;
122                                 if(t->p.mark & FCMP)
123                                         if((s+1)->p.mark & BRANCH)
124                                                 goto no2;
125                                 if(t->p.mark & LOAD)
126                                         if(conflict(t, (s+1)))
127                                                 goto no2;
128                                 for(u=t+1; u<=s; u++)
129                                         if(depend(u, t))
130                                                 goto no2;
131                                 goto out2;
132                         no2:;
133                         }
134                         if(debug['X'])
135                                 Bprint(&bso, "?l%P\n", &s->p);
136                         continue;
137                 out2:
138                         if(debug['X']) {
139                                 Bprint(&bso, "!l%P\n", &t->p);
140                                 Bprint(&bso, "%P\n", &s->p);
141                         }
142                         stmp = *t;
143                         memmove(t, t+1, (uchar*)s - (uchar*)t);
144                         *s = stmp;
145                         s--;
146                         continue;
147                 }
148
149                 /*
150                  * fop2 delay.
151                  */
152                 if(s->p.mark & FCMP) {
153                         if(s >= se-1) {
154                                 s->nop = 1;
155                                 continue;
156                         }
157                         if(!((s+1)->p.mark & BRANCH))
158                                 continue;
159                         /* t is the trial instruction to use */
160                         for(t=s-1; t>=sch; t--) {
161                                 for(u=t+1; u<=s; u++)
162                                         if(depend(u, t))
163                                                 goto no3;
164                                 goto out3;
165                         no3:;
166                         }
167                         if(debug['X'])
168                                 Bprint(&bso, "?f%P\n", &s->p);
169                         s->nop = 1;
170                         continue;
171                 out3:
172                         if(debug['X']) {
173                                 Bprint(&bso, "!f%P\n", &t->p);
174                                 Bprint(&bso, "%P\n", &s->p);
175                         }
176                         stmp = *t;
177                         memmove(t, t+1, (uchar*)s - (uchar*)t);
178                         *s = stmp;
179                         s--;
180                         continue;
181                 }
182         }
183
184         /*
185          * put it all back
186          */
187         for(s=sch, p=p0; s<se; s++, p=q) {
188                 q = p->link;
189                 if(q != s->p.link) {
190                         *p = s->p;
191                         p->link = q;
192                 }
193                 if(s->nop)
194                         addnop(p);
195         }
196         if(debug['X'])
197                 Bprint(&bso, "\n");
198 }
199
200 void
201 regsused(Sch *s, Prog *realp)
202 {
203         int c, ar, ad, ld, sz;
204         ulong m;
205         Prog *p;
206
207         p = &s->p;
208         s->comp = compound(p);
209         s->nop = 0;
210         if(s->comp) {
211                 s->set.ireg |= 1<<REGTMP;
212                 s->used.ireg |= 1<<REGTMP;
213         }
214         ar = 0;         /* dest is really reference */
215         ad = 0;         /* source/dest is really address */
216         ld = 0;         /* opcode is load instruction */
217         sz = 20;                /* size of load/store for overlap computation */
218
219 /*
220  * flags based on opcode
221  */
222         switch(p->as) {
223         case ATEXT:
224                 curtext = realp;
225                 autosize = p->to.offset + 4;
226                 ad = 1;
227                 break;
228         case AJMPL:
229                 c = p->reg;
230                 if(c == NREG)
231                         c = REGLINK;
232                 s->set.ireg |= 1<<c;
233                 ar = 1;
234                 ad = 1;
235                 break;
236         case AJMP:
237                 ar = 1;
238                 ad = 1;
239                 break;
240         case ACMP:
241                 s->set.cc |= E_ICC;
242                 ar = 1;
243                 break;
244         case AFCMPD:
245         case AFCMPED:
246         case AFCMPEF:
247         case AFCMPEX:
248         case AFCMPF:
249         case AFCMPX:
250                 s->set.cc |= E_FCC;
251                 ar = 1;
252                 break;
253         case ABE:
254         case ABNE:
255         case ABLE:
256         case ABG:
257         case ABL:
258         case ABGE:
259         case ABLEU:
260         case ABGU:
261         case ABCS:
262         case ABCC:
263         case ABNEG:
264         case ABPOS:
265         case ABVC:
266         case ABVS:
267                 s->used.cc |= E_ICC;
268                 ar = 1;
269                 break;
270         case AFBE:
271         case AFBLG:
272         case AFBG:
273         case AFBLE:
274         case AFBGE:
275         case AFBL:
276                 s->used.cc |= E_FCC;
277                 ar = 1;
278                 break;
279         case AMOVB:
280         case AMOVBU:
281                 sz = 1;
282                 ld = 1;
283                 break;
284         case AMOVH:
285         case AMOVHU:
286                 sz = 2;
287                 ld = 1;
288                 break;
289         case AFMOVF:
290         case AMOVW:
291                 sz = 4;
292                 ld = 1;
293                 break;
294         case AMOVD:
295         case AFMOVD:
296                 sz = 8;
297                 ld = 1;
298                 break;
299         case AFMOVX:    /* gok */
300                 sz = 16;
301                 ld = 1;
302                 break;
303         case AADDCC:
304         case AADDXCC:
305         case AANDCC:
306         case AANDNCC:
307         case AORCC:
308         case AORNCC:
309         case ASUBCC:
310         case ASUBXCC:
311         case ATADDCC:
312         case ATADDCCTV:
313         case ATSUBCC:
314         case ATSUBCCTV:
315         case AXNORCC:
316         case AXORCC:
317                 s->set.cc |= E_ICC;
318                 break;
319         case ADIV:
320         case ADIVL:
321         case AMOD:
322         case AMODL:
323         case AMUL:
324         case AMULSCC:
325         case ATAS:
326                 s->set.ireg = ALL;
327                 s->set.freg = ALL;
328                 s->set.cc = ALL;
329                 break;
330         }
331
332 /*
333  * flags based on 'to' field
334  */
335         c = p->to.class;
336         if(c == 0) {
337                 c = aclass(&p->to) + 1;
338                 p->to.class = c;
339         }
340         c--;
341         switch(c) {
342         default:
343                 print("unknown class %d %D\n", c, &p->to);
344
345         case C_ZCON:
346         case C_SCON:
347         case C_UCON:
348         case C_LCON:
349         case C_NONE:
350         case C_SBRA:
351         case C_LBRA:
352                 break;
353         case C_CREG:
354         case C_FSR:
355         case C_FQ:
356         case C_PREG:
357                 s->set.ireg = ALL;
358                 s->set.freg = ALL;
359                 s->set.cc = ALL;
360                 break;
361         case C_ZOREG:
362         case C_SOREG:
363         case C_LOREG:
364         case C_ASI:
365                 c = p->to.reg;
366                 s->used.ireg |= 1<<c;
367                 if(ad)
368                         break;
369                 s->size = sz;
370                 s->soffset = regoff(&p->to);
371
372                 m = ANYMEM;
373                 if(c == REGSB)
374                         m = E_MEMSB;
375                 if(c == REGSP)
376                         m = E_MEMSP;
377
378                 if(ar)
379                         s->used.cc |= m;
380                 else
381                         s->set.cc |= m;
382                 break;
383         case C_SACON:
384         case C_LACON:
385                 s->used.ireg |= 1<<REGSP;
386                 break;
387         case C_SECON:
388         case C_LECON:
389                 s->used.ireg |= 1<<REGSB;
390                 break;
391         case C_REG:
392                 if(ar)
393                         s->used.ireg |= 1<<p->to.reg;
394                 else
395                         s->set.ireg |= 1<<p->to.reg;
396                 break;
397         case C_FREG:
398                 /* do better -- determine double prec */
399                 if(ar) {
400                         s->used.freg |= 1<<p->to.reg;
401                         s->used.freg |= 1<<(p->to.reg|1);
402                 } else {
403                         s->set.freg |= 1<<p->to.reg;
404                         s->set.freg |= 1<<(p->to.reg|1);
405                 }
406                 break;
407         case C_SAUTO:
408         case C_LAUTO:
409         case C_ESAUTO:
410         case C_OSAUTO:
411         case C_ELAUTO:
412         case C_OLAUTO:
413                 s->used.ireg |= 1<<REGSP;
414                 if(ad)
415                         break;
416                 s->size = sz;
417                 s->soffset = regoff(&p->to);
418
419                 if(ar)
420                         s->used.cc |= E_MEMSP;
421                 else
422                         s->set.cc |= E_MEMSP;
423                 break;
424         case C_SEXT:
425         case C_LEXT:
426         case C_ESEXT:
427         case C_OSEXT:
428         case C_ELEXT:
429         case C_OLEXT:
430                 s->used.ireg |= 1<<REGSB;
431                 if(ad)
432                         break;
433                 s->size = sz;
434                 s->soffset = regoff(&p->to);
435
436                 if(ar)
437                         s->used.cc |= E_MEMSB;
438                 else
439                         s->set.cc |= E_MEMSB;
440                 break;
441         }
442
443 /*
444  * flags based on 'from' field
445  */
446         c = p->from.class;
447         if(c == 0) {
448                 c = aclass(&p->from) + 1;
449                 p->from.class = c;
450         }
451         c--;
452         switch(c) {
453         default:
454                 print("unknown class %d %D\n", c, &p->from);
455
456         case C_ZCON:
457         case C_SCON:
458         case C_UCON:
459         case C_LCON:
460         case C_NONE:
461         case C_SBRA:
462         case C_LBRA:
463                 c = p->from.reg;
464                 if(c != NREG)
465                         s->used.ireg |= 1<<c;
466                 break;
467         case C_CREG:
468         case C_FSR:
469         case C_FQ:
470         case C_PREG:
471                 s->set.ireg = ALL;
472                 s->set.freg = ALL;
473                 s->set.cc = ALL;
474                 break;
475         case C_ZOREG:
476         case C_SOREG:
477         case C_LOREG:
478         case C_ASI:
479                 c = p->from.reg;
480                 s->used.ireg |= 1<<c;
481                 if(ld)
482                         p->mark |= LOAD;
483                 if(ad)
484                         break;
485                 s->size = sz;
486                 s->soffset = regoff(&p->from);
487
488                 m = ANYMEM;
489                 if(c == REGSB)
490                         m = E_MEMSB;
491                 if(c == REGSP)
492                         m = E_MEMSP;
493
494                 s->used.cc |= m;
495                 break;
496         case C_SACON:
497         case C_LACON:
498                 s->used.ireg |= 1<<REGSP;
499                 break;
500         case C_SECON:
501         case C_LECON:
502                 s->used.ireg |= 1<<REGSB;
503                 break;
504         case C_REG:
505                 s->used.ireg |= 1<<p->from.reg;
506                 break;
507         case C_FREG:
508                 /* do better -- determine double prec */
509                 s->used.freg |= 1<<p->from.reg;
510                 s->used.freg |= 1<<(p->from.reg|1);
511                 break;
512         case C_SAUTO:
513         case C_LAUTO:
514         case C_ESAUTO:
515         case C_ELAUTO:
516         case C_OSAUTO:
517         case C_OLAUTO:
518                 s->used.ireg |= 1<<REGSP;
519                 if(ld)
520                         p->mark |= LOAD;
521                 if(ad)
522                         break;
523                 s->size = sz;
524                 s->soffset = regoff(&p->from);
525
526                 s->used.cc |= E_MEMSP;
527                 break;
528         case C_SEXT:
529         case C_LEXT:
530         case C_ESEXT:
531         case C_ELEXT:
532         case C_OSEXT:
533         case C_OLEXT:
534                 s->used.ireg |= 1<<REGSB;
535                 if(ld)
536                         p->mark |= LOAD;
537                 if(ad)
538                         break;
539                 s->size = sz;
540                 s->soffset = regoff(&p->from);
541
542                 s->used.cc |= E_MEMSB;
543                 break;
544         }
545         
546         c = p->reg;
547         if(c != NREG) {
548                 if(p->from.type == D_FREG || p->to.type == D_FREG) {
549                         s->used.freg |= 1<<c;
550                         s->used.freg |= 1<<(c|1);
551                 } else
552                         s->used.ireg |= 1<<c;
553         }
554         s->set.ireg &= ~(1<<0);         /* R0 cant be set */
555 }
556
557 /*
558  * test to see if 2 instrictions can be
559  * interchanged without changing semantics
560  */
561 int
562 depend(Sch *sa, Sch *sb)
563 {
564         ulong x;
565
566         if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
567                 return 1;
568         if(sb->set.ireg & sa->used.ireg)
569                 return 1;
570
571         if(sa->set.freg & (sb->set.freg|sb->used.freg))
572                 return 1;
573         if(sb->set.freg & sa->used.freg)
574                 return 1;
575
576         x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
577                 (sb->set.cc & sa->used.cc);
578         if(x) {
579                 /*
580                  * allow SB and SP to pass each other.
581                  * allow SB to pass SB iff doffsets are ok
582                  * anything else conflicts
583                  */
584                 if(x != E_MEMSP && x != E_MEMSB)
585                         return 1;
586                 x = sa->set.cc | sb->set.cc |
587                         sa->used.cc | sb->used.cc;
588                 if(x & E_MEM)
589                         return 1;
590                 if(offoverlap(sa, sb))
591                         return 1;
592         }
593
594         return 0; 
595 }
596
597 int
598 offoverlap(Sch *sa, Sch *sb)
599 {
600
601         if(sa->soffset < sb->soffset) {
602                 if(sa->soffset+sa->size > sb->soffset)
603                         return 1;
604                 return 0;
605         }
606         if(sb->soffset+sb->size > sa->soffset)
607                 return 1;
608         return 0;
609 }
610
611 /*
612  * test 2 adjacent instructions
613  * and find out if inserted instructions
614  * are desired to prevent stalls.
615  * first instruction is a load instruction.
616  */
617 int
618 conflict(Sch *sa, Sch *sb)
619 {
620
621         if(sa->set.ireg & sb->used.ireg)
622                 return 1;
623         if(sa->set.freg & sb->used.freg)
624                 return 1;
625         return 0;
626 }
627
628 int
629 compound(Prog *p)
630 {
631         Optab *o;
632
633         o = oplook(p);
634         if(o->size != 4)
635                 return 1;
636         if(p->to.type == D_REG && p->to.reg == REGSB)
637                 return 1;
638         return 0;
639 }
640
641 void
642 dumpbits(Sch *s, Dep *d)
643 {
644         int i;
645
646         for(i=0; i<32; i++)
647                 if(d->ireg & (1<<i))
648                         Bprint(&bso, " R%d", i);
649         for(i=0; i<32; i++)
650                 if(d->freg & (1<<i))
651                         Bprint(&bso, " F%d", i);
652         for(i=0; i<32; i++)
653                 switch(d->cc & (1<<i)) {
654                 default:
655                         break;
656                 case E_ICC:
657                         Bprint(&bso, " ICC");
658                         break;
659                 case E_FCC:
660                         Bprint(&bso, " FCC");
661                         break;
662                 case E_MEM:
663                         Bprint(&bso, " MEM%d", s->size);
664                         break;
665                 case E_MEMSB:
666                         Bprint(&bso, " SB%d", s->size);
667                         break;
668                 case E_MEMSP:
669                         Bprint(&bso, " SP%d", s->size);
670                         break;
671                 }
672 }