3 Prog *divuconst(Prog *, uvlong, int, int, int);
4 Prog *divconst(Prog *, vlong, int, int, int);
5 Prog *modconst(Prog *, vlong, int, int, int);
11 Prog *p, *p1, *q, *q1, *q2;
12 int o, curframe, curbecome, maxbecome, shift;
15 * find leaf subroutines
19 * expand RET and other macros
20 * expand BECOME pseudo
21 * use conditional moves where appropriate
25 Bprint(&bso, "%5.2f noops\n", cputime());
34 for(p = firstp; p != P; p = p->link) {
36 /* find out how much arg space is used in this TEXT */
37 if(p->to.type == D_OREG && p->to.reg == REGSP)
38 if(p->to.offset > curframe)
39 curframe = p->to.offset;
43 if(curtext && curtext->from.sym) {
44 curtext->from.sym->frame = curframe;
45 curtext->from.sym->become = curbecome;
46 if(curbecome > maxbecome)
47 maxbecome = curbecome;
52 p->mark |= LABEL|LEAF|SYNC;
54 p->link->mark |= LABEL;
58 /* don't mess with what we don't understand */
63 for(q1=p->link; q1 != P; q1 = q1->link) {
65 if(q1->as != AXORNOT) /* used as NOP in PALcode */
71 /* special form of RET is BECOME */
72 if(p->from.type == D_CONST)
73 if(p->from.offset > curbecome)
74 curbecome = p->from.offset;
77 p->link->mark |= LABEL;
82 q->link = q1; /* q is non-nop */
88 curtext->mark &= ~LEAF;
107 while(q1->as == ANOP) {
111 if(!(q1->mark & LEAF)) {
112 if (q1->mark & LABEL)
121 if (q1->mark & LABEL)
127 p->mark |= LABEL; /* ??? */
138 if(p->from.type == D_CONST /*&& !debug['d']*/)
143 curtext->mark &= ~LEAF;
149 if(curtext && curtext->from.sym) {
150 curtext->from.sym->frame = curframe;
151 curtext->from.sym->become = curbecome;
152 if(curbecome > maxbecome)
153 maxbecome = curbecome;
157 print("max become = %d\n", maxbecome);
158 xdefine("ALEFbecome", STEXT, maxbecome);
161 for(p = firstp; p != P; p = p->link) {
167 if(curtext != P && curtext->from.sym != S && curtext->to.offset >= 0) {
168 o = maxbecome - curtext->from.sym->frame;
171 /* calling a become or calling a variable */
172 if(p->to.sym == S || p->to.sym->become) {
173 curtext->to.offset += o;
176 print("%D calling %D increase %d\n",
177 &curtext->from, &p->to, o);
185 for(p = firstp; p != P; p = p->link) {
190 autosize = p->to.offset + 8;
192 if(curtext->mark & LEAF) {
201 q = genIRR(p, ASUBQ, autosize, NREG, REGSP);
202 else if(!(curtext->mark & LEAF)) {
204 Bprint(&bso, "save suppressed in: %s\n",
205 curtext->from.sym->name);
207 curtext->mark |= LEAF;
210 if(curtext->mark & LEAF) {
211 if(curtext->from.sym)
212 curtext->from.sym->type = SLEAF;
216 genstore(q, AMOVL, REGLINK, 0LL, REGSP);
221 if(p->from.type == D_CONST)
223 if(curtext->mark & LEAF) {
234 p->from.type = D_CONST;
235 p->from.offset = autosize;
252 p->from.type = D_OREG;
260 q = genIRR(p, AADDQ, autosize, NREG, REGSP);
265 q1->to.type = D_OREG;
267 q1->to.reg = REGLINK;
275 if(curtext->mark & LEAF) {
289 p->from.type = D_CONST;
290 p->from.offset = autosize;
307 q = genIRR(p, AADDQ, autosize, NREG, REGSP);
311 p->from.type = D_OREG;
321 /* All I wanted was a MOVB... */
324 /* rewrite sign extend; could use v3 extension in asmout case 1 */
325 if (p->to.type == D_REG) {
327 shift = (p->as == AMOVB) ? (64-8) : (64-16);
328 if (p->from.type == D_REG) {
330 p->reg = p->from.reg;
331 p->from.type = D_CONST;
332 p->from.offset = shift;
333 q = genIRR(p, ASRAQ, shift, p->to.reg, p->to.reg);
337 p->as = (p->as == AMOVB) ? AMOVBU : AMOVWU;
338 q = genIRR(p, ASLLQ, shift, p->to.reg, p->to.reg);
339 q = genIRR(q, ASRAQ, shift, p->to.reg, p->to.reg);
342 /* fall through... */
346 break; /* use BWX extension */
349 if (p->from.type == D_OREG) {
350 if (p->to.type != D_REG)
353 q = genXXX(p, AEXTBL, &p->to, REGTMP2, &p->to);
354 if (o == AMOVW || o == AMOVWU)
357 if ((p->from.offset & 7) != 0 || aclass(&p->from) != C_SOREG) {
358 q1 = genXXX(p, AMOVA, &p->from, NREG, &q->to);
359 q1->from.offset &= 7;
363 q->from.reg = p->from.reg;
364 if (o == AMOVB || o == AMOVW)
365 genXXX(q, o, &q->to, NREG, &q->to);
367 else if (p->to.type == D_OREG) {
368 if (aclass(&p->from) == C_ZCON) {
369 p->from.type = D_REG;
370 p->from.reg = REGZERO;
372 else if (p->from.type != D_REG)
375 q = genRRR(p, AMSKBL, p->to.reg, REGTMP2, REGTMP2);
376 q1 = genRRR(q, AINSBL, p->to.reg, p->from.reg, REGTMP);
377 if (o == AMOVW || o == AMOVWU) {
381 q2 = genXXX(q1, AOR, &q->to, REGTMP, &q->to);
382 genXXX(q2, AMOVQU, &q->to, NREG, &p->to);
385 if ((p->from.offset & 7) != 0 || aclass(&p->from) != C_SOREG) {
386 q->from.reg = REGTMP;
387 q1->from.reg = REGTMP;
388 q = genXXX(p, AMOVA, &p->from, NREG, &q->from);
396 p = genXXX(p, AADDL, &p->to, REGZERO, &p->to);
400 if (p->to.type != D_REG) {
401 diag("illegal dest type in %P", p);
407 q = genXXX(p, ASRLQ, &p->from, REGTMP, &p->to);
410 p->from.type = D_CONST;
411 p->from.offset = 0xf0;
415 p = genXXX(p, AADDL, &p->to, REGZERO, &p->to);
432 if(p->to.type != D_REG)
434 /*if(debug['d'] && p->from.type == D_CONST) {
435 q = genRRR(p, p->as, REGTMP, p->reg, p->to.reg);
441 if(p->from.type == D_CONST) {
446 q = divconst(p, p->from.offset, p->reg, p->to.reg, 64);
449 q = divuconst(p, p->from.offset, p->reg, p->to.reg, 64);
452 q = modconst(p, p->from.offset, p->reg, p->to.reg, 64);
455 q = divuconst(p, p->from.offset, p->reg, REGTMP2, 64);
456 q = genIRR(q, AMULQ, p->from.offset, REGTMP2, REGTMP2);
457 q = genRRR(q, ASUBQ, REGTMP2, p->reg, p->to.reg);
460 q = divconst(p, p->from.offset, p->reg, p->to.reg, 32);
463 q = divuconst(p, p->from.offset, p->reg, p->to.reg, 32);
466 q = modconst(p, p->from.offset, p->reg, p->to.reg, 32);
469 q = divuconst(p, p->from.offset, p->reg, REGTMP2, 32);
470 q = genIRR(q, AMULQ, p->from.offset, REGTMP2, REGTMP2);
471 q = genRRR(q, ASUBQ, REGTMP2, p->reg, p->to.reg);
478 if(p->from.type != D_REG){
479 diag("bad instruction %P", p);
483 q = genIRR(p, ASUBQ, 16LL, NREG, REGSP);
484 q = genstore(q, AMOVQ, p->from.reg, 8LL, REGSP);
485 if (o == ADIVL || o == ADIVL || o == AMODL || o == AMODLU)
488 q = genRRR(q, AMOVQ, p->reg, NREG, REGTMP);
490 q->from.reg = p->to.reg;
492 /* CALL appropriate */
499 q1->to.type = D_BRANCH;
500 q1->cond = divsubr(o);
504 q = genRRR(q, AMOVQ, REGTMP, NREG, p->to.reg);
505 q = genIRR(q, AADDQ, 16LL, NREG, REGSP);
510 /* Attempt to replace {cond. branch, mov} with a cmov */
511 /* XXX warning: this is all a bit experimental */
524 if (q1 != p->cond || q1 == P)
526 /*print("%P\n", q); /* */
527 if (q->to.type != D_REG)
529 if (q->from.type != D_REG && (q->from.type != D_CONST || q->from.name != D_NONE))
533 /* print("%P\n", q); /* */
534 if (q->as != AMOVQ) /* XXX can handle more than this! */
536 q->as = (p->as^1) + ACMOVEQ-ABEQ; /* sleazy hack */
537 q->reg = p->from.reg; /* XXX check CMOVx operand order! */
538 excise(p); /* XXX p's LABEL? */
539 if (!(q1->mark&LABEL2))
552 if (q1 != p->cond || q1 == P)
554 if (q->from.type != D_FREG || q->to.type != D_FREG)
556 /* print("%P\n", q); /* */
559 if (q->as != AMOVT) /* XXX can handle more than this! */
561 q->as = (p->as^1) + AFCMOVEQ-AFBEQ; /* sleazy hack */
562 q->reg = p->from.reg; /* XXX check CMOVx operand order! */
563 excise(p); /* XXX p's LABEL? */
564 if (!(q1->mark&LABEL2))
572 q1 = firstp; /* top of block */
573 o = 0; /* count of instructions */
574 for(p = firstp; p != P; p = p1) {
577 if(p->mark & NOSCHED){
581 for(; p != P; p = p->link){
582 if(!(p->mark & NOSCHED))
591 if(p->mark & (LABEL|SYNC)) {
597 if(p->mark & (BRANCH|SYNC)) {
619 /* XXX use of this may lose important LABEL flags, check that this isn't happening (or fix) */
632 Sym *s1, *s2, *s3, *s4, *s5, *s6, *s7, *s8;
635 s1 = lookup("_divq", 0);
636 s2 = lookup("_divqu", 0);
637 s3 = lookup("_modq", 0);
638 s4 = lookup("_modqu", 0);
639 s5 = lookup("_divl", 0);
640 s6 = lookup("_divlu", 0);
641 s7 = lookup("_modl", 0);
642 s8 = lookup("_modlu", 0);
643 for(p = firstp; p != P; p = p->link)
645 if(p->from.sym == s1)
647 if(p->from.sym == s2)
649 if(p->from.sym == s3)
651 if(p->from.sym == s4)
653 if(p->from.sym == s5)
655 if(p->from.sym == s6)
657 if(p->from.sym == s7)
659 if(p->from.sym == s8)
663 diag("undefined: %s", s1->name);
666 if(prog_divqu == P) {
667 diag("undefined: %s", s2->name);
668 prog_divqu = curtext;
671 diag("undefined: %s", s3->name);
674 if(prog_modqu == P) {
675 diag("undefined: %s", s4->name);
676 prog_modqu = curtext;
679 diag("undefined: %s", s5->name);
682 if(prog_divlu == P) {
683 diag("undefined: %s", s6->name);
684 prog_divlu = curtext;
687 diag("undefined: %s", s7->name);
690 if(prog_modlu == P) {
691 diag("undefined: %s", s8->name);
692 prog_modlu = curtext;
717 diag("bad op %A in divsubr", o);
723 divuconst(Prog *p, uvlong y, int num, int quot, int bits)
726 uvlong k, m, n, mult, tmp, msb;
731 diag("division by zero");
735 return genRRR(p, AMOVQ, num, NREG, quot);
737 if(num == REGTMP || quot == REGTMP)
738 diag("bad register in divuconst");
741 for(logy = -1; tmp != 0; logy++)
744 msb = (1LL << (bits-1));
745 if((y & (y-1)) == 0) /* power of 2 */
746 return genIRR(p, ASRLQ, logy, num, quot);
748 return genIRR(p, ACMPUGE, y, num, quot);
750 /* k = (-2^(bits+logy)) % y */
754 Bprint(&bso, "divuconst: y=%lld msb=%lld m=%lld n=%lld\n",
756 for(i = 0; i <= logy; i++) {
765 Bprint(&bso, "divuconst: y=%lld msb=%lld m=%lld n=%lld\n",
768 if(k > (1LL << logy)) {
776 Bprint(&bso, "divuconst: y=%lld mult=%lld shift=%d bits=%d k=%lld\n",
777 y, mult, shift, bits, k);
779 p = genIRR(p, AMOVQ, mult, NREG, REGTMP);
780 p = genRRR(p, AEXTLL, REGZERO, num, quot);
781 p = genRRR(p, AMULQ, REGTMP, quot, quot);
782 p = genIRR(p, ASRLQ, shift, quot, quot);
783 p = genRRR(p, AADDL, quot, REGZERO, quot);
791 p = genIRR(p, AMOVQ, mult, NREG, REGTMP);
792 p = genRRR(p, AEXTLL, REGZERO, num, quot);
793 p = genRRR(p, AUMULH, REGTMP, quot, quot);
795 p = genIRR(p, ASRLQ, shift-64, quot, quot);
796 p = genRRR(p, AADDL, quot, REGZERO, quot);
804 p = genIRR(p, AMOVQ, mult, NREG, REGTMP);
805 p = genRRR(p, AUMULH, REGTMP, num, quot);
807 p = genIRR(p, ASRLQ, shift-64, quot, quot);
811 p = genIRR(p, AMOVQ, mult, NREG, REGTMP);
812 p = genRRR(p, AUMULH, REGTMP, num, REGTMP);
813 p = genRRR(p, AADDQ, num, REGTMP, quot);
814 p = genRRR(p, ACMPUGT, REGTMP, quot, REGTMP);
815 p = genIRR(p, ASLLQ, 128-shift, REGTMP, REGTMP);
816 p = genIRR(p, ASRLQ, shift-64, quot, quot);
817 p = genRRR(p, AADDQ, REGTMP, quot, quot);
822 divconst(Prog *p, vlong y, int num, int quot, int bits)
830 q = genRRR(p, ASUBQ, num, REGZERO, REGTMP2);
832 q = genRRR(q, AMOVQ, num, NREG, quot);
833 q = genRRR(q, ACMOVGT, REGTMP2, REGTMP2, quot);
834 q = divuconst(q, yabs, quot, quot, bits-1);
835 q = genRRR(q, ASUBQ, quot, REGZERO, REGTMP);
836 q = genRRR(q, (y < 0)? ACMOVLT: ACMOVGT, REGTMP, REGTMP2, quot);
841 modconst(Prog *p, vlong y, int num, int quot, int bits)
849 q = genRRR(p, ASUBQ, num, REGZERO, REGTMP2);
850 q = genRRR(q, ACMOVLT, num, REGTMP2, REGTMP2);
851 q = divuconst(q, yabs, REGTMP2, REGTMP2, bits-1);
852 q = genRRR(q, ASUBQ, REGTMP2, REGZERO, REGTMP);
853 q = genRRR(q, ACMOVLT, REGTMP, num, REGTMP2);
854 q = genIRR(q, AMULQ, yabs, REGTMP2, REGTMP2);
855 q = genRRR(q, ASUBQ, REGTMP2, num, quot);
860 genXXX(Prog *q, int op, Adr *from, int reg, Adr *to)
876 genRRR(Prog *q, int op, int from, int reg, int to)
883 p->from.type = D_REG;
894 genIRR(Prog *q, int op, vlong v, int reg, int to)
901 p->from.type = D_CONST;
912 genstore(Prog *q, int op, int from, vlong offset, int to)
919 p->from.type = D_REG;
923 p->to.offset = offset;
931 genload(Prog *q, int op, vlong offset, int from, int to)
938 p->from.type = D_OREG;
939 p->from.offset = offset;