3 #define BIT(n) ((uvlong)1<<(n))
10 static void checkpool(Prog*, int);
11 static void flushpool(Prog*, int);
12 static int ispcdisp(long);
15 static Oprang oprange[ALAST];
27 Bprint(&bso, "%5.2f span\n", cputime());
33 for(p = firstp; p != P; p = p->link) {
34 if(p->as == ADWORD && (c&7) != 0)
42 autosize = p->to.offset + PCSZ;
44 p->from.sym->value = c;
45 /* need passes to resolve branches */
51 diag("zero-width instruction\n%P", p);
54 switch(o->flag & (LFROM|LTO)) {
62 if(p->as == AB || p->as == ARET || p->as == AERET || p->as == ARETURN) /* TO DO: other unconditional operations */
70 * if any procedure is large enough to
71 * generate a large SBRA branch, then
72 * generate extra passes putting branches
73 * around jmps to fix. this is rare.
77 Bprint(&bso, "%5.2f span1\n", cputime());
80 for(p = firstp; p != P; p = p->link) {
81 if(p->as == ADWORD && (c&7) != 0)
85 /* very large branches
86 if(o->type == 6 && p->cond) {
87 otxt = p->cond->pc - c;
90 if(otxt >= (1L<<17) - 10) {
95 q->to.type = D_BRANCH;
102 q->to.type = D_BRANCH;
103 q->cond = q->link->link;
112 autosize = p->to.offset + PCSZ;
114 p->from.sym->value = c;
117 diag("zero-width instruction\n%P", p);
126 * add strings to text segment
129 for(i=0; i<NHASH; i++)
130 for(s = hash[i]; s != S; s = s->link) {
131 if(s->type != SSTRING)
143 setext = lookup("etext", 0);
146 textsize = c - INITTEXT;
149 INITDAT = rnd(c, INITRND);
151 Bprint(&bso, "tsize = %#llux\n", textsize);
156 * when the first reference to the literal pool threatens
157 * to go out of range of a 1Mb PC-relative offset
158 * drop the pool now, and branch round it.
161 checkpool(Prog *p, int skip)
163 if(pool.size >= 0xffff0 || !ispcdisp(p->pc+4+pool.size - pool.start+8))
165 else if(p->link == P)
170 flushpool(Prog *p, int skip)
176 if(debug['v'] && skip == 1)
177 print("note: flush literal pool at %#llux: len=%lud ref=%lux\n", p->pc+4, pool.size, pool.start);
180 q->to.type = D_BRANCH;
185 else if(p->pc+pool.size-pool.start < 1024*1024)
187 elitrl->link = p->link;
189 blitrl = 0; /* BUG: should refer back to values until out-of-range */
200 addpool(Prog *p, Adr *a)
211 if(p->as == AMOV && (a->name == D_EXTERN || a->name == D_STATIC)
212 || (a->offset >> 32) != 0 && (a->offset >> 31) != -1){
239 if((instoffset >> 32) != 0 && (instoffset >> 31) != -1)
240 diag("offset too large\n%P", p);
242 t.to.offset = instoffset;
246 for(q = blitrl; q != P; q = q->link) /* could hash on t.t0.offset */
247 if(memcmp(&q->to, &t.to, sizeof(t.to)) == 0) {
262 pool.size = rnd(pool.size, sz);
272 case ABEQ: return ABNE;
273 case ABNE: return ABEQ;
274 case ABCS: return ABCC;
275 case ABHS: return ABLO;
276 case ABCC: return ABCS;
277 case ABLO: return ABHS;
278 case ABMI: return ABPL;
279 case ABPL: return ABMI;
280 case ABVS: return ABVC;
281 case ABVC: return ABVS;
282 case ABHI: return ABLS;
283 case ABLS: return ABHI;
284 case ABGE: return ABLT;
285 case ABLT: return ABGE;
286 case ABGT: return ABLE;
287 case ABLE: return ABGT;
289 diag("unknown relation: %A", a);
294 xdefine(char *p, int t, long v)
299 if(s->type == 0 || s->type == SXREF) {
317 /* pc-relative addressing will reach? */
318 return v >= -0xfffff && v <= 0xfffff && (v&3) == 0;
324 /* uimm12 or uimm24? */
335 return findmask(v) != nil;
341 return (v >> 32) == 0 && findmask(v | v<<32) != nil;
364 for(i = 0; x != 0; i++)
370 findrotl(uvlong x, int *l)
374 for(i = 0; (x&1) == 0 || (x&BIT(63)) != 0; i++)
375 x = (x<<1) | ((x&BIT(63))!=0);
381 findmask64(Mask *m, uvlong v)
384 int i, lr, l0, l1, e;
386 if(v == 0 || v == ~(uvlong)0)
388 x = findrotl(v, &lr);
392 if(e == 0 || l1 == 64 || l0 == 64 || 64%e != 0)
399 for(i = 0; i < 64; i += e)
400 if(((x>>i) & fm) != f)
403 print("%#llux %#llux 1:%d 0:%d r:%d\n", v, x, l1, l0, lr%e);
412 * internal class codes for different constant classes:
413 * they partition the constant/offset range into disjoint ranges that
414 * are somehow treated specially by one or more load/store instructions.
416 static int autoclass[] = {C_PSAUTO, C_NSAUTO, C_NPAUTO, C_PSAUTO, C_PPAUTO, C_UAUTO4K, C_UAUTO8K, C_UAUTO16K, C_UAUTO32K, C_UAUTO64K, C_LAUTO};
417 static int oregclass[] = {C_ZOREG, C_NSOREG, C_NPOREG, C_PSOREG, C_PPOREG, C_UOREG4K, C_UOREG8K, C_UOREG16K, C_UOREG32K, C_UOREG64K, C_LOREG};
418 static int sextclass[] = {C_SEXT1, C_LEXT, C_LEXT, C_SEXT1, C_SEXT1, C_SEXT1, C_SEXT2, C_SEXT4, C_SEXT8, C_SEXT16, C_LEXT};
421 * return appropriate index into tables above
431 if(l >= -512 && (l&7) == 0)
437 if(l <= 504 && (l&7) == 0)
441 if(l <= 8190 && (l&1) == 0)
443 if(l <= 16380 && (l&3) == 0)
445 if(l <= 32760 && (l&7) == 0)
447 if(l <= 65520 && (l&0xF) == 0)
453 * given an offset v and a class c (see above)
454 * return the offset value to use in the instruction,
455 * scaled if necessary
458 offsetshift(vlong v, int c)
462 static int shifts[] = {0, 1, 2, 3, 4};
465 if(c >= C_SEXT1 && c <= C_SEXT16)
466 s = shifts[c-C_SEXT1];
467 else if(c >= C_UAUTO4K && c <= C_UAUTO64K)
468 s = shifts[c-C_UAUTO4K];
469 else if(c >= C_UOREG4K && c <= C_UOREG64K)
470 s = shifts[c-C_UOREG4K];
473 diag("odd offset: %lld\n%P", v, curp);
478 * if v contains a single 16-bit value aligned
479 * on a 16-bit field, and thus suitable for movk/movn,
480 * return the field index 0 to 3; otherwise return -1
487 for(s = 0; s < 64; s += 16)
488 if((v & ~((uvlong)0xFFFF<<s)) == 0)
539 if(a->sym == 0 || a->sym->name == 0) {
540 print("null sym external\n");
546 if(t == 0 || t == SXREF) {
547 diag("undefined external: %s in %s",
554 instoffset = s->value + a->offset + INITDAT;
561 instoffset = s->value + a->offset;
566 instoffset = s->value + a->offset;
568 return sextclass[constclass(instoffset)];
572 instoffset = autosize + a->offset;
573 return autoclass[constclass(instoffset)];
576 instoffset = autosize + a->offset + PCSZ;
577 return autoclass[constclass(instoffset)];
580 instoffset = a->offset;
581 return oregclass[constclass(instoffset)];
594 if(t == 0 || t == SXREF) {
595 diag("undefined external: %s in %s",
599 instoffset = s->value + a->offset + INITDAT;
600 if(s->type == STEXT || s->type == SLEAF || s->type == SUNDEF)
601 instoffset = s->value + a->offset;
613 instoffset = a->offset;
614 if(a->reg != NREG && a->reg != REGZERO)
656 diag("undefined external: %s in %s",
665 instoffset = s->value + a->offset;
669 instoffset = s->value + a->offset;
670 if(instoffset != 0 && isaddcon(instoffset))
673 instoffset = s->value + a->offset + INITDAT;
677 instoffset = autosize + a->offset;
681 instoffset = autosize + a->offset + PCSZ;
683 if(isaddcon(instoffset))
707 a1 = aclass(&p->from) + 1;
713 a3 = aclass(&p->to) + 1;
721 o = oprange[r].start;
723 a1 = opcross[repop[r]][a1][a2][a3];
728 o = oprange[r].stop; /* just generate an error */
731 print("oplook %A %d %d %d\n",
732 (int)p->as, a1, a2, a3);
733 print(" %d %d\n", p->from.type, p->to.type);
740 if(o->a2 == a2 || c2[o->a2])
744 print("%P\t-> %d (%d %d %d)\n", p, o->type,
745 o->a1, o->a2, o->a3);
746 p->optab = (o-optab)+1;
749 diag("illegal combination %A %R %R %R",
781 if(b == C_ZCON || b == C_ADDCON0 || b == C_ABCON)
791 if(b == C_ABCON || b == C_MBCON)
796 if(b == C_MBCON || b == C_ZCON || b == C_ADDCON0)
801 if(b == C_ZCON || b == C_BITCON || b == C_BITCON32 || b == C_BITCON64 || b == C_ADDCON || b == C_ADDCON0 || b == C_ABCON || b == C_MBCON || b == C_MOVCON)
806 return cmp(C_LCON, b);
819 if(b == C_SEXT1 || b == C_SEXT2)
824 if(b >= C_SEXT1 && b <= C_SEXT4)
829 if(b >= C_SEXT1 && b <= C_SEXT8)
834 if(b >= C_SEXT1 && b <= C_SEXT16)
844 if(b == C_PSAUTO || b == C_PPAUTO)
849 return cmp(C_UAUTO4K, b);
852 return cmp(C_UAUTO8K, b);
855 return cmp(C_UAUTO16K, b);
858 return cmp(C_UAUTO32K, b);
861 return cmp(C_NSAUTO, b);
864 return cmp(C_NPAUTO, b) || cmp(C_UAUTO64K, b);
872 if(b == C_ZOREG || b == C_PSOREG)
877 if(b == C_ZOREG || b == C_PSAUTO || b == C_PSOREG || b == C_PPAUTO || b == C_PPOREG)
882 return cmp(C_UOREG4K, b);
885 return cmp(C_UOREG8K, b);
888 return cmp(C_UOREG16K, b);
891 return cmp(C_UOREG32K, b);
894 return cmp(C_NSOREG, b);
897 return cmp(C_NPOREG, b) || cmp(C_UOREG64K, b);
908 ocmp(const void *a1, const void *a2)
936 for(i=0; i<C_GOK; i++)
937 for(n=0; n<C_GOK; n++)
938 xcmp[i][n] = cmp(n, i);
939 for(n=0; optab[n].as != AXXX; n++)
942 qsort(optab, n, sizeof(optab[0]), ocmp);
945 oprange[r].start = optab+i;
946 while(optab[i].as == r)
948 oprange[r].stop = optab+i;
954 diag("unknown op in build: %A", r);
967 case AAND: /* logical immediate, logical shifted register */
978 case ABIC: /* only logical shifted register */
994 case AADC: /* rn=Rd */
1001 oprange[ASBCSW] = t;
1003 case ANGC: /* rn=REGZERO */
1006 oprange[ANGCSW] = t;
1017 /* register/register, and shifted */
1021 oprange[AMOVKW] = t;
1023 oprange[AMOVNW] = t;
1025 oprange[AMOVZW] = t;
1058 oprange[ARBITW] = t;
1061 oprange[AREV16] = t;
1062 oprange[AREV16W] = t;
1063 oprange[AREV32] = t;
1066 oprange[ASDIVW] = t;
1068 oprange[AUDIVW] = t;
1069 oprange[ACRC32B] = t;
1070 oprange[ACRC32CB] = t;
1071 oprange[ACRC32CH] = t;
1072 oprange[ACRC32CW] = t;
1073 oprange[ACRC32CX] = t;
1074 oprange[ACRC32H] = t;
1075 oprange[ACRC32W] = t;
1076 oprange[ACRC32X] = t;
1079 oprange[AMADDW] = t;
1081 oprange[AMSUBW] = t;
1082 oprange[ASMADDL] = t;
1083 oprange[ASMSUBL] = t;
1084 oprange[AUMADDL] = t;
1085 oprange[AUMSUBL] = t;
1090 oprange[AUREMW] = t;
1095 oprange[AMNEGW] = t;
1096 oprange[ASMNEGL] = t;
1097 oprange[ASMULL] = t;
1098 oprange[ASMULH] = t;
1099 oprange[AUMNEGL] = t;
1100 oprange[AUMULH] = t;
1101 oprange[AUMULL] = t;
1104 oprange[AMOVHU] = t;
1107 oprange[AMOVWU] = t;
1112 oprange[ASBFMW] = t;
1114 oprange[AUBFMW] = t;
1118 oprange[ABFXIL] = t;
1119 oprange[ABFXILW] = t;
1120 oprange[ASBFIZ] = t;
1121 oprange[ASBFIZW] = t;
1123 oprange[ASBFXW] = t;
1124 oprange[AUBFIZ] = t;
1125 oprange[AUBFIZW] = t;
1127 oprange[AUBFXW] = t;
1130 oprange[AEXTRW] = t;
1133 oprange[ASXTBW] = t;
1135 oprange[ASXTHW] = t;
1140 oprange[AUXTBW] = t;
1141 oprange[AUXTHW] = t;
1144 oprange[ACCMNW] = t;
1146 oprange[ACCMPW] = t;
1149 oprange[ACSELW] = t;
1150 oprange[ACSINC] = t;
1151 oprange[ACSINCW] = t;
1152 oprange[ACSINV] = t;
1153 oprange[ACSINVW] = t;
1154 oprange[ACSNEG] = t;
1155 oprange[ACSNEGW] = t;
1156 // aliases Rm=Rn, !cond
1158 oprange[ACINCW] = t;
1160 oprange[ACINVW] = t;
1162 oprange[ACNEGW] = t;
1165 // aliases, Rm=Rn=REGZERO, !cond
1166 oprange[ACSETW] = t;
1167 oprange[ACSETM] = t;
1168 oprange[ACSETMW] = t;
1186 oprange[AYIELD] = t;
1194 oprange[ACBNZW] = t;
1209 oprange[ADCPS1] = t;
1210 oprange[ADCPS2] = t;
1211 oprange[ADCPS3] = t;
1214 oprange[AFADDD] = t;
1215 oprange[AFSUBS] = t;
1216 oprange[AFSUBD] = t;
1217 oprange[AFMULS] = t;
1218 oprange[AFMULD] = t;
1219 oprange[AFNMULS] = t;
1220 oprange[AFNMULD] = t;
1221 oprange[AFDIVS] = t;
1222 oprange[AFMAXD] = t;
1223 oprange[AFMAXS] = t;
1224 oprange[AFMIND] = t;
1225 oprange[AFMINS] = t;
1226 oprange[AFMAXNMD] = t;
1227 oprange[AFMAXNMS] = t;
1228 oprange[AFMINNMD] = t;
1229 oprange[AFMINNMS] = t;
1230 oprange[AFDIVD] = t;
1233 oprange[AFCVTDS] = t;
1234 oprange[AFABSD] = t;
1235 oprange[AFABSS] = t;
1236 oprange[AFNEGD] = t;
1237 oprange[AFNEGS] = t;
1238 oprange[AFSQRTD] = t;
1239 oprange[AFSQRTS] = t;
1240 oprange[AFRINTNS] = t;
1241 oprange[AFRINTND] = t;
1242 oprange[AFRINTPS] = t;
1243 oprange[AFRINTPD] = t;
1244 oprange[AFRINTMS] = t;
1245 oprange[AFRINTMD] = t;
1246 oprange[AFRINTZS] = t;
1247 oprange[AFRINTZD] = t;
1248 oprange[AFRINTAS] = t;
1249 oprange[AFRINTAD] = t;
1250 oprange[AFRINTXS] = t;
1251 oprange[AFRINTXD] = t;
1252 oprange[AFRINTIS] = t;
1253 oprange[AFRINTID] = t;
1254 oprange[AFCVTDH] = t;
1255 oprange[AFCVTHS] = t;
1256 oprange[AFCVTHD] = t;
1257 oprange[AFCVTSH] = t;
1260 oprange[AFCMPD] = t;
1261 oprange[AFCMPES] = t;
1262 oprange[AFCMPED] = t;
1265 oprange[AFCCMPD] = t;
1266 oprange[AFCCMPES] = t;
1267 oprange[AFCCMPED] = t;
1270 oprange[AFCSELS] = t;
1278 oprange[AFCVTZSDW] = t;
1279 oprange[AFCVTZSS] = t;
1280 oprange[AFCVTZSSW] = t;
1281 oprange[AFCVTZUD] = t;
1282 oprange[AFCVTZUDW] = t;
1283 oprange[AFCVTZUS] = t;
1284 oprange[AFCVTZUSW] = t;
1287 oprange[ASCVTFS] = t;
1288 oprange[ASCVTFWD] = t;
1289 oprange[ASCVTFWS] = t;
1290 oprange[AUCVTFD] = t;
1291 oprange[AUCVTFS] = t;
1292 oprange[AUCVTFWD] = t;
1293 oprange[AUCVTFWS] = t;
1317 oprange[ALDXRB] = t;
1318 oprange[ALDXRH] = t;
1319 oprange[ALDXRW] = t;
1322 oprange[ALDXPW] = t;
1325 oprange[ASTXRB] = t;
1326 oprange[ASTXRH] = t;
1327 oprange[ASTXRW] = t;
1330 oprange[ASTXPW] = t;
1335 oprange[AAESMC] = t;
1336 oprange[AAESIMC] = t;
1337 oprange[ASHA1H] = t;
1338 oprange[ASHA1SU1] = t;
1339 oprange[ASHA256SU0] = t;
1343 oprange[ASHA1P] = t;
1344 oprange[ASHA1M] = t;
1345 oprange[ASHA1SU0] = t;
1346 oprange[ASHA256H] = t;
1347 oprange[ASHA256H2] = t;
1348 oprange[ASHA256SU1] = t;
1352 oprange[AMOVPW] = t;
1353 oprange[AMOVPSW] = t;