From: cinap_lenrek Date: Sun, 26 May 2013 20:41:40 +0000 (+0200) Subject: keep fpregs always in sse (FXSAVE) format, adapt libmach and acid files for new format X-Git-Url: https://git.lizzy.rs/?a=commitdiff_plain;h=257c7e958ef125ec12cc218519ce56f767eb112d;p=plan9front.git keep fpregs always in sse (FXSAVE) format, adapt libmach and acid files for new format we now always use the new FXSAVE format in FPsave structure and fpregs file, converting back and forth in fpx87save() and fpx87restore(). document that fprestore() is a destructive operation now. change fp register definition in libmach and adapt fpr() acid funciton. avoid unneccesary copy of fpstate and fpsave in sysfork(). functions including syscalls do not preserve the fp registers and copying fpstate from the current process would mean we had to fpsave(&up->fpsave); first. simply not doing it, new process starts in FPinit state. --- diff --git a/sys/lib/acid/386 b/sys/lib/acid/386 index 2aa319daa..5ae46fd83 100644 --- a/sys/lib/acid/386 +++ b/sys/lib/acid/386 @@ -69,22 +69,14 @@ defn fpr() print("F5\t", *F5, "\n"); print("F6\t", *F6, "\n"); print("F7\t", *F7, "\n"); - print("control\t", *fmt(E0, 'x'), "\n"); - print("status\t", *fmt(E1, 'x'), "\n"); - print("tag\t", *fmt(E2, 'x'), "\n"); - print("ip offset\t", *fmt(E3, 'X'), "\n"); - print("cs selector\t", *fmt(E4, 'x'), "\n"); - print("opcode\t", *fmt(E4>>8, 'x'), "\n"); - print("data operand offset\t", *fmt(E5, 'x'), "\n"); - print("operand selector\t", *fmt(E6, 'x'), "\n"); -} - -defn mmregs() -{ - print("MM0\t", *MM0, " MM1\t", *MM1, "\n"); - print("MM2\t", *MM2, " MM3\t", *MM3, "\n"); - print("MM4\t", *MM4, " MM5\t", *MM5, "\n"); - print("MM6\t", *MM6, " MM7\t", *MM7, "\n"); + print("control\t", *FCW, "\n"); + print("status\t", *FSW, "\n"); + print("tag\t", *FTW, "\n"); + print("ip\t", *FIP, "\n"); + print("cs selector\t", *FCS, "\n"); + print("opcode\t", *FOP, "\n"); + print("data operand\t", *FDP, "\n"); + print("operand selector\t", *FDS, "\n"); } defn pstop(pid) diff --git a/sys/src/9/pc/dat.h b/sys/src/9/pc/dat.h index e8d422414..862dc344a 100644 --- a/sys/src/9/pc/dat.h +++ b/sys/src/9/pc/dat.h @@ -76,10 +76,10 @@ struct FPstate ushort r3; ulong pc; ushort selector; - ushort r4; + ushort opcode; ulong operand; ushort oselector; - ushort r5; + ushort r4; uchar regs[80]; /* floating point registers */ }; @@ -91,10 +91,10 @@ struct FPssestate /* SSE fp state */ ushort fop; /* opcode */ ulong fpuip; /* pc */ ushort cs; /* pc segment */ - ushort r1; /* reserved */ + ushort rsrvd1; /* reserved */ ulong fpudp; /* data pointer */ ushort ds; /* data pointer segment */ - ushort r2; + ushort rsrvd2; ulong mxcsr; /* MXCSR register state */ ulong mxcsr_mask; /* MXCSR mask register */ uchar xregs[480]; /* extended registers */ diff --git a/sys/src/9/pc/fns.h b/sys/src/9/pc/fns.h index a066c44ea..176b36aae 100644 --- a/sys/src/9/pc/fns.h +++ b/sys/src/9/pc/fns.h @@ -41,7 +41,9 @@ void fpssesave(FPsave*); void fpssesave0(FPsave*); ulong fpstatus(void); void fpx87restore(FPsave*); +void fpx87restore0(FPsave*); void fpx87save(FPsave*); +void fpx87save0(FPsave*); ulong getcr0(void); ulong getcr2(void); ulong getcr3(void); diff --git a/sys/src/9/pc/l.s b/sys/src/9/pc/l.s index 4087b3f1a..e86b22449 100644 --- a/sys/src/9/pc/l.s +++ b/sys/src/9/pc/l.s @@ -657,13 +657,13 @@ TEXT fpinit(SB), $0 /* enable and init */ WAIT RET -TEXT fpx87save(SB), $0 /* save state and disable */ +TEXT fpx87save0(SB), $0 /* save state and disable */ MOVL p+0(FP), AX FSAVE 0(AX) /* no WAIT */ FPOFF(l2) RET -TEXT fpx87restore(SB), $0 /* enable and restore state */ +TEXT fpx87restore0(SB), $0 /* enable and restore state */ FPON MOVL p+0(FP), AX FRSTOR 0(AX) diff --git a/sys/src/9/pc/main.c b/sys/src/9/pc/main.c index 35cfccf01..538bec15b 100644 --- a/sys/src/9/pc/main.c +++ b/sys/src/9/pc/main.c @@ -469,6 +469,121 @@ confinit(void) } } +/* + * we keep FPsave structure in sse format emulating FXSAVE / FXRSTOR + * instructions for legacy x87 fpu. + * + * Note that fpx87restore() and fpxsserestore() do modify the FPsave + * data structure for conversion / realignment shuffeling. this means + * that p->fpsave is only valid when p->fpstate == FPinactive. + */ +void +fpx87save(FPsave *fps) +{ + fpx87save0(fps); + + /* NOP fps->fcw = fps->control; */ + fps->fsw = fps->status; + fps->ftw = fps->tag; + fps->fop = fps->opcode; + fps->fpuip = fps->pc; + fps->cs = fps->selector; + fps->fpudp = fps->operand; + fps->ds = fps->oselector; + +#define MOVA(d,s) \ + *((ushort*)(d+8)) = *((ushort*)(s+8)), \ + *((ulong*)(d+4)) = *((ulong*)(s+4)), \ + *((ulong*)(d)) = *((ulong*)(s)) + + MOVA(fps->xregs+0x70, fps->regs+70); + MOVA(fps->xregs+0x60, fps->regs+60); + MOVA(fps->xregs+0x50, fps->regs+50); + MOVA(fps->xregs+0x40, fps->regs+40); + MOVA(fps->xregs+0x30, fps->regs+30); + MOVA(fps->xregs+0x20, fps->regs+20); + MOVA(fps->xregs+0x10, fps->regs+10); + MOVA(fps->xregs+0x00, fps->regs+00); + +#undef MOVA + +#define CLR6(d) \ + *((ulong*)(d)) = 0, \ + *((ushort*)(d+4)) = 0 + + CLR6(fps->xregs+0x70+10); + CLR6(fps->xregs+0x60+10); + CLR6(fps->xregs+0x50+10); + CLR6(fps->xregs+0x40+10); + CLR6(fps->xregs+0x30+10); + CLR6(fps->xregs+0x20+10); + CLR6(fps->xregs+0x10+10); + CLR6(fps->xregs+0x00+10); + +#undef CLR6 + + fps->rsrvd1 = fps->rsrvd2 = fps->mxcsr = fps->mxcsr_mask = 0; +} + +void +fpx87restore(FPsave *fps) +{ +#define MOVA(d,s) \ + *((ulong*)(d)) = *((ulong*)(s)), \ + *((ulong*)(d+4)) = *((ulong*)(s+4)), \ + *((ushort*)(d+8)) = *((ushort*)(s+8)) + + MOVA(fps->regs+00, fps->xregs+0x00); + MOVA(fps->regs+10, fps->xregs+0x10); + MOVA(fps->regs+20, fps->xregs+0x20); + MOVA(fps->regs+30, fps->xregs+0x30); + MOVA(fps->regs+40, fps->xregs+0x40); + MOVA(fps->regs+50, fps->xregs+0x50); + MOVA(fps->regs+60, fps->xregs+0x60); + MOVA(fps->regs+70, fps->xregs+0x70); + +#undef MOVA + + fps->oselector = fps->ds; + fps->operand = fps->fpudp; + fps->opcode = (fps->fop & 0x7ff); + fps->selector = fps->cs; + fps->pc = fps->fpuip; + fps->tag = fps->ftw; + fps->status = fps->fsw; + /* NOP fps->control = fps->fcw; */ + + fps->r1 = fps->r2 = fps->r3 = fps->r4 = 0; + + fpx87restore0(fps); +} + +/* + * sse fp save and restore buffers have to be 16-byte (FPalign) aligned, + * so we shuffle the data up and down as needed or make copies. + */ +void +fpssesave(FPsave *fps) +{ + FPsave *afps; + + afps = (FPsave *)ROUND(((uintptr)fps), FPalign); + fpssesave0(afps); + if(fps != afps) /* not aligned? shuffle down from aligned buffer */ + memmove(fps, afps, sizeof(FPssestate) - FPalign); +} + +void +fpsserestore(FPsave *fps) +{ + FPsave *afps; + + afps = (FPsave *)ROUND(((uintptr)fps), FPalign); + if(fps != afps) /* shuffle up to make aligned */ + memmove(afps, fps, sizeof(FPssestate) - FPalign); + fpsserestore0(afps); +} + static char* mathmsg[] = { nil, /* handled below */ @@ -510,61 +625,6 @@ mathnote(ulong status, ulong pc) postnote(up, 1, note, NDebug); } -/* - * sse fp save and restore buffers have to be 16-byte (FPalign) aligned, - * so we shuffle the data up and down as needed or make copies. - */ -void -fpssesave(FPsave *fps) -{ - FPsave *afps; - - afps = (FPsave *)ROUND(((uintptr)fps), FPalign); - fpssesave0(afps); - if(fps != afps) /* not aligned? shuffle down from aligned buffer */ - memmove(fps, afps, sizeof(FPssestate) - FPalign); -} - -void -fpsserestore(FPsave *fps) -{ - FPsave *afps; - - afps = (FPsave *)ROUND(((uintptr)fps), FPalign); - if(fps != afps) /* shuffle up to make aligned */ - memmove(afps, fps, sizeof(FPssestate) - FPalign); - fpsserestore0(afps); - if(fps != afps) /* shuffle regs back down when unaligned */ - memmove(fps, afps, sizeof(FPssestate) - FPalign); -} - -/* - * extract control, status and fppc from process - * floating point state independent of format. - */ -static void -mathstate(ulong *stsp, ulong *pcp, ulong *ctlp) -{ - ulong sts, fpc, ctl; - FPsave *f = &up->fpsave; - - if(fpsave == fpx87save){ - sts = f->status; - fpc = f->pc; - ctl = f->control; - } else { - sts = f->fsw; - fpc = f->fpuip; - ctl = f->fcw; - } - if(stsp) - *stsp = sts; - if(pcp) - *pcp = fpc; - if(ctlp) - *ctlp = ctl; -} - /* * math coprocessor error */ @@ -591,7 +651,7 @@ matherror(Ureg*, void*) static void mathemu(Ureg *ureg, void*) { - ulong status, control, pc; + ulong status, control; if(up->fpstate & FPillegal){ /* someone did floating point in a note handler */ @@ -611,9 +671,10 @@ mathemu(Ureg *ureg, void*) * More attention should probably be paid here to the * exception masks and error summary. */ - mathstate(&status, &pc, &control); + status = up->fpsave.fsw; + control = up->fpsave.fcw; if((status & ~control) & 0x07F){ - mathnote(status, pc); + mathnote(status, up->fpsave.fpuip); break; } fprestore(&up->fpsave); diff --git a/sys/src/9/port/sysproc.c b/sys/src/9/port/sysproc.c index 0676dffd0..9c5c8899e 100644 --- a/sys/src/9/port/sysproc.c +++ b/sys/src/9/port/sysproc.c @@ -84,7 +84,6 @@ sysrfork(ulong *arg) p = newproc(); - p->fpsave = up->fpsave; p->scallnr = up->scallnr; p->s = up->s; p->nerrlab = 0; @@ -180,7 +179,6 @@ sysrfork(ulong *arg) if((flag&RFNOTEG) == 0) p->noteid = up->noteid; - p->fpstate = up->fpstate; pid = p->pid; memset(p->time, 0, sizeof(p->time)); p->time[TReal] = MACHP(0)->ticks; diff --git a/sys/src/libmach/8.c b/sys/src/libmach/8.c index 5ec20a581..5e23cca71 100644 --- a/sys/src/libmach/8.c +++ b/sys/src/libmach/8.c @@ -14,9 +14,11 @@ #define AX REGOFF(ax) #define REGSIZE sizeof(struct Ureg) +#define FP_CTLS(x) (REGSIZE+2*(x)) #define FP_CTL(x) (REGSIZE+4*(x)) -#define FP_REG(x) (FP_CTL(7)+10*(x)) -#define FPREGSIZE (7*4+8*10) +#define FP_REG(x) (FP_CTL(8)+16*(x)) +#define XM_REG(x) (FP_CTL(8)+8*16+16*(x)) +#define FPREGSIZE 512 Reglist i386reglist[] = { {"DI", REGOFF(di), RINT, 'X'}, @@ -38,21 +40,44 @@ Reglist i386reglist[] = { {"SP", SP, RINT, 'X'}, {"SS", REGOFF(ss), RINT, 'X'}, - {"E0", FP_CTL(0), RFLT, 'X'}, - {"E1", FP_CTL(1), RFLT, 'X'}, - {"E2", FP_CTL(2), RFLT, 'X'}, - {"E3", FP_CTL(3), RFLT, 'X'}, - {"E4", FP_CTL(4), RFLT, 'X'}, - {"E5", FP_CTL(5), RFLT, 'X'}, - {"E6", FP_CTL(6), RFLT, 'X'}, - {"F0", FP_REG(0), RFLT, '3'}, - {"F1", FP_REG(1), RFLT, '3'}, - {"F2", FP_REG(2), RFLT, '3'}, - {"F3", FP_REG(3), RFLT, '3'}, - {"F4", FP_REG(4), RFLT, '3'}, - {"F5", FP_REG(5), RFLT, '3'}, - {"F6", FP_REG(6), RFLT, '3'}, - {"F7", FP_REG(7), RFLT, '3'}, + {"FCW", FP_CTLS(0), RFLT, 'x'}, + {"FSW", FP_CTLS(1), RFLT, 'x'}, + {"FTW", FP_CTLS(2), RFLT, 'b'}, + {"FOP", FP_CTLS(3), RFLT, 'x'}, + {"FIP", FP_CTL(2), RFLT, 'X'}, + {"FCS", FP_CTLS(6), RFLT, 'x'}, + {"FDP", FP_CTL(4), RFLT, 'X'}, + {"FDS", FP_CTLS(10), RFLT, 'x'}, + {"MXCSR", FP_CTL(6), RFLT, 'X'}, + {"MXCSRMASK", FP_CTL(7), RFLT, 'X'}, + + {"M0", FP_REG(0), RFLT, 'F'}, /* assumes double */ + {"M1", FP_REG(1), RFLT, 'F'}, + {"M2", FP_REG(2), RFLT, 'F'}, + {"M3", FP_REG(3), RFLT, 'F'}, + {"M4", FP_REG(4), RFLT, 'F'}, + {"M5", FP_REG(5), RFLT, 'F'}, + {"M6", FP_REG(6), RFLT, 'F'}, + {"M7", FP_REG(7), RFLT, 'F'}, + + {"X0", XM_REG(0), RFLT, 'F'}, /* assumes double */ + {"X1", XM_REG(1), RFLT, 'F'}, + {"X2", XM_REG(2), RFLT, 'F'}, + {"X3", XM_REG(3), RFLT, 'F'}, + {"X4", XM_REG(4), RFLT, 'F'}, + {"X5", XM_REG(5), RFLT, 'F'}, + {"X6", XM_REG(6), RFLT, 'F'}, + {"X7", XM_REG(7), RFLT, 'F'}, + + {"F0", FP_REG(7), RFLT, '3'}, + {"F1", FP_REG(6), RFLT, '3'}, + {"F2", FP_REG(5), RFLT, '3'}, + {"F3", FP_REG(4), RFLT, '3'}, + {"F4", FP_REG(3), RFLT, '3'}, + {"F5", FP_REG(2), RFLT, '3'}, + {"F6", FP_REG(1), RFLT, '3'}, + {"F7", FP_REG(0), RFLT, '3'}, + { 0 } };