--- /dev/null
+#ifndef __FLOAT
+#define __FLOAT
+/* IEEE, default rounding */
+
+#define FLT_ROUNDS 1
+#define FLT_RADIX 2
+
+#define FLT_DIG 6
+#define FLT_EPSILON 1.19209290e-07
+#define FLT_MANT_DIG 24
+#define FLT_MAX 3.40282347e+38
+#define FLT_MAX_10_EXP 38
+#define FLT_MAX_EXP 128
+#define FLT_MIN 1.17549435e-38
+#define FLT_MIN_10_EXP -37
+#define FLT_MIN_EXP -125
+
+#define DBL_DIG 15
+#define DBL_EPSILON 2.2204460492503131e-16
+#define DBL_MANT_DIG 53
+#define DBL_MAX 1.797693134862315708145e+308
+#define DBL_MAX_10_EXP 308
+#define DBL_MAX_EXP 1024
+#define DBL_MIN 2.225073858507201383090233e-308
+#define DBL_MIN_10_EXP -307
+#define DBL_MIN_EXP -1021
+#define LDBL_MANT_DIG DBL_MANT_DIG
+#define LDBL_EPSILON DBL_EPSILON
+#define LDBL_DIG DBL_DIG
+#define LDBL_MIN_EXP DBL_MIN_EXP
+#define LDBL_MIN DBL_MIN
+#define LDBL_MIN_10_EXP DBL_MIN_10_EXP
+#define LDBL_MAX_EXP DBL_MAX_EXP
+#define LDBL_MAX DBL_MAX
+#define LDBL_MAX_10_EXP DBL_MAX_10_EXP
+
+typedef union FPdbleword FPdbleword;
+union FPdbleword
+{
+ double x;
+ struct { /* little endian */
+ long lo;
+ long hi;
+ };
+};
+
+#ifdef _RESEARCH_SOURCE
+/* define stuff needed for floating conversion */
+#define IEEE_8087 1
+#define Sudden_Underflow 1
+#endif
+#ifdef _PLAN9_SOURCE
+/* MXCSR */
+/* fcr */
+#define FPFTZ (1<<15) /* amd64 */
+#define FPINEX (1<<12)
+#define FPUNFL (1<<11)
+#define FPOVFL (1<<10)
+#define FPZDIV (1<<9)
+#define FPDNRM (1<<8) /* amd64 */
+#define FPINVAL (1<<7)
+#define FPDAZ (1<<6) /* amd64 */
+#define FPRNR (0<<13)
+#define FPRZ (3<<13)
+#define FPRPINF (2<<13)
+#define FPRNINF (1<<13)
+#define FPRMASK (3<<13)
+#define FPPEXT 0
+#define FPPSGL 0
+#define FPPDBL 0
+#define FPPMASK 0
+/* fsr */
+#define FPAINEX (1<<5)
+#define FPAUNFL (1<<4)
+#define FPAOVFL (1<<3)
+#define FPAZDIV (1<<2)
+#define FPADNRM (1<<1) /* not in plan 9 */
+#define FPAINVAL (1<<0)
+#endif
+#endif /* __FLOAT */
--- /dev/null
+#ifndef _SUSV2_SOURCE
+#error "inttypes.h is SUSV2"
+#endif
+
+#ifndef _INTTYPES_H_
+#define _INTTYPES_H_ 1
+
+
+typedef char int8_t;
+typedef short int16_t;
+typedef int int32_t;
+typedef long long int64_t;
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+
+typedef long long intptr_t;
+typedef unsigned long long uintptr_t;
+
+#endif
--- /dev/null
+#ifndef __MATH
+#define __MATH
+#pragma lib "/$M/lib/ape/libap.a"
+
+/* a HUGE_VAL appropriate for IEEE double-precision */
+/* the correct value, 1.797693134862316e+308, causes a ken overflow */
+#define HUGE_VAL 1.79769313486231e+308
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern double acos(double);
+extern double asin(double);
+extern double atan(double);
+extern double atan2(double, double);
+extern double cos(double);
+extern double hypot(double, double);
+extern double sin(double);
+extern double tan(double);
+extern double cosh(double);
+extern double sinh(double);
+extern double tanh(double);
+extern double exp(double);
+extern double frexp(double, int *);
+extern double ldexp(double, int);
+extern double log(double);
+extern double log10(double);
+extern double modf(double, double *);
+extern double pow(double, double);
+extern double sqrt(double);
+extern double ceil(double);
+extern double fabs(double);
+extern double floor(double);
+extern double fmod(double, double);
+extern double NaN(void);
+extern int isNaN(double);
+extern double Inf(int);
+extern int isInf(double, int);
+
+#ifdef _RESEARCH_SOURCE
+/* does >> treat left operand as unsigned ? */
+#define Unsigned_Shifts 1
+#define M_E 2.7182818284590452354 /* e */
+#define M_LOG2E 1.4426950408889634074 /* log 2e */
+#define M_LOG10E 0.43429448190325182765 /* log 10e */
+#define M_LN2 0.69314718055994530942 /* log e2 */
+#define M_LN10 2.30258509299404568402 /* log e10 */
+#define M_PI 3.14159265358979323846 /* pi */
+#define M_PI_2 1.57079632679489661923 /* pi/2 */
+#define M_PI_4 0.78539816339744830962 /* pi/4 */
+#define M_1_PI 0.31830988618379067154 /* 1/pi */
+#define M_2_PI 0.63661977236758134308 /* 2/pi */
+#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */
+#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
+#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
+
+extern double hypot(double, double);
+extern double erf(double);
+extern double erfc(double);
+extern double j0(double);
+extern double y0(double);
+extern double j1(double);
+extern double y1(double);
+extern double jn(int, double);
+extern double yn(int, double);
+
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#define isnan(x) isNaN(x)
+#define isinf(x) isInf(x, 0)
+
+#endif /* __MATH */
--- /dev/null
+#ifndef __STDARG
+#define __STDARG
+
+typedef char *va_list;
+
+#define va_start(list, start) list = (sizeof(start)<8 ? (char *)((long long *)&(start)+1) : \
+(char *)(&(start)+1))
+#define va_end(list)
+#define va_arg(list, mode)\
+ ((sizeof(mode) == 1)?\
+ ((mode*)(list += 8))[-8]:\
+ (sizeof(mode) == 2)?\
+ ((mode*)(list += 8))[-4]:\
+ (sizeof(mode) == 4)?\
+ ((mode*)(list += 8))[-2]:\
+ ((mode*)(list += sizeof(mode)))[-1])
+
+#endif /* __STDARG */
--- /dev/null
+#ifndef __UREG_H
+#define __UREG_H
+#if !defined(_PLAN9_SOURCE)
+ This header file is an extension to ANSI/POSIX
+#endif
+
+struct Ureg {
+ unsigned long long ax;
+ unsigned long long bx;
+ unsigned long long cx;
+ unsigned long long dx;
+ unsigned long long si;
+ unsigned long long di;
+ unsigned long long bp;
+ unsigned long long r8;
+ unsigned long long r9;
+ unsigned long long r10;
+ unsigned long long r11;
+ unsigned long long r12;
+ unsigned long long r13;
+ unsigned long long r14;
+ unsigned long long r15;
+
+ unsigned short ds;
+ unsigned short es;
+ unsigned short fs;
+ unsigned short gs;
+
+ unsigned long long type;
+ unsigned long long error; /* error code (or zero) */
+ unsigned long long pc; /* pc */
+ unsigned long long cs; /* old context */
+ unsigned long long flags; /* old flags */
+ unsigned long long sp; /* sp */
+ unsigned long long ss; /* old stack segment */
+};
+
+#endif
--- /dev/null
+TEXT getcallerpc(SB), 1, $0
+ MOVQ -8(RARG), AX
+ RET
--- /dev/null
+
+TEXT setfcr(SB), $4
+ XORL $(0x3F<<7),RARG /* bits are cleared in csr to enable them */
+ ANDL $0xFFC0, RARG /* just the fcr bits */
+ WAIT /* is this needed? */
+ STMXCSR 0(SP)
+ MOVL 0(SP), AX
+ ANDL $~0x3F, AX
+ ORL RARG, AX
+ MOVL AX, 0(SP)
+ LDMXCSR 0(SP)
+ RET
+
+TEXT getfcr(SB), $4
+ WAIT
+ STMXCSR 0(SP)
+ MOVWLZX 0(SP), AX
+ ANDL $0xFFC0, AX
+ XORL $(0x3F<<7),AX
+ RET
+
+TEXT getfsr(SB), $4
+ WAIT
+ STMXCSR 0(SP)
+ MOVL 0(SP), AX
+ ANDL $0x3F, AX
+ RET
+
+TEXT setfsr(SB), $4
+ ANDL $0x3F, RARG
+ WAIT
+ STMXCSR 0(SP)
+ MOVL 0(SP), AX
+ ANDL $~0x3F, AX
+ ORL RARG, AX
+ MOVL AX, 0(SP)
+ LDMXCSR 0(SP)
+ RET
--- /dev/null
+extern long __SEEK(long long*, int, long long, int);
+
+long long
+_SEEK(int fd, long long o, int p)
+{
+ long long l;
+
+ if(__SEEK(&l, fd, o, p) < 0)
+ l = -1;
+ return l;
+}
--- /dev/null
+TEXT _cycles(SB),1,$0 /* time stamp counter; cycles since power up */
+ RDTSC
+ MOVL AX, 0(RARG) /* lo */
+ MOVL DX, 4(RARG) /* hi */
+ RET
--- /dev/null
+#define _LOCK_EXTENSION
+#include "../plan9/sys9.h"
+#include <lock.h>
+
+int tas(int*);
+
+void
+lock(Lock *lk)
+{
+ while(tas(&lk->val))
+ _SLEEP(0);
+}
+
+int
+canlock(Lock *lk)
+{
+ if(tas(&lk->val))
+ return 0;
+ return 1;
+}
+
+void
+unlock(Lock *lk)
+{
+ lk->val = 0;
+}
--- /dev/null
+#define NPRIVATES 16
+
+GLOBL _tos(SB), $8
+GLOBL _errnoloc(SB), $8
+GLOBL _privates(SB), $8
+GLOBL _nprivates(SB), $8
+
+TEXT _main(SB), 1, $(32+NPRIVATES*8)
+
+ /* _tos = arg */
+ MOVQ AX, _tos(SB)
+ LEAQ 24(SP), AX
+ MOVQ AX, _errnoloc(SB)
+ LEAQ 32(SP), AX
+ MOVQ AX, _privates(SB)
+ MOVQ $NPRIVATES, _nprivates(SB)
+ CALL _envsetup(SB)
+ MOVL inargc-8(FP), RARG
+ LEAQ inargv+0(FP), AX
+ MOVQ AX, 8(SP)
+ MOVQ environ(SB), AX
+ MOVQ AX, 16(SP)
+ CALL main(SB)
+ MOVQ AX, RARG
+ CALL exit(SB)
+ RET
--- /dev/null
+#define NPRIVATES 16
+
+GLOBL _tos(SB), $8
+GLOBL _privates(SB), $8
+GLOBL _nprivates(SB), $8
+
+TEXT _mainp(SB), 1, $(3*8+NPRIVATES*8)
+
+ /* _tos = arg */
+ MOVQ AX, _tos(SB)
+ LEAQ 8(SP), AX
+ MOVQ AX, _privates(SB)
+ MOVQ $NPRIVATES, _nprivates(SB)
+
+ /* _profmain(); */
+ CALL _profmain(SB)
+
+ /* _tos->prof.pp = _tos->prof.next; */
+ MOVQ _tos+0(SB),DX
+ MOVQ 4(DX),CX
+ MOVQ CX,(DX)
+
+ CALL _envsetup(SB)
+
+ /* main(argc, argv, environ); */
+ MOVL inargc-8(FP), RARG
+ LEAQ inargv+0(FP), AX
+ MOVQ AX, 8(SP)
+ MOVQ environ(SB), AX
+ MOVQ AX, 16(SP)
+ CALL main(SB)
+
+loop:
+ MOVL AX, RARG
+ CALL exit(SB)
+ MOVQ $_profin(SB), AX /* force loading of profile */
+ MOVL $0, AX
+ JMP loop
+
+TEXT _savearg(SB), 1, $0
+ RET
+
+TEXT _callpc(SB), 1, $0
+ MOVQ 8(RARG), AX
+ RET
--- /dev/null
+APE=/sys/src/ape
+objtype=amd64
+<$APE/config
+LIB=/$objtype/lib/ape/libap.a
+OFILES=\
+ _seek.$O\
+ cycles.$O\
+ lock.$O\
+ main9.$O\
+ main9p.$O\
+ notetramp.$O\
+ setjmp.$O\
+ strchr.$O\
+ strlen.$O\
+ tas.$O\
+
+</sys/src/cmd/mksyslib
+
+CFLAGS=-c -D_POSIX_SOURCE -D_PLAN9_SOURCE
+
--- /dev/null
+#include "../plan9/lib.h"
+#include "../plan9/sys9.h"
+#include <signal.h>
+#include <setjmp.h>
+
+/* A stack to hold pcs when signals nest */
+#define MAXSIGSTACK 20
+typedef struct Pcstack Pcstack;
+static struct Pcstack {
+ int sig;
+ void (*hdlr)(int, char*, Ureg*);
+ unsigned long long restorepc;
+ Ureg *u;
+} pcstack[MAXSIGSTACK];
+static int nstack = 0;
+
+static void notecont(Ureg*, char*);
+
+void
+_notetramp(int sig, void (*hdlr)(int, char*, Ureg*), Ureg *u)
+{
+ Pcstack *p;
+
+ if(nstack >= MAXSIGSTACK)
+ _NOTED(1); /* nesting too deep; just do system default */
+ p = &pcstack[nstack];
+ p->restorepc = u->pc;
+ p->sig = sig;
+ p->hdlr = hdlr;
+ p->u = u;
+ nstack++;
+ u->pc = (unsigned long long) notecont;
+ _NOTED(2); /* NSAVE: clear note but hold state */
+}
+
+static void
+notecont(Ureg *u, char *s)
+{
+ Pcstack *p;
+ void(*f)(int, char*, Ureg*);
+
+ p = &pcstack[nstack-1];
+ f = p->hdlr;
+ u->pc = p->restorepc;
+ nstack--;
+ (*f)(p->sig, s, u);
+ _NOTED(3); /* NRSTR */
+}
+
+#define JMPBUFPC 1
+#define JMPBUFSP 0
+
+extern sigset_t _psigblocked;
+
+typedef struct {
+ sigset_t set;
+ sigset_t blocked;
+ unsigned long long jmpbuf[2];
+} sigjmp_buf_amd64;
+
+void
+siglongjmp(sigjmp_buf j, int ret)
+{
+ struct Ureg *u;
+ sigjmp_buf_amd64 *jb;
+
+ jb = (sigjmp_buf_amd64*)j;
+
+ if(jb->set)
+ _psigblocked = jb->blocked;
+ if(nstack == 0 || pcstack[nstack-1].u->sp > jb->jmpbuf[JMPBUFSP])
+ longjmp((void*)jb->jmpbuf, ret);
+ u = pcstack[nstack-1].u;
+ nstack--;
+ u->ax = ret;
+ if(ret == 0)
+ u->ax = 1;
+ u->pc = jb->jmpbuf[JMPBUFPC];
+ u->sp = jb->jmpbuf[JMPBUFSP] + 8;
+ _NOTED(3); /* NRSTR */
+}
--- /dev/null
+TEXT longjmp(SB), $0
+ MOVL r+8(FP), AX
+ CMPL AX, $0
+ JNE ok /* ansi: "longjmp(0) => longjmp(1)" */
+ MOVL $1, AX /* bless their pointed heads */
+ok:
+ MOVQ 0(RARG), SP /* restore sp */
+ MOVQ 8(RARG), BX /* put return pc on the stack */
+ MOVQ BX, 0(SP)
+ RET
+
+TEXT setjmp(SB), $0
+ MOVQ SP, 0(RARG) /* store sp */
+ MOVQ 0(SP), BX /* store return pc */
+ MOVQ BX, 8(RARG)
+ MOVL $0, AX /* return 0 */
+ RET
+
+TEXT sigsetjmp(SB), $0
+ MOVL savemask+8(FP), BX
+ MOVL BX, 0(RARG)
+ MOVL $_psigblocked(SB), 4(RARG)
+ MOVQ SP, 8(RARG) /* store sp */
+ MOVQ 0(SP), BX /* store return pc */
+ MOVQ BX, 16(RARG)
+ MOVL $0, AX /* return 0 */
+ RET
--- /dev/null
+ TEXT strchr(SB), $0
+
+ MOVQ RARG, DI
+ MOVB c+8(FP), AX
+ CMPB AX, $0
+ JEQ l2 /**/
+
+/*
+ * char is not null
+ */
+l1:
+ MOVB (DI), BX
+ CMPB BX, $0
+ JEQ ret0
+ ADDQ $1, DI
+ CMPB AX, BX
+ JNE l1
+
+ MOVQ DI, AX
+ SUBQ $1, AX
+ RET
+
+/*
+ * char is null
+ */
+l2:
+ MOVQ $-1, CX
+ CLD
+
+ REPN; SCASB
+
+ MOVQ DI, AX
+ SUBQ $1, AX
+ RET
+
+ret0:
+ MOVQ $0, AX
+ RET
--- /dev/null
+ TEXT strlen(SB),$0
+
+ MOVL $0, AX
+ MOVQ $-1, CX
+ CLD
+/*
+ * look for end of string
+ */
+
+ MOVQ RARG, DI
+ REPN; SCASB
+
+ MOVQ DI, AX
+ SUBQ RARG, AX
+ SUBQ $1, AX
+ RET
--- /dev/null
+TEXT tas(SB),$0
+
+ MOVL $0xdeadead,AX
+ XCHGL AX,(RARG)
+ RET
--- /dev/null
+#include <u.h>
+#include <libc.h>
+
+extern int _seek(vlong*, int, vlong, int);
+
+vlong
+seek(int fd, vlong o, int p)
+{
+ vlong l;
+
+ if(_seek(&l, fd, o, p) < 0)
+ l = -1LL;
+ return l;
+}
--- /dev/null
+GLOBL argv0(SB), $8
+GLOBL _tos(SB), $8
+GLOBL _privates(SB), $8
+GLOBL _nprivates(SB), $4
--- /dev/null
+TEXT ainc(SB), 1, $0 /* int ainc(int *); */
+ainclp:
+ MOVL (RARG), AX /* exp */
+ MOVL AX, BX
+ INCL BX /* new */
+ LOCK; CMPXCHGL BX, (RARG)
+ JNZ ainclp
+ MOVL BX, AX
+ RET
+
+TEXT adec(SB), 1, $0 /* int adec(int*); */
+adeclp:
+ MOVL (RARG), AX
+ MOVL AX, BX
+ DECL BX
+ LOCK; CMPXCHGL BX, (RARG)
+ JNZ adeclp
+ MOVL BX, AX
+ RET
+
+/*
+ * int cas32(u32int *p, u32int ov, u32int nv);
+ * int cas(uint *p, int ov, int nv);
+ * int casl(ulong *p, ulong ov, ulong nv);
+ */
+
+TEXT cas32(SB), 1, $0
+TEXT cas(SB), 1, $0
+TEXT casul(SB), 1, $0
+TEXT casl(SB), 1, $0 /* back compat */
+ MOVL exp+8(FP), AX
+ MOVL new+16(FP), BX
+ LOCK; CMPXCHGL BX, (RARG)
+ MOVL $1, AX /* use CMOVLEQ etc. here? */
+ JNZ _cas32r0
+_cas32r1:
+ RET
+_cas32r0:
+ DECL AX
+ RET
+
+/*
+ * int cas64(u64int *p, u64int ov, u64int nv);
+ * int casp(void **p, void *ov, void *nv);
+ */
+
+TEXT cas64(SB), 1, $0
+TEXT casp(SB), 1, $0
+ MOVQ exp+8(FP), AX
+ MOVQ new+16(FP), BX
+ LOCK; CMPXCHGQ BX, (RARG)
+ MOVL $1, AX /* use CMOVLEQ etc. here? */
+ JNZ _cas64r0
+_cas64r1:
+ RET
+_cas64r0:
+ DECL AX
+ RET
+
+TEXT fas64(SB), 1, $-4
+TEXT fasp(SB), 1, $-4
+ MOVQ p+8(FP), AX
+ LOCK; XCHGQ AX, (RARG) /* */
+ RET
+
+TEXT fas32(SB), 1, $-4
+ MOVL p+8(FP), AX
+ LOCK; XCHGL AX, (RARG) /* */
+ RET
--- /dev/null
+TEXT cycles(SB),1,$0 /* time stamp counter; cycles since power up */
+ RDTSC
+ MOVL AX, 0(RARG) /* lo */
+ MOVL DX, 4(RARG) /* hi */
+ RET
--- /dev/null
+TEXT getcallerpc(SB), 1, $0
+ MOVQ -8(RARG), AX
+ RET
--- /dev/null
+
+TEXT setfcr(SB), $4
+ XORL $(0x3F<<7),RARG /* bits are cleared in csr to enable them */
+ ANDL $0xFFC0, RARG /* just the fcr bits */
+ WAIT /* is this needed? */
+ STMXCSR 0(SP)
+ MOVL 0(SP), AX
+ ANDL $~0x3F, AX
+ ORL RARG, AX
+ MOVL AX, 0(SP)
+ LDMXCSR 0(SP)
+ RET
+
+TEXT getfcr(SB), $4
+ WAIT
+ STMXCSR 0(SP)
+ MOVWLZX 0(SP), AX
+ ANDL $0xFFC0, AX
+ XORL $(0x3F<<7),AX
+ RET
+
+TEXT getfsr(SB), $4
+ WAIT
+ STMXCSR 0(SP)
+ MOVL 0(SP), AX
+ ANDL $0x3F, AX
+ RET
+
+TEXT setfsr(SB), $4
+ ANDL $0x3F, RARG
+ WAIT
+ STMXCSR 0(SP)
+ MOVL 0(SP), AX
+ ANDL $~0x3F, AX
+ ORL RARG, AX
+ MOVL AX, 0(SP)
+ LDMXCSR 0(SP)
+ RET
--- /dev/null
+#define NPRIVATES 16
+
+TEXT _main(SB), 1, $(2*8+NPRIVATES*8)
+ MOVQ AX, _tos(SB)
+ LEAQ 16(SP), AX
+ MOVQ AX, _privates(SB)
+ MOVL $NPRIVATES, _nprivates(SB)
+ MOVL inargc-8(FP), RARG
+ LEAQ inargv+0(FP), AX
+ MOVQ AX, 8(SP)
+ CALL main(SB)
+
+loop:
+ MOVQ $_exits<>(SB), RARG
+ CALL exits(SB)
+ JMP loop
+
+DATA _exits<>+0(SB)/4, $"main"
+GLOBL _exits<>+0(SB), $5
--- /dev/null
+#define NPRIVATES 16
+
+TEXT _mainp(SB), 1, $(2*8+NPRIVATES*8)
+ MOVQ AX, _tos(SB) /* _tos = arg */
+ LEAQ 16(SP), AX
+ MOVQ AX, _privates(SB)
+ MOVL $NPRIVATES, _nprivates(SB)
+
+ CALL _profmain(SB) /* _profmain(); */
+
+ MOVQ _tos+0(SB), DX /* _tos->prof.pp = _tos->prof.next; */
+ MOVQ 8(DX), CX
+ MOVQ CX, (DX)
+
+ MOVL inargc-8(FP), RARG /* main(argc, argv); */
+ LEAQ inargv+0(FP), AX
+ MOVQ AX, 8(SP)
+ CALL main(SB)
+
+loop:
+ MOVQ $_exits<>(SB), RARG
+ CALL exits(SB)
+ MOVQ $_profin(SB), AX /* force loading of profile */
+ JMP loop
+
+TEXT _savearg(SB), 1, $0
+ MOVQ RARG, AX
+ RET
+
+TEXT _saveret(SB), 1, $0
+ RET
+
+TEXT _restorearg(SB), 1, $0
+ RET /* we want RARG in RARG */
+
+TEXT _callpc(SB), 1, $0
+ MOVQ 8(RARG), AX
+ RET
+
+DATA _exits<>+0(SB)/4, $"main"
+GLOBL _exits<>+0(SB), $5
--- /dev/null
+ TEXT memccpy(SB),$0
+
+ MOVL n+24(FP), CX
+ CMPL CX, $0
+ JEQ none
+ MOVQ p2+8(FP), DI
+ MOVBLZX c+16(FP), AX
+ CLD
+/*
+ * find the character in the second string
+ */
+
+ REPN; SCASB
+ JEQ found
+
+/*
+ * if not found, set count to 'n'
+ */
+none:
+ MOVL $0, AX
+ MOVL n+24(FP), BX
+ JMP memcpy
+
+/*
+ * if found, set count to bytes thru character
+ */
+found:
+ MOVQ DI, AX
+ SUBQ p2+8(FP), AX
+ MOVQ AX, BX
+ ADDQ RARG, AX
+
+/*
+ * copy the memory
+ */
+
+memcpy:
+ MOVQ RARG, DI
+ MOVQ p2+8(FP), SI
+/*
+ * copy whole longs, if aligned
+ */
+ MOVQ DI, DX
+ ORQ SI, DX
+ ANDL $3, DX
+ JNE c3
+ MOVL BX, CX
+ SHRQ $2, CX
+ REP; MOVSL
+/*
+ * copy the rest, by bytes
+ */
+ ANDL $3, BX
+c3:
+ MOVL BX, CX
+ REP; MOVSB
+
+ RET
--- /dev/null
+ TEXT memchr(SB),$0
+
+ MOVL n+16(FP), CX
+ CMPL CX, $0
+ JEQ none
+ MOVQ RARG, DI
+ MOVBLZX c+8(FP), AX
+ CLD
+/*
+ * SCASB is memchr instruction
+ */
+
+ REPN; SCASB
+ JEQ found
+
+none:
+ MOVL $0, AX
+ RET
+
+found:
+ MOVQ DI, AX
+ SUBQ $1, AX
+ RET
--- /dev/null
+ TEXT memcmp(SB),$0
+
+ MOVL n+16(FP), BX
+ CMPL BX, $0
+ JEQ none
+ MOVQ RARG, DI
+ MOVQ p2+8(FP), SI
+ CLD
+ MOVQ DI, CX
+ ORQ SI, CX
+ ANDL $3, CX
+ JNE c3
+/*
+ * first by longs
+ */
+
+ MOVL BX, CX
+ SHRQ $2, CX
+
+ REP; CMPSL
+ JNE found
+
+/*
+ * then by bytes
+ */
+ ANDL $3, BX
+c3:
+ MOVL BX, CX
+ REP; CMPSB
+ JNE found1
+
+none:
+ MOVQ $0, AX
+ RET
+
+/*
+ * if long found,
+ * back up and look by bytes
+ */
+found:
+ MOVL $4, CX
+ SUBQ CX, DI
+ SUBQ CX, SI
+ REP; CMPSB
+
+found1:
+ JLS lt
+ MOVQ $-1, AX
+ RET
+lt:
+ MOVQ $1, AX
+ RET
--- /dev/null
+TEXT memcpy(SB), $0
+ MOVQ RARG, DI
+ MOVQ DI, AX /* return value */
+ MOVQ p2+8(FP), SI
+ MOVL n+16(FP), BX
+ CMPL BX, $0
+ JGT _ok
+ JEQ _return /* nothing to do if n == 0 */
+ MOVL $0, SI /* fault if n < 0 */
+
+/*
+ * check and set for backwards:
+ * (p2 < p1) && ((p2+n) > p1)
+ */
+_ok:
+ CMPQ SI, DI
+ JGT _forward
+ JEQ _return /* nothing to do if p2 == p1 */
+ MOVQ SI, DX
+ ADDQ BX, DX
+ CMPQ DX, DI
+ JGT _back
+
+/*
+ * copy whole longs if aligned
+ */
+_forward:
+ CLD
+ MOVQ SI, DX
+ ORQ DI, DX
+ ANDL $3, DX
+ JNE c3f
+ MOVQ BX, CX
+ SHRQ $2, CX
+ ANDL $3, BX
+ REP; MOVSL
+
+/*
+ * copy the rest, by bytes
+ */
+ JEQ _return /* flags set by above ANDL */
+c3f:
+ MOVL BX, CX
+ REP; MOVSB
+
+ RET
+
+/*
+ * whole thing backwards has
+ * adjusted addresses
+ */
+_back:
+ ADDQ BX, DI
+ ADDQ BX, SI
+ STD
+ SUBQ $4, DI
+ SUBQ $4, SI
+/*
+ * copy whole longs, if aligned
+ */
+ MOVQ DI, DX
+ ORQ SI, DX
+ ANDL $3, DX
+ JNE c3b
+ MOVL BX, CX
+ SHRQ $2, CX
+ ANDL $3, BX
+ REP; MOVSL
+/*
+ * copy the rest, by bytes
+ */
+ JEQ _return /* flags set by above ANDL */
+
+c3b:
+ ADDQ $3, DI
+ ADDQ $3, SI
+ MOVL BX, CX
+ REP; MOVSB
+
+_return:
+ RET
--- /dev/null
+TEXT memmove(SB), $0
+ MOVQ RARG, DI
+ MOVQ DI, AX /* return value */
+ MOVQ p2+8(FP), SI
+ MOVL n+16(FP), BX
+ CMPL BX, $0
+ JGT _ok
+ JEQ _return /* nothing to do if n == 0 */
+ MOVL $0, SI /* fault if n < 0 */
+
+/*
+ * check and set for backwards:
+ * (p2 < p1) && ((p2+n) > p1)
+ */
+_ok:
+ CMPQ SI, DI
+ JGT _forward
+ JEQ _return /* nothing to do if p2 == p1 */
+ MOVQ SI, DX
+ ADDQ BX, DX
+ CMPQ DX, DI
+ JGT _back
+
+/*
+ * copy whole longs if aligned
+ */
+_forward:
+ CLD
+ MOVQ SI, DX
+ ORQ DI, DX
+ ANDL $3, DX
+ JNE c3f
+ MOVQ BX, CX
+ SHRQ $2, CX
+ ANDL $3, BX
+ REP; MOVSL
+
+/*
+ * copy the rest, by bytes
+ */
+ JEQ _return /* flags set by above ANDL */
+c3f:
+ MOVL BX, CX
+ REP; MOVSB
+
+ RET
+
+/*
+ * whole thing backwards has
+ * adjusted addresses
+ */
+_back:
+ ADDQ BX, DI
+ ADDQ BX, SI
+ STD
+ SUBQ $4, DI
+ SUBQ $4, SI
+/*
+ * copy whole longs, if aligned
+ */
+ MOVQ DI, DX
+ ORQ SI, DX
+ ANDL $3, DX
+ JNE c3b
+ MOVL BX, CX
+ SHRQ $2, CX
+ ANDL $3, BX
+ REP; MOVSL
+/*
+ * copy the rest, by bytes
+ */
+ JEQ _return /* flags set by above ANDL */
+
+c3b:
+ ADDQ $3, DI
+ ADDQ $3, SI
+ MOVL BX, CX
+ REP; MOVSB
+
+_return:
+ RET
--- /dev/null
+ TEXT memset(SB),$0
+
+ CLD
+ MOVQ RARG, DI
+ MOVBLZX c+8(FP), AX
+ MOVL n+16(FP), BX
+/*
+ * if not enough bytes, just set bytes
+ */
+ CMPL BX, $9
+ JLS c3
+/*
+ * if not aligned, just set bytes
+ */
+ MOVQ RARG, CX
+ ANDL $3,CX
+ JNE c3
+/*
+ * build word in AX
+ */
+ MOVB AL, AH
+ MOVL AX, CX
+ SHLL $16, CX
+ ORL CX, AX
+/*
+ * set whole longs
+ */
+c1:
+ MOVQ BX, CX
+ SHRQ $2, CX
+ ANDL $3, BX
+ REP; STOSL
+/*
+ * set the rest, by bytes
+ */
+c3:
+ MOVL BX, CX
+ REP; STOSB
+ret:
+ MOVQ RARG,AX
+ RET
--- /dev/null
+objtype=amd64
+</$objtype/mkfile
+
+LIB=/$objtype/lib/libc.a
+SFILES=\
+ argv0.s\
+ atom.s\
+ cycles.s\
+ getfcr.s\
+ main9.s\
+ main9p.s\
+ memccpy.s\
+ memchr.s\
+ memcmp.s\
+ memcpy.s\
+ memmove.s\
+ memset.s\
+ muldiv.s\
+ setjmp.s\
+ sqrt.s\
+ strcat.s\
+ strchr.s\
+ strcpy.s\
+ strlen.s\
+ tas.s\
+
+CFILES=\
+ _seek.c\
+ getcallerpc.c\
+ notejmp.c\
+
+HFILES=/sys/include/libc.h
+
+OFILES=${CFILES:%.c=%.$O} ${SFILES:%.s=%.$O}
+
+UPDATE=mkfile\
+ $HFILES\
+ $CFILES\
+ $SFILES\
+
+</sys/src/cmd/mksyslib
--- /dev/null
+TEXT umuldiv(SB), $0
+ MOVL RARG, AX
+ MULL b+8(FP)
+ DIVL c+16(FP)
+ RET
+
+TEXT muldiv(SB), $0
+ MOVL RARG, AX
+ IMULL b+8(FP)
+ IDIVL c+16(FP)
+ RET
+ END
--- /dev/null
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+
+void
+notejmp(void *vr, jmp_buf j, int ret)
+{
+ struct Ureg *r = vr;
+
+ r->ax = ret;
+ if(ret == 0)
+ r->ax = 1;
+ r->pc = j[JMPBUFPC];
+ r->sp = j[JMPBUFSP] + 8;
+ noted(NCONT);
+}
--- /dev/null
+TEXT longjmp(SB), $0
+ MOVL r+8(FP), AX
+ CMPL AX, $0
+ JNE ok /* ansi: "longjmp(0) => longjmp(1)" */
+ MOVL $1, AX /* bless their pointed heads */
+ok:
+ MOVQ 0(RARG), SP /* restore sp */
+ MOVQ 8(RARG), BX /* put return pc on the stack */
+ MOVQ BX, 0(SP)
+ RET
+
+TEXT setjmp(SB), $0
+ MOVQ SP, 0(RARG) /* store sp */
+ MOVQ 0(SP), BX /* store return pc */
+ MOVQ BX, 8(RARG)
+ MOVL $0, AX /* return 0 */
+ RET
--- /dev/null
+TEXT sqrt(SB), $0
+ MOVSD a+0(FP), X0
+ SQRTSD X0, X0
+ RET
--- /dev/null
+ TEXT strcat(SB),$0
+
+ MOVL $0, AX
+ MOVQ $-1, CX
+ CLD
+
+/*
+ * find length of second string
+ */
+
+ MOVQ p2+8(FP), DI
+ REPN; SCASB
+
+ MOVQ DI, BX
+ SUBQ p2+8(FP), BX
+
+/*
+ * find end of first string
+ */
+
+ MOVQ RARG, DI
+ REPN; SCASB
+
+/*
+ * copy the memory
+ */
+ SUBQ $1, DI
+ MOVQ p2+8(FP), SI
+/*
+ * copy whole longs, if aligned
+ */
+ MOVQ DI, CX
+ ORQ SI, CX
+ ANDL $3, CX
+ JNE c3
+ MOVQ BX, CX
+ SHRQ $2, CX
+ REP; MOVSL
+/*
+ * copy the rest, by bytes
+ */
+ ANDL $3, BX
+c3:
+ MOVQ BX, CX
+ REP; MOVSB
+
+ MOVQ RARG, AX
+ RET
--- /dev/null
+ TEXT strchr(SB), $0
+
+ MOVQ RARG, DI
+ MOVB c+8(FP), AX
+ CMPB AX, $0
+ JEQ l2 /**/
+
+/*
+ * char is not null
+ */
+l1:
+ MOVB (DI), BX
+ CMPB BX, $0
+ JEQ ret0
+ ADDQ $1, DI
+ CMPB AX, BX
+ JNE l1
+
+ MOVQ DI, AX
+ SUBQ $1, AX
+ RET
+
+/*
+ * char is null
+ */
+l2:
+ MOVQ $-1, CX
+ CLD
+
+ REPN; SCASB
+
+ MOVQ DI, AX
+ SUBQ $1, AX
+ RET
+
+ret0:
+ MOVQ $0, AX
+ RET
--- /dev/null
+ TEXT strcpy(SB),$0
+
+ MOVL $0, AX
+ MOVQ $-1, CX
+ CLD
+/*
+ * find end of second string
+ */
+
+ MOVQ p2+8(FP), DI
+ REPN; SCASB
+
+ MOVQ DI, BX
+ SUBQ p2+8(FP), BX
+
+/*
+ * copy the memory
+ */
+ MOVQ RARG, DI
+ MOVQ p2+8(FP), SI
+/*
+ * copy whole longs, if aligned
+ */
+ MOVQ DI, CX
+ ORQ SI, CX
+ ANDL $3, CX
+ JNE c3
+ MOVQ BX, CX
+ SHRQ $2, CX
+ REP; MOVSL
+/*
+ * copy the rest, by bytes
+ */
+ ANDL $3, BX
+c3:
+ MOVL BX, CX
+ REP; MOVSB
+
+ MOVQ RARG, AX
+ RET
--- /dev/null
+ TEXT strlen(SB),$0
+
+ MOVL $0, AX
+ MOVQ $-1, CX
+ CLD
+/*
+ * look for end of string
+ */
+
+ MOVQ RARG, DI
+ REPN; SCASB
+
+ MOVQ DI, AX
+ SUBQ RARG, AX
+ SUBQ $1, AX
+ RET
--- /dev/null
+TEXT _tas(SB), 1, $0
+
+ MOVL $0xdeaddead,AX
+ XCHGL AX,(RARG)
+ RET
--- /dev/null
+objtype=amd64
+</$objtype/mkfile
+
+LIB=/$objtype/lib/libmp.a
+SFILES=\
+ mpdigdiv.s\
+ mpvecadd.s\
+ mpvecdigmuladd.s\
+ mpvecdigmulsub.s\
+ mpvecsub.s\
+
+HFILES=/$objtype/include/u.h /sys/include/mp.h ../port/dat.h
+
+OFILES=${SFILES:%.s=%.$O}
+
+UPDATE=mkfile\
+ $HFILES\
+ $SFILES\
+
+</sys/src/cmd/mksyslib
--- /dev/null
+TEXT mpdigdiv(SB),$0
+
+/* MOVL dividend+0(FP),BX */
+ MOVL 0(RARG),AX
+ MOVL 4(RARG),DX
+ MOVL divisor+8(FP),BX
+ MOVQ quotient+16(FP),DI
+ XORL CX,CX
+ CMPL DX,BX /* dividend >= 2^32 * divisor */
+ JHS _divovfl
+ CMPL BX,CX /* divisor == 0 */
+ JE _divovfl
+ DIVL BX /* AX = DX:AX/BX */
+ MOVL AX,0(DI)
+ RET
+
+ /* return all 1's */
+_divovfl:
+ NOTL CX
+ MOVL CX,0(DI)
+ RET
--- /dev/null
+/*
+ * mpvecadd(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *sum)
+ *
+ * sum[0:alen] = a[0:alen-1] + b[0:blen-1]
+ *
+ * prereq: alen >= blen, sum has room for alen+1 digits
+ */
+TEXT mpvecadd(SB),$0
+
+ MOVL alen+8(FP),DX
+ MOVL blen+24(FP),CX
+/* MOVL a+0(FP),SI */
+ MOVQ RARG, SI
+ MOVQ b+16(FP),BX
+ SUBL CX,DX
+ MOVQ sum+32(FP),DI
+ XORL BP,BP /* this also sets carry to 0 */
+
+ /* skip addition if b is zero */
+ TESTL CX,CX
+ JZ _add1
+
+ /* sum[0:blen-1],carry = a[0:blen-1] + b[0:blen-1] */
+_addloop1:
+ MOVL (SI)(BP*4), AX
+ ADCL (BX)(BP*4), AX
+ MOVL AX,(DI)(BP*4)
+ INCL BP
+ LOOP _addloop1
+
+_add1:
+ /* jump if alen > blen */
+ INCL DX
+ MOVL DX,CX
+ LOOP _addloop2
+
+ /* sum[alen] = carry */
+_addend:
+ JC _addcarry
+ MOVL $0,(DI)(BP*4)
+ RET
+_addcarry:
+ MOVL $1,(DI)(BP*4)
+ RET
+
+ /* sum[blen:alen-1],carry = a[blen:alen-1] + 0 */
+_addloop2:
+ MOVL (SI)(BP*4),AX
+ ADCL $0,AX
+ MOVL AX,(DI)(BP*4)
+ INCL BP
+ LOOP _addloop2
+ JMP _addend
+
--- /dev/null
+/*
+ * mpvecdigmul(mpdigit *b, int n, mpdigit m, mpdigit *p)
+ *
+ * p += b*m
+ *
+ * each step look like:
+ * hi,lo = m*b[i]
+ * lo += oldhi + carry
+ * hi += carry
+ * p[i] += lo
+ * oldhi = hi
+ *
+ * the registers are:
+ * hi = DX - constrained by hardware
+ * lo = AX - constrained by hardware
+ * b+n = SI - can't be BP
+ * p+n = DI - can't be BP
+ * i-n = BP
+ * m = BX
+ * oldhi = CX
+ *
+ */
+TEXT mpvecdigmuladd(SB),$0
+
+/* MOVQ b+0(FP),SI */
+ MOVQ RARG,SI
+ MOVL n+8(FP),CX
+ MOVL m+16(FP),BX
+ MOVQ p+24(FP),DI
+ MOVL CX,BP
+ NEGQ BP /* BP = -n */
+ SHLL $2,CX
+ ADDQ CX,SI /* SI = b + n */
+ ADDQ CX,DI /* DI = p + n */
+ XORL CX,CX
+_muladdloop:
+ MOVL (SI)(BP*4),AX /* lo = b[i] */
+ MULL BX /* hi, lo = b[i] * m */
+ ADDL CX,AX /* lo += oldhi */
+ JCC _muladdnocarry1
+ INCL DX /* hi += carry */
+_muladdnocarry1:
+ ADDL AX,(DI)(BP*4) /* p[i] += lo */
+ JCC _muladdnocarry2
+ INCL DX /* hi += carry */
+_muladdnocarry2:
+ MOVL DX,CX /* oldhi = hi */
+ INCQ BP /* i++ */
+ JNZ _muladdloop
+ XORL AX,AX
+ ADDL CX,(DI)(BP*4) /* p[n] + oldhi */
+ ADCL AX,AX /* return carry out of p[n] */
+ RET
--- /dev/null
+/*
+ * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p)
+ *
+ * p -= b*m
+ *
+ * each step look like:
+ * hi,lo = m*b[i]
+ * lo += oldhi + carry
+ * hi += carry
+ * p[i] += lo
+ * oldhi = hi
+ *
+ * the registers are:
+ * hi = DX - constrained by hardware
+ * lo = AX - constrained by hardware
+ * b = SI - can't be BP
+ * p = DI - can't be BP
+ * i = BP
+ * n = CX - constrained by LOOP instr
+ * m = BX
+ * oldhi = R8
+ *
+ */
+TEXT mpvecdigmulsub(SB),$0
+
+/* MOVL b+0(FP),SI */
+ MOVQ RARG,SI
+ MOVL n+8(FP),CX
+ MOVL m+16(FP),BX
+ MOVQ p+24(FP),DI
+ XORL BP,BP
+ MOVL BP,R8
+_mulsubloop:
+ MOVL (SI)(BP*4),AX /* lo = b[i] */
+ MULL BX /* hi, lo = b[i] * m */
+ ADDL R8,AX /* lo += oldhi */
+ JCC _mulsubnocarry1
+ INCL DX /* hi += carry */
+_mulsubnocarry1:
+ SUBL AX,(DI)(BP*4)
+ JCC _mulsubnocarry2
+ INCL DX /* hi += carry */
+_mulsubnocarry2:
+ MOVL DX,R8
+ INCL BP
+ LOOP _mulsubloop
+ SUBL R8,(DI)(BP*4)
+ JCC _mulsubnocarry3
+ MOVQ $-1,AX
+ RET
+_mulsubnocarry3:
+ MOVQ $1,AX
+ RET
--- /dev/null
+/*
+ * mpvecsub(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *diff)
+ *
+ * diff[0:alen-1] = a[0:alen-1] - b[0:blen-1]
+ *
+ * prereq: alen >= blen, diff has room for alen digits
+ */
+TEXT mpvecsub(SB),$0
+
+/* MOVQ a+0(FP),SI */
+ MOVQ RARG, SI
+ MOVQ b+16(FP),BX
+ MOVL alen+8(FP),DX
+ MOVL blen+24(FP),CX
+ MOVQ diff+32(FP),DI
+ SUBL CX,DX
+ XORL BP,BP /* this also sets carry to 0 */
+
+ /* skip subraction if b is zero */
+ TESTL CX,CX
+ JZ _sub1
+
+ /* diff[0:blen-1],borrow = a[0:blen-1] - b[0:blen-1] */
+_subloop1:
+ MOVL (SI)(BP*4),AX
+ SBBL (BX)(BP*4),AX
+ MOVL AX,(DI)(BP*4)
+ INCL BP
+ LOOP _subloop1
+
+_sub1:
+ INCL DX
+ MOVL DX,CX
+ LOOP _subloop2
+ RET
+
+ /* diff[blen:alen-1] = a[blen:alen-1] - 0 */
+_subloop2:
+ MOVL (SI)(BP*4),AX
+ SBBL $0,AX
+ MOVL AX,(DI)(BP*4)
+ INCL BP
+ LOOP _subloop2
+ RET
+
--- /dev/null
+/*
+ * rfc1321 requires that I include this. The code is new. The constants
+ * all come from the rfc (hence the copyright). We trade a table for the
+ * macros in rfc. The total size is a lot less. -- presotto
+ *
+ * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+ * rights reserved.
+ *
+ * License to copy and use this software is granted provided that it
+ * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+ * Algorithm" in all material mentioning or referencing this software
+ * or this function.
+ *
+ * License is also granted to make and use derivative works provided
+ * that such works are identified as "derived from the RSA Data
+ * Security, Inc. MD5 Message-Digest Algorithm" in all material
+ * mentioning or referencing the derived work.
+ *
+ * RSA Data Security, Inc. makes no representations concerning either
+ * the merchantability of this software or the suitability of this
+ * software forany particular purpose. It is provided "as is"
+ * without express or implied warranty of any kind.
+ * These notices must be retained in any copies of any part of this
+ * documentation and/or software.
+ */
+#define S11 7
+#define S12 12
+#define S13 17
+#define S14 22
+
+#define S21 5
+#define S22 9
+#define S23 14
+#define S24 20
+
+#define S31 4
+#define S32 11
+#define S33 16
+#define S34 23
+
+#define S41 6
+#define S42 10
+#define S43 15
+#define S44 21
+
+/*
+ * SI is data
+ * a += FN(B,C,D);
+ * a += x[sh] + t[sh];
+ * a = (a << S11) | (a >> (32 - S11));
+ * a += b;
+ */
+
+#define BODY1(off,V,FN,SH,A,B,C,D)\
+ FN(B,C,D)\
+ LEAL V(A)(DI*1),A;\
+ ADDL (off)(BP),A;\
+ ROLL $SH,A;\
+ ADDL B,A;\
+
+#define BODY(off,V,FN,SH,A,B,C,D)\
+ FN(B,C,D)\
+ LEAL V(A)(DI*1),A;\
+ ADDL (off)(BP),A;\
+ ROLL $SH,A;\
+ ADDL B,A;\
+
+/*
+ * fn1 = ((c ^ d) & b) ^ d
+ */
+#define FN1(B,C,D)\
+ MOVL C,DI;\
+ XORL D,DI;\
+ ANDL B,DI;\
+ XORL D,DI;\
+
+/*
+ * fn2 = ((b ^ c) & d) ^ c;
+ */
+#define FN2(B,C,D)\
+ MOVL B,DI;\
+ XORL C,DI;\
+ ANDL D,DI;\
+ XORL C,DI;\
+
+/*
+ * fn3 = b ^ c ^ d;
+ */
+#define FN3(B,C,D)\
+ MOVL B,DI;\
+ XORL C,DI;\
+ XORL D,DI;\
+
+/*
+ * fn4 = c ^ (b | ~d);
+ */
+#define FN4(B,C,D)\
+ MOVL D,DI;\
+ XORL $-1,DI;\
+ ORL B,DI;\
+ XORL C,DI;\
+
+#define LEN 8
+#define STATE 16
+
+TEXT _md5block+0(SB), $0
+
+ MOVQ RARG,R8
+ MOVLQZX len+LEN(FP),BX
+ ADDQ BX,R8
+
+mainloop:
+ MOVQ state+STATE(FP),SI
+ MOVL (SI),AX
+ MOVL 4(SI),BX
+ MOVL 8(SI),CX
+ MOVL 12(SI),DX
+
+ BODY1( 0*4,0xd76aa478,FN1,S11,AX,BX,CX,DX)
+ BODY1( 1*4,0xe8c7b756,FN1,S12,DX,AX,BX,CX)
+ BODY1( 2*4,0x242070db,FN1,S13,CX,DX,AX,BX)
+ BODY1( 3*4,0xc1bdceee,FN1,S14,BX,CX,DX,AX)
+
+ BODY1( 4*4,0xf57c0faf,FN1,S11,AX,BX,CX,DX)
+ BODY1( 5*4,0x4787c62a,FN1,S12,DX,AX,BX,CX)
+ BODY1( 6*4,0xa8304613,FN1,S13,CX,DX,AX,BX)
+ BODY1( 7*4,0xfd469501,FN1,S14,BX,CX,DX,AX)
+
+ BODY1( 8*4,0x698098d8,FN1,S11,AX,BX,CX,DX)
+ BODY1( 9*4,0x8b44f7af,FN1,S12,DX,AX,BX,CX)
+ BODY1(10*4,0xffff5bb1,FN1,S13,CX,DX,AX,BX)
+ BODY1(11*4,0x895cd7be,FN1,S14,BX,CX,DX,AX)
+
+ BODY1(12*4,0x6b901122,FN1,S11,AX,BX,CX,DX)
+ BODY1(13*4,0xfd987193,FN1,S12,DX,AX,BX,CX)
+ BODY1(14*4,0xa679438e,FN1,S13,CX,DX,AX,BX)
+ BODY1(15*4,0x49b40821,FN1,S14,BX,CX,DX,AX)
+
+
+ BODY( 1*4,0xf61e2562,FN2,S21,AX,BX,CX,DX)
+ BODY( 6*4,0xc040b340,FN2,S22,DX,AX,BX,CX)
+ BODY(11*4,0x265e5a51,FN2,S23,CX,DX,AX,BX)
+ BODY( 0*4,0xe9b6c7aa,FN2,S24,BX,CX,DX,AX)
+
+ BODY( 5*4,0xd62f105d,FN2,S21,AX,BX,CX,DX)
+ BODY(10*4,0x02441453,FN2,S22,DX,AX,BX,CX)
+ BODY(15*4,0xd8a1e681,FN2,S23,CX,DX,AX,BX)
+ BODY( 4*4,0xe7d3fbc8,FN2,S24,BX,CX,DX,AX)
+
+ BODY( 9*4,0x21e1cde6,FN2,S21,AX,BX,CX,DX)
+ BODY(14*4,0xc33707d6,FN2,S22,DX,AX,BX,CX)
+ BODY( 3*4,0xf4d50d87,FN2,S23,CX,DX,AX,BX)
+ BODY( 8*4,0x455a14ed,FN2,S24,BX,CX,DX,AX)
+
+ BODY(13*4,0xa9e3e905,FN2,S21,AX,BX,CX,DX)
+ BODY( 2*4,0xfcefa3f8,FN2,S22,DX,AX,BX,CX)
+ BODY( 7*4,0x676f02d9,FN2,S23,CX,DX,AX,BX)
+ BODY(12*4,0x8d2a4c8a,FN2,S24,BX,CX,DX,AX)
+
+
+ BODY( 5*4,0xfffa3942,FN3,S31,AX,BX,CX,DX)
+ BODY( 8*4,0x8771f681,FN3,S32,DX,AX,BX,CX)
+ BODY(11*4,0x6d9d6122,FN3,S33,CX,DX,AX,BX)
+ BODY(14*4,0xfde5380c,FN3,S34,BX,CX,DX,AX)
+
+ BODY( 1*4,0xa4beea44,FN3,S31,AX,BX,CX,DX)
+ BODY( 4*4,0x4bdecfa9,FN3,S32,DX,AX,BX,CX)
+ BODY( 7*4,0xf6bb4b60,FN3,S33,CX,DX,AX,BX)
+ BODY(10*4,0xbebfbc70,FN3,S34,BX,CX,DX,AX)
+
+ BODY(13*4,0x289b7ec6,FN3,S31,AX,BX,CX,DX)
+ BODY( 0*4,0xeaa127fa,FN3,S32,DX,AX,BX,CX)
+ BODY( 3*4,0xd4ef3085,FN3,S33,CX,DX,AX,BX)
+ BODY( 6*4,0x04881d05,FN3,S34,BX,CX,DX,AX)
+
+ BODY( 9*4,0xd9d4d039,FN3,S31,AX,BX,CX,DX)
+ BODY(12*4,0xe6db99e5,FN3,S32,DX,AX,BX,CX)
+ BODY(15*4,0x1fa27cf8,FN3,S33,CX,DX,AX,BX)
+ BODY( 2*4,0xc4ac5665,FN3,S34,BX,CX,DX,AX)
+
+
+ BODY( 0*4,0xf4292244,FN4,S41,AX,BX,CX,DX)
+ BODY( 7*4,0x432aff97,FN4,S42,DX,AX,BX,CX)
+ BODY(14*4,0xab9423a7,FN4,S43,CX,DX,AX,BX)
+ BODY( 5*4,0xfc93a039,FN4,S44,BX,CX,DX,AX)
+
+ BODY(12*4,0x655b59c3,FN4,S41,AX,BX,CX,DX)
+ BODY( 3*4,0x8f0ccc92,FN4,S42,DX,AX,BX,CX)
+ BODY(10*4,0xffeff47d,FN4,S43,CX,DX,AX,BX)
+ BODY( 1*4,0x85845dd1,FN4,S44,BX,CX,DX,AX)
+
+ BODY( 8*4,0x6fa87e4f,FN4,S41,AX,BX,CX,DX)
+ BODY(15*4,0xfe2ce6e0,FN4,S42,DX,AX,BX,CX)
+ BODY( 6*4,0xa3014314,FN4,S43,CX,DX,AX,BX)
+ BODY(13*4,0x4e0811a1,FN4,S44,BX,CX,DX,AX)
+
+ BODY( 4*4,0xf7537e82,FN4,S41,AX,BX,CX,DX)
+ BODY(11*4,0xbd3af235,FN4,S42,DX,AX,BX,CX)
+ BODY( 2*4,0x2ad7d2bb,FN4,S43,CX,DX,AX,BX)
+ BODY( 9*4,0xeb86d391,FN4,S44,BX,CX,DX,AX)
+
+ ADDQ $(16*4),BP
+ MOVQ state+STATE(FP),DI
+ ADDL AX,0(DI)
+ ADDL BX,4(DI)
+ ADDL CX,8(DI)
+ ADDL DX,12(DI)
+
+ CMPQ BP,R8
+ JCS mainloop
+
+ RET
--- /dev/null
+objtype=amd64
+</$objtype/mkfile
+
+LIB=/$objtype/lib/libsec.a
+FILES=\
+ md5block\
+ sha1block\
+
+HFILES=/sys/include/libsec.h
+
+SFILES=${FILES:%=%.s}
+
+OFILES=${FILES:%=%.$O}
+
+UPDATE=mkfile\
+ $HFILES\
+ $SFILES\
+
+</sys/src/cmd/mksyslib
--- /dev/null
+/* x = (wp[off-f] ^ wp[off-8] ^ wp[off-14] ^ wp[off-16]) <<< 1;
+ * wp[off] = x;
+ * x += A <<< 5;
+ * E += 0xca62c1d6 + x;
+ * x = FN(B,C,D);
+ * E += x;
+ * B >>> 2
+ */
+#define BSWAPDI BYTE $0x0f; BYTE $0xcf;
+
+#define BODY(off,FN,V,A,B,C,D,E)\
+ MOVL (off-64)(BP),DI;\
+ XORL (off-56)(BP),DI;\
+ XORL (off-32)(BP),DI;\
+ XORL (off-12)(BP),DI;\
+ ROLL $1,DI;\
+ MOVL DI,off(BP);\
+ LEAL V(DI)(E*1),E;\
+ MOVL A,DI;\
+ ROLL $5,DI;\
+ ADDL DI,E;\
+ FN(B,C,D)\
+ ADDL DI,E;\
+ RORL $2,B;\
+
+#define BODY0(off,FN,V,A,B,C,D,E)\
+ MOVLQZX off(BX),DI;\
+ BSWAPDI;\
+ MOVL DI,off(BP);\
+ LEAL V(DI)(E*1),E;\
+ MOVL A,DI;\
+ ROLL $5,DI;\
+ ADDL DI,E;\
+ FN(B,C,D)\
+ ADDL DI,E;\
+ RORL $2,B;\
+
+/*
+ * fn1 = (((C^D)&B)^D);
+ */
+#define FN1(B,C,D)\
+ MOVL C,DI;\
+ XORL D,DI;\
+ ANDL B,DI;\
+ XORL D,DI;\
+
+/*
+ * fn24 = B ^ C ^ D
+ */
+#define FN24(B,C,D)\
+ MOVL B,DI;\
+ XORL C,DI;\
+ XORL D,DI;\
+
+/*
+ * fn3 = ((B ^ C) & (D ^= B)) ^ B
+ * D ^= B to restore D
+ */
+#define FN3(B,C,D)\
+ MOVL B,DI;\
+ XORL C,DI;\
+ XORL B,D;\
+ ANDL D,DI;\
+ XORL B,DI;\
+ XORL B,D;\
+
+/*
+ * stack offsets
+ * void sha1block(uchar *DATA, int LEN, ulong *STATE)
+ */
+#define DATA 0
+#define LEN 8
+#define STATE 16
+
+/*
+ * stack offsets for locals
+ * ulong w[80];
+ * uchar *edata;
+ * ulong *w15, *w40, *w60, *w80;
+ * register local
+ * ulong *wp = BP
+ * ulong a = eax, b = ebx, c = ecx, d = edx, e = esi
+ * ulong tmp = edi
+ */
+#define Rpdata R8
+#define WARRAY (-8-(80*4))
+#define TMP1 (-16-(80*4))
+#define TMP2 (-24-(80*4))
+#define W15 (-32-(80*4))
+#define W40 (-40-(80*4))
+#define W60 (-48-(80*4))
+#define W80 (-56-(80*4))
+#define EDATA (-64-(80*4))
+
+TEXT _sha1block+0(SB),$384
+
+ MOVQ RARG, Rpdata
+ MOVLQZX len+LEN(FP),BX
+ ADDQ BX, RARG
+ MOVQ RARG,edata+EDATA(SP)
+
+ LEAQ aw15+(WARRAY+15*4)(SP),DI
+ MOVQ DI,w15+W15(SP)
+ LEAQ aw40+(WARRAY+40*4)(SP),DX
+ MOVQ DX,w40+W40(SP)
+ LEAQ aw60+(WARRAY+60*4)(SP),CX
+ MOVQ CX,w60+W60(SP)
+ LEAQ aw80+(WARRAY+80*4)(SP),DI
+ MOVQ DI,w80+W80(SP)
+
+mainloop:
+ LEAQ warray+WARRAY(SP),BP
+
+ MOVQ state+STATE(FP),DI
+ MOVL (DI),AX
+ MOVL 4(DI),BX
+ MOVL BX,tmp1+TMP1(SP)
+ MOVL 8(DI),CX
+ MOVL 12(DI),DX
+ MOVL 16(DI),SI
+
+ MOVQ Rpdata,BX
+
+loop1:
+ BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI)
+ MOVL SI,tmp2+TMP2(SP)
+ BODY0(4,FN1,0x5a827999,SI,AX,tmp1+TMP1(SP),CX,DX)
+ MOVL tmp1+TMP1(SP),SI
+ BODY0(8,FN1,0x5a827999,DX,tmp2+TMP2(SP),AX,SI,CX)
+ BODY0(12,FN1,0x5a827999,CX,DX,tmp2+TMP2(SP),AX,SI)
+ MOVL SI,tmp1+TMP1(SP)
+ BODY0(16,FN1,0x5a827999,SI,CX,DX,tmp2+TMP2(SP),AX)
+ MOVL tmp2+TMP2(SP),SI
+
+ ADDQ $20,BX
+ ADDQ $20,BP
+ CMPQ BP,w15+W15(SP)
+ JCS loop1
+
+ BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI)
+ ADDQ $4,BX
+ MOVQ BX,R8
+ MOVQ tmp1+TMP1(SP),BX
+
+ BODY(4,FN1,0x5a827999,SI,AX,BX,CX,DX)
+ BODY(8,FN1,0x5a827999,DX,SI,AX,BX,CX)
+ BODY(12,FN1,0x5a827999,CX,DX,SI,AX,BX)
+ BODY(16,FN1,0x5a827999,BX,CX,DX,SI,AX)
+
+ ADDQ $20,BP
+
+loop2:
+ BODY(0,FN24,0x6ed9eba1,AX,BX,CX,DX,SI)
+ BODY(4,FN24,0x6ed9eba1,SI,AX,BX,CX,DX)
+ BODY(8,FN24,0x6ed9eba1,DX,SI,AX,BX,CX)
+ BODY(12,FN24,0x6ed9eba1,CX,DX,SI,AX,BX)
+ BODY(16,FN24,0x6ed9eba1,BX,CX,DX,SI,AX)
+
+ ADDQ $20,BP
+ CMPQ BP,w40+W40(SP)
+ JCS loop2
+
+loop3:
+ BODY(0,FN3,0x8f1bbcdc,AX,BX,CX,DX,SI)
+ BODY(4,FN3,0x8f1bbcdc,SI,AX,BX,CX,DX)
+ BODY(8,FN3,0x8f1bbcdc,DX,SI,AX,BX,CX)
+ BODY(12,FN3,0x8f1bbcdc,CX,DX,SI,AX,BX)
+ BODY(16,FN3,0x8f1bbcdc,BX,CX,DX,SI,AX)
+
+ ADDQ $20,BP
+ CMPQ BP,w60+W60(SP)
+ JCS loop3
+
+loop4:
+ BODY(0,FN24,0xca62c1d6,AX,BX,CX,DX,SI)
+ BODY(4,FN24,0xca62c1d6,SI,AX,BX,CX,DX)
+ BODY(8,FN24,0xca62c1d6,DX,SI,AX,BX,CX)
+ BODY(12,FN24,0xca62c1d6,CX,DX,SI,AX,BX)
+ BODY(16,FN24,0xca62c1d6,BX,CX,DX,SI,AX)
+
+ ADDQ $20,BP
+ CMPQ BP,w80+W80(SP)
+ JCS loop4
+
+ MOVQ state+STATE(FP),DI
+ ADDL AX,0(DI)
+ ADDL BX,4(DI)
+ ADDL CX,8(DI)
+ ADDL DX,12(DI)
+ ADDL SI,16(DI)
+
+ MOVQ edata+EDATA(SP),DI
+ CMPQ Rpdata,DI
+ JCS mainloop
+
+ RET
+ END