3 #include "../port/lib.h"
7 #include "../port/error.h"
12 int shargs(char*, int, char**);
14 extern void checkpages(void);
15 extern void checkpagerefs(void);
37 /* Check flags before we commit */
38 if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
40 if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG))
42 if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG))
45 if((flag&RFPROC) == 0) {
46 if(flag & (RFMEM|RFNOWAIT))
48 if(flag & (RFFDG|RFCFDG)) {
51 up->fgrp = dupfgrp(ofg);
53 up->fgrp = dupfgrp(nil);
56 if(flag & (RFNAMEG|RFCNAMEG)) {
60 pgrpcpy(up->pgrp, opg);
61 /* inherit noattach */
62 up->pgrp->noattach = opg->noattach;
66 up->pgrp->noattach = 1;
72 if(flag & (RFENVG|RFCENVG)) {
74 up->egrp = smalloc(sizeof(Egrp));
77 envcpy(up->egrp, oeg);
81 up->noteid = pidalloc(0);
87 p->fpsave = up->fpsave;
88 p->scallnr = up->scallnr;
95 memmove(p->note, up->note, sizeof(p->note));
96 p->privatemem = up->privatemem;
97 p->noswap = up->noswap;
100 p->lastnote = up->lastnote;
101 p->notify = up->notify;
105 /* Make a new set of memory segments */
109 qunlock(&p->seglock);
112 for(i = 0; i < NSEG; i++)
114 p->seg[i] = dupseg(up->seg, i, n);
115 qunlock(&p->seglock);
118 /* File descriptors */
119 if(flag & (RFFDG|RFCFDG)) {
121 p->fgrp = dupfgrp(up->fgrp);
123 p->fgrp = dupfgrp(nil);
131 if(flag & (RFNAMEG|RFCNAMEG)) {
134 pgrpcpy(p->pgrp, up->pgrp);
135 /* inherit noattach */
136 p->pgrp->noattach = up->pgrp->noattach;
143 p->pgrp->noattach = 1;
152 /* Environment group */
153 if(flag & (RFENVG|RFCENVG)) {
154 p->egrp = smalloc(sizeof(Egrp));
157 envcpy(p->egrp, up->egrp);
164 p->procmode = up->procmode;
165 if(up->procctl == Proc_tracesyscall)
166 p->procctl = Proc_tracesyscall;
168 /* Craft a return frame which will cause the child to pop out of
169 * the scheduler in user mode with the return register zero
171 forkchild(p, up->dbgreg);
174 if((flag&RFNOWAIT) == 0){
175 p->parentpid = up->pid;
180 if((flag&RFNOTEG) == 0)
181 p->noteid = up->noteid;
183 p->fpstate = up->fpstate;
185 memset(p->time, 0, sizeof(p->time));
186 p->time[TReal] = MACHP(0)->ticks;
188 kstrdup(&p->text, up->text);
189 kstrdup(&p->user, up->user);
194 * since the bss/data segments are now shareable,
195 * any mmu info about this process is now stale
196 * (i.e. has bad properties) and has to be discarded.
199 p->basepri = up->basepri;
200 p->priority = up->basepri;
201 p->fixedpri = up->fixedpri;
205 procwired(p, wm->machno);
217 return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3];
228 char *a, *charp, *args, *file, *file0;
229 char *progarg[sizeof(Exec)/2+1], *elem, progelem[64];
230 ulong ssize, tstk, nargs, nbytes, n, bssend;
233 char line[sizeof(Exec)];
236 ulong magic, text, entry, data, bss;
242 validaddr(arg[0], 1, 0);
243 file0 = validnamedup((char*)arg[0], 1);
247 /* Disaster after commit */
249 pexit(up->errstr, 1);
254 tc = namec(file, Aopen, OEXEC, 0);
260 kstrdup(&elem, up->genbuf);
262 n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0);
265 magic = l2be(exec.magic);
266 text = l2be(exec.text);
267 entry = l2be(exec.entry);
268 if(n==sizeof(Exec) && (magic == AOUT_MAGIC)){
269 if(text >= USTKTOP-UTZERO
270 || entry < UTZERO+sizeof(Exec)
271 || entry >= UTZERO+sizeof(Exec)+text)
273 break; /* for binary */
277 * Process #! /bin/sh args ...
279 memmove(line, &exec, sizeof(Exec));
280 if(indir || line[0]!='#' || line[1]!='!')
282 n = shargs(line, n, progarg);
287 * First arg becomes complete file name
291 validaddr(arg[1], BY2WD, 1);
294 if(strlen(elem) >= sizeof progelem)
296 strcpy(progelem, elem);
297 progarg[0] = progelem;
302 data = l2be(exec.data);
303 bss = l2be(exec.bss);
304 t = (UTZERO+sizeof(Exec)+text+(BY2PG-1)) & ~(BY2PG-1);
305 d = (t + data + (BY2PG-1)) & ~(BY2PG-1);
306 bssend = t + data + bss;
307 b = (bssend + (BY2PG-1)) & ~(BY2PG-1);
308 if(t >= KZERO || d >= KZERO || b >= KZERO)
312 * Args: pass 1: count
314 nbytes = sizeof(Tos); /* hole for profiling clock at top of stack (and more) */
320 nbytes += strlen(a) + 1;
325 argp = (char**)arg[1];
326 validaddr((ulong)argp, BY2WD, 0);
329 if(((ulong)argp&(BY2PG-1)) < BY2WD)
330 validaddr((ulong)argp, BY2WD, 0);
331 validaddr((ulong)a, 1, 0);
332 nbytes += ((char*)vmemchr(a, 0, 0x7FFFFFFF) - a) + 1;
335 ssize = BY2WD*(nargs+1) + ((nbytes+(BY2WD-1)) & ~(BY2WD-1));
338 * 8-byte align SP for those (e.g. sparc) that need it.
339 * execregs() will subtract another 4 bytes for argc.
344 if(PGROUND(ssize) >= USTKSIZE)
348 * Build the stack segment, putting it in kernel virtual for the moment
352 qunlock(&up->seglock);
361 } while((s = isoverlap(up, tstk-USTKSIZE, USTKSIZE)) != nil);
362 up->seg[ESEG] = newseg(SG_STACK, tstk-USTKSIZE, USTKSIZE/BY2PG);
365 * Args: pass 2: assemble; the pages will be faulted in
367 tos = (Tos*)(tstk - sizeof(Tos));
368 tos->cyclefreq = m->cyclefreq;
373 argv = (char**)(tstk - ssize);
374 charp = (char*)(tstk - nbytes);
379 argp = (char**)arg[1];
381 for(i=0; i<nargs; i++){
382 if(indir && *argp==0) {
384 argp = (char**)arg[1];
386 *argv++ = charp + (USTKTOP-tstk);
387 n = strlen(*argp) + 1;
388 memmove(charp, *argp++, n);
392 file0 = nil; /* so waserror() won't free file0 */
397 elem = nil; /* so waserror() won't free elem */
400 /* copy args; easiest from new process's stack */
402 if(n > 128) /* don't waste too much space on huge arg lists */
407 up->args = smalloc(n);
408 memmove(up->args, args, n);
409 if(n>0 && up->args[n-1]!='\0'){
410 /* make sure last arg is NUL-terminated */
411 /* put NUL at UTF-8 character boundary */
413 if(fullrune(up->args+i, n-i))
426 * Special segments are maintained across exec
428 for(i = SSEG; i <= BSEG; i++) {
430 /* prevent a second free if we have an error */
433 for(i = BSEG+1; i < NSEG; i++) {
435 if(s != 0 && (s->type&SG_CEXEC)) {
445 for(i=0; i<=f->maxfd; i++)
448 /* Text. Shared. Attaches to cache image if possible */
449 /* attachimage returns a locked cache image */
450 img = attachimage(SG_TEXT|SG_RONLY, tc, UTZERO, (t-UTZERO)>>PGSHIFT);
455 ts->flen = sizeof(Exec)+text;
459 s = newseg(SG_DATA, t, (d-t)>>PGSHIFT);
462 /* Attached by hand */
465 s->fstart = ts->fstart+ts->flen;
468 /* BSS. Zero fill on demand */
469 up->seg[BSEG] = newseg(SG_BSS, d, (b-d)>>PGSHIFT);
476 s->base = USTKTOP-USTKSIZE;
478 relocateseg(s, USTKTOP-tstk);
480 qunlock(&up->seglock);
481 poperror(); /* seglock */
484 * '/' processes are higher priority (hack to make /ip more responsive).
486 if(devtab[tc->type]->dc == L'/')
487 up->basepri = PriRoot;
488 up->priority = up->basepri;
491 poperror(); /* elem */
502 * At this point, the mmu contains info about the old address
503 * space and needs to be flushed
508 up->procctl = Proc_stopme;
509 return execregs(entry, ssize, nargs);
513 shargs(char *s, int n, char **ap)
518 n -= 2; /* skip #! */
519 for(i=0; s[i]!='\n'; i++)
526 while(*s==' ' || *s=='\t')
533 while(*s && *s!=' ' && *s!='\t')
557 if (up->edf && (up->edf->flags & Admitted))
565 tsleep(&up->sleep, return0, 0, n);
572 return procalarm(arg[0]);
579 char *inval = "invalid exit string";
582 status = (char*)arg[0];
587 validaddr((ulong)status, 1, 0);
588 if(vmemchr(status, 0, ERRMAX) == 0){
589 memmove(buf, status, ERRMAX);
598 return 0; /* not reached */
611 validaddr(arg[0], sizeof(OWaitmsg), 1);
615 ow = (OWaitmsg*)arg[0];
616 readnum(0, ow->pid, NUMSIZE, w.pid, NUMSIZE);
617 readnum(0, ow->time+TUser*NUMSIZE, NUMSIZE, w.time[TUser], NUMSIZE);
618 readnum(0, ow->time+TSys*NUMSIZE, NUMSIZE, w.time[TSys], NUMSIZE);
619 readnum(0, ow->time+TReal*NUMSIZE, NUMSIZE, w.time[TReal], NUMSIZE);
620 strncpy(ow->msg, w.msg, sizeof(ow->msg)-1);
621 ow->msg[sizeof(ow->msg)-1] = '\0';
635 validaddr(arg[0], n, 1);
639 i = snprint((char*)arg[0], n, "%d %lud %lud %lud %q",
641 w.time[TUser], w.time[TSys], w.time[TReal],
648 werrstr(char *fmt, ...)
656 vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va);
661 generrstr(char *buf, uint nbuf)
667 validaddr((ulong)buf, nbuf, 1);
668 if(nbuf > sizeof tmp)
670 memmove(tmp, buf, nbuf);
672 /* make sure it's NUL-terminated */
674 memmove(buf, up->syserrstr, nbuf);
676 memmove(up->syserrstr, tmp, nbuf);
681 syserrstr(ulong *arg)
683 return generrstr((char*)arg[0], arg[1]);
686 /* compatibility for old binaries */
688 sys_errstr(ulong *arg)
690 return generrstr((char*)arg[0], 64);
694 sysnotify(ulong *arg)
697 validaddr(arg[0], sizeof(ulong), 0);
698 up->notify = (int(*)(void*, char*))(arg[0]);
705 if(arg[0]!=NRSTR && !up->notified)
711 syssegbrk(ulong *arg)
718 for(i = 0; i < NSEG; i++) {
720 if(s == 0 || addr < s->base || addr >= s->top)
722 switch(s->type&SG_TYPE) {
728 return ibrk(arg[1], i);
733 return 0; /* not reached */
737 syssegattach(ulong *arg)
739 return segattach(up, arg[0], (char*)arg[1], arg[2], arg[3]);
743 syssegdetach(ulong *arg)
751 qunlock(&up->seglock);
757 for(i = 0; i < NSEG; i++)
760 if((addr >= s->base && addr < s->top) ||
761 (s->top == s->base && addr == s->base))
770 * Check we are not detaching the initial stack segment.
772 if(s == up->seg[SSEG]){
779 qunlock(&up->seglock);
782 /* Ensure we flush any entries from the lost segment */
788 syssegfree(ulong *arg)
794 s = seg(up, from, 1);
797 to = (from + arg[1]) & ~(BY2PG-1);
798 from = PGROUND(from);
805 mfreeseg(s, from, (to - from) / BY2PG);
812 /* For binary compatibility */
816 return ibrk(arg[0], BSEG);
820 sysrendezvous(ulong *arg)
826 l = &REND(up->rgrp, tag);
829 for(p = *l; p; p = p->rendhash) {
830 if(p->rendtag == tag) {
843 /* Going to sleep here */
845 up->rendval = arg[1];
848 up->state = Rendezvous;
857 * The implementation of semaphores is complicated by needing
858 * to avoid rescheduling in syssemrelease, so that it is safe
859 * to call from real-time processes. This means syssemrelease
860 * cannot acquire any qlocks, only spin locks.
862 * Semacquire and semrelease must both manipulate the semaphore
863 * wait list. Lock-free linked lists only exist in theory, not
864 * in practice, so the wait list is protected by a spin lock.
866 * The semaphore value *addr is stored in user memory, so it
867 * cannot be read or written while holding spin locks.
869 * Thus, we can access the list only when holding the lock, and
870 * we can access the semaphore only when not holding the lock.
871 * This makes things interesting. Note that sleep's condition function
872 * is called while holding two locks - r and up->rlock - so it cannot
873 * access the semaphore value either.
875 * An acquirer announces its intention to try for the semaphore
876 * by putting a Sema structure onto the wait list and then
877 * setting Sema.waiting. After one last check of semaphore,
878 * the acquirer sleeps until Sema.waiting==0. A releaser of n
879 * must wake up n acquirers who have Sema.waiting set. It does
880 * this by clearing Sema.waiting and then calling wakeup.
882 * There are three interesting races here.
884 * The first is that in this particular sleep/wakeup usage, a single
885 * wakeup can rouse a process from two consecutive sleeps!
888 * (a) set Sema.waiting = 1
890 * (b) set Sema.waiting = 0
891 * (a) check Sema.waiting inside sleep, return w/o sleeping
892 * (a) try for semaphore, fail
893 * (a) set Sema.waiting = 1
898 * This is okay - semacquire will just go around the loop
899 * again. It does mean that at the top of the for(;;) loop in
900 * semacquire, phore.waiting might already be set to 1.
902 * The second is that a releaser might wake an acquirer who is
903 * interrupted before he can acquire the lock. Since
904 * release(n) issues only n wakeup calls -- only n can be used
905 * anyway -- if the interrupted process is not going to use his
906 * wakeup call he must pass it on to another acquirer.
908 * The third race is similar to the second but more subtle. An
909 * acquirer sets waiting=1 and then does a final canacquire()
910 * before going to sleep. The opposite order would result in
911 * missing wakeups that happen between canacquire and
912 * waiting=1. (In fact, the whole point of Sema.waiting is to
913 * avoid missing wakeups between canacquire() and sleep().) But
914 * there can be spurious wakeups between a successful
915 * canacquire() and the following semdequeue(). This wakeup is
916 * not useful to the acquirer, since he has already acquired
917 * the semaphore. Like in the previous case, though, the
918 * acquirer must pass the wakeup call along.
920 * This is all rather subtle. The code below has been verified
921 * with the spin model /sys/src/9/port/semaphore.p. The
922 * original code anticipated the second race but not the first
923 * or third, which were caught only with spin. The first race
924 * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it.
925 * It was lucky that my abstract model of sleep/wakeup still managed
926 * to preserve that behavior.
928 * I remain slightly concerned about memory coherence
929 * outside of locks. The spin model does not take
930 * queued processor writes into account so we have to
931 * think hard. The only variables accessed outside locks
932 * are the semaphore value itself and the boolean flag
933 * Sema.waiting. The value is only accessed with cmpswap,
934 * whose job description includes doing the right thing as
935 * far as memory coherence across processors. That leaves
936 * Sema.waiting. To handle it, we call coherence() before each
937 * read and after each write. - rsc
940 /* Add semaphore p with addr a to list in seg. */
942 semqueue(Segment *s, long *a, Sema *p)
944 memset(p, 0, sizeof *p);
946 lock(&s->sema); /* uses s->sema.Rendez.Lock, but no one else is */
948 p->prev = s->sema.prev;
954 /* Remove semaphore p from list in seg. */
956 semdequeue(Segment *s, Sema *p)
959 p->next->prev = p->prev;
960 p->prev->next = p->next;
964 /* Wake up n waiters with addr a on list in seg. */
966 semwakeup(Segment *s, long *a, long n)
971 for(p=s->sema.next; p!=&s->sema && n>0; p=p->next){
972 if(p->addr == a && p->waiting){
982 /* Add delta to semaphore and wake up waiters as appropriate. */
984 semrelease(Segment *s, long *addr, long delta)
990 while(!cmpswap(addr, value, value+delta));
991 semwakeup(s, addr, delta);
995 /* Try to acquire semaphore using compare-and-swap */
997 canacquire(long *addr)
1001 while((value=*addr) > 0)
1002 if(cmpswap(addr, value, value-1))
1007 /* Should we wake up? */
1012 return !((Sema*)p)->waiting;
1015 /* Acquire semaphore (subtract 1). */
1017 semacquire(Segment *s, long *addr, int block)
1022 if(canacquire(addr))
1028 semqueue(s, addr, &phore);
1032 if(canacquire(addr)){
1038 sleep(&phore, semawoke, &phore);
1041 semdequeue(s, &phore);
1042 coherence(); /* not strictly necessary due to lock in semdequeue */
1044 semwakeup(s, addr, 1);
1050 /* Acquire semaphore or time-out */
1052 tsemacquire(Segment *s, long *addr, ulong ms)
1054 int acquired, timedout;
1058 if(canacquire(addr))
1062 acquired = timedout = 0;
1063 semqueue(s, addr, &phore);
1067 if(canacquire(addr)){
1074 tsleep(&phore, semawoke, &phore, ms);
1075 elms = TK2MS(m->ticks - t);
1083 semdequeue(s, &phore);
1084 coherence(); /* not strictly necessary due to lock in semdequeue */
1086 semwakeup(s, addr, 1);
1095 syssemacquire(ulong *arg)
1101 validaddr(arg[0], sizeof(long), 1);
1103 addr = (long*)arg[0];
1106 if((s = seg(up, (ulong)addr, 0)) == nil)
1110 return semacquire(s, addr, block);
1114 systsemacquire(ulong *arg)
1120 validaddr(arg[0], sizeof(long), 1);
1122 addr = (long*)arg[0];
1125 if((s = seg(up, (ulong)addr, 0)) == nil)
1129 return tsemacquire(s, addr, ms);
1133 syssemrelease(ulong *arg)
1138 validaddr(arg[0], sizeof(long), 1);
1140 addr = (long*)arg[0];
1143 if((s = seg(up, (ulong)addr, 0)) == nil)
1145 if(delta < 0 || *addr < 0)
1147 return semrelease(s, addr, arg[1]);