]> git.lizzy.rs Git - plan9front.git/commitdiff
exportfs: fix flush races, proc sweeping
authorcinap_lenrek <cinap_lenrek@felloff.net>
Tue, 18 Feb 2014 21:15:06 +0000 (22:15 +0100)
committercinap_lenrek <cinap_lenrek@felloff.net>
Tue, 18 Feb 2014 21:15:06 +0000 (22:15 +0100)
sys/src/cmd/exportfs/exportfs.c
sys/src/cmd/exportfs/exportfs.h
sys/src/cmd/exportfs/exportsrv.c

index c16220deeda26a8eba8d3847140af318fe368a00..628975dd708c8876fcbb73f8168ed333ad1a301f 100644 (file)
@@ -460,8 +460,8 @@ reply(Fcall *r, Fcall *t, char *err)
                fatal(Enomem);
        n = convS2M(t, data, messagesize);
        if(write(netfd, data, n)!=n){
-               syslog(0, "exportfs", "short write: %r");
-               fatal("mount write");
+               /* not fatal, might have got a note due to flush */
+               fprint(2, "exportfs: short write in reply: %r\n");
        }
        free(data);
 }
@@ -570,8 +570,6 @@ getsbuf(void)
                unlock(&sbufalloc);
                w = emallocz(sizeof(*w) + messagesize);
        }
-       w->pid = 0;
-       w->canint = 0;
        w->flushtag = NOTAG;
        return w;
 }
@@ -579,8 +577,6 @@ getsbuf(void)
 void
 putsbuf(Fsrpc *w)
 {
-       w->pid = 0;
-       w->canint = 0;
        w->flushtag = NOTAG;
        lock(&sbufalloc);
        w->next = sbufalloc.free;
index 77709d0d97add6831df80b8061960cc5194d79c9..b3587fab2ba4e3daacfc8376e6a5a9241449ebcf 100644 (file)
@@ -15,8 +15,6 @@ typedef struct Qidtab Qidtab;
 struct Fsrpc
 {
        Fsrpc   *next;          /* freelist */
-       uintptr pid;            /* Pid of slave process executing the rpc */
-       int     canint;         /* Interrupt gate */
        int     flushtag;       /* Tag on which to reply to flush */
        Fcall   work;           /* Plan 9 incoming Fcall */
        uchar   buf[];          /* Data buffer */
@@ -53,9 +51,10 @@ struct File
 
 struct Proc
 {
-       uintptr pid;
+       Lock;
        Fsrpc   *busy;
        Proc    *next;
+       int     pid;
 };
 
 struct Qidtab
@@ -70,7 +69,6 @@ struct Qidtab
 
 enum
 {
-       MAXPROC         = 50,
        FHASHSIZE       = 64,
        Fidchunk        = 1000,
        Npsmpt          = 32,
@@ -128,7 +126,7 @@ void        freefile(File*);
 void   slaveopen(Fsrpc*);
 void   slaveread(Fsrpc*);
 void   slavewrite(Fsrpc*);
-void   blockingslave(void);
+void   blockingslave(Proc*);
 void   reopen(Fid *f);
 void   noteproc(int, char*);
 void   flushaction(void*, char*);
index d7a0ad290374e21d9dfd44ae84ba6ed64eaaacda..fe76e81e467a59b78d09c962ef6892490adfd6b1 100644 (file)
@@ -64,14 +64,20 @@ Xflush(Fsrpc *t)
 
        for(m = Proclist; m; m = m->next){
                w = m->busy;
-               if(w != 0 && w->pid == m->pid && w->work.tag == t->work.oldtag) {
+               if(w == nil || w->work.tag != t->work.oldtag)
+                       continue;
+
+               lock(m);
+               w = m->busy;
+               if(w != nil && w->work.tag == t->work.oldtag) {
                        w->flushtag = t->work.tag;
                        DEBUG(DFD, "\tset flushtag %d\n", t->work.tag);
-                       if(w->canint)
-                               postnote(PNPROC, w->pid, "flush");
+                       postnote(PNPROC, m->pid, "flush");
+                       unlock(m);
                        putsbuf(t);
                        return;
                }
+               unlock(m);
        }
 
        reply(&t->work, &rhdr, 0);
@@ -459,10 +465,10 @@ procsetname(char *fmt, ...)
 void
 slave(Fsrpc *f)
 {
-       Proc *p;
-       uintptr pid;
-       Fcall rhdr;
        static int nproc;
+       Proc *p, **l;
+       Fcall rhdr;
+       int pid;
 
        if(readonly){
                switch(f->work.type){
@@ -479,30 +485,41 @@ slave(Fsrpc *f)
                }
        }
        for(;;) {
-               for(p = Proclist; p; p = p->next) {
-                       if(p->busy == 0) {
-                               f->pid = p->pid;
-                               p->busy = f;
-                               do {
-                                       pid = (uintptr)rendezvous((void*)p->pid, f);
-                               }
-                               while(pid == ~0);       /* Interrupted */
-                               if(pid != p->pid)
-                                       fatal("rendezvous sync fail");
-                               return;
-                       }       
+               for(l = &Proclist; (p = *l) != nil; l = &p->next) {
+                       if(p->busy != nil)
+                               continue;
+
+                       p->busy = f;
+                       while(rendezvous(p, f) == (void*)~0)
+                               ;
+
+                       /* swept a slave proc */
+                       if(f == nil){
+                               *l = p->next;
+                               free(p);
+                               nproc--;
+                               break;
+                       }
+                       f = nil;
+
+                       /*
+                        * as long as the number of slave procs
+                        * is small, dont bother sweeping.
+                        */
+                       if(nproc < 16)
+                               break;
                }
+               if(f == nil)
+                       return;
 
-               if(nproc >= MAXPROC){
+               p = emallocz(sizeof(Proc));
+               pid = rfork(RFPROC|RFMEM|RFNOWAIT);
+               switch(pid) {
+               case -1:
                        reply(&f->work, &rhdr, Enoprocs);
                        putsbuf(f);
+                       free(p);
                        return;
-               }
-               nproc++;
-               pid = rfork(RFPROC|RFMEM);
-               switch(pid) {
-               case -1:
-                       fatal("rfork");
 
                case 0:
                        if (local[0] != '\0')
@@ -511,44 +528,34 @@ slave(Fsrpc *f)
                                                local, remote);
                                else
                                        procsetname("%s -> %s", local, remote);
-                       blockingslave();
-                       fatal("slave");
+                       blockingslave(p);
+                       _exits(0);
 
                default:
-                       p = emallocz(sizeof(Proc));
-                       p->busy = 0;
                        p->pid = pid;
                        p->next = Proclist;
                        Proclist = p;
-                       while(rendezvous((void*)pid, p) == (void*)~0)
-                               ;
+                       nproc++;
                }
        }
 }
 
 void
-blockingslave(void)
+blockingslave(Proc *m)
 {
        Fsrpc *p;
        Fcall rhdr;
-       Proc *m;
-       uintptr pid;
 
        notify(flushaction);
 
-       pid = getpid();
-
-       do {
-               m = rendezvous((void*)pid, 0);
-       }
-       while(m == (void*)~0);  /* Interrupted */
-       
        for(;;) {
-               p = rendezvous((void*)pid, (void*)pid);
-               if(p == (void*)~0)                      /* Interrupted */
+               p = rendezvous(m, nil);
+               if(p == (void*)~0)      /* Interrupted */
                        continue;
+               if(p == nil)            /* Swept */
+                       break;
 
-               DEBUG(DFD, "\tslave: %p %F p %p\n", pid, &p->work, p->pid);
+               DEBUG(DFD, "\tslave: %d %F\n", m->pid, &p->work);
                if(p->flushtag != NOTAG)
                        goto flushme;
 
@@ -568,13 +575,17 @@ blockingslave(void)
                default:
                        reply(&p->work, &rhdr, "exportfs: slave type error");
                }
-               if(p->flushtag != NOTAG) {
 flushme:
+               lock(m);
+               m->busy = nil;
+               unlock(m);
+
+               /* no more flushes can come in now */
+               if(p->flushtag != NOTAG) {
                        p->work.type = Tflush;
                        p->work.tag = p->flushtag;
                        reply(&p->work, &rhdr, 0);
                }
-               m->busy = 0;
                putsbuf(p);
        }
 }
@@ -654,16 +665,8 @@ slaveopen(Fsrpc *p)
        
        path = makepath(f->f, "");
        DEBUG(DFD, "\topen: %s %d\n", path, work->mode);
-
-       p->canint = 1;
-       if(p->flushtag != NOTAG){
-               free(path);
-               return;
-       }
-       /* There is a race here I ignore because there are no locks */
        f->fid = open(path, work->mode);
        free(path);
-       p->canint = 0;
        if(f->fid < 0 || (d = dirfstat(f->fid)) == nil) {
        Error:
                errstr(err, sizeof err);
@@ -703,9 +706,6 @@ slaveread(Fsrpc *p)
        }
 
        n = (work->count > messagesize-IOHDRSZ) ? messagesize-IOHDRSZ : work->count;
-       p->canint = 1;
-       if(p->flushtag != NOTAG)
-               return;
        data = malloc(n);
        if(data == 0) {
                reply(work, &rhdr, Enomem);
@@ -717,14 +717,12 @@ slaveread(Fsrpc *p)
                r = preaddir(f, (uchar*)data, n, work->offset);
        else
                r = pread(f->fid, data, n, work->offset);
-       p->canint = 0;
        if(r < 0) {
                free(data);
                errstr(err, sizeof err);
                reply(work, &rhdr, err);
                return;
        }
-
        DEBUG(DFD, "\tread: fd=%d %d bytes\n", f->fid, r);
 
        rhdr.data = data;
@@ -750,11 +748,7 @@ slavewrite(Fsrpc *p)
        }
 
        n = (work->count > messagesize-IOHDRSZ) ? messagesize-IOHDRSZ : work->count;
-       p->canint = 1;
-       if(p->flushtag != NOTAG)
-               return;
        n = pwrite(f->fid, work->data, n, work->offset);
-       p->canint = 0;
        if(n < 0) {
                errstr(err, sizeof err);
                reply(work, &rhdr, err);