8 char* file(char*, char);
9 void warning(char*, void*);
10 void error(char*, void*);
11 int returnmail(char**, char*, char*);
12 void logit(char*, char*, char**);
19 int giveup = 2*24*60*60;
23 /* the current directory */
29 char *runqlog = "runq";
32 char **badsys; /* array of recalcitrant systems */
35 int sflag; /* single thread per directory */
36 int aflag; /* all directories */
37 int Eflag; /* ignore E.xxxxxx dates */
38 int Rflag; /* no giving up, ever */
43 fprint(2, "usage: runq [-adsE] [-q dir] [-l load] [-t time] [-r nfiles] [-n nprocs] q-root cmd\n");
48 main(int argc, char **argv)
66 case 'R': /* no giving up -- just leave stuff in the queue */
82 giveup = 60*60*atoi(ARGF());
99 pidlist = malloc(npid*sizeof(*pidlist));
101 error("can't malloc", 0);
103 if(aflag == 0 && qdir == 0) {
106 error("unknown user", 0);
112 error("can't cd to %s", root);
130 fd = open(name, OREAD);
133 n = read(fd, buf, sizeof(buf));
137 fprint(2, "removing directory %s\n", name);
138 syslog(0, runqlog, "rmdir %s", name);
151 for(i = 0; i < npid; i++){
159 syslog(0, runqlog, "forkltd confused");
163 for(i = 0; i < npid; i++)
164 if(pidlist[i] == pid)
172 * run all user directories, must be bootes (or root on unix) to do this
182 fd = open(".", OREAD);
184 warning("reading %s", root);
187 n = dirreadall(fd, &db);
190 if(db[i].qid.type & QTDIR){
191 if(emptydir(db[i].name))
195 syslog(0, runqlog, "out of procs");
214 * cd to a user directory and run it
222 warning("cd to %s", name);
226 fprint(2, "running %s\n", name);
232 * run the current directory
241 fd = sysopenlocked(".", OREAD);
243 fd = open(".", OREAD);
245 warning("reading %s", name);
248 nfiles = dirreadall(fd, &dirbuf);
250 for(i=0; i<nfiles; i++){
251 if(dirbuf[i].name[0]!='C' || dirbuf[i].name[1]!='.')
264 * free files matching name in the current directory
271 syslog(0, runqlog, "removing %s/%s", curdir, name);
273 for(i=0; i<nfiles; i++){
274 if(strcmp(&dirbuf[i].name[1], &name[1]) == 0)
275 remove(dirbuf[i].name);
278 /* error file (may have) appeared after we read the directory */
279 /* stomp on data file in case of phase error */
280 remove(file(name, 'D'));
281 remove(file(name, 'E'));
285 * like trylock, but we've already got the lock on fd,
286 * and don't want an L. lock file.
289 keeplockalive(char *path, int fd)
294 l = malloc(sizeof(Mlock));
298 snprint(l->name, sizeof l->name, "%s", path);
300 /* fork process to keep lock alive until sysunlock(l) */
301 switch(l->pid = rfork(RFPROC)){
308 if(pread(fd, buf, 1, 0) < 0)
323 int dfd, ac, dtime, efd, pid, i, etime;
324 char *buf, *cp, **av;
330 fprint(2, "dofile %s\n", dp->name);
332 * if no data file or empty control or data file, just clean up
333 * the empty control file must be 15 minutes old, to minimize the
336 d = dirstat(file(dp->name, 'D'));
338 syslog(0, runqlog, "no data file for %s", dp->name);
343 if(time(0)-dp->mtime > 15*60){
344 syslog(0, runqlog, "empty ctl file for %s", dp->name);
353 * retry times depend on the age of the errors file
355 if(!Eflag && (d = dirstat(file(dp->name, 'E'))) != nil){
358 if(etime - dtime < 60*60){
359 /* up to the first hour, try every 15 minutes */
360 if(time(0) - etime < 15*60)
363 /* after the first hour, try once an hour */
364 if(time(0) - etime < 60*60)
371 * open control and data
373 b = sysopen(file(dp->name, 'C'), "rl", 0660);
376 fprint(2, "can't open %s: %r\n", file(dp->name, 'C'));
379 dfd = open(file(dp->name, 'D'), OREAD);
382 fprint(2, "can't open %s: %r\n", file(dp->name, 'D'));
384 sysunlockfile(Bfildes(b));
390 * - read args into (malloc'd) buffer
391 * - malloc a vector and copy pointers to args into it
393 buf = malloc(dp->length+1);
395 warning("buffer allocation", 0);
397 sysunlockfile(Bfildes(b));
401 if(Bread(b, buf, dp->length) != dp->length){
402 warning("reading control file %s\n", dp->name);
404 sysunlockfile(Bfildes(b));
410 av = malloc(2*sizeof(char*));
412 warning("argv allocation", 0);
416 sysunlockfile(Bfildes(b));
419 for(ac = 1, cp = buf; *cp; ac++){
425 av = realloc(av, (ac+2)*sizeof(char*));
427 warning("argv allocation", 0);
431 sysunlockfile(Bfildes(b));
435 while(*cp && !isspace(*cp)){
437 while(*cp && *cp != '"')
447 if(!Eflag &&time(0) - dtime > giveup){
448 if(returnmail(av, dp->name, "Giveup") != 0)
449 logit("returnmail failed", dp->name, av);
454 for(i = 0; i < nbad; i++){
455 if(strcmp(av[3], badsys[i]) == 0)
460 * Ken's fs, for example, gives us 5 minutes of inactivity before
461 * the lock goes stale, so we have to keep reading it.
463 l = keeplockalive(file(dp->name, 'C'), Bfildes(b));
472 sysunlockfile(Bfildes(b));
473 syslog(0, runqlog, "out of procs");
477 fprint(2, "Starting %s", cmd);
478 for(ac = 0; av[ac]; ac++)
479 fprint(2, " %s", av[ac]);
482 logit("execing", dp->name, av);
487 efd = open(file(dp->name, 'E'), OWRITE);
489 if(debug) syslog(0, "runq", "open %s as %s: %r", file(dp->name,'E'), getuser());
490 efd = create(file(dp->name, 'E'), OWRITE, 0666);
492 if(debug) syslog(0, "runq", "create %s as %s: %r", file(dp->name, 'E'), getuser());
493 exits("could not open error file - Retry");
498 error("can't exec %s", cmd);
504 error("wait failed: %r", "");
510 fprint(2, "wm->pid %d wm->msg == %s\n", wm->pid, wm->msg);
514 fprint(2, "[%d] wm->msg == %s\n", getpid(), wm->msg);
515 if(!Rflag && strstr(wm->msg, "Retry")==0){
516 /* return the message and remove it */
517 if(returnmail(av, dp->name, wm->msg) != 0)
518 logit("returnmail failed", dp->name, av);
521 /* add sys to bad list and try again later */
523 badsys = realloc(badsys, nbad*sizeof(char*));
524 badsys[nbad-1] = strdup(av[3]);
527 /* it worked remove the message */
537 sysunlockfile(Bfildes(b));
545 * return a name starting with the given character
548 file(char *name, char type)
550 static char nname[Elemlen+1];
552 strncpy(nname, name, Elemlen);
559 * send back the mail with an error message
561 * return 0 if successful
564 returnmail(char **av, char *name, char *msg)
566 char buf[256], attachment[Pathlen], *sender;
572 if(av[1] == 0 || av[2] == 0){
573 logit("runq - dumping bad file", name, av);
577 s = unescapespecial(s_copy(av[2]));
580 if(!returnable(sender) || strcmp(sender, "postmaster") == 0) {
581 logit("runq - dumping p to p mail", name, av);
586 logit("runq - pipe failed", name, av);
590 switch(rfork(RFFDG|RFPROC|RFENVG)){
592 logit("runq - fork failed", name, av);
595 logit("returning", name, av);
600 putenv("upasname", "/dev/null");
601 snprint(buf, sizeof(buf), "%s/marshal", UPASBIN);
602 snprint(attachment, sizeof(attachment), "%s", file(name, 'D'));
603 execl(buf, "send", "-A", attachment, "-s", "permanent failure", sender, nil);
604 error("can't exec", 0);
611 fprint(pfd[1], "\n"); /* get out of headers */
613 fprint(pfd[1], "Your request ``%.20s ", av[1]);
614 for(n = 3; av[n]; n++)
615 fprint(pfd[1], "%s ", av[n]);
617 fprint(pfd[1], "'' failed (code %s).\nThe symptom was:\n\n", msg);
618 fd = open(file(name, 'E'), OREAD);
621 n = read(fd, buf, sizeof(buf));
624 if(write(pfd[1], buf, n) != n){
635 syslog(0, "runq", "wait: %r");
636 logit("wait failed", name, av);
642 syslog(0, "runq", "returnmail child: %s", wm->msg);
643 logit("returnmail child failed", name, av);
650 * print a warning and continue
653 warning(char *f, void *a)
658 rerrstr(err, sizeof(err));
659 snprint(buf, sizeof(buf), f, a);
660 fprint(2, "runq: %s: %s\n", buf, err);
664 * print an error and die
667 error(char *f, void *a)
672 rerrstr(err, sizeof(err));
673 snprint(buf, sizeof(buf), f, a);
674 fprint(2, "runq: %s: %s\n", buf, err);
679 logit(char *msg, char *file, char **av)
684 n = snprint(buf, sizeof(buf), "%s/%s: %s", curdir, file, msg);
687 if(n + m + 4 > sizeof(buf))
689 sprint(buf + n, " '%s'", *av);
692 syslog(0, runqlog, "%s", buf);
695 char *loadfile = ".runqload";
717 l = syslock(loadfile);
718 fd = open(loadfile, ORDWR);
720 fd = create(loadfile, 0666, ORDWR);
728 /* get current load */
730 n = read(fd, buf, sizeof(buf)-1);
738 /* ignore load if file hasn't been changed in 30 minutes */
741 if(d->mtime + 30*60 < time(0))
746 /* if load already too high, give up */
747 if(start && i >= load){
752 /* increment/decrement load */
758 fprint(fd, "%d\n", i);