2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
19 #include <sys/types.h>
26 #ifdef BOZO_SAVE_CORES
39 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
41 #include <afs/audit.h>
42 #include <afs/afsutil.h>
43 #include <afs/fileutil.h>
46 #if defined(AFS_AIX_ENV) || defined(AFS_SUN4_ENV)
47 /* All known versions of AIX lack WCOREDUMP but this works */
48 #define WCOREDUMP(x) ((x) & 0x80)
51 #define BNODE_LWP_STACKSIZE (16 * 1024)
53 int bnode_waiting = 0;
54 static PROCESS bproc_pid; /* pid of waker-upper */
55 static struct bnode *allBnodes=0; /* list of all bnodes */
56 static struct bnode_proc *allProcs=0; /* list of all processes for which we're waiting */
57 static struct bnode_type *allTypes=0; /* list of registered type handlers */
59 static struct bnode_stats {
64 extern char **environ; /* env structure */
67 /* Remember the name of the process, if any, that failed last */
68 static void RememberProcName(register struct bnode_proc *ap)
70 register struct bnode *tbnodep;
73 if (tbnodep->lastErrorName) {
74 free(tbnodep->lastErrorName);
75 tbnodep->lastErrorName = NULL;
78 tbnodep->lastErrorName = (char *) malloc(strlen(ap->coreName)+1);
79 strcpy(tbnodep->lastErrorName, ap->coreName);
83 /* utility for use by BOP_HASCORE functions to determine where a core file might
86 int bnode_CoreName(register struct bnode *abnode, char *acoreName,
89 strcpy(abuffer, AFSDIR_SERVER_CORELOG_FILEPATH);
91 strcat(abuffer, acoreName);
94 strcat(abuffer, abnode->name);
98 /* save core file, if any */
99 static void SaveCore(register struct bnode *abnode, register struct bnode_proc
104 register afs_int32 code;
105 #ifdef BOZO_SAVE_CORES
106 struct timeval Start;
107 struct tm *TimeFields;
111 code = stat(AFSDIR_SERVER_CORELOG_FILEPATH, &tstat);
114 bnode_CoreName(abnode, aproc->coreName, tbuffer);
115 #ifdef BOZO_SAVE_CORES
116 TM_GetTimeOfDay(&Start, 0);
117 TimeFields = localtime(&Start.tv_sec);
118 sprintf(FileName,"%s.%d%02d%02d%02d%02d%02d", tbuffer,
119 TimeFields->tm_year, TimeFields->tm_mon + 1,
120 TimeFields->tm_mday, TimeFields->tm_hour, TimeFields->tm_min,
122 strcpy(tbuffer,FileName);
124 code = renamefile(AFSDIR_SERVER_CORELOG_FILEPATH, tbuffer);
127 int bnode_GetString(register struct bnode *abnode, register char *abuffer,
128 register afs_int32 alen)
130 return BOP_GETSTRING(abnode, abuffer, alen);
133 int bnode_GetParm(register struct bnode *abnode, register afs_int32 aindex,
134 register char *abuffer, afs_int32 alen)
136 return BOP_GETPARM(abnode, aindex, abuffer, alen);
139 int bnode_GetStat(register struct bnode *abnode, register afs_int32 *astatus)
141 return BOP_GETSTAT(abnode, astatus);
144 int bnode_RestartP(register struct bnode *abnode)
146 return BOP_RESTARTP(abnode);
149 static int bnode_Check(register struct bnode *abnode)
151 if (abnode->flags & BNODE_WAIT) {
152 abnode->flags &= ~BNODE_WAIT;
153 LWP_NoYieldSignal(abnode);
158 /* tell if an instance has a core file */
159 int bnode_HasCore(register struct bnode *abnode)
161 return BOP_HASCORE(abnode);
164 /* wait for all bnodes to stabilize */
165 int bnode_WaitAll() {
166 register struct bnode *tb;
167 register afs_int32 code;
171 for(tb = allBnodes; tb; tb=tb->next) {
173 code = BOP_GETSTAT(tb, &stat);
178 if (stat != tb->goal) {
179 tb->flags |= BNODE_WAIT;
189 /* wait until bnode status is correct */
190 int bnode_WaitStatus(register struct bnode *abnode, int astatus)
192 register afs_int32 code;
198 code = BOP_GETSTAT(abnode, &stat);
199 if (code) return code;
201 /* otherwise, check if we're done */
202 if (stat == astatus) {
203 bnode_Release(abnode);
206 if (astatus != abnode->goal) {
207 bnode_Release(abnode);
208 return -1; /* no longer our goal, don't keep waiting */
210 /* otherwise, block */
211 abnode->flags |= BNODE_WAIT;
212 LWP_WaitProcess(abnode);
216 int bnode_SetStat(register struct bnode *abnode, register int agoal)
218 abnode->goal = agoal;
220 BOP_SETSTAT(abnode, agoal);
221 abnode->flags &= ~BNODE_ERRORSTOP;
225 int bnode_SetGoal(register struct bnode *abnode, register int agoal)
227 abnode->goal = agoal;
232 int bnode_SetFileGoal(register struct bnode *abnode, register int agoal)
234 if (abnode->fileGoal == agoal) return 0; /* already done */
235 abnode->fileGoal = agoal;
240 /* apply a function to all bnodes in the system */
241 int bnode_ApplyInstance(int (*aproc)(), char *arock)
243 register struct bnode *tb, *nb;
244 register afs_int32 code;
246 for(tb = allBnodes; tb; tb=nb) {
248 code = (*aproc) (tb, arock);
249 if (code) return code;
254 struct bnode *bnode_FindInstance(register char *aname)
256 register struct bnode *tb;
258 for(tb=allBnodes;tb;tb=tb->next) {
259 if (!strcmp(tb->name, aname)) return tb;
264 static struct bnode_type *FindType(register char *aname)
266 register struct bnode_type *tt;
268 for(tt=allTypes;tt;tt=tt->next) {
269 if (!strcmp(tt->name, aname)) return tt;
271 return (struct bnode_type *) 0;
274 int bnode_Register(char *atype, struct bnode_ops *aprocs, int anparms)
276 register struct bnode_type *tt;
278 for(tt=allTypes;tt;tt=tt->next) {
279 if (!strcmp(tt->name, atype)) break;
282 tt = (struct bnode_type *) malloc(sizeof(struct bnode_type));
283 memset(tt, 0, sizeof(struct bnode_type));
292 afs_int32 bnode_Create(char *atype, char *ainstance, struct bnode **abp,
293 char *ap1, char *ap2, char *ap3, char *ap4,
294 char *ap5, char *notifier, int fileGoal)
296 struct bnode_type *type;
298 char *notifierpath = NULL;
301 if (bnode_FindInstance(ainstance)) return BZEXISTS;
302 type = FindType(atype);
303 if (!type) return BZBADTYPE;
305 if (notifier && strcmp(notifier, NONOTIFIER)) {
306 /* construct local path from canonical (wire-format) path */
307 if (ConstructLocalBinPath(notifier, ¬ifierpath)) {
308 bozo_Log("BNODE-Create: Notifier program path invalid '%s'\n", notifier);
312 if (stat(notifierpath, &tstat)) {
313 bozo_Log("BNODE-Create: Notifier program '%s' not found\n", notifierpath);
318 tb = (*type->ops->create)(ainstance, ap1, ap2, ap3, ap4, ap5);
323 tb->notifier = notifierpath;
327 /* The fs_create above calls bnode_InitBnode() which always sets the
328 ** fileGoal to BSTAT_NORMAL .... overwrite it with whatever is passed into
329 ** this function as a parameter... */
330 tb->fileGoal = fileGoal;
332 bnode_SetStat(tb, tb->goal); /* nudge it once */
337 int bnode_DeleteName(char *ainstance)
339 register struct bnode *tb;
341 tb = bnode_FindInstance(ainstance);
342 if (!tb) return BZNOENT;
344 return bnode_Delete(tb);
347 int bnode_Hold(register struct bnode *abnode)
353 int bnode_Release(register struct bnode *abnode)
356 if (abnode->refCount == 0 && abnode->flags & BNODE_DELETE) {
357 abnode->flags &= ~BNODE_DELETE; /* we're going for it */
358 bnode_Delete(abnode);
363 int bnode_Delete(register struct bnode *abnode)
365 register afs_int32 code;
366 register struct bnode **lb, *ub;
369 if (abnode->refCount != 0) {
370 abnode->flags |= BNODE_DELETE;
374 /* make sure the bnode is idle before zapping */
376 code = BOP_GETSTAT(abnode, &temp);
377 bnode_Release(abnode);
378 if (code) return code;
379 if (temp != BSTAT_SHUTDOWN) return BZBUSY;
381 /* all clear to zap */
382 for(lb = &allBnodes, ub = *lb; ub; lb= &ub->next, ub = *lb) {
384 /* unthread it from the list */
389 free(abnode->name); /* do this first, since bnode fields may be bad after BOP_DELETE */
390 code = BOP_DELETE(abnode); /* don't play games like holding over this one */
395 /* function to tell if there's a timeout coming up */
396 int bnode_PendingTimeout(register struct bnode *abnode)
398 return (abnode->flags & BNODE_NEEDTIMEOUT);
401 /* function called to set / clear periodic bnode wakeup times */
402 int bnode_SetTimeout(register struct bnode *abnode, afs_int32 atimeout)
405 abnode->nextTimeout = FT_ApproxTime() + atimeout;
406 abnode->flags |= BNODE_NEEDTIMEOUT;
407 abnode->period = atimeout;
408 IOMGR_Cancel(bproc_pid);
411 abnode->flags &= ~BNODE_NEEDTIMEOUT;
416 /* used by new bnode creation code to format bnode header */
417 int bnode_InitBnode (register struct bnode *abnode,
418 struct bnode_ops *abnodeops, char *aname)
420 struct bnode **lb, *nb;
422 /* format the bnode properly */
423 memset(abnode, 0, sizeof(struct bnode));
424 abnode->ops = abnodeops;
425 abnode->name = (char *) malloc(strlen(aname)+1);
428 strcpy(abnode->name, aname);
429 abnode->flags = BNODE_ACTIVE;
430 abnode->fileGoal = BSTAT_NORMAL;
431 abnode->goal = BSTAT_SHUTDOWN;
433 /* put the bnode at the end of the list so we write bnode file in same order */
434 for(lb = &allBnodes, nb = *lb; nb; lb = &nb->next, nb = *lb);
440 static int DeleteProc(register struct bnode_proc *abproc)
442 register struct bnode_proc **pb, *tb;
443 struct bnode_proc *nb;
445 for(pb = &allProcs,tb = *pb; tb; pb = &tb->next, tb=nb) {
456 /* bnode lwp executes this code repeatedly */
458 register afs_int32 code;
459 register struct bnode *tb;
460 register afs_int32 temp;
461 register struct bnode_proc *tp;
463 int options; /* must not be register */
469 /* first figure out how long to sleep for */
470 temp = 0x7fffffff; /* afs_int32 time; maxint doesn't work in select */
472 for(tb = allBnodes; tb; tb=tb->next) {
473 if (tb->flags & BNODE_NEEDTIMEOUT) {
474 if (tb->nextTimeout < temp) {
476 temp = tb->nextTimeout;
480 /* now temp has the time at which we should wakeup next */
483 if (setAny) temp -= FT_ApproxTime(); /* how many seconds until next event */
488 code = IOMGR_Select(0, 0, 0, 0, &tv);
490 else code = 0; /* fake timeout code */
492 /* figure out why we woke up; child exit or timeouts */
493 FT_GetTimeOfDay(&tv, 0); /* must do the real gettimeofday once and a while */
496 /* check all bnodes to see which ones need timeout events */
497 for(tb = allBnodes; tb; tb=nb) {
498 if ((tb->flags & BNODE_NEEDTIMEOUT) && temp > tb->nextTimeout) {
502 if (tb->flags & BNODE_NEEDTIMEOUT) { /* check again, BOP_TIMEOUT could change */
503 tb->nextTimeout = FT_ApproxTime() + tb->period;
506 bnode_Release(tb); /* delete may occur here */
512 /* signalled, probably by incoming signal */
515 bnode_waiting = options | 0x800000;
516 code = waitpid((pid_t)-1, &status, options);
518 if (code == 0 || code == -1) break; /* all done */
519 /* otherwise code has a process id, which we now search for */
520 for(tp=allProcs; tp; tp=tp->next)
521 if (tp->pid == code) break;
527 /* count restarts in last 10 seconds */
528 if (temp > tb->rsTime + 30) {
529 /* it's been 10 seconds we've been counting */
534 if (WIFSIGNALED(status) == 0) {
535 /* exited, not signalled */
536 tp->lastExit = WEXITSTATUS(status);
539 tb->errorCode = tp->lastExit;
540 tb->lastErrorExit = FT_ApproxTime();
541 RememberProcName(tp);
545 bozo_Log("%s:%s exited with code %d\n",
546 tb->name, tp->coreName, tp->lastExit);
548 bozo_Log("%s exited with code %d\n",
549 tb->name, tp->lastExit);
552 /* Signal occurred, perhaps spurious due to shutdown request.
553 * If due to a shutdown request, don't overwrite last error
556 tp->lastSignal = WTERMSIG(status);
558 if (tp->lastSignal != SIGQUIT && tp->lastSignal != SIGTERM
559 && tp->lastSignal != SIGKILL) {
560 tb->errorSignal = tp->lastSignal;
561 tb->lastErrorExit = FT_ApproxTime();
562 RememberProcName(tp);
565 bozo_Log("%s:%s exited on signal %d%s\n",
566 tb->name, tp->coreName, tp->lastSignal,
567 WCOREDUMP(status) ? " (core dumped)" : "");
569 bozo_Log("%s exited on signal %d%s\n",
570 tb->name, tp->lastSignal,
571 WCOREDUMP(status) ? " (core dumped)" : "");
574 tb->lastAnyExit = FT_ApproxTime();
577 bozo_Log("BNODE: Notifier %s will be called\n", tb->notifier);
580 BOP_PROCEXIT(tb, tp);
583 if (tb->rsCount++ > 10) {
584 /* 10 in 10 seconds */
585 tb->flags |= BNODE_ERRORSTOP;
586 bnode_SetGoal(tb, BSTAT_SHUTDOWN);
587 bozo_Log("BNODE '%s' repeatedly failed to start, perhaps missing executable.\n",
590 bnode_Release(tb); /* bnode delete can happen here */
593 else bnode_stats.weirdPids++;
599 static afs_int32 SendNotifierData(register int fd,
600 register struct bnode_proc *tp)
602 register struct bnode *tb = tp->bnode;
603 char buffer[1000], *bufp = buffer, *buf1;
607 * First sent out the bnode_proc struct
609 (void) sprintf(bufp, "BEGIN bnode_proc\n");
610 bufp += strlen(bufp);
611 (void) sprintf(bufp, "comLine: %s\n", tp->comLine);
612 bufp += strlen(bufp);
613 if (!(buf1 = tp->coreName))
615 (void) sprintf(bufp, "coreName: %s\n", buf1);
616 bufp += strlen(bufp);
617 (void) sprintf(bufp, "pid: %ld\n", tp->pid);
618 bufp += strlen(bufp);
619 (void) sprintf(bufp, "lastExit: %ld\n", tp->lastExit);
620 bufp += strlen(bufp);
622 (void) sprintf(bufp, "lastSignal: %ld\n", tp->lastSignal);
623 bufp += strlen(bufp);
625 (void) sprintf(bufp, "flags: %ld\n", tp->flags);
626 bufp += strlen(bufp);
627 (void) sprintf(bufp, "END bnode_proc\n");
628 bufp += strlen(bufp);
629 len =(int)(bufp-buffer);
630 if (write(fd, buffer, len) < 0) {
635 * Now sent out the bnode struct
638 (void) sprintf(bufp, "BEGIN bnode\n");
639 bufp += strlen(bufp);
640 (void) sprintf(bufp, "name: %s\n", tb->name);
641 bufp += strlen(bufp);
642 (void) sprintf(bufp, "rsTime: %ld\n", tb->rsTime);
643 bufp += strlen(bufp);
644 (void) sprintf(bufp, "rsCount: %ld\n", tb->rsCount);
645 bufp += strlen(bufp);
646 (void) sprintf(bufp, "procStartTime: %ld\n", tb->procStartTime);
647 bufp += strlen(bufp);
648 (void) sprintf(bufp, "procStarts: %ld\n", tb->procStarts);
649 bufp += strlen(bufp);
650 (void) sprintf(bufp, "lastAnyExit: %ld\n", tb->lastAnyExit);
651 bufp += strlen(bufp);
652 (void) sprintf(bufp, "lastErrorExit: %ld\n", tb->lastErrorExit);
653 bufp += strlen(bufp);
654 (void) sprintf(bufp, "errorCode: %ld\n", tb->errorCode);
655 bufp += strlen(bufp);
656 (void) sprintf(bufp, "errorSignal: %ld\n", tb->errorSignal);
657 bufp += strlen(bufp);
659 (void) sprintf(bufp, "lastErrorName: %s\n", tb->lastErrorName);
660 bufp += strlen(bufp);
662 (void) sprintf(bufp, "goal: %d\n", tb->goal);
663 bufp += strlen(bufp);
664 (void) sprintf(bufp, "END bnode\n");
665 bufp += strlen(bufp);
666 len = (int)bufp-(int)buffer;
667 if (write(fd, buffer, len) < 0) {
672 int hdl_notifier(struct bnode_proc *tp)
674 #ifndef AFS_NT40_ENV /* NT notifier callout not yet implemented */
675 int code, pid, status;
678 if (stat(tp->bnode->notifier, &tstat)) {
679 bozo_Log("BNODE: Failed to find notifier '%s'; ignored\n", tp->bnode->notifier);
682 if ((pid = fork()) == 0) {
684 struct bnode *tb = tp->bnode;
687 #if defined(AFS_HPUX_ENV) || defined(AFS_SUN5_ENV) || defined(AFS_SGI51_ENV)
690 #ifdef AFS_LINUX20_ENV
696 fout = popen(tb->notifier, "w");
698 bozo_Log("BNODE: Failed to find notifier '%s'; ignored\n", tb->notifier);
699 perror(tb->notifier);
702 code = SendNotifierData(fileno(fout), tp);
705 } else if (pid < 0) {
706 bozo_Log("Failed to fork creating process to handle notifier '%s'\n", tp->bnode->notifier);
709 #endif /* AFS_NT40_ENV */
713 /* Called by IOMGR at low priority on IOMGR's stack shortly after a SIGCHLD
714 * occurs. Wakes up bproc do redo things */
715 int bnode_SoftInt(int asignal)
717 IOMGR_Cancel(bproc_pid);
721 /* Called at signal interrupt level; queues function to be called
722 * when IOMGR runs again.
724 void bnode_Int(int asignal)
726 extern void bozo_ShutdownAndExit();
728 if (asignal == SIGQUIT) {
729 IOMGR_SoftSig(bozo_ShutdownAndExit, (char *)asignal);
731 IOMGR_SoftSig(bnode_SoftInt, (char *)asignal);
736 /* intialize the whole system */
739 register afs_int32 code;
740 struct sigaction newaction;
743 if (initDone) return 0;
745 memset(&bnode_stats, 0, sizeof(bnode_stats));
746 LWP_InitializeProcessSupport(1, &junk); /* just in case */
748 code = LWP_CreateProcess(bproc, BNODE_LWP_STACKSIZE,
749 /* priority */ 1, (void *) /* parm */0,
750 "bnode-manager", &bproc_pid);
751 if (code) return code;
752 memset((char *)&newaction, 0, sizeof(newaction));
753 newaction.sa_handler = bnode_Int;
754 code = sigaction(SIGCHLD, &newaction, NULL);
755 if (code) return errno;
756 code = sigaction(SIGQUIT, &newaction, NULL);
757 if (code) return errno;
761 /* free token list returned by parseLine */
762 int bnode_FreeTokens(register struct bnode_token *alist)
764 register struct bnode_token *nlist;
765 for(; alist; alist = nlist) {
773 static int space(int x)
775 if (x == 0 || x == ' ' || x == '\t' || x== '\n') return 1;
779 int bnode_ParseLine(char *aline, struct bnode_token **alist)
784 struct bnode_token *first, *last;
785 register struct bnode_token *ttok;
788 inToken = 0; /* not copying token chars at start */
789 first = (struct bnode_token *) 0;
790 last = (struct bnode_token *) 0;
793 if (tc == 0 || space(tc)) { /* terminating null gets us in here, too */
795 inToken = 0; /* end of this token */
797 ttok = (struct bnode_token *) malloc(sizeof(struct bnode_token));
798 ttok->next = (struct bnode_token *) 0;
799 ttok->key = (char *) malloc(strlen(tbuffer)+1);
800 strcpy(ttok->key, tbuffer);
806 if (!first) first = ttok;
810 /* an alpha character */
815 if (tptr - tbuffer >= sizeof(tbuffer)) return -1; /* token too long */
819 /* last token flushed 'cause space(0) --> true */
820 if (last) last->next = (struct bnode_token *) 0;
828 int bnode_NewProc(struct bnode *abnode, char *aexecString, char *coreName,
829 struct bnode_proc **aproc)
831 struct bnode_token *tlist, *tt;
833 struct bnode_proc *tp;
835 char *argv[MAXVARGS];
838 code = bnode_ParseLine(aexecString, &tlist); /* try parsing first */
839 if (code) return code;
840 tp = (struct bnode_proc *) malloc(sizeof(struct bnode_proc));
841 memset(tp, 0, sizeof(struct bnode_proc));
846 tp->comLine = aexecString;
847 tp->coreName = coreName; /* may be null */
848 abnode->procStartTime = FT_ApproxTime();
849 abnode->procStarts++;
851 /* convert linked list of tokens into argv structure */
852 for (tt = tlist, i = 0; i < (MAXVARGS - 1) && tt; tt = tt->next, i++) {
855 argv[i] = NULL; /* null-terminated */
857 cpid = spawnprocve(argv[0], argv, environ, -1);
858 osi_audit(BOSSpawnProcEvent, 0, AUD_STR, aexecString, AUD_END );
860 if (cpid == (pid_t)-1) {
861 bozo_Log("Failed to spawn process for bnode '%s'\n", abnode->name);
862 bnode_FreeTokens(tlist);
867 bnode_FreeTokens(tlist);
869 tp->flags = BPROC_STARTED;
870 tp->flags &= ~BPROC_EXITED;
875 int bnode_StopProc(register struct bnode_proc *aproc, int asignal)
878 if (!(aproc->flags & BPROC_STARTED) || (aproc->flags & BPROC_EXITED))
881 osi_audit( BOSStopProcEvent, 0, AUD_STR, (aproc ? aproc->comLine : NULL), AUD_END );
883 code = kill(aproc->pid, asignal);
884 bnode_Check(aproc->bnode);
888 int bnode_Deactivate(register struct bnode *abnode)
890 register struct bnode **pb, *tb;
892 if (!(abnode->flags & BNODE_ACTIVE)) return BZNOTACTIVE;
893 for(pb = &allBnodes,tb = *pb; tb; tb=nb) {
897 tb->flags &= ~BNODE_ACTIVE;