2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
19 #include <sys/types.h>
26 #ifdef BOZO_SAVE_CORES
39 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
41 #include <afs/audit.h>
42 #include <afs/afsutil.h>
43 #include <afs/fileutil.h>
47 /* All known versions of AIX lack WCOREDUMP but this works */
48 #define WCOREDUMP(x) ((x) & 0x80)
51 #define BNODE_LWP_STACKSIZE (16 * 1024)
53 int bnode_waiting = 0;
54 static PROCESS bproc_pid; /* pid of waker-upper */
55 static struct bnode *allBnodes=0; /* list of all bnodes */
56 static struct bnode_proc *allProcs=0; /* list of all processes for which we're waiting */
57 static struct bnode_type *allTypes=0; /* list of registered type handlers */
59 static struct bnode_stats {
63 static afs_int32 SendNotifierData();
64 static int DeleteProc();
67 extern char **environ; /* env structure */
70 /* Remember the name of the process, if any, that failed last */
71 static void RememberProcName(ap)
72 register struct bnode_proc *ap; {
73 register struct bnode *tbnodep;
76 if (tbnodep->lastErrorName) {
77 free(tbnodep->lastErrorName);
78 tbnodep->lastErrorName = NULL;
81 tbnodep->lastErrorName = (char *) malloc(strlen(ap->coreName)+1);
82 strcpy(tbnodep->lastErrorName, ap->coreName);
86 /* utility for use by BOP_HASCORE functions to determine where a core file might
89 bnode_CoreName(abnode, acoreName, abuffer)
90 register struct bnode *abnode;
93 strcpy(abuffer, AFSDIR_SERVER_CORELOG_FILEPATH);
95 strcat(abuffer, acoreName);
98 strcat(abuffer, abnode->name);
102 /* save core file, if any */
103 static void SaveCore(abnode, aproc)
104 register struct bnode_proc *aproc;
105 register struct bnode *abnode; {
108 register afs_int32 code;
109 #ifdef BOZO_SAVE_CORES
110 struct timeval Start;
111 struct tm *TimeFields;
115 code = stat(AFSDIR_SERVER_CORELOG_FILEPATH, &tstat);
118 bnode_CoreName(abnode, aproc->coreName, tbuffer);
119 #ifdef BOZO_SAVE_CORES
120 TM_GetTimeOfDay(&Start, 0);
121 TimeFields = localtime(&Start.tv_sec);
122 sprintf(FileName,"%s.%d%02d%02d%02d%02d%02d", tbuffer,
123 TimeFields->tm_year, TimeFields->tm_mon + 1,
124 TimeFields->tm_mday, TimeFields->tm_hour, TimeFields->tm_min,
126 strcpy(tbuffer,FileName);
128 code = renamefile(AFSDIR_SERVER_CORELOG_FILEPATH, tbuffer);
131 bnode_GetString(abnode, abuffer, alen)
132 register struct bnode *abnode;
133 register char *abuffer;
134 register afs_int32 alen;{
135 return BOP_GETSTRING(abnode, abuffer, alen);
138 bnode_GetParm(abnode, aindex, abuffer, alen)
139 register struct bnode *abnode;
140 register afs_int32 aindex;
141 register char *abuffer;
143 return BOP_GETPARM(abnode, aindex, abuffer, alen);
146 bnode_GetStat(abnode, astatus)
147 register struct bnode *abnode;
148 register afs_int32 *astatus; {
149 return BOP_GETSTAT(abnode, astatus);
152 bnode_RestartP(abnode)
153 register struct bnode *abnode; {
154 return BOP_RESTARTP(abnode);
157 static bnode_Check(abnode)
158 register struct bnode *abnode; {
159 if (abnode->flags & BNODE_WAIT) {
160 abnode->flags &= ~BNODE_WAIT;
161 LWP_NoYieldSignal(abnode);
166 /* tell if an instance has a core file */
167 bnode_HasCore(abnode)
168 register struct bnode *abnode; {
169 return BOP_HASCORE(abnode);
172 /* wait for all bnodes to stabilize */
174 register struct bnode *tb;
175 register afs_int32 code;
179 for(tb = allBnodes; tb; tb=tb->next) {
181 code = BOP_GETSTAT(tb, &stat);
186 if (stat != tb->goal) {
187 tb->flags |= BNODE_WAIT;
197 /* wait until bnode status is correct */
198 bnode_WaitStatus(abnode, astatus)
200 register struct bnode *abnode; {
201 register afs_int32 code;
207 code = BOP_GETSTAT(abnode, &stat);
208 if (code) return code;
210 /* otherwise, check if we're done */
211 if (stat == astatus) {
212 bnode_Release(abnode);
215 if (astatus != abnode->goal) {
216 bnode_Release(abnode);
217 return -1; /* no longer our goal, don't keep waiting */
219 /* otherwise, block */
220 abnode->flags |= BNODE_WAIT;
221 LWP_WaitProcess(abnode);
225 bnode_SetStat(abnode, agoal)
226 register struct bnode *abnode;
227 register int agoal; {
228 abnode->goal = agoal;
230 BOP_SETSTAT(abnode, agoal);
231 abnode->flags &= ~BNODE_ERRORSTOP;
235 bnode_SetGoal(abnode, agoal)
236 register struct bnode *abnode;
237 register int agoal; {
238 abnode->goal = agoal;
243 bnode_SetFileGoal(abnode, agoal)
244 register struct bnode *abnode;
245 register int agoal; {
246 if (abnode->fileGoal == agoal) return 0; /* already done */
247 abnode->fileGoal = agoal;
252 /* apply a function to all bnodes in the system */
253 int bnode_ApplyInstance(aproc, arock)
256 register struct bnode *tb, *nb;
257 register afs_int32 code;
259 for(tb = allBnodes; tb; tb=nb) {
261 code = (*aproc) (tb, arock);
262 if (code) return code;
267 struct bnode *bnode_FindInstance (aname)
268 register char *aname; {
269 register struct bnode *tb;
271 for(tb=allBnodes;tb;tb=tb->next) {
272 if (!strcmp(tb->name, aname)) return tb;
277 static struct bnode_type *FindType(aname)
278 register char *aname; {
279 register struct bnode_type *tt;
281 for(tt=allTypes;tt;tt=tt->next) {
282 if (!strcmp(tt->name, aname)) return tt;
284 return (struct bnode_type *) 0;
287 bnode_Register(atype, aprocs, anparms)
289 int anparms; /* number of parms to create */
290 struct bnode_ops *aprocs; {
291 register struct bnode_type *tt;
293 for(tt=allTypes;tt;tt=tt->next) {
294 if (!strcmp(tt->name, atype)) break;
297 tt = (struct bnode_type *) malloc(sizeof(struct bnode_type));
298 memset(tt, 0, sizeof(struct bnode_type));
307 afs_int32 bnode_Create(atype, ainstance, abp, ap1, ap2, ap3, ap4, ap5, notifier,fileGoal)
311 char *ap1, *ap2, *ap3, *ap4, *ap5, *notifier;
313 struct bnode_type *type;
315 char *notifierpath = NULL;
318 if (bnode_FindInstance(ainstance)) return BZEXISTS;
319 type = FindType(atype);
320 if (!type) return BZBADTYPE;
322 if (notifier && strcmp(notifier, NONOTIFIER)) {
323 /* construct local path from canonical (wire-format) path */
324 if (ConstructLocalBinPath(notifier, ¬ifierpath)) {
325 bozo_Log("BNODE-Create: Notifier program path invalid '%s'\n", notifier);
329 if (stat(notifierpath, &tstat)) {
330 bozo_Log("BNODE-Create: Notifier program '%s' not found\n", notifierpath);
335 tb = (*type->ops->create)(ainstance, ap1, ap2, ap3, ap4, ap5);
340 tb->notifier = notifierpath;
344 /* The fs_create above calls bnode_InitBnode() which always sets the
345 ** fileGoal to BSTAT_NORMAL .... overwrite it with whatever is passed into
346 ** this function as a parameter... */
347 tb->fileGoal = fileGoal;
349 bnode_SetStat(tb, tb->goal); /* nudge it once */
354 int bnode_DeleteName(ainstance)
356 register struct bnode *tb;
358 tb = bnode_FindInstance(ainstance);
359 if (!tb) return BZNOENT;
361 return bnode_Delete(tb);
365 register struct bnode *abnode; {
370 bnode_Release(abnode)
371 register struct bnode *abnode; {
373 if (abnode->refCount == 0 && abnode->flags & BNODE_DELETE) {
374 abnode->flags &= ~BNODE_DELETE; /* we're going for it */
375 bnode_Delete(abnode);
380 int bnode_Delete(abnode)
381 register struct bnode *abnode; {
382 register afs_int32 code;
383 register struct bnode **lb, *ub;
386 if (abnode->refCount != 0) {
387 abnode->flags |= BNODE_DELETE;
391 /* make sure the bnode is idle before zapping */
393 code = BOP_GETSTAT(abnode, &temp);
394 bnode_Release(abnode);
395 if (code) return code;
396 if (temp != BSTAT_SHUTDOWN) return BZBUSY;
398 /* all clear to zap */
399 for(lb = &allBnodes, ub = *lb; ub; lb= &ub->next, ub = *lb) {
401 /* unthread it from the list */
406 free(abnode->name); /* do this first, since bnode fields may be bad after BOP_DELETE */
407 code = BOP_DELETE(abnode); /* don't play games like holding over this one */
412 /* function to tell if there's a timeout coming up */
413 int bnode_PendingTimeout(abnode)
414 register struct bnode *abnode; {
415 return (abnode->flags & BNODE_NEEDTIMEOUT);
418 /* function called to set / clear periodic bnode wakeup times */
419 int bnode_SetTimeout(abnode, atimeout)
420 register struct bnode *abnode;
421 afs_int32 atimeout; {
423 abnode->nextTimeout = FT_ApproxTime() + atimeout;
424 abnode->flags |= BNODE_NEEDTIMEOUT;
425 abnode->period = atimeout;
426 IOMGR_Cancel(bproc_pid);
429 abnode->flags &= ~BNODE_NEEDTIMEOUT;
434 /* used by new bnode creation code to format bnode header */
435 int bnode_InitBnode (abnode, abnodeops, aname)
436 register struct bnode *abnode;
438 struct bnode_ops *abnodeops; {
439 struct bnode **lb, *nb;
441 /* format the bnode properly */
442 memset(abnode, 0, sizeof(struct bnode));
443 abnode->ops = abnodeops;
444 abnode->name = (char *) malloc(strlen(aname)+1);
445 strcpy(abnode->name, aname);
446 abnode->flags = BNODE_ACTIVE;
447 abnode->fileGoal = BSTAT_NORMAL;
448 abnode->goal = BSTAT_SHUTDOWN;
450 /* put the bnode at the end of the list so we write bnode file in same order */
451 for(lb = &allBnodes, nb = *lb; nb; lb = &nb->next, nb = *lb);
457 /* bnode lwp executes this code repeatedly */
459 register afs_int32 code;
460 register struct bnode *tb;
461 register afs_int32 temp;
462 register struct bnode_proc *tp;
464 int options; /* must not be register */
470 /* first figure out how long to sleep for */
471 temp = 0x7fffffff; /* afs_int32 time; maxint doesn't work in select */
473 for(tb = allBnodes; tb; tb=tb->next) {
474 if (tb->flags & BNODE_NEEDTIMEOUT) {
475 if (tb->nextTimeout < temp) {
477 temp = tb->nextTimeout;
481 /* now temp has the time at which we should wakeup next */
484 if (setAny) temp -= FT_ApproxTime(); /* how many seconds until next event */
489 code = IOMGR_Select(0, 0, 0, 0, &tv);
491 else code = 0; /* fake timeout code */
493 /* figure out why we woke up; child exit or timeouts */
494 FT_GetTimeOfDay(&tv, 0); /* must do the real gettimeofday once and a while */
497 /* check all bnodes to see which ones need timeout events */
498 for(tb = allBnodes; tb; tb=nb) {
499 if ((tb->flags & BNODE_NEEDTIMEOUT) && temp > tb->nextTimeout) {
503 if (tb->flags & BNODE_NEEDTIMEOUT) { /* check again, BOP_TIMEOUT could change */
504 tb->nextTimeout = FT_ApproxTime() + tb->period;
507 bnode_Release(tb); /* delete may occur here */
513 /* signalled, probably by incoming signal */
516 bnode_waiting = options | 0x800000;
517 code = waitpid((pid_t)-1, &status, options);
519 if (code == 0 || code == -1) break; /* all done */
520 /* otherwise code has a process id, which we now search for */
521 for(tp=allProcs; tp; tp=tp->next)
522 if (tp->pid == code) break;
528 /* count restarts in last 10 seconds */
529 if (temp > tb->rsTime + 30) {
530 /* it's been 10 seconds we've been counting */
535 if (WIFSIGNALED(status) == 0) {
536 /* exited, not signalled */
537 tp->lastExit = WEXITSTATUS(status);
540 tb->errorCode = tp->lastExit;
541 tb->lastErrorExit = FT_ApproxTime();
542 RememberProcName(tp);
546 bozo_Log("%s:%s exited with code %d\n",
547 tb->name, tp->coreName, tp->lastExit);
549 bozo_Log("%s exited with code %d\n",
550 tb->name, tp->lastExit);
553 /* Signal occurred, perhaps spurious due to shutdown request.
554 * If due to a shutdown request, don't overwrite last error
557 tp->lastSignal = WTERMSIG(status);
559 if (tp->lastSignal != SIGQUIT && tp->lastSignal != SIGTERM
560 && tp->lastSignal != SIGKILL) {
561 tb->errorSignal = tp->lastSignal;
562 tb->lastErrorExit = FT_ApproxTime();
563 RememberProcName(tp);
566 bozo_Log("%s:%s exited on signal %d%s\n",
567 tb->name, tp->coreName, tp->lastSignal,
568 WCOREDUMP(status) ? " (core dumped)" : "");
570 bozo_Log("%s exited on signal %d%s\n",
571 tb->name, tp->lastSignal,
572 WCOREDUMP(status) ? " (core dumped)" : "");
575 tb->lastAnyExit = FT_ApproxTime();
578 bozo_Log("BNODE: Notifier %s will be called\n", tb->notifier);
581 BOP_PROCEXIT(tb, tp);
584 if (tb->rsCount++ > 10) {
585 /* 10 in 10 seconds */
586 tb->flags |= BNODE_ERRORSTOP;
587 bnode_SetGoal(tb, BSTAT_SHUTDOWN);
588 bozo_Log("BNODE '%s' repeatedly failed to start, perhaps missing executable.\n",
591 bnode_Release(tb); /* bnode delete can happen here */
594 else bnode_stats.weirdPids++;
604 signal(SIGPIPE, SIG_IGN);
605 bozo_Log("Notifier aborted prematurely");
613 struct bnode_proc *tp;
615 #ifndef AFS_NT40_ENV /* NT notifier callout not yet implemented */
616 int code, pid, status;
619 if (stat(tp->bnode->notifier, &tstat)) {
620 bozo_Log("BNODE: Failed to find notifier '%s'; ignored\n", tp->bnode->notifier);
623 if ((pid = fork()) == 0) {
625 struct bnode *tb = tp->bnode;
628 #if defined(AFS_HPUX_ENV) || defined(AFS_SUN5_ENV) || defined(AFS_SGI51_ENV)
631 #ifdef AFS_LINUX20_ENV
637 fout = popen(tb->notifier, "w");
639 bozo_Log("BNODE: Failed to find notifier '%s'; ignored\n", tb->notifier);
640 perror(tb->notifier);
643 code = SendNotifierData(fileno(fout), tp);
646 } else if (pid < 0) {
647 bozo_Log("Failed to fork creating process to handle notifier '%s'\n", tp->bnode->notifier);
650 #endif /* AFS_NT40_ENV */
655 static afs_int32 SendNotifierData(fd, tp)
657 register struct bnode_proc *tp;
659 register struct bnode *tb = tp->bnode;
660 char buffer[1000], *bufp = buffer, *buf1;
664 * First sent out the bnode_proc struct
666 (void) sprintf(bufp, "BEGIN bnode_proc\n");
667 bufp += strlen(bufp);
668 (void) sprintf(bufp, "comLine: %s\n", tp->comLine);
669 bufp += strlen(bufp);
670 if (!(buf1 = tp->coreName))
672 (void) sprintf(bufp, "coreName: %s\n", buf1);
673 bufp += strlen(bufp);
674 (void) sprintf(bufp, "pid: %ld\n", tp->pid);
675 bufp += strlen(bufp);
676 (void) sprintf(bufp, "lastExit: %ld\n", tp->lastExit);
677 bufp += strlen(bufp);
679 (void) sprintf(bufp, "lastSignal: %ld\n", tp->lastSignal);
680 bufp += strlen(bufp);
682 (void) sprintf(bufp, "flags: %ld\n", tp->flags);
683 bufp += strlen(bufp);
684 (void) sprintf(bufp, "END bnode_proc\n");
685 bufp += strlen(bufp);
686 len =(int)(bufp-buffer);
687 if (write(fd, buffer, len) < 0) {
692 * Now sent out the bnode struct
695 (void) sprintf(bufp, "BEGIN bnode\n");
696 bufp += strlen(bufp);
697 (void) sprintf(bufp, "name: %s\n", tb->name);
698 bufp += strlen(bufp);
699 (void) sprintf(bufp, "rsTime: %ld\n", tb->rsTime);
700 bufp += strlen(bufp);
701 (void) sprintf(bufp, "rsCount: %ld\n", tb->rsCount);
702 bufp += strlen(bufp);
703 (void) sprintf(bufp, "procStartTime: %ld\n", tb->procStartTime);
704 bufp += strlen(bufp);
705 (void) sprintf(bufp, "procStarts: %ld\n", tb->procStarts);
706 bufp += strlen(bufp);
707 (void) sprintf(bufp, "lastAnyExit: %ld\n", tb->lastAnyExit);
708 bufp += strlen(bufp);
709 (void) sprintf(bufp, "lastErrorExit: %ld\n", tb->lastErrorExit);
710 bufp += strlen(bufp);
711 (void) sprintf(bufp, "errorCode: %ld\n", tb->errorCode);
712 bufp += strlen(bufp);
713 (void) sprintf(bufp, "errorSignal: %ld\n", tb->errorSignal);
714 bufp += strlen(bufp);
716 (void) sprintf(bufp, "lastErrorName: %s\n", tb->lastErrorName);
717 bufp += strlen(bufp);
719 (void) sprintf(bufp, "goal: %d\n", tb->goal);
720 bufp += strlen(bufp);
721 (void) sprintf(bufp, "END bnode\n");
722 bufp += strlen(bufp);
723 len = (int)bufp-(int)buffer;
724 if (write(fd, buffer, len) < 0) {
731 /* Called by IOMGR at low priority on IOMGR's stack shortly after a SIGCHLD
732 * occurs. Wakes up bproc do redo things */
733 bnode_SoftInt(int asignal)
735 IOMGR_Cancel(bproc_pid);
739 /* Called at signal interrupt level; queues function to be called
740 * when IOMGR runs again.
743 bnode_Int(int asignal)
745 extern void bozo_ShutdownAndExit();
747 if (asignal == SIGQUIT) {
748 IOMGR_SoftSig(bozo_ShutdownAndExit, (char *)asignal);
750 IOMGR_SoftSig(bnode_SoftInt, (char *)asignal);
755 /* intialize the whole system */
758 register afs_int32 code;
759 struct sigaction newaction;
762 if (initDone) return 0;
764 memset(&bnode_stats, 0, sizeof(bnode_stats));
765 LWP_InitializeProcessSupport(1, &junk); /* just in case */
767 code = LWP_CreateProcess(bproc, BNODE_LWP_STACKSIZE,
768 /* priority */ 1, /* parm */0, "bnode-manager", &bproc_pid);
769 if (code) return code;
770 memset((char *)&newaction, 0, sizeof(newaction));
771 newaction.sa_handler = bnode_Int;
772 code = sigaction(SIGCHLD, &newaction, NULL);
773 if (code) return errno;
774 code = sigaction(SIGQUIT, &newaction, NULL);
775 if (code) return errno;
779 /* free token list returned by parseLine */
780 bnode_FreeTokens(alist)
781 register struct bnode_token *alist; {
782 register struct bnode_token *nlist;
783 for(; alist; alist = nlist) {
793 if (x == 0 || x == ' ' || x == '\t' || x== '\n') return 1;
797 bnode_ParseLine(aline, alist)
799 struct bnode_token **alist; {
803 struct bnode_token *first, *last;
804 register struct bnode_token *ttok;
807 inToken = 0; /* not copying token chars at start */
808 first = (struct bnode_token *) 0;
809 last = (struct bnode_token *) 0;
812 if (tc == 0 || space(tc)) { /* terminating null gets us in here, too */
814 inToken = 0; /* end of this token */
816 ttok = (struct bnode_token *) malloc(sizeof(struct bnode_token));
817 ttok->next = (struct bnode_token *) 0;
818 ttok->key = (char *) malloc(strlen(tbuffer)+1);
819 strcpy(ttok->key, tbuffer);
825 if (!first) first = ttok;
829 /* an alpha character */
834 if (tptr - tbuffer >= sizeof(tbuffer)) return -1; /* token too long */
838 /* last token flushed 'cause space(0) --> true */
839 if (last) last->next = (struct bnode_token *) 0;
847 int bnode_NewProc(abnode, aexecString, coreName, aproc)
848 struct bnode_proc **aproc;
850 struct bnode *abnode;
852 struct bnode_token *tlist, *tt;
854 struct bnode_proc *tp;
856 char *argv[MAXVARGS];
859 code = bnode_ParseLine(aexecString, &tlist); /* try parsing first */
860 if (code) return code;
861 tp = (struct bnode_proc *) malloc(sizeof(struct bnode_proc));
862 memset(tp, 0, sizeof(struct bnode_proc));
867 tp->comLine = aexecString;
868 tp->coreName = coreName; /* may be null */
869 abnode->procStartTime = FT_ApproxTime();
870 abnode->procStarts++;
872 /* convert linked list of tokens into argv structure */
873 for (tt = tlist, i = 0; i < (MAXVARGS - 1) && tt; tt = tt->next, i++) {
876 argv[i] = NULL; /* null-terminated */
878 cpid = spawnprocve(argv[0], argv, environ, -1);
879 osi_audit(BOSSpawnProcEvent, 0, AUD_STR, aexecString, AUD_END );
881 if (cpid == (pid_t)-1) {
882 bozo_Log("Failed to spawn process for bnode '%s'\n", abnode->name);
883 bnode_FreeTokens(tlist);
888 bnode_FreeTokens(tlist);
890 tp->flags = BPROC_STARTED;
891 tp->flags &= ~BPROC_EXITED;
896 int bnode_StopProc(aproc, asignal)
897 register struct bnode_proc *aproc;
900 if (!(aproc->flags & BPROC_STARTED) || (aproc->flags & BPROC_EXITED))
903 osi_audit( BOSStopProcEvent, 0, AUD_STR, (aproc ? aproc->comLine : NULL), AUD_END );
905 code = kill(aproc->pid, asignal);
906 bnode_Check(aproc->bnode);
910 int bnode_Deactivate(abnode)
911 register struct bnode *abnode; {
912 register struct bnode **pb, *tb;
914 if (!(abnode->flags & BNODE_ACTIVE)) return BZNOTACTIVE;
915 for(pb = &allBnodes,tb = *pb; tb; tb=nb) {
919 tb->flags &= ~BNODE_ACTIVE;
926 static int DeleteProc(abproc)
927 register struct bnode_proc *abproc; {
928 register struct bnode_proc **pb, *tb;
929 struct bnode_proc *nb;
931 for(pb = &allProcs,tb = *pb; tb; pb = &tb->next, tb=nb) {