#include <afsconfig.h>
#include <afs/param.h>
+#include <afs/procmgmt.h>
+#include <roken.h>
#include <stddef.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <dirent.h>
-#include <errno.h>
-#include <sys/types.h>
-#ifdef HAVE_STDINT_H
-# include <stdint.h>
-#endif
-#ifdef AFS_NT40_ENV
-#include <io.h>
-#else
-#include <sys/file.h>
-#include <sys/time.h>
-#endif
-#ifdef BOZO_SAVE_CORES
-#include <time.h>
-#endif
-#include <sys/stat.h>
-#include <string.h>
-#include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
#include <lwp.h>
#include <rx/rx.h>
#include <afs/audit.h>
#include <afs/afsutil.h>
#include <afs/fileutil.h>
+
#include "bnode.h"
#include "bosprototypes.h"
#endif
#define BNODE_LWP_STACKSIZE (16 * 1024)
+#define BNODE_ERROR_COUNT_MAX 16 /* maximum number of retries */
int bnode_waiting = 0;
static PROCESS bproc_pid; /* pid of waker-upper */
} bnode_stats;
extern const char *DoCore;
+extern const char *DoPidFiles;
#ifndef AFS_NT40_ENV
extern char **environ; /* env structure */
#endif
free(tbnodep->lastErrorName);
tbnodep->lastErrorName = NULL;
}
- if (ap->coreName) {
- tbnodep->lastErrorName = (char *)malloc(strlen(ap->coreName) + 1);
- strcpy(tbnodep->lastErrorName, ap->coreName);
- }
+ if (ap->coreName)
+ tbnodep->lastErrorName = strdup(ap->coreName);
}
/* utility for use by BOP_HASCORE functions to determine where a core file might
if (code) {
DIR *logdir;
struct dirent *file;
- size_t length;
unsigned long pid;
const char *coredir = AFSDIR_LOGS_DIR;
continue;
pid = atol(file->d_name + 5);
if (pid == aproc->pid) {
- length = strlen(coredir) + strlen(file->d_name) + 2;
- corefile = malloc(length);
+ asprintf(&corefile, "%s/%s", coredir, file->d_name);
if (corefile == NULL) {
closedir(logdir);
return;
}
- snprintf(corefile, length, "%s/%s", coredir, file->d_name);
code = 0;
break;
}
}
int
+bnode_ResetErrorCount(struct bnode *abnode)
+{
+ abnode->errorStopCount = 0;
+ abnode->errorStopDelay = 0;
+ return 0;
+}
+
+int
bnode_SetStat(struct bnode *abnode, int agoal)
{
abnode->goal = agoal;
/* format the bnode properly */
memset(abnode, 0, sizeof(struct bnode));
abnode->ops = abnodeops;
- abnode->name = (char *)malloc(strlen(aname) + 1);
+ abnode->name = strdup(aname);
if (!abnode->name)
return ENOMEM;
- strcpy(abnode->name, aname);
abnode->flags = BNODE_ACTIVE;
abnode->fileGoal = BSTAT_NORMAL;
abnode->goal = BSTAT_SHUTDOWN;
tb = tp->bnode;
bnode_Hold(tb);
- /* count restarts in last 10 seconds */
+ /* count restarts in last 30 seconds */
if (temp > tb->rsTime + 30) {
- /* it's been 10 seconds we've been counting */
+ /* it's been 30 seconds we've been counting */
tb->rsTime = temp;
tb->rsCount = 0;
}
+
if (WIFSIGNALED(status) == 0) {
/* exited, not signalled */
tp->lastExit = WEXITSTATUS(status);
tb->notifier);
hdl_notifier(tp);
}
- BOP_PROCEXIT(tb, tp);
- bnode_Check(tb);
- if (tb->rsCount++ > 10) {
- /* 10 in 10 seconds */
+ if (tb->goal && tb->rsCount++ > 10) {
+ /* 10 in 30 seconds */
+ if (tb->errorStopCount >= BNODE_ERROR_COUNT_MAX) {
+ tb->errorStopDelay = 0; /* max reached, give up. */
+ } else {
+ tb->errorStopCount++;
+ if (!tb->errorStopDelay) {
+ tb->errorStopDelay = 1;
+ } else {
+ tb->errorStopDelay *= 2;
+ }
+ }
tb->flags |= BNODE_ERRORSTOP;
bnode_SetGoal(tb, BSTAT_SHUTDOWN);
bozo_Log
("BNODE '%s' repeatedly failed to start, perhaps missing executable.\n",
tb->name);
}
+ BOP_PROCEXIT(tb, tp);
+ bnode_Check(tb);
bnode_Release(tb); /* bnode delete can happen here */
DeleteProc(tp);
} else
hdl_notifier(struct bnode_proc *tp)
{
#ifndef AFS_NT40_ENV /* NT notifier callout not yet implemented */
- int code, pid;
+ int pid;
struct stat tstat;
if (stat(tp->bnode->notifier, &tstat)) {
if ((pid = fork()) == 0) {
FILE *fout;
struct bnode *tb = tp->bnode;
- int ec;
#if defined(AFS_HPUX_ENV) || defined(AFS_SUN5_ENV) || defined(AFS_SGI51_ENV)
- ec = setsid();
+ setsid();
#elif defined(AFS_DARWIN90_ENV)
- ec = setpgid(0, 0);
+ setpgid(0, 0);
#elif defined(AFS_LINUX20_ENV) || defined(AFS_AIX_ENV)
- ec = setpgrp();
+ setpgrp();
#else
- ec = setpgrp(0, 0);
+ setpgrp(0, 0);
#endif
fout = popen(tb->notifier, "w");
if (fout == NULL) {
perror(tb->notifier);
exit(1);
}
- code = SendNotifierData(fileno(fout), tp);
+ SendNotifierData(fileno(fout), tp);
pclose(fout);
exit(0);
} else if (pid < 0) {
ttok =
(struct bnode_token *)malloc(sizeof(struct bnode_token));
ttok->next = (struct bnode_token *)0;
- ttok->key = (char *)malloc(strlen(tbuffer) + 1);
- strcpy(ttok->key, tbuffer);
+ ttok->key = strdup(tbuffer);
if (last) {
last->next = ttok;
last = ttok;
return 0;
}
}
- return 0;
}
#define MAXVARGS 128
free(tp);
return errno;
}
+ bozo_Log("%s started pid %ld: %s\n", abnode->name, cpid, aexecString);
bnode_FreeTokens(tlist);
allProcs = tp;
tp->pid = cpid;
tp->flags = BPROC_STARTED;
tp->flags &= ~BPROC_EXITED;
+ BOP_PROCSTARTED(abnode, tp);
bnode_Check(abnode);
return 0;
}