2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
15 #include <sys/types.h>
19 #include <sys/socket.h>
20 #include <netinet/in.h>
23 #include <afs/com_err.h>
24 #include <afs/bubasics.h>
26 #include <afs/tcdata.h>
29 #include "error_macros.h"
30 #include "bucoord_internal.h"
31 #include "bucoord_prototypes.h"
33 #define SET_FLAG(set) \
35 curPollPtr->flags |= (set); \
38 #define CLEAR_FLAG(clear) \
40 curPollPtr->flags &= ~(clear); \
43 extern struct bc_config *bc_globalConfig;
44 extern afs_int32 bc_GetConn(struct bc_config *aconfig, afs_int32 aport, struct rx_connection **tconn);
46 /* globals for backup coordinator status management */
48 dlqlinkT statusHead; /* chain of status blocks */
49 struct Lock statusQueueLock; /* access control for status chain */
50 struct Lock cmdLineLock; /* lock on the cmdLine */
52 afs_int32 lastTaskCode; /* Error code from task that last finished */
55 * get next item for status interrogation, if any.
58 nextItem(statusP linkPtr)
62 ptr = (dlqlinkP) linkPtr;
64 /* if last known item has terminated, reset ptr */
73 /* if we're back at the head again */
74 if (ptr == &statusHead)
76 return ((statusP) ptr);
86 ptr = (dlqlinkP) linkPtr;
88 /* if last known item has terminated, reset ptr */
97 /* if we're back at the head again */
98 if (ptr == &statusHead) {
101 return ((statusP) ptr);
108 cmdDispatch(void *unused)
111 char *targv[MAXV]; /*Ptr to parsed argv stuff */
112 afs_int32 targc; /*Num parsed arguments */
114 char *internalCmdLine;
116 internalCmdLine = cmdLine;
119 code = cmd_ParseLine(internalCmdLine, targv, &targc, MAXV);
121 printf("Couldn't parse line: '%s'", afs_error_message(code));
124 free(internalCmdLine);
127 * Because the "-at" option cannot be wildcarded, we cannot fall
128 * into recusive loop here by setting dispatchCount to 1.
130 doDispatch(targc, targv, 1);
136 statusWatcher(void *unused)
138 struct rx_connection *tconn = NULL;
139 statusP curPollPtr = 0;
141 struct tciStatusS statusPtr;
143 /* task information */
144 afs_uint32 taskFlags;
145 afs_uint32 localTaskFlags;
146 afs_uint32 temp; /* for flag manipulation */
159 rx_DestroyConnection(tconn);
163 curPollPtr = nextItem(curPollPtr);
165 if (curPollPtr == 0) {
166 #ifdef AFS_PTHREAD_ENV
167 struct timespec delaytime;
169 delayTime.tv_sec = 5;
170 delayTime.tv_nsec = 0;
171 pthread_delay_np(&delayTime);
174 IOMGR_Sleep(5); /* wait a while */
175 #endif /*else AFS_PTHREAD_ENV */
179 /* save useful information */
180 localTaskFlags = curPollPtr->flags;
181 taskId = curPollPtr->taskId;
182 port = curPollPtr->port;
183 atTime = curPollPtr->scheduledDump;
184 jobNumber = curPollPtr->jobNumber;
187 /* reset certain flags; local kill; */
188 CLEAR_FLAG(ABORT_LOCAL);
190 /* An abort request before the command even started */
191 if (atTime && (localTaskFlags & ABORT_REQUEST)) {
192 if (localTaskFlags & NOREMOVE) {
193 curPollPtr->flags |= (STARTING | ABORT_DONE); /* Will ignore on other passes */
194 curPollPtr->scheduledDump = 0;
196 deleteStatusNode(curPollPtr);
202 /* A task not started yet - check its start time */
203 if (localTaskFlags & STARTING || atTime) {
205 * Start a timed dump if its time has come. When the job is
206 * started, it will allocate its own status structure so this
207 * one is no longer needed: delete it.
209 * Avoid multiple processes trouncing the cmdLine by placing
212 if (atTime && (atTime <= time(0))) {
213 lock_cmdLine(); /* Will unlock in cmdDispatch */
215 cmdLine = curPollPtr->cmdLine;
217 curPollPtr->cmdLine = 0;
220 printf("Starting scheduled dump: job %d\n", jobNumber);
221 printf("schedD> %s\n", cmdLine);
224 LWP_CreateProcess(cmdDispatch, 16384, LWP_NORMAL_PRIORITY,
225 (void *)2, "cmdDispatch", &dispatchPid);
230 printf("Couldn't create cmdDispatch task\n");
233 if (localTaskFlags & NOREMOVE) {
234 curPollPtr->flags |= STARTING; /* Will ignore on other passes */
235 curPollPtr->flags |= (code ? TASK_ERROR : TASK_DONE);
236 curPollPtr->scheduledDump = 0;
238 deleteStatusNode(curPollPtr);
245 if (localTaskFlags & ABORT_LOCAL) {
246 /* kill the local task */
247 if ((localTaskFlags & CONTACT_LOST) != 0) {
248 printf("Job %d: in contact with butc at port %d\n", jobNumber,
250 printf("Job %d cont: Local kill ignored - use normal kill\n",
255 code = (afs_int32) bc_GetConn(bc_globalConfig, port, &tconn);
257 SET_FLAG(CONTACT_LOST);
261 if (CheckTCVersion(tconn)) {
262 SET_FLAG(CONTACT_LOST);
266 /* Send abort to TC requst if we have to */
267 if (localTaskFlags & ABORT_REQUEST) {
268 code = TC_RequestAbort(tconn, taskId);
270 afs_com_err("statusWatcher", code, "; Can't post abort request");
271 afs_com_err("statusWatcher", 0, "...Deleting job");
272 if (localTaskFlags & NOREMOVE) {
273 curPollPtr->flags |= (STARTING | TASK_ERROR);
274 curPollPtr->scheduledDump = 0;
276 deleteStatusNode(curPollPtr);
282 curPollPtr->flags &= ~ABORT_REQUEST;
283 curPollPtr->flags |= ABORT_SENT;
288 /* otherwise just get the status */
289 code = TC_GetStatus(tconn, taskId, &statusPtr);
291 if (code == TC_NODENOTFOUND) {
292 printf("Job %d: %s - no such task on port %d, deleting\n",
293 jobNumber, curPollPtr->taskName, port);
295 if (localTaskFlags & NOREMOVE) {
296 curPollPtr->flags |= (STARTING | TASK_ERROR);
297 curPollPtr->scheduledDump = 0;
299 deleteStatusNode(curPollPtr); /* delete this status node */
305 SET_FLAG(CONTACT_LOST);
309 /* in case we previously lost contact or couldn't find */
310 CLEAR_FLAG(CONTACT_LOST);
312 /* extract useful status */
313 taskFlags = statusPtr.flags;
315 /* update local status */
318 /* remember some status flags in local struct */
320 (DRIVE_WAIT | OPR_WAIT | CALL_WAIT | TASK_DONE | ABORT_DONE |
322 curPollPtr->flags &= ~temp; /* clear */
323 curPollPtr->flags |= (taskFlags & temp); /* update */
325 curPollPtr->dbDumpId = statusPtr.dbDumpId;
326 curPollPtr->nKBytes = statusPtr.nKBytes;
327 strcpy(curPollPtr->volumeName, statusPtr.volumeName);
328 curPollPtr->volsFailed = statusPtr.volsFailed;
329 curPollPtr->lastPolled = statusPtr.lastPolled;
333 if (taskFlags & TASK_DONE) { /*done */
334 if (taskFlags & ABORT_DONE) {
335 if (curPollPtr->dbDumpId)
336 printf("Job %d: %s: DumpID %u Aborted", jobNumber,
337 curPollPtr->taskName, curPollPtr->dbDumpId);
339 printf("Job %d: %s Aborted", jobNumber,
340 curPollPtr->taskName);
342 if (taskFlags & TASK_ERROR)
343 printf(" with errors\n");
350 else if (taskFlags & TASK_ERROR) {
351 if (!(localTaskFlags & SILENT)) {
352 if (curPollPtr->dbDumpId)
353 printf("Job %d: DumpID %u Failed with errors\n",
354 jobNumber, curPollPtr->dbDumpId);
356 printf("Job %d Failed with errors\n", jobNumber);
362 if (!(localTaskFlags & SILENT)) {
363 if (curPollPtr->dbDumpId)
364 printf("Job %d: %s: DumpID %u finished", jobNumber,
365 curPollPtr->taskName, curPollPtr->dbDumpId);
367 printf("Job %d: %s finished", jobNumber,
368 curPollPtr->taskName);
370 if (curPollPtr->volsTotal) {
371 printf(". %d volumes dumped",
372 (curPollPtr->volsTotal -
373 curPollPtr->volsFailed));
374 if (curPollPtr->volsFailed)
375 printf(", %d failed", curPollPtr->volsFailed);
383 /* make call to destroy task on server */
384 code = TC_EndStatus(tconn, taskId);
386 printf("Job %d: %s, error in job termination cleanup\n",
387 jobNumber, curPollPtr->taskName);
389 if (localTaskFlags & NOREMOVE) {
390 curPollPtr->flags |= STARTING;
391 curPollPtr->scheduledDump = 0;
393 deleteStatusNode(curPollPtr); /* unlink and destroy local task */
402 * Allocate a job number. Computes the maximum of all the job numbers
403 * and then returns the maximum+1.
404 * If no jobs are found, returns 1.
410 afs_int32 retval = 0;
413 ptr = statusHead.dlq_next;
414 while (ptr != &statusHead) {
415 /* compute max of all job numbers */
416 if (((statusP) ptr)->jobNumber > retval)
417 retval = ((statusP) ptr)->jobNumber;
426 * Wait for a specific task to finish and then return.
427 * Return the task's flags when it's done. If the job
428 * had been cleaned up, then just return 0.
431 waitForTask(afs_uint32 taskId)
434 afs_int32 done = 0, rcode = 0, t;
436 t = (TASK_DONE | ABORT_DONE | TASK_ERROR);
438 /* Sleep 2 seconds */
439 #ifdef AFS_PTHREAD_ENV
440 struct timespec delaytime;
441 delayTime.tv_sec = 2;
442 delayTime.tv_nsec = 0;
443 pthread_delay_np(&delayTime);
446 #endif /*else AFS_PTHREAD_ENV */
448 /* Check if we are done */
450 ptr = findStatus(taskId);
451 if (!ptr || (ptr->flags & t)) {
452 rcode = (ptr ? ptr->flags : 0);