2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
17 #include <sys/types.h>
23 #include <sys/socket.h>
24 #include <netinet/in.h>
27 #include <afs/com_err.h>
28 #include <afs/bubasics.h>
30 #include <afs/tcdata.h>
32 #include "error_macros.h"
34 #define SET_FLAG(set) \
36 curPollPtr->flags |= (set); \
39 #define CLEAR_FLAG(clear) \
41 curPollPtr->flags &= ~(clear); \
44 extern struct bc_config *bc_globalConfig;
45 extern afs_int32 bc_GetConn(struct bc_config *aconfig, afs_int32 aport, struct rx_connection **tconn);
46 extern statusP findStatus(afs_uint32 taskId);
48 /* globals for backup coordinator status management */
50 dlqlinkT statusHead; /* chain of status blocks */
51 struct Lock statusQueueLock; /* access control for status chain */
52 struct Lock cmdLineLock; /* lock on the cmdLine */
54 afs_int32 lastTaskCode; /* Error code from task that last finished */
57 * get next item for status interrogation, if any.
65 ptr = (dlqlinkP) linkPtr;
67 /* if last known item has terminated, reset ptr */
76 /* if we're back at the head again */
77 if (ptr == &statusHead)
79 return ((statusP) ptr);
89 ptr = (dlqlinkP) linkPtr;
91 /* if last known item has terminated, reset ptr */
100 /* if we're back at the head again */
101 if (ptr == &statusHead) {
104 return ((statusP) ptr);
114 char **targv[MAXV]; /*Ptr to parsed argv stuff */
115 afs_int32 targc; /*Num parsed arguments */
117 char *internalCmdLine;
119 internalCmdLine = cmdLine;
122 code = cmd_ParseLine(internalCmdLine, targv, &targc, MAXV);
124 printf("Couldn't parse line: '%s'", error_message(code));
127 free(internalCmdLine);
130 * Because the "-at" option cannot be wildcarded, we cannot fall
131 * into recusive loop here by setting dispatchCount to 1.
133 doDispatch(targc, targv, 1);
140 struct rx_connection *tconn = (struct rc_connection *)0;
141 statusP curPollPtr = 0;
143 struct tciStatusS statusPtr;
145 /* task information */
146 afs_uint32 taskFlags;
147 afs_uint32 localTaskFlags;
148 afs_uint32 temp; /* for flag manipulation */
161 rx_DestroyConnection(tconn);
162 tconn = (struct rc_connection *)0;
165 curPollPtr = nextItem(curPollPtr);
167 if (curPollPtr == 0) {
168 #ifdef AFS_PTHREAD_ENV
169 struct timespec delaytime;
171 delayTime.tv_sec = 5;
172 delayTime.tv_nsec = 0;
173 pthread_delay_np(&delayTime);
176 IOMGR_Sleep(5); /* wait a while */
177 #endif /*else AFS_PTHREAD_ENV */
181 /* save useful information */
182 localTaskFlags = curPollPtr->flags;
183 taskId = curPollPtr->taskId;
184 port = curPollPtr->port;
185 atTime = curPollPtr->scheduledDump;
186 jobNumber = curPollPtr->jobNumber;
189 /* reset certain flags; local kill; */
190 CLEAR_FLAG(ABORT_LOCAL);
192 /* An abort request before the command even started */
193 if (atTime && (localTaskFlags & ABORT_REQUEST)) {
194 if (localTaskFlags & NOREMOVE) {
195 curPollPtr->flags |= (STARTING | ABORT_DONE); /* Will ignore on other passes */
196 curPollPtr->scheduledDump = 0;
198 deleteStatusNode(curPollPtr);
204 /* A task not started yet - check its start time */
205 if (localTaskFlags & STARTING || atTime) {
207 * Start a timed dump if its time has come. When the job is
208 * started, it will allocate its own status structure so this
209 * one is no longer needed: delete it.
211 * Avoid multiple processes trouncing the cmdLine by placing
214 if (atTime && (atTime <= time(0))) {
215 lock_cmdLine(); /* Will unlock in cmdDispatch */
217 cmdLine = curPollPtr->cmdLine;
219 curPollPtr->cmdLine = 0;
222 printf("Starting scheduled dump: job %d\n", jobNumber);
223 printf("schedD> %s\n", cmdLine);
226 LWP_CreateProcess(cmdDispatch, 16384, LWP_NORMAL_PRIORITY,
227 (void *)2, "cmdDispatch", &dispatchPid);
232 printf("Couldn't create cmdDispatch task\n");
235 if (localTaskFlags & NOREMOVE) {
236 curPollPtr->flags |= STARTING; /* Will ignore on other passes */
237 curPollPtr->flags |= (code ? TASK_ERROR : TASK_DONE);
238 curPollPtr->scheduledDump = 0;
240 deleteStatusNode(curPollPtr);
247 if (localTaskFlags & ABORT_LOCAL) {
248 /* kill the local task */
249 if ((localTaskFlags & CONTACT_LOST) != 0) {
250 printf("Job %d: in contact with butc at port %d\n", jobNumber,
252 printf("Job %d cont: Local kill ignored - use normal kill\n",
257 code = (afs_int32) bc_GetConn(bc_globalConfig, port, &tconn);
259 SET_FLAG(CONTACT_LOST);
263 if (CheckTCVersion(tconn)) {
264 SET_FLAG(CONTACT_LOST);
268 /* Send abort to TC requst if we have to */
269 if (localTaskFlags & ABORT_REQUEST) {
270 code = TC_RequestAbort(tconn, taskId);
272 com_err("statusWatcher", code, "; Can't post abort request");
273 com_err("statusWatcher", 0, "...Deleting job");
274 if (localTaskFlags & NOREMOVE) {
275 curPollPtr->flags |= (STARTING | TASK_ERROR);
276 curPollPtr->scheduledDump = 0;
278 deleteStatusNode(curPollPtr);
284 curPollPtr->flags &= ~ABORT_REQUEST;
285 curPollPtr->flags |= ABORT_SENT;
290 /* otherwise just get the status */
291 code = TC_GetStatus(tconn, taskId, &statusPtr);
293 if (code == TC_NODENOTFOUND) {
294 printf("Job %d: %s - no such task on port %d, deleting\n",
295 jobNumber, curPollPtr->taskName, port);
297 if (localTaskFlags & NOREMOVE) {
298 curPollPtr->flags |= (STARTING | TASK_ERROR);
299 curPollPtr->scheduledDump = 0;
301 deleteStatusNode(curPollPtr); /* delete this status node */
307 SET_FLAG(CONTACT_LOST);
311 /* in case we previously lost contact or couldn't find */
312 CLEAR_FLAG(CONTACT_LOST);
314 /* extract useful status */
315 taskFlags = statusPtr.flags;
317 /* update local status */
320 /* remember some status flags in local struct */
322 (DRIVE_WAIT | OPR_WAIT | CALL_WAIT | TASK_DONE | ABORT_DONE |
324 curPollPtr->flags &= ~temp; /* clear */
325 curPollPtr->flags |= (taskFlags & temp); /* update */
327 curPollPtr->dbDumpId = statusPtr.dbDumpId;
328 curPollPtr->nKBytes = statusPtr.nKBytes;
329 strcpy(curPollPtr->volumeName, statusPtr.volumeName);
330 curPollPtr->volsFailed = statusPtr.volsFailed;
331 curPollPtr->lastPolled = statusPtr.lastPolled;
335 if (taskFlags & TASK_DONE) { /*done */
336 if (taskFlags & ABORT_DONE) {
337 if (curPollPtr->dbDumpId)
338 printf("Job %d: %s: DumpID %u Aborted", jobNumber,
339 curPollPtr->taskName, curPollPtr->dbDumpId);
341 printf("Job %d: %s Aborted", jobNumber,
342 curPollPtr->taskName);
344 if (taskFlags & TASK_ERROR)
345 printf(" with errors\n");
352 else if (taskFlags & TASK_ERROR) {
353 if (!(localTaskFlags & SILENT)) {
354 if (curPollPtr->dbDumpId)
355 printf("Job %d: DumpID %u Failed with errors\n",
356 jobNumber, curPollPtr->dbDumpId);
358 printf("Job %d Failed with errors\n", jobNumber);
364 if (!(localTaskFlags & SILENT)) {
365 if (curPollPtr->dbDumpId)
366 printf("Job %d: %s: DumpID %u finished", jobNumber,
367 curPollPtr->taskName, curPollPtr->dbDumpId);
369 printf("Job %d: %s finished", jobNumber,
370 curPollPtr->taskName);
372 if (curPollPtr->volsTotal) {
373 printf(". %d volumes dumped",
374 (curPollPtr->volsTotal -
375 curPollPtr->volsFailed));
376 if (curPollPtr->volsFailed)
377 printf(", %d failed", curPollPtr->volsFailed);
385 /* make call to destroy task on server */
386 code = TC_EndStatus(tconn, taskId);
388 printf("Job %d: %s, error in job termination cleanup\n",
389 jobNumber, curPollPtr->taskName);
391 if (localTaskFlags & NOREMOVE) {
392 curPollPtr->flags |= STARTING;
393 curPollPtr->scheduledDump = 0;
395 deleteStatusNode(curPollPtr); /* unlink and destroy local task */
403 * Allocate a job number. Computes the maximum of all the job numbers
404 * and then returns the maximum+1.
405 * If no jobs are found, returns 1.
411 afs_int32 retval = 0;
414 ptr = statusHead.dlq_next;
415 while (ptr != &statusHead) {
416 /* compute max of all job numbers */
417 if (((statusP) ptr)->jobNumber > retval)
418 retval = ((statusP) ptr)->jobNumber;
427 * Wait for a specific task to finish and then return.
428 * Return the task's flags when it's done. If the job
429 * had been cleaned up, then just return 0.
435 afs_int32 done = 0, rcode, t;
437 t = (TASK_DONE | ABORT_DONE | TASK_ERROR);
439 /* Sleep 2 seconds */
440 #ifdef AFS_PTHREAD_ENV
441 struct timespec delaytime;
442 delayTime.tv_sec = 2;
443 delayTime.tv_nsec = 0;
444 pthread_delay_np(&delayTime);
447 #endif /*else AFS_PTHREAD_ENV */
449 /* Check if we are done */
451 ptr = findStatus(taskId);
452 if (!ptr || (ptr->flags & t)) {
453 rcode = (ptr ? ptr->flags : 0);