/*
- * Copyright 2006, Sine Nomine Associates and others.
+ * Copyright 2006-2007, Sine Nomine Associates and others.
* All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
#include <afsconfig.h>
#include <afs/param.h>
-RCSID
- ("$Header$");
#include <stdlib.h>
#include <stdio.h>
#include <sys/time.h>
#endif /* ITIMER_REAL */
#endif
-#if defined(AFS_AIX_ENV) || defined(AFS_SUN4_ENV)
-#define WCOREDUMP(x) (x & 0200)
+#ifndef WCOREDUMP
+#define WCOREDUMP(x) ((x) & 0200)
#endif
#include <rx/xdr.h>
#include <afs/afsint.h>
#include <afs/afsutil.h>
#include <afs/fileutil.h>
#include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
+#include <afs/dir.h>
#ifndef AFS_NT40_ENV
#include <syslog.h>
#endif
#include "salvsync.h"
#include "viceinode.h"
#include "salvage.h"
-#include "volinodes.h" /* header magic number, etc. stuff */
#include "vol-salvage.h"
#ifdef AFS_NT40_ENV
#include <pthread.h>
static void * SalvageLogCleanupThread(void *);
static int SalvageLogCleanup(int pid);
+static void * SalvageLogScanningThread(void *);
+static void ScanLogs(struct rx_queue *log_watch_queue);
+
struct log_cleanup_node {
struct rx_queue q;
int pid;
#define DEFAULT_PARALLELISM 4 /* allow 4 parallel salvage workers by default */
static int
-handleit(struct cmd_syndesc *as)
+handleit(struct cmd_syndesc *as, void *arock)
{
register struct cmd_item *ti;
char pname[100], *temp;
- afs_int32 seenpart = 0, seenvol = 0, vid = 0, seenany = 0;
- struct DiskPartition *partP;
-
+ afs_int32 seenpart = 0, seenvol = 0, vid = 0;
#ifdef AFS_SGI_VNODE_GLUE
if (afs_init_kernel_config(-1) < 0) {
}
if ((ti = as->parms[15].items)) { /* -datelogs */
- TimeStampLogFile(AFSDIR_SERVER_SALSRVLOG_FILEPATH);
+ TimeStampLogFile((char *)AFSDIR_SERVER_SALSRVLOG_FILEPATH);
}
#endif
vid = atoi(ti->data);
}
+ if (ShowLog) {
+ printf("-showlog does not work with -client\n");
+ exit(-1);
+ }
+
if (!seenpart || !seenvol) {
printf("You must specify '-partition' and '-volumeid' with the '-client' option\n");
exit(-1);
int err = 0;
int i;
- extern char cml_version_number[];
#ifdef AFS_AIX32_ENV
/*
}
#endif
- ts = cmd_CreateSyntax("initcmd", handleit, 0, "initialize the program");
+ ts = cmd_CreateSyntax("initcmd", handleit, NULL, "initialize the program");
cmd_AddParm(ts, "-partition", CMD_SINGLE, CMD_OPTIONAL,
"Name of partition to salvage");
cmd_AddParm(ts, "-volumeid", CMD_SINGLE, CMD_OPTIONAL,
err = cmd_Dispatch(argc, argv);
Exit(err);
+ return 0; /* not reached */
}
static void
afs_int32 code;
SYNC_response res;
SALVSYNC_response_hdr sres;
+ VolumePackageOptions opts;
- VInitVolumePackage(volumeUtility, 5, 5, DONT_CONNECT_FS, 0);
+ VOptDefaults(volumeUtility, &opts);
+ VInitVolumePackage2(volumeUtility, &opts);
SALVSYNC_clientInit();
code = SALVSYNC_SalvageVolume(vid, pname, SALVSYNC_SALVAGE, SALVSYNC_OPERATOR, 0, NULL);
pthread_t tid;
pthread_attr_t attrs;
int slot;
+ VolumePackageOptions opts;
/* All entries to the log will be appended. Useful if there are
* multiple salvagers appending to the log.
*/
- CheckLogFile(AFSDIR_SERVER_SALSRVLOG_FILEPATH);
+ CheckLogFile((char *)AFSDIR_SERVER_SALSRVLOG_FILEPATH);
#ifndef AFS_NT40_ENV
#ifdef AFS_LINUX20_ENV
fcntl(fileno(logFile), F_SETFL, O_APPEND); /* Isn't this redundant? */
/* Get and hold a lock for the duration of the salvage to make sure
* that no other salvage runs at the same time. The routine
- * VInitVolumePackage (called below) makes sure that a file server or
+ * VInitVolumePackage2 (called below) makes sure that a file server or
* other volume utilities don't interfere with the salvage.
*/
child_slot = (int *) malloc(Parallel * sizeof(int));
assert(child_slot != NULL);
memset(child_slot, 0, Parallel * sizeof(int));
-
+
/* initialize things */
- VInitVolumePackage(salvageServer, 5, 5,
- 1, 0);
+ VOptDefaults(salvageServer, &opts);
+ VInitVolumePackage2(salvageServer, &opts);
DInit(10);
queue_Init(&pending_q);
queue_Init(&log_cleanup_queue);
&attrs,
&SalvageLogCleanupThread,
NULL) == 0);
+ assert(pthread_create(&tid,
+ &attrs,
+ &SalvageLogScanningThread,
+ NULL) == 0);
/* loop forever serving requests */
while (1) {
node = SALVSYNC_getWork();
assert(node != NULL);
+ Log("dispatching child to salvage volume %u...\n",
+ node->command.sop.parent);
+
VOL_LOCK;
/* find a slot */
for (slot = 0; slot < Parallel; slot++) {
}
assert (slot < Parallel);
+ do_fork:
pid = Fork();
if (pid == 0) {
VOL_UNLOCK;
ret = DoSalvageVolume(node, slot);
Exit(ret);
} else if (pid < 0) {
- VOL_UNLOCK;
- SALVSYNC_doneWork(node, 1);
+ Log("failed to fork child worker process\n");
+ sleep(1);
+ goto do_fork;
} else {
child_slot[slot] = pid;
node->pid = pid;
DoSalvageVolume(struct SalvageQueueNode * node, int slot)
{
char childLog[AFSDIR_PATH_MAX];
- int ret;
- struct DiskPartition * partP;
+ struct DiskPartition64 * partP;
- VChildProcReconnectFS();
+ /* do not allow further forking inside salvager */
+ canfork = 0;
/* do not attempt to close parent's logFile handle as
* another thread may have held the lock on the FILE
ShowLog = 0;
}
- if (node->command.sop.volume <= 0) {
+ if (node->command.sop.parent <= 0) {
Log("salvageServer: invalid volume id specified; salvage aborted\n");
return 1;
}
}
/* Salvage individual volume; don't notify fs */
- SalvageFileSys1(partP, node->command.sop.volume);
-
- VDisconnectFS();
+ SalvageFileSys1(partP, node->command.sop.parent);
fclose(logFile);
return 0;
static void *
SalvageChildReaperThread(void * args)
{
- int slot, pid, status, code, found;
- struct SalvageQueueNode *qp, *nqp;
+ int slot, pid, status;
struct log_cleanup_node * cleanup;
assert(pthread_mutex_lock(&worker_lock) == 0);
child_slot[slot] = 0;
VOL_UNLOCK;
+ SALVSYNC_doneWorkByPid(pid, status);
+
assert(pthread_mutex_lock(&worker_lock) == 0);
if (cleanup) {
/* ok, we've reaped a child */
current_workers--;
- SALVSYNC_doneWorkByPid(pid, 0);
assert(pthread_cond_broadcast(&worker_cv) == 0);
}
*pid = ret;
if (WCOREDUMP(*status))
Log("\"%s\" core dumped!\n", prog);
- if (WIFSIGNALED(*status) != 0 || WEXITSTATUS(*status) != 0)
+ if ((WIFSIGNALED(*status) != 0) ||
+ ((WEXITSTATUS(*status) != 0) &&
+ (WEXITSTATUS(*status) != SALSRV_EXIT_VOLGROUP_LINK)))
Log("\"%s\" (pid=%d) terminated abnormally!\n", prog, ret);
} else {
Log("wait returned -1\n");
return 0;
}
+
+/* wake up every five minutes to see if a non-child salvage has finished */
+#define SALVAGE_SCAN_POLL_INTERVAL 300
+
+/**
+ * Thread to look for SalvageLog.$pid files that are not from our child
+ * worker salvagers, and notify SalvageLogCleanupThread to clean them
+ * up. This can happen if we restart during salvages, or the
+ * salvageserver crashes or something.
+ *
+ * @param arg unused
+ *
+ * @return always NULL
+ */
+static void *
+SalvageLogScanningThread(void * arg)
+{
+ struct rx_queue log_watch_queue;
+
+ queue_Init(&log_watch_queue);
+
+ {
+ DIR *dp;
+ struct dirent *dirp;
+ char prefix[AFSDIR_PATH_MAX];
+ size_t prefix_len;
+
+ afs_snprintf(prefix, sizeof(prefix), "%s.", AFSDIR_SLVGLOG_FILE);
+ prefix_len = strlen(prefix);
+
+ dp = opendir(AFSDIR_LOGS_DIR);
+ assert(dp);
+
+ while ((dirp = readdir(dp)) != NULL) {
+ pid_t pid;
+ struct log_cleanup_node *cleanup;
+ int i;
+
+ if (strncmp(dirp->d_name, prefix, prefix_len) != 0) {
+ /* not a salvage logfile; skip */
+ continue;
+ }
+
+ errno = 0;
+ pid = strtol(dirp->d_name + prefix_len, NULL, 10);
+
+ if (errno != 0) {
+ /* file is SalvageLog.<something> but <something> isn't
+ * a pid, so skip */
+ continue;
+ }
+
+ VOL_LOCK;
+ for (i = 0; i < Parallel; ++i) {
+ if (pid == child_slot[i]) {
+ break;
+ }
+ }
+ VOL_UNLOCK;
+ if (i < Parallel) {
+ /* this pid is one of our children, so the reaper thread
+ * will take care of it; skip */
+ continue;
+ }
+
+ cleanup =
+ (struct log_cleanup_node *) malloc(sizeof(struct log_cleanup_node));
+ cleanup->pid = pid;
+
+ queue_Append(&log_watch_queue, cleanup);
+ }
+
+ closedir(dp);
+ }
+
+ ScanLogs(&log_watch_queue);
+
+ while (queue_IsNotEmpty(&log_watch_queue)) {
+ sleep(SALVAGE_SCAN_POLL_INTERVAL);
+ ScanLogs(&log_watch_queue);
+ }
+
+ return NULL;
+}
+
+/**
+ * look through log_watch_queue, and if any processes are not still
+ * running, hand them off to the SalvageLogCleanupThread
+ *
+ * @param log_watch_queue a queue of PIDs that we should clean up if
+ * that PID has died
+ */
+static void
+ScanLogs(struct rx_queue *log_watch_queue)
+{
+ struct log_cleanup_node *cleanup, *next;
+
+ assert(pthread_mutex_lock(&worker_lock) == 0);
+
+ for (queue_Scan(log_watch_queue, cleanup, next, log_cleanup_node)) {
+ /* if a process is still running, assume it's the salvage process
+ * still going, and keep waiting for it */
+ if (kill(cleanup->pid, 0) < 0 && errno == ESRCH) {
+ queue_Remove(cleanup);
+ queue_Append(&log_cleanup_queue, cleanup);
+ assert(pthread_cond_signal(&log_cleanup_queue.queue_change_cv) == 0);
+ }
+ }
+
+ assert(pthread_mutex_unlock(&worker_lock) == 0);
+}