DAFS: avoid shutdown hang during salvage

author Andrew Deason <adeason@sinenomine.net>

Wed, 29 Jul 2009 15:56:34 +0000 (11:56 -0400)

committer Derrick Brashear <shadow|account-1000005@unknown>

Tue, 1 Sep 2009 16:21:10 +0000 (09:21 -0700)
author Andrew Deason <adeason@sinenomine.net>
Wed, 29 Jul 2009 15:56:34 +0000 (11:56 -0400)
committer Derrick Brashear <shadow|account-1000005@unknown>
Tue, 1 Sep 2009 16:21:10 +0000 (09:21 -0700)
diff --git a/src/vol/salvaged.c b/src/vol/salvaged.c

index 6af6013..c382200 100644 (file)
--- a/src/vol/salvaged.c
+++ b/src/vol/salvaged.c
@@ -165,6 +165,9 @@ static int Reap_Child(char * prog, int * pid, int * status);
 static void * SalvageLogCleanupThread(void *);
 static int SalvageLogCleanup(int pid);
 
+static void * SalvageLogScanningThread(void *);
+static void ScanLogs(struct rx_queue *log_watch_queue);
+
 struct log_cleanup_node {
     struct rx_queue q;
     int pid;
@@ -524,6 +527,10 @@ SalvageServer(void)
                          &attrs, 
                          &SalvageLogCleanupThread,
                          NULL) == 0);
+    assert(pthread_create(&tid,
+                         &attrs,
+                         &SalvageLogScanningThread,
+                         NULL) == 0);
 
     /* loop forever serving requests */
     while (1) {
@@ -745,3 +752,115 @@ SalvageLogCleanup(int pid)
 
     return 0;
 }
+
+/* wake up every five minutes to see if a non-child salvage has finished */
+#define SALVAGE_SCAN_POLL_INTERVAL 300
+
+/**
+ * Thread to look for SalvageLog.$pid files that are not from our child
+ * worker salvagers, and notify SalvageLogCleanupThread to clean them
+ * up. This can happen if we restart during salvages, or the
+ * salvageserver crashes or something.
+ *
+ * @param arg  unused
+ *
+ * @return always NULL
+ */
+static void *
+SalvageLogScanningThread(void * arg)
+{
+    struct rx_queue log_watch_queue;
+    struct log_cleanup_node * cleanup;
+
+    queue_Init(&log_watch_queue);
+
+    {
+       DIR *dp;
+       struct dirent *dirp;
+       char prefix[AFSDIR_PATH_MAX];
+       size_t prefix_len;
+
+       afs_snprintf(prefix, sizeof(prefix), "%s.", AFSDIR_SLVGLOG_FILE);
+       prefix_len = strlen(prefix);
+
+       dp = opendir(AFSDIR_LOGS_DIR);
+       assert(dp);
+
+       while ((dirp = readdir(dp)) != NULL) {
+           pid_t pid;
+           struct log_cleanup_node *cleanup;
+           int i;
+
+           if (strncmp(dirp->d_name, prefix, prefix_len) != 0) {
+               /* not a salvage logfile; skip */
+               continue;
+           }
+
+           errno = 0;
+           pid = strtol(dirp->d_name + prefix_len, NULL, 10);
+
+           if (errno != 0) {
+               /* file is SalvageLog.<something> but <something> isn't
+                * a pid, so skip */
+                continue;
+           }
+
+           VOL_LOCK;
+           for (i = 0; i < Parallel; ++i) {
+               if (pid == child_slot[i]) {
+                   break;
+               }
+           }
+           VOL_UNLOCK;
+           if (i < Parallel) {
+               /* this pid is one of our children, so the reaper thread
+                * will take care of it; skip */
+               continue;
+           }
+
+           cleanup =
+               (struct log_cleanup_node *) malloc(sizeof(struct log_cleanup_node));
+           cleanup->pid = pid;
+
+           queue_Append(&log_watch_queue, cleanup);
+       }
+
+       closedir(dp);
+    }
+
+    ScanLogs(&log_watch_queue);
+
+    while (queue_IsNotEmpty(&log_watch_queue)) {
+       sleep(SALVAGE_SCAN_POLL_INTERVAL);
+       ScanLogs(&log_watch_queue);
+    }
+
+    return NULL;
+}
+
+/**
+ * look through log_watch_queue, and if any processes are not still
+ * running, hand them off to the SalvageLogCleanupThread
+ *
+ * @param log_watch_queue  a queue of PIDs that we should clean up if
+ * that PID has died
+ */
+static void
+ScanLogs(struct rx_queue *log_watch_queue)
+{
+    struct log_cleanup_node *cleanup, *next;
+
+    assert(pthread_mutex_lock(&worker_lock) == 0);
+
+    for (queue_Scan(log_watch_queue, cleanup, next, log_cleanup_node)) {
+       /* if a process is still running, assume it's the salvage process
+        * still going, and keep waiting for it */
+       if (kill(cleanup->pid, 0) < 0 && errno == ESRCH) {
+           queue_Remove(cleanup);
+           queue_Append(&log_cleanup_queue, cleanup);
+           assert(pthread_cond_signal(&log_cleanup_queue.queue_change_cv) == 0);
+       }
+    }
+
+    assert(pthread_mutex_unlock(&worker_lock) == 0);
+}
diff --git a/src/vol/salvsync-server.c b/src/vol/salvsync-server.c

index bdc8bd0..6eb46af 100644 (file)
--- a/src/vol/salvsync-server.c
+++ b/src/vol/salvsync-server.c
@@ -189,6 +189,9 @@ static struct QueueHead pendingQueue;  /* volumes being salvaged */
  */
 static int partition_salvaging[VOLMAXPARTS+1];
 
+static int HandlerFD[MAXHANDLERS];
+static void (*HandlerProc[MAXHANDLERS]) (int);
+
 #define VSHASH_SIZE 64
 #define VSHASH_MASK (VSHASH_SIZE-1)
 #define VSHASH(vid) ((vid)&VSHASH_MASK)
@@ -292,6 +295,21 @@ SALVSYNC_salvInit(void)
     assert(pthread_create(&tid, &tattr, SALVSYNC_syncThread, NULL) == 0);
 }
 
+static void
+CleanFDs(void)
+{
+    int i;
+    for (i = 0; i < MAXHANDLERS; ++i) {
+       if (HandlerFD[i] >= 0) {
+           SALVSYNC_Drop(HandlerFD[i]);
+       }
+    }
+
+    /* just in case we were in AcceptOff mode, and thus this fd wouldn't
+     * have a handler */
+    close(salvsync_server_state.fd);
+    salvsync_server_state.fd = -1;
+}
 
 static fd_set SALVSYNC_readfds;
 
@@ -304,6 +322,11 @@ SALVSYNC_syncThread(void * args)
     int tid;
     SYNC_server_state_t * state = &salvsync_server_state;
 
+    /* when we fork, the child needs to close the salvsync server sockets,
+     * otherwise, it may get salvsync requests, instead of the parent
+     * salvageserver */
+    assert(pthread_atfork(NULL, NULL, CleanFDs) == 0);
+
     SYNC_getAddr(&state->endpoint, &state->addr);
     SYNC_cleanupSock(state);
 
@@ -362,6 +385,12 @@ SALVSYNC_com(osi_socket fd)
     SALVSYNC_command scom;
     SALVSYNC_response sres;
     SYNC_PROTO_BUF_DECL(buf);
+
+    memset(&com, 0, sizeof(com));
+    memset(&res, 0, sizeof(res));
+    memset(&scom, 0, sizeof(scom));
+    memset(&sres, 0, sizeof(sres));
+    memset(&sres_hdr, 0, sizeof(sres));
     
     com.payload.buf = (void *)buf;
     com.payload.len = SYNC_PROTO_MAX_LEN;
@@ -758,9 +787,6 @@ AcceptOff(void)
 
 /* The multiple FD handling code. */
 
-static int HandlerFD[MAXHANDLERS];
-static void (*HandlerProc[MAXHANDLERS]) (int);
-
 static void
 InitHandler(void)
 {
diff --git a/src/vol/vol-salvage.c b/src/vol/vol-salvage.c

index 2f5866c..51bdd15 100644 (file)
--- a/src/vol/vol-salvage.c
+++ b/src/vol/vol-salvage.c
@@ -3237,6 +3237,44 @@ AskOffline(VolumeId volumeId, char * partition)
        Log("AskOffline:  request for fileserver to take volume offline failed; salvage aborting.\n");
        Abort("Salvage aborted\n");
     }
+
+#ifdef AFS_DEMAND_ATTACH_FS
+    /* set inUse = programType in the volume header. We do this in case
+     * the fileserver restarts/crashes while we are salvaging.
+     * Otherwise, the fileserver could attach the volume again on
+     * startup while we are salvaging, which would be very bad, or
+     * schedule another salvage while we are salvaging, which would be
+     * annoying. */
+    if (!Testing) {
+       int fd;
+       IHandle_t *h;
+       char name[VMAXPATHLEN];
+       struct VolumeHeader header;
+       struct VolumeDiskHeader diskHeader;
+       struct VolumeDiskData volHeader;
+
+       afs_snprintf(name, sizeof(name), "%s/" VFORMAT, fileSysPathName,
+           afs_printable_uint32_lu(volumeId));
+
+       fd = afs_open(name, O_RDONLY);
+       assert(fd >= 0);
+       assert(read(fd, &diskHeader, sizeof(diskHeader)) == sizeof(diskHeader));
+       assert(diskHeader.stamp.magic == VOLUMEHEADERMAGIC);
+       close(fd);
+
+       DiskToVolumeHeader(&header, &diskHeader);
+
+       IH_INIT(h, fileSysDevice, header.parent, header.volumeInfo);
+       assert(IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
+       assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
+
+       volHeader.inUse = programType;
+
+       assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
+
+       IH_RELEASE(h);
+    }
+#endif /* AFS_DEMAND_ATTACH_FS */
 }
 
 void
diff --git a/src/vol/volume.c b/src/vol/volume.c

index d5ed2fc..b440e9f 100644 (file)
--- a/src/vol/volume.c
+++ b/src/vol/volume.c
@@ -353,7 +353,6 @@ static void VVByPListWait_r(struct DiskPartition64 * dp);
 static int VCheckSalvage(register Volume * vp);
 static int VUpdateSalvagePriority_r(Volume * vp);
 static int VScheduleSalvage_r(Volume * vp);
-static int VCancelSalvage_r(Volume * vp, int reason);
 
 /* Volume hash table */
 static void VReorderHash_r(VolumeHashChainHead * head, Volume * pp, Volume * vp);
@@ -1344,9 +1343,10 @@ VShutdownVolume_r(Volume * vp)
     
     switch(V_attachState(vp)) {
     case VOL_STATE_SALVAGING:
-       /* make sure salvager knows we don't want
-        * the volume back */
-       VCancelSalvage_r(vp, SALVSYNC_SHUTDOWN);
+       /* Leave salvaging volumes alone. Any in-progress salvages will
+        * continue working after viced shuts down. This is intentional.
+        */
+
     case VOL_STATE_PREATTACHED:
     case VOL_STATE_ERROR:
        VChangeState_r(vp, VOL_STATE_UNATTACHED);
@@ -4008,6 +4008,35 @@ VRequestSalvage_r(Error * ec, Volume * vp, int reason, int flags)
        vp->salvage.requested = 1;
        vp->salvage.reason = reason;
        vp->stats.last_salvage = FT_ApproxTime();
+       if (VIsSalvager(V_inUse(vp))) {
+           Log("VRequestSalvage: volume %u appears to be salvaging, but we\n", vp->hashid);
+           Log("  didn't request a salvage. Forcing it offline waiting for the\n");
+           Log("  salvage to finish; if you are sure no salvage is running,\n");
+           Log("  run a salvage manually.\n");
+
+           /* make sure neither VScheduleSalvage_r nor
+            * VUpdateSalvagePriority_r try to schedule another salvage */
+           vp->salvage.requested = vp->salvage.scheduled = 0;
+
+           /* these stats aren't correct, but doing this makes them
+            * slightly closer to being correct */
+           vp->stats.salvages++;
+           vp->stats.last_salvage_req = FT_ApproxTime();
+           IncUInt64(&VStats.salvages);
+
+           VChangeState_r(vp, VOL_STATE_ERROR);
+           *ec = VSALVAGE;
+           code = 1;
+
+       } else if (vp->stats.salvages < SALVAGE_COUNT_MAX) {
+           VChangeState_r(vp, VOL_STATE_SALVAGING);
+           *ec = VSALVAGING;
+       } else {
+           Log("VRequestSalvage: volume %u online salvaged too many times; forced offline.\n", vp->hashid);
+           VChangeState_r(vp, VOL_STATE_ERROR);
+           *ec = VSALVAGE;
+           code = 1;
+       }
        if (flags & VOL_SALVAGE_INVALIDATE_HEADER) {
            /* Instead of ReleaseVolumeHeader, we do FreeVolumeHeader() 
                so that the the next VAttachVolumeByVp_r() invocation 
@@ -4017,15 +4046,6 @@ VRequestSalvage_r(Error * ec, Volume * vp, int reason, int flags)
            */
            FreeVolumeHeader(vp);
        }
-       if (vp->stats.salvages < SALVAGE_COUNT_MAX) {
-           VChangeState_r(vp, VOL_STATE_SALVAGING);
-           *ec = VSALVAGING;
-       } else {
-           Log("VRequestSalvage: volume %u online salvaged too many times; forced offline.\n", vp->hashid);
-           VChangeState_r(vp, VOL_STATE_ERROR);
-           *ec = VSALVAGE;
-           code = 1;
-       }
     }
     return code;
 }
@@ -4186,57 +4206,6 @@ VScheduleSalvage_r(Volume * vp)
     return ret;
 }
 
-/**
- * ask salvageserver to cancel a scheduled salvage operation.
- *
- * @param[in] vp      pointer to volume object
- * @param[in] reason  SALVSYNC protocol reason code
- *
- * @return operation status
- *    @retval 0 success
- *    @retval 1 request failed
- *
- * @pre VOL_LOCK is held.
- *
- * @post salvageserver is sent a request to cancel the volume salvage.
- *       volume is transitioned to a hard error state.
- *
- * @internal volume package internal use only.
- */
-static int
-VCancelSalvage_r(Volume * vp, int reason)
-{
-    int code, ret = 0;
-
-#ifdef SALVSYNC_BUILD_CLIENT
-    if (vp->salvage.scheduled) {
-       VChangeState_r(vp, VOL_STATE_SALVSYNC_REQ);
-       VOL_UNLOCK;
-
-       /* can't use V_id() since there's no guarantee
-        * we have the disk data header at this point */
-       code = SALVSYNC_SalvageVolume(vp->hashid,
-                                     VPartitionPath(vp->partition),
-                                     SALVSYNC_CANCEL,
-                                     reason,
-                                     0,
-                                     NULL);
-
-       VOL_LOCK;
-       VChangeState_r(vp, VOL_STATE_ERROR);
-
-       if (code == SYNC_OK) {
-           vp->salvage.scheduled = 0;
-           vp->salvage.requested = 0;
-       } else {
-           ret = 1;
-       }
-    }
-#endif /* SALVSYNC_BUILD_CLIENT */
-    return ret;
-}
-
-
 #ifdef SALVSYNC_BUILD_CLIENT
 /**
  * connect to the salvageserver SYNC service.
diff --git a/src/vol/volume_inline.h b/src/vol/volume_inline.h

index ef54b45..a26a6dc 100644 (file)
--- a/src/vol/volume_inline.h
+++ b/src/vol/volume_inline.h
@@ -11,6 +11,26 @@
 #define _AFS_VOL_VOLUME_INLINE_H 1
 
 #include "volume.h"
+/**
+ * tell caller whether the given program type represents a salvaging
+ * program.
+ *
+ * @param type  program type enumeration
+ *
+ * @return whether program state is a salvager
+ *   @retval 0  type is a non-salvaging program
+ *   @retval 1  type is a salvaging program
+ */
+static_inline int
+VIsSalvager(ProgramType type)
+{
+    switch(type) {
+    case salvager:
+    case salvageServer:
+       return 1;
+    }
+    return 0;
+}
 
 /***************************************************/
 /* demand attach fs state machine routines         */
author	Andrew Deason <adeason@sinenomine.net>
	Wed, 29 Jul 2009 15:56:34 +0000 (11:56 -0400)
committer	Derrick Brashear <shadow\|account-1000005@unknown>
	Tue, 1 Sep 2009 16:21:10 +0000 (09:21 -0700)
src/vol/salvaged.c		patch \| blob \| history
src/vol/salvsync-server.c		patch \| blob \| history
src/vol/vol-salvage.c		patch \| blob \| history
src/vol/volume.c		patch \| blob \| history
src/vol/volume_inline.h		patch \| blob \| history