Windows: preserve prior vlserver list on dns failure
authorJeffrey Altman <jaltman@your-file-system.com>
Wed, 10 Sep 2014 14:23:17 +0000 (10:23 -0400)
committerJeffrey Altman <jaltman@your-file-system.com>
Wed, 24 Sep 2014 22:17:11 +0000 (18:17 -0400)
Do not destroy the existing vlserver list if the DNS resolver query
fails.  Continue using the prior vlserver values until a DNS response
is obtained.  This will result in repeated DNS queries and a delay
if there is continued failure, but it will permit VL RPCs to continue
to be issued in the face of a DNS failure or misconfiguration.

Change-Id: Icac97c2bebdef744cc316225c1a6b1888ceb2f6e
Reviewed-on: http://gerrit.openafs.org/11457
Tested-by: BuildBot <buildbot@rampaginggeek.com>
Reviewed-by: Jeffrey Altman <jaltman@your-file-system.com>

src/WINNT/afsd/cm_cell.c
src/WINNT/afsd/cm_config.c
src/WINNT/afsd/cm_server.c
src/WINNT/afsd/cm_server.h

index b552a3b..7d37fed 100644 (file)
@@ -747,16 +747,27 @@ cm_CreateCellWithInfo( char * cellname,
     rock.cellp = cm_GetCell(cellname, CM_FLAG_CREATE | CM_FLAG_NOPROBE);
     rock.flags = 0;
 
-    cm_FreeServerList(&rock.cellp->vlServersp, CM_FREESERVERLIST_DELETE);
-
     if (!(flags & CM_CELLFLAG_DNS)) {
+       int first = 1;
+
         for (i = 0; i < host_count; i++) {
             thp = gethostbyname(hostname[i]);
+           if (first) {
+               /*
+                * If there is at least one resolved vlserver or an authoritative,
+                * host not found response, destroy the prior list.
+                */
+               if (thp != NULL || WSAGetLastError() == WSAHOST_NOT_FOUND) {
+                   cm_FreeServerList(&rock.cellp->vlServersp, CM_FREESERVERLIST_DELETE);
+                   first = 0;
+               }
+           }
+
             if (thp) {
-                int foundAddr = 0;
+               if (thp->h_addrtype != AF_INET)
+                   continue;
+
                 for (j=0 ; thp->h_addr_list[j]; j++) {
-                    if (thp->h_addrtype != AF_INET)
-                        continue;
                     memcpy(&vlSockAddr.sin_addr.s_addr,
                            thp->h_addr_list[j],
                            sizeof(long));
@@ -770,6 +781,12 @@ cm_CreateCellWithInfo( char * cellname,
         _InterlockedAnd(&rock.cellp->flags, ~CM_CELLFLAG_DNS);
     } else if (cm_dnsEnabled) {
         int ttl;
+       cm_serverRef_t * vlServersp = NULL;
+
+       lock_ObtainWrite(&cm_serverLock);
+       vlServersp = rock.cellp->vlServersp;
+       rock.cellp->vlServersp = NULL;
+       lock_ReleaseWrite(&cm_serverLock);
 
         code = cm_SearchCellByDNS(rock.cellp->name, NULL, &ttl, cm_AddCellProc, &rock);
         lock_ObtainMutex(&rock.cellp->mx);
@@ -779,8 +796,29 @@ cm_CreateCellWithInfo( char * cellname,
 #ifdef DEBUG
             fprintf(stderr, "cell %s: ttl=%d\n", rock.cellp->name, ttl);
 #endif
-        }
+       } else {
+           lock_ObtainWrite(&cm_serverLock);
+           if (rock.cellp->vlServersp == NULL) {
+               rock.cellp->vlServersp = vlServersp;
+               vlServersp = NULL;
+           }
+           lock_ReleaseWrite(&cm_serverLock);
+       }
+
+       cm_FreeServerList(&vlServersp, CM_FREESERVERLIST_DELETE);
+       if (vlServersp != NULL) {
+           /*
+            * We moved the vlServer list out of the way and
+            * in the meantime it was replaced.  If the vlServerp
+            * list is non-Empty after cm_FreeServerList was called
+            * it means that there are deleted entries with active
+            * references.  Must put them back onto the list to
+            * avoid leaking the memory.
+            */
+           cm_AppendServerList(rock.cellp->vlServersp, &vlServersp);
+       }
     } else {
+       cm_FreeServerList(&rock.cellp->vlServersp, CM_FREESERVERLIST_DELETE);
         lock_ObtainMutex(&rock.cellp->mx);
         rock.cellp->flags &= ~CM_CELLFLAG_DNS;
     }
index b071d6e..44db379 100644 (file)
@@ -919,7 +919,7 @@ long cm_SearchCellByDNS(char *cellNamep, char *newCellNamep, int *ttl,
     rc = getAFSServer("afs3-vlserver", "udp", cellNamep, htons(7003),
                       cellHostAddrs, cellHostNames, ports, adminRanks, &numServers, ttl);
     if (rc == 0 && numServers > 0) {     /* found the cell */
-        for (i = 0; i < numServers; i++) {
+       for (i = 0; i < numServers; i++) {
             memcpy(&vlSockAddr.sin_addr.s_addr, &cellHostAddrs[i],
                    sizeof(long));
             vlSockAddr.sin_port = ports[i];
index c3ada8a..44b928a 100644 (file)
@@ -1528,6 +1528,20 @@ int cm_IsServerListEmpty(cm_serverRef_t *serversp)
     return ( allDeleted ? CM_ERROR_EMPTY : 0 );
 }
 
+void cm_AppendServerList(cm_serverRef_t *dest, cm_serverRef_t **src)
+{
+    cm_serverRef_t *ref;
+
+    if (dest == NULL | src == NULL || *src == NULL)
+       return;
+
+    for (ref = dest; ref->next != NULL; ref = ref->next);
+
+    ref->next = *src;
+
+    *src = NULL;
+}
+
 void cm_FreeServerList(cm_serverRef_t** list, afs_uint32 flags)
 {
     cm_serverRef_t  **current;
index c779db2..fc25d2e 100644 (file)
@@ -139,6 +139,8 @@ extern afs_uint32 cm_ServerListSize(cm_serverRef_t* list);
 
 extern void cm_FreeServerList(cm_serverRef_t** list, afs_uint32 flags);
 
+extern void cm_AppendServerList(cm_serverRef_t *dest, cm_serverRef_t **src);
+
 extern void cm_ForceNewConnectionsAllServers(void);
 
 extern void cm_SetServerNo64Bit(cm_server_t * serverp, int no64bit);