Windows: Checksum server lists on Volume Errors
[openafs.git] / src / WINNT / afsd / cm_volume.c
index fdab257..dd40e31 100644 (file)
@@ -17,6 +17,7 @@
 #include <winsock2.h>
 #include <nb30.h>
 #include <string.h>
+#include <strsafe.h>
 #include <malloc.h>
 #include "afsd.h"
 #include <osi.h>
@@ -75,6 +76,7 @@ cm_ShutdownVolume(void)
                 cm_VolumeStatusNotification(volp, volp->vol[volType].ID, volp->vol[volType].state, vl_alldown);
         }
         volp->cbExpiresRO = 0;
+        volp->cbIssuedRO = 0;
         volp->cbServerpRO = NULL;
         lock_FinalizeRWLock(&volp->rw);
     }
@@ -115,6 +117,7 @@ void cm_InitVolume(int newFile, long maxVols)
                         cm_VolumeStatusNotification(volp, volp->vol[volType].ID, vl_unknown, volp->vol[volType].state);
                 }
                 volp->cbExpiresRO = 0;
+                volp->cbIssuedRO = 0;
                 volp->cbServerpRO = NULL;
             }
         }
@@ -164,10 +167,76 @@ cm_VolNameIsID(char *aname)
  *    first, and fall back to successively older versions if you get
  *    RXGEN_OPCODE.
  */
-#define MULTIHOMED 1
+static long
+cm_GetEntryByName( struct cm_cell *cellp, const char *name,
+                   struct vldbentry *vldbEntryp,
+                   struct nvldbentry *nvldbEntryp,
+                   struct uvldbentry *uvldbEntryp,
+                   int *methodp,
+                   cm_user_t *userp,
+                   cm_req_t *reqp
+                   )
+{
+    long code;
+    cm_conn_t *connp;
+    struct rx_connection * rxconnp;
+
+    osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s",
+              osi_LogSaveString(afsd_logp,cellp->name),
+              osi_LogSaveString(afsd_logp,name));
+    do {
+
+        code = cm_ConnByMServers(cellp->vlServersp, FALSE, userp, reqp, &connp);
+        if (code)
+            continue;
+
+        rxconnp = cm_GetRxConn(connp);
+        code = VL_GetEntryByNameU(rxconnp, name, uvldbEntryp);
+        *methodp = 2;
+        if ( code == RXGEN_OPCODE )
+        {
+            code = VL_GetEntryByNameN(rxconnp, name, nvldbEntryp);
+            *methodp = 1;
+        }
+        if ( code == RXGEN_OPCODE ) {
+            code = VL_GetEntryByNameO(rxconnp, name, vldbEntryp);
+            *methodp = 0;
+        }
+        rx_PutConnection(rxconnp);
+    } while (cm_Analyze(connp, userp, reqp, NULL, 0, NULL, cellp->vlServersp, NULL, code));
+    code = cm_MapVLRPCError(code, reqp);
+    if ( code )
+        osi_Log3(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s FAILURE, code 0x%x",
+                  osi_LogSaveString(afsd_logp,cellp->name),
+                  osi_LogSaveString(afsd_logp,name), code);
+    else
+        osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s SUCCESS",
+                  osi_LogSaveString(afsd_logp,cellp->name),
+                  osi_LogSaveString(afsd_logp,name));
+    return code;
+}
+
+static long
+cm_GetEntryByID( struct cm_cell *cellp, afs_uint32 id,
+                 struct vldbentry *vldbEntryp,
+                 struct nvldbentry *nvldbEntryp,
+                 struct uvldbentry *uvldbEntryp,
+                 int *methodp,
+                 cm_user_t *userp,
+                 cm_req_t *reqp
+                 )
+{
+    char name[64];
+
+    StringCbPrintf(name, sizeof(name), "%u", id);
+
+    return cm_GetEntryByName(cellp, name, vldbEntryp, nvldbEntryp, uvldbEntryp, methodp, userp, reqp);
+}
+
 long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *reqp,
                     cm_volume_t *volp)
 {
+    struct rx_connection *rxconnp;
     cm_conn_t *connp;
     int i;
     afs_uint32 j, k;
@@ -178,11 +247,10 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
     u_long tempAddr;
     struct vldbentry vldbEntry;
     struct nvldbentry nvldbEntry;
-#ifdef MULTIHOMED
     struct uvldbentry uvldbEntry;
-#endif
     int method = -1;
     int ROcount = 0;
+    int isMixed = 0;
     long code;
     enum volstatus rwNewstate = vl_online;
     enum volstatus roNewstate = vl_online;
@@ -192,6 +260,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
 #endif
     afs_uint32 volType;
     time_t now;
+    int replicated = 0;
 
     lock_AssertWrite(&volp->rw);
 
@@ -247,44 +316,12 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
         lock_ReleaseWrite(&volp->rw);
 
         if (cellp->flags & CM_CELLFLAG_VLSERVER_INVALID)
-            cm_UpdateCell(cellp, 0);
+             cm_UpdateCell(cellp, 0);
 
         /* now we have volume structure locked and held; make RPC to fill it */
-       osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s",
-                  osi_LogSaveString(afsd_logp,volp->cellp->name),
-                  osi_LogSaveString(afsd_logp,volp->namep));
-        do {
-            struct rx_connection * rxconnp;
-
-            code = cm_ConnByMServers(cellp->vlServersp, userp, reqp, &connp);
-            if (code)
-                continue;
-
-            rxconnp = cm_GetRxConn(connp);
-#ifdef MULTIHOMED
-            code = VL_GetEntryByNameU(rxconnp, volp->namep, &uvldbEntry);
-            method = 2;
-            if ( code == RXGEN_OPCODE )
-#endif
-            {
-                code = VL_GetEntryByNameN(rxconnp, volp->namep, &nvldbEntry);
-                method = 1;
-            }
-            if ( code == RXGEN_OPCODE ) {
-                code = VL_GetEntryByNameO(rxconnp, volp->namep, &vldbEntry);
-                method = 0;
-            }
-            rx_PutConnection(rxconnp);
-        } while (cm_Analyze(connp, userp, reqp, NULL, NULL, cellp->vlServersp, NULL, code));
-        code = cm_MapVLRPCError(code, reqp);
-       if ( code )
-           osi_Log3(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s FAILURE, code 0x%x",
-                     osi_LogSaveString(afsd_logp,volp->cellp->name),
-                      osi_LogSaveString(afsd_logp,volp->namep), code);
-       else
-           osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s SUCCESS",
-                     osi_LogSaveString(afsd_logp,volp->cellp->name),
-                      osi_LogSaveString(afsd_logp,volp->namep));
+        code = cm_GetEntryByName(cellp, volp->namep, &vldbEntry, &nvldbEntry,
+                                 &uvldbEntry,
+                                 &method, userp, reqp);
     }
 
     /* We can end up here with code == CM_ERROR_NOSUCHVOLUME if the base volume name
@@ -301,41 +338,26 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
         snprintf(name, VL_MAXNAMELEN, "%s.readonly", volp->namep);
 
         /* now we have volume structure locked and held; make RPC to fill it */
-       osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s",
-                 osi_LogSaveString(afsd_logp,volp->cellp->name),
-                 osi_LogSaveString(afsd_logp,name));
-        do {
-            struct rx_connection * rxconnp;
-
-            code = cm_ConnByMServers(cellp->vlServersp, userp, reqp, &connp);
-            if (code)
-                continue;
+        code = cm_GetEntryByName(cellp, name, &vldbEntry, &nvldbEntry,
+                                 &uvldbEntry,
+                                 &method, userp, reqp);
+    }
 
-            rxconnp = cm_GetRxConn(connp);
-#ifdef MULTIHOMED
-            code = VL_GetEntryByNameU(connp->rxconnp, name, &uvldbEntry);
-            method = 2;
-            if ( code == RXGEN_OPCODE )
-#endif
-            {
-                code = VL_GetEntryByNameN(connp->rxconnp, name, &nvldbEntry);
-                method = 1;
-            }
-            if ( code == RXGEN_OPCODE ) {
-                code = VL_GetEntryByNameO(connp->rxconnp, name, &vldbEntry);
-                method = 0;
-            }
-            rx_PutConnection(rxconnp);
-        } while (cm_Analyze(connp, userp, reqp, NULL, NULL, cellp->vlServersp, NULL, code));
-        code = cm_MapVLRPCError(code, reqp);
-       if ( code )
-           osi_Log3(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s FAILURE, code 0x%x",
-                    osi_LogSaveString(afsd_logp,volp->cellp->name),
-                     osi_LogSaveString(afsd_logp,name), code);
-       else
-           osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s SUCCESS",
-                    osi_LogSaveString(afsd_logp,volp->cellp->name),
-                     osi_LogSaveString(afsd_logp,name));
+    /*
+     * What if there was a volume rename?  The volume name no longer exists but the
+     * volume id might.  Try to refresh the volume location information based one
+     * of the readwrite or readonly volume id.
+     */
+    if (code == CM_ERROR_NOSUCHVOLUME) {
+        if (volp->vol[RWVOL].ID != 0) {
+            code = cm_GetEntryByID(cellp, volp->vol[RWVOL].ID, &vldbEntry, &nvldbEntry,
+                                    &uvldbEntry,
+                                    &method, userp, reqp);
+        } else if (volp->vol[ROVOL].ID != 0) {
+            code = cm_GetEntryByID(cellp, volp->vol[ROVOL].ID, &vldbEntry, &nvldbEntry,
+                                    &uvldbEntry,
+                                    &method, userp, reqp);
+        }
     }
 
     lock_ObtainWrite(&volp->rw);
@@ -370,6 +392,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
         case 0:
             flags = vldbEntry.flags;
             nServers = vldbEntry.nServers;
+            replicated = (nServers > 0);
             rwID = vldbEntry.volumeId[0];
             roID = vldbEntry.volumeId[1];
             bkID = vldbEntry.volumeId[2];
@@ -383,6 +406,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
         case 1:
             flags = nvldbEntry.flags;
             nServers = nvldbEntry.nServers;
+            replicated = (nServers > 0);
             rwID = nvldbEntry.volumeId[0];
             roID = nvldbEntry.volumeId[1];
             bkID = nvldbEntry.volumeId[2];
@@ -393,15 +417,15 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
             strncpy(name, nvldbEntry.name, VL_MAXNAMELEN);
             name[VL_MAXNAMELEN - 1] = '\0';
             break;
-#ifdef MULTIHOMED
         case 2:
             flags = uvldbEntry.flags;
             nServers = uvldbEntry.nServers;
+            replicated = (nServers > 0);
             rwID = uvldbEntry.volumeId[0];
             roID = uvldbEntry.volumeId[1];
             bkID = uvldbEntry.volumeId[2];
             for ( i=0, j=0; code == 0 && i<nServers && j<NMAXNSERVERS; i++ ) {
-                if ( !(uvldbEntry.serverFlags[i] & VLSERVER_FLAG_UUID) ) {
+                if ( !(uvldbEntry.serverFlags[i] & VLSF_UUID) ) {
                     serverFlags[j] = uvldbEntry.serverFlags[i];
                     serverNumber[j] = uvldbEntry.serverNumber[i].time_low;
                     j++;
@@ -418,16 +442,14 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
                     memset(&addrs, 0, sizeof(addrs));
 
                     do {
-                        struct rx_connection *rxconnp;
-
-                        code = cm_ConnByMServers(cellp->vlServersp, userp, reqp, &connp);
+                        code = cm_ConnByMServers(cellp->vlServersp, FALSE, userp, reqp, &connp);
                         if (code)
                             continue;
 
                         rxconnp = cm_GetRxConn(connp);
                         code = VL_GetAddrsU(rxconnp, &attrs, &uuid, &unique, &nentries, &addrs);
                         rx_PutConnection(rxconnp);
-                    } while (cm_Analyze(connp, userp, reqp, NULL, NULL, cellp->vlServersp, NULL, code));
+                    } while (cm_Analyze(connp, userp, reqp, NULL, 0, NULL, cellp->vlServersp, NULL, code));
 
                     if ( code ) {
                         code = cm_MapVLRPCError(code, reqp);
@@ -454,12 +476,11 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
             strncpy(name, uvldbEntry.name, VL_MAXNAMELEN);
             name[VL_MAXNAMELEN - 1] = '\0';
             break;
-#endif
         }
 
         /* decode the response */
         lock_ObtainWrite(&cm_volumeLock);
-        if (cm_VolNameIsID(volp->namep)) {
+        if (!cm_VolNameIsID(volp->namep)) {
             size_t    len;
 
             len = strlen(name);
@@ -506,6 +527,10 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
                 volp->vol[ROVOL].ID = roID;
                 cm_AddVolumeToIDHashTable(volp, ROVOL);
             }
+            if (replicated)
+                _InterlockedOr(&volp->vol[ROVOL].flags, CM_VOL_STATE_FLAG_REPLICATED);
+            else
+                _InterlockedAnd(&volp->vol[ROVOL].flags, ~CM_VOL_STATE_FLAG_REPLICATED);
         } else {
             if (volp->vol[ROVOL].qflags & CM_VOLUME_QFLAG_IN_HASH)
                 cm_RemoveVolumeFromIDHashTable(volp, ROVOL);
@@ -524,6 +549,15 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
             volp->vol[BACKVOL].ID = 0;
         }
         lock_ReleaseWrite(&cm_volumeLock);
+
+        /* See if the replica sites are mixed versions */
+        for (i=0; i<nServers; i++) {
+            if (serverFlags[i] & VLSF_NEWREPSITE) {
+                isMixed = 1;
+                break;
+            }
+        }
+
         for (i=0; i<nServers; i++) {
             /* create a server entry */
             tflags = serverFlags[i];
@@ -534,7 +568,6 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
             tempAddr = htonl(serverNumber[i]);
             tsockAddr.sin_addr.s_addr = tempAddr;
             tsp = cm_FindServer(&tsockAddr, CM_SERVER_FILE, FALSE);
-#ifdef MULTIHOMED
             if (tsp && (method == 2) && (tsp->flags & CM_SERVERFLAG_UUID)) {
                 /*
                  * Check to see if the uuid of the server we know at this address
@@ -555,7 +588,6 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
                               osi_LogSaveString(afsd_logp, hoststr));
                 }
             }
-#endif
             if (!tsp) {
                 /*
                  * cm_NewServer will probe the file server which in turn will
@@ -595,7 +627,12 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
                 if (!(tsp->flags & CM_SERVERFLAG_DOWN))
                     rwServers_alldown = 0;
             }
-            if ((tflags & VLSF_ROVOL) && (flags & VLF_ROEXISTS)) {
+            /*
+             * If there are mixed versions of RO releases on the replica
+             * sites, skip the servers with the out of date versions.
+             */
+            if ((tflags & VLSF_ROVOL) && (flags & VLF_ROEXISTS) &&
+                (!isMixed || (tflags & VLSF_NEWREPSITE))) {
                 tsrp = cm_NewServerRef(tsp, roID);
                 cm_InsertServerList(&volp->vol[ROVOL].serversp, tsrp);
                 ROcount++;
@@ -661,6 +698,10 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
     }
 
     volp->lastUpdateTime = time(NULL);
+    if (isMixed)
+        _InterlockedOr(&volp->flags, CM_VOLUMEFLAG_RO_MIXED);
+    else
+        _InterlockedAnd(&volp->flags, ~CM_VOLUMEFLAG_RO_MIXED);
 
     if (code == 0)
         _InterlockedAnd(&volp->flags, ~CM_VOLUMEFLAG_RESET);
@@ -953,6 +994,7 @@ long cm_FindVolumeByName(struct cm_cell *cellp, char *volumeNamep,
             volp->vol[volType].flags = 0;
         }
         volp->cbExpiresRO = 0;
+        volp->cbIssuedRO = 0;
         volp->cbServerpRO = NULL;
         volp->creationDateRO = 0;
         cm_AddVolumeToNameHashTable(volp);
@@ -1188,9 +1230,16 @@ void cm_RefreshVolumes(int lifetime)
 
         if (!(volp->flags & CM_VOLUMEFLAG_RESET)) {
             lock_ObtainWrite(&volp->rw);
-            if (volp->lastUpdateTime + lifetime <= now) {
-                _InterlockedOr(&volp->flags, CM_VOLUMEFLAG_RESET);
-                volp->lastUpdateTime = 0;
+            if (volp->flags & CM_VOLUMEFLAG_RO_MIXED) {
+                if (volp->lastUpdateTime + 300 <= now) {
+                    _InterlockedOr(&volp->flags, CM_VOLUMEFLAG_RESET);
+                    volp->lastUpdateTime = 0;
+                }
+            } else {
+                if (volp->lastUpdateTime + lifetime <= now) {
+                    _InterlockedOr(&volp->flags, CM_VOLUMEFLAG_RESET);
+                    volp->lastUpdateTime = 0;
+                }
             }
             lock_ReleaseWrite(&volp->rw);
         }
@@ -1244,8 +1293,9 @@ cm_CheckOfflineVolumeState(cm_volume_t *volp, cm_vol_state_t *statep, afs_uint32
                     continue;
 
                 alldeleted = 0;
-                *onlinep = 1;
-                alldown = 0;
+
+                if (!(serversp->server->flags & CM_SERVERFLAG_DOWN))
+                    alldown = 0;
 
                 if (serversp->status == srv_busy || serversp->status == srv_offline)
                     serversp->status = srv_not_busy;
@@ -1273,7 +1323,7 @@ cm_CheckOfflineVolumeState(cm_volume_t *volp, cm_vol_state_t *statep, afs_uint32
                     code = RXAFS_GetVolumeStatus(rxconnp, statep->ID,
                                                  &volStat, &Name, &OfflineMsg, &MOTD);
                     rx_PutConnection(rxconnp);
-                } while (cm_Analyze(connp, cm_rootUserp, &req, &fid, NULL, NULL, NULL, code));
+                } while (cm_Analyze(connp, cm_rootUserp, &req, &fid, 0, NULL, NULL, NULL, code));
                 code = cm_MapRPCError(code, &req);
 
                 lock_ObtainWrite(&volp->rw);
@@ -1834,7 +1884,7 @@ cm_VolumeRenewROCallbacks(void)
             cm_InitReq(&req);
 
             lock_ReleaseRead(&cm_volumeLock);
-            if (cm_GetSCache(&fid, &scp, cm_rootUserp, &req) == 0) {
+            if (cm_GetSCache(&fid, NULL, &scp, cm_rootUserp, &req) == 0) {
                 lock_ObtainWrite(&scp->rw);
                 cm_GetCallback(scp, cm_rootUserp, &req, 1);
                 lock_ReleaseWrite(&scp->rw);
@@ -1895,3 +1945,19 @@ cm_VolumeType(cm_volume_t *volp, afs_uint32 id)
 
     return -1;
 }
+
+LONG_PTR
+cm_ChecksumVolumeServerList(struct cm_fid *fidp, cm_user_t *userp, cm_req_t *reqp)
+{
+    LONG_PTR cksum = 0;
+    long code;
+    afs_uint32 replicated;
+    cm_serverRef_t **serverspp;
+
+    code = cm_GetServerList(fidp, userp, reqp, &replicated, &serverspp);
+    if (code == 0) {
+        cksum = cm_ChecksumServerList(*serverspp);
+        cm_FreeServerList(serverspp, 0);
+    }
+    return cksum;
+}