Windows: Checksum server lists on Volume Errors
[openafs.git] / src / WINNT / afsd / cm_volume.c
index a81f0ed..dd40e31 100644 (file)
@@ -76,6 +76,7 @@ cm_ShutdownVolume(void)
                 cm_VolumeStatusNotification(volp, volp->vol[volType].ID, volp->vol[volType].state, vl_alldown);
         }
         volp->cbExpiresRO = 0;
+        volp->cbIssuedRO = 0;
         volp->cbServerpRO = NULL;
         lock_FinalizeRWLock(&volp->rw);
     }
@@ -116,6 +117,7 @@ void cm_InitVolume(int newFile, long maxVols)
                         cm_VolumeStatusNotification(volp, volp->vol[volType].ID, vl_unknown, volp->vol[volType].state);
                 }
                 volp->cbExpiresRO = 0;
+                volp->cbIssuedRO = 0;
                 volp->cbServerpRO = NULL;
             }
         }
@@ -165,15 +167,11 @@ cm_VolNameIsID(char *aname)
  *    first, and fall back to successively older versions if you get
  *    RXGEN_OPCODE.
  */
-#define MULTIHOMED 1
-
 static long
 cm_GetEntryByName( struct cm_cell *cellp, const char *name,
                    struct vldbentry *vldbEntryp,
                    struct nvldbentry *nvldbEntryp,
-#ifdef MULTIHOMED
                    struct uvldbentry *uvldbEntryp,
-#endif
                    int *methodp,
                    cm_user_t *userp,
                    cm_req_t *reqp
@@ -188,16 +186,14 @@ cm_GetEntryByName( struct cm_cell *cellp, const char *name,
               osi_LogSaveString(afsd_logp,name));
     do {
 
-        code = cm_ConnByMServers(cellp->vlServersp, userp, reqp, &connp);
+        code = cm_ConnByMServers(cellp->vlServersp, FALSE, userp, reqp, &connp);
         if (code)
             continue;
 
         rxconnp = cm_GetRxConn(connp);
-#ifdef MULTIHOMED
         code = VL_GetEntryByNameU(rxconnp, name, uvldbEntryp);
         *methodp = 2;
         if ( code == RXGEN_OPCODE )
-#endif
         {
             code = VL_GetEntryByNameN(rxconnp, name, nvldbEntryp);
             *methodp = 1;
@@ -207,7 +203,7 @@ cm_GetEntryByName( struct cm_cell *cellp, const char *name,
             *methodp = 0;
         }
         rx_PutConnection(rxconnp);
-    } while (cm_Analyze(connp, userp, reqp, NULL, NULL, cellp->vlServersp, NULL, code));
+    } while (cm_Analyze(connp, userp, reqp, NULL, 0, NULL, cellp->vlServersp, NULL, code));
     code = cm_MapVLRPCError(code, reqp);
     if ( code )
         osi_Log3(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s FAILURE, code 0x%x",
@@ -224,9 +220,7 @@ static long
 cm_GetEntryByID( struct cm_cell *cellp, afs_uint32 id,
                  struct vldbentry *vldbEntryp,
                  struct nvldbentry *nvldbEntryp,
-#ifdef MULTIHOMED
                  struct uvldbentry *uvldbEntryp,
-#endif
                  int *methodp,
                  cm_user_t *userp,
                  cm_req_t *reqp
@@ -253,11 +247,10 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
     u_long tempAddr;
     struct vldbentry vldbEntry;
     struct nvldbentry nvldbEntry;
-#ifdef MULTIHOMED
     struct uvldbentry uvldbEntry;
-#endif
     int method = -1;
     int ROcount = 0;
+    int isMixed = 0;
     long code;
     enum volstatus rwNewstate = vl_online;
     enum volstatus roNewstate = vl_online;
@@ -267,6 +260,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
 #endif
     afs_uint32 volType;
     time_t now;
+    int replicated = 0;
 
     lock_AssertWrite(&volp->rw);
 
@@ -322,13 +316,11 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
         lock_ReleaseWrite(&volp->rw);
 
         if (cellp->flags & CM_CELLFLAG_VLSERVER_INVALID)
-            cm_UpdateCell(cellp, 0);
+             cm_UpdateCell(cellp, 0);
 
         /* now we have volume structure locked and held; make RPC to fill it */
         code = cm_GetEntryByName(cellp, volp->namep, &vldbEntry, &nvldbEntry,
-#ifdef MULTIHOMED
                                  &uvldbEntry,
-#endif
                                  &method, userp, reqp);
     }
 
@@ -347,9 +339,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
 
         /* now we have volume structure locked and held; make RPC to fill it */
         code = cm_GetEntryByName(cellp, name, &vldbEntry, &nvldbEntry,
-#ifdef MULTIHOMED
                                  &uvldbEntry,
-#endif
                                  &method, userp, reqp);
     }
 
@@ -361,15 +351,11 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
     if (code == CM_ERROR_NOSUCHVOLUME) {
         if (volp->vol[RWVOL].ID != 0) {
             code = cm_GetEntryByID(cellp, volp->vol[RWVOL].ID, &vldbEntry, &nvldbEntry,
-#ifdef MULTIHOMED
                                     &uvldbEntry,
-#endif
                                     &method, userp, reqp);
         } else if (volp->vol[ROVOL].ID != 0) {
             code = cm_GetEntryByID(cellp, volp->vol[ROVOL].ID, &vldbEntry, &nvldbEntry,
-#ifdef MULTIHOMED
                                     &uvldbEntry,
-#endif
                                     &method, userp, reqp);
         }
     }
@@ -406,6 +392,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
         case 0:
             flags = vldbEntry.flags;
             nServers = vldbEntry.nServers;
+            replicated = (nServers > 0);
             rwID = vldbEntry.volumeId[0];
             roID = vldbEntry.volumeId[1];
             bkID = vldbEntry.volumeId[2];
@@ -419,6 +406,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
         case 1:
             flags = nvldbEntry.flags;
             nServers = nvldbEntry.nServers;
+            replicated = (nServers > 0);
             rwID = nvldbEntry.volumeId[0];
             roID = nvldbEntry.volumeId[1];
             bkID = nvldbEntry.volumeId[2];
@@ -429,15 +417,15 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
             strncpy(name, nvldbEntry.name, VL_MAXNAMELEN);
             name[VL_MAXNAMELEN - 1] = '\0';
             break;
-#ifdef MULTIHOMED
         case 2:
             flags = uvldbEntry.flags;
             nServers = uvldbEntry.nServers;
+            replicated = (nServers > 0);
             rwID = uvldbEntry.volumeId[0];
             roID = uvldbEntry.volumeId[1];
             bkID = uvldbEntry.volumeId[2];
             for ( i=0, j=0; code == 0 && i<nServers && j<NMAXNSERVERS; i++ ) {
-                if ( !(uvldbEntry.serverFlags[i] & VLSERVER_FLAG_UUID) ) {
+                if ( !(uvldbEntry.serverFlags[i] & VLSF_UUID) ) {
                     serverFlags[j] = uvldbEntry.serverFlags[i];
                     serverNumber[j] = uvldbEntry.serverNumber[i].time_low;
                     j++;
@@ -454,14 +442,14 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
                     memset(&addrs, 0, sizeof(addrs));
 
                     do {
-                        code = cm_ConnByMServers(cellp->vlServersp, userp, reqp, &connp);
+                        code = cm_ConnByMServers(cellp->vlServersp, FALSE, userp, reqp, &connp);
                         if (code)
                             continue;
 
                         rxconnp = cm_GetRxConn(connp);
                         code = VL_GetAddrsU(rxconnp, &attrs, &uuid, &unique, &nentries, &addrs);
                         rx_PutConnection(rxconnp);
-                    } while (cm_Analyze(connp, userp, reqp, NULL, NULL, cellp->vlServersp, NULL, code));
+                    } while (cm_Analyze(connp, userp, reqp, NULL, 0, NULL, cellp->vlServersp, NULL, code));
 
                     if ( code ) {
                         code = cm_MapVLRPCError(code, reqp);
@@ -488,7 +476,6 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
             strncpy(name, uvldbEntry.name, VL_MAXNAMELEN);
             name[VL_MAXNAMELEN - 1] = '\0';
             break;
-#endif
         }
 
         /* decode the response */
@@ -540,6 +527,10 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
                 volp->vol[ROVOL].ID = roID;
                 cm_AddVolumeToIDHashTable(volp, ROVOL);
             }
+            if (replicated)
+                _InterlockedOr(&volp->vol[ROVOL].flags, CM_VOL_STATE_FLAG_REPLICATED);
+            else
+                _InterlockedAnd(&volp->vol[ROVOL].flags, ~CM_VOL_STATE_FLAG_REPLICATED);
         } else {
             if (volp->vol[ROVOL].qflags & CM_VOLUME_QFLAG_IN_HASH)
                 cm_RemoveVolumeFromIDHashTable(volp, ROVOL);
@@ -558,6 +549,15 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
             volp->vol[BACKVOL].ID = 0;
         }
         lock_ReleaseWrite(&cm_volumeLock);
+
+        /* See if the replica sites are mixed versions */
+        for (i=0; i<nServers; i++) {
+            if (serverFlags[i] & VLSF_NEWREPSITE) {
+                isMixed = 1;
+                break;
+            }
+        }
+
         for (i=0; i<nServers; i++) {
             /* create a server entry */
             tflags = serverFlags[i];
@@ -568,7 +568,6 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
             tempAddr = htonl(serverNumber[i]);
             tsockAddr.sin_addr.s_addr = tempAddr;
             tsp = cm_FindServer(&tsockAddr, CM_SERVER_FILE, FALSE);
-#ifdef MULTIHOMED
             if (tsp && (method == 2) && (tsp->flags & CM_SERVERFLAG_UUID)) {
                 /*
                  * Check to see if the uuid of the server we know at this address
@@ -589,7 +588,6 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
                               osi_LogSaveString(afsd_logp, hoststr));
                 }
             }
-#endif
             if (!tsp) {
                 /*
                  * cm_NewServer will probe the file server which in turn will
@@ -629,7 +627,12 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
                 if (!(tsp->flags & CM_SERVERFLAG_DOWN))
                     rwServers_alldown = 0;
             }
-            if ((tflags & VLSF_ROVOL) && (flags & VLF_ROEXISTS)) {
+            /*
+             * If there are mixed versions of RO releases on the replica
+             * sites, skip the servers with the out of date versions.
+             */
+            if ((tflags & VLSF_ROVOL) && (flags & VLF_ROEXISTS) &&
+                (!isMixed || (tflags & VLSF_NEWREPSITE))) {
                 tsrp = cm_NewServerRef(tsp, roID);
                 cm_InsertServerList(&volp->vol[ROVOL].serversp, tsrp);
                 ROcount++;
@@ -695,6 +698,10 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
     }
 
     volp->lastUpdateTime = time(NULL);
+    if (isMixed)
+        _InterlockedOr(&volp->flags, CM_VOLUMEFLAG_RO_MIXED);
+    else
+        _InterlockedAnd(&volp->flags, ~CM_VOLUMEFLAG_RO_MIXED);
 
     if (code == 0)
         _InterlockedAnd(&volp->flags, ~CM_VOLUMEFLAG_RESET);
@@ -987,6 +994,7 @@ long cm_FindVolumeByName(struct cm_cell *cellp, char *volumeNamep,
             volp->vol[volType].flags = 0;
         }
         volp->cbExpiresRO = 0;
+        volp->cbIssuedRO = 0;
         volp->cbServerpRO = NULL;
         volp->creationDateRO = 0;
         cm_AddVolumeToNameHashTable(volp);
@@ -1222,9 +1230,16 @@ void cm_RefreshVolumes(int lifetime)
 
         if (!(volp->flags & CM_VOLUMEFLAG_RESET)) {
             lock_ObtainWrite(&volp->rw);
-            if (volp->lastUpdateTime + lifetime <= now) {
-                _InterlockedOr(&volp->flags, CM_VOLUMEFLAG_RESET);
-                volp->lastUpdateTime = 0;
+            if (volp->flags & CM_VOLUMEFLAG_RO_MIXED) {
+                if (volp->lastUpdateTime + 300 <= now) {
+                    _InterlockedOr(&volp->flags, CM_VOLUMEFLAG_RESET);
+                    volp->lastUpdateTime = 0;
+                }
+            } else {
+                if (volp->lastUpdateTime + lifetime <= now) {
+                    _InterlockedOr(&volp->flags, CM_VOLUMEFLAG_RESET);
+                    volp->lastUpdateTime = 0;
+                }
             }
             lock_ReleaseWrite(&volp->rw);
         }
@@ -1308,7 +1323,7 @@ cm_CheckOfflineVolumeState(cm_volume_t *volp, cm_vol_state_t *statep, afs_uint32
                     code = RXAFS_GetVolumeStatus(rxconnp, statep->ID,
                                                  &volStat, &Name, &OfflineMsg, &MOTD);
                     rx_PutConnection(rxconnp);
-                } while (cm_Analyze(connp, cm_rootUserp, &req, &fid, NULL, NULL, NULL, code));
+                } while (cm_Analyze(connp, cm_rootUserp, &req, &fid, 0, NULL, NULL, NULL, code));
                 code = cm_MapRPCError(code, &req);
 
                 lock_ObtainWrite(&volp->rw);
@@ -1869,7 +1884,7 @@ cm_VolumeRenewROCallbacks(void)
             cm_InitReq(&req);
 
             lock_ReleaseRead(&cm_volumeLock);
-            if (cm_GetSCache(&fid, &scp, cm_rootUserp, &req) == 0) {
+            if (cm_GetSCache(&fid, NULL, &scp, cm_rootUserp, &req) == 0) {
                 lock_ObtainWrite(&scp->rw);
                 cm_GetCallback(scp, cm_rootUserp, &req, 1);
                 lock_ReleaseWrite(&scp->rw);
@@ -1930,3 +1945,19 @@ cm_VolumeType(cm_volume_t *volp, afs_uint32 id)
 
     return -1;
 }
+
+LONG_PTR
+cm_ChecksumVolumeServerList(struct cm_fid *fidp, cm_user_t *userp, cm_req_t *reqp)
+{
+    LONG_PTR cksum = 0;
+    long code;
+    afs_uint32 replicated;
+    cm_serverRef_t **serverspp;
+
+    code = cm_GetServerList(fidp, userp, reqp, &replicated, &serverspp);
+    if (code == 0) {
+        cksum = cm_ChecksumServerList(*serverspp);
+        cm_FreeServerList(serverspp, 0);
+    }
+    return cksum;
+}