#include <afs/voldefs.h>
#include <rx/xdr.h>
#include <rx/rx.h>
+#include <rx/rx_queue.h>
#include <afs/vlserver.h>
#include <afs/nfs.h>
#include <afs/cellconfig.h>
struct rx_connection *aconn;
afs_int32 code = 0;
- aconn = (struct rx_connection *)0;
aconn = UV_Bind(server, AFSCONF_VOLUMEPORT);
code = AFSVolPartitionInfo64(aconn, pname, partition);
if (code == RXGEN_OPCODE) {
struct volintInfo tstatus;
tid = 0;
- aconn = (struct rx_connection *)0;
error = 0;
init_volintInfo(&tstatus);
/* If caller specified RW id, but not RO/BK ids, have them be RW+1 and RW+2 */
lastid = *anewid;
if (aroid && *aroid != 0) {
- lastid = MAX(lastid, *aroid);
+ lastid = max(lastid, *aroid);
}
if (abkid && *abkid != 0) {
- lastid = MAX(lastid, *abkid);
+ lastid = max(lastid, *abkid);
}
if (aroid && *aroid == 0) {
*aroid = ++lastid;
code =
AFSVolTransCreate_retry(aconn, avolid, apart, ITOffline, &ttid);
+ /* return early and quietly for VNOVOL; don't continue the attempt to delete. */
+ if (code == VNOVOL) {
+ error = code;
+ goto dfail;
+ }
+
EGOTO2(dfail, code, "%sFailed to start transaction on %u\n",
prefix, avolid);
pntg = 1;
toconn = UV_Bind(atoserver, AFSCONF_VOLUMEPORT); /* get connections to the servers */
fromconn = UV_Bind(afromserver, AFSCONF_VOLUMEPORT);
- fromtid = totid = 0; /* initialize to uncreated */
+ totid = 0; /* initialize to uncreated */
/* ***
* clone the read/write volume locally.
/* Get a clone id */
VPRINT1("Allocating new volume id for clone of volume %u ...",
afromvol);
- newVol = tmpVol = 0;
+ tmpVol = 0;
vcode = ubik_VL_GetNewVolumeId(cstruct, 0, 1, &tmpVol);
newVol = tmpVol;
EGOTO1(mfail, vcode,
code = DoVolDelete(fromconn, newVol, afrompart,
"cloned", 0, NULL, NULL);
if (code) {
+ if (code == VNOVOL) {
+ EPRINT1(code, "Failed to start transaction on %u\n", newVol);
+ }
error = code;
goto mfail;
}
fflush(STDOUT);
}
- if (volid && toconn)
+ if (volid && toconn) {
code = DoVolDelete(toconn, volid, atopart,
"destination", 0, NULL, "Recovery:");
+ if (code == VNOVOL) {
+ EPRINT1(code, "Recovery: Failed to start transaction on %u\n", volid);
+ }
+ }
/* put source volume on-line */
if (fromconn) {
if (fromconn) {
code = DoVolDelete(fromconn, backupId, afrompart,
"backup", 0, NULL, "Recovery:");
+ if (code == VNOVOL) {
+ EPRINT1(code, "Recovery: Failed to start transaction on %u\n", backupId);
+ }
code = DoVolDelete(fromconn, afromvol, afrompart, "source",
(atoserver != afromserver)?atoserver:0,
- NULL, NULL);
+ NULL, NULL);
+ if (code == VNOVOL) {
+ EPRINT1(code, "Failed to start transaction on %u\n", afromvol);
+ }
}
}
/* common cleanup - delete local clone */
if (newVol) {
code = DoVolDelete(fromconn, newVol, afrompart,
- "clone", 0, NULL, "Recovery:");
+ "clone", 0, NULL, "Recovery:");
+ if (code == VNOVOL) {
+ EPRINT1(code, "Recovery: Failed to start transaction on %u\n", newVol);
+ }
}
/* unlock VLDB entry */
ubik_VL_ReleaseLock(cstruct, 0, afromvol, -1,
(LOCKREL_OPCODE | LOCKREL_AFSID | LOCKREL_TIMESTAMP));
VDONE;
- islocked = 0;
}
done: /* routine cleanup */
if (volName)
code = DoVolDelete(fromconn, cloneVol, afrompart,
"cloned", 0, NULL, NULL);
if (code) {
+ if (code == VNOVOL) {
+ EPRINT1(code, "Failed to start transaction on %u\n", cloneVol);
+ }
error = code;
goto mfail;
}
MapHostToNetwork(&entry);
/* common cleanup - delete local clone */
- if (cloneVol)
+ if (cloneVol) {
code = DoVolDelete(fromconn, cloneVol, afrompart,
- "clone", 0, NULL, "Recovery:");
+ "clone", 0, NULL, "Recovery:");
+ if (code == VNOVOL) {
+ EPRINT1(code, "Recovery: Failed to start transaction on %u\n", cloneVol);
+ }
+ }
done: /* routine cleanup */
if (fromconn)
/* If the volume does not exist, create it */
if (!volid || code) {
- char volname[64];
+ char volname[VL_MAXNAMELEN];
char hoststr[16];
if (volid && (code != VNOVOL)) {
goto fail;
}
- strcpy(volname, vldbEntryPtr->name);
+ strlcpy(volname, vldbEntryPtr->name, sizeof(volname));
+
+ if (strlcat(volname,
+ tmpVolId?".roclone":".readonly",
+ sizeof(volname)) >= sizeof(volname)) {
+ code = ENOMEM;
+ PrintError("Volume name is too long\n", code);
+ goto fail;
+ }
+
if (tmpVolId)
strcat(volname, ".roclone");
else
afs_uint32 cloneVolId = 0, roVolId;
struct replica *replicas = 0;
struct nvldbentry entry, storeEntry;
- int i, volcount = 0, m, fullrelease, vldbindex;
+ int i, volcount = 0, m, vldbindex;
int failure;
struct restoreCookie cookie;
struct rx_connection **toconns = 0;
manyDests tr;
manyResults results;
int rwindex, roindex, roclone, roexists;
- afs_uint32 rwcrdate = 0;
+ afs_uint32 rwcrdate = 0, rwupdate = 0;
afs_uint32 clcrdate;
struct rtime {
int validtime;
char hoststr[16];
afs_int32 origflags[NMAXNSERVERS];
struct volser_status orig_status;
+ int notreleased = 0;
+ int tried_justnewsites = 0;
+ int justnewsites = 0; /* are we just trying to release to new RO sites? */
+ int sites = 0; /* number of ro sites */
+ int new_sites = 0; /* number of ro sites markes as new */
+
+ typedef enum {
+ CR_RECOVER = 0x0000, /**< not complete: a recovery from a previous failed release */
+ CR_FORCED = 0x0001, /**< complete: forced by caller */
+ CR_LAST_OK = 0x0002, /**< complete: no sites have been marked as new release */
+ CR_ALL_NEW = 0x0004, /**< complete: all sites have been marked as new release */
+ CR_NEW_RW = 0x0008, /**< complete: read-write has changed */
+ CR_RO_MISSING = 0x0010, /**< complete: ro clone is missing */
+ } complete_release_t;
+
+ complete_release_t complete_release = CR_RECOVER;
memset(remembertime, 0, sizeof(remembertime));
memset(&results, 0, sizeof(results));
ONERROR(vcode, entry.name, "Could not update vldb entry for %s.\n");
}
- /* Will we be completing a previously unfinished release. -force overrides */
- for (s = 0, m = 0, fullrelease=0, i=0; (i<entry.nServers); i++) {
+ /*
+ * Determine if this is to be a complete release or a recovery of a
+ * previous unfinished release. The previous release is considered to be
+ * unfinished when the clone was successfully distributed to at least one
+ * (but not all) of the read-only sites, as indicated by the NEW_REPSITE
+ * vldb flags.
+ *
+ * The caller can override the vldb flags check using the -force
+ * flag, to force this to be a complete release.
+ */
+ for (i = 0; i < entry.nServers; i++) {
if (entry.serverFlags[i] & ITSROVOL) {
- m++;
- if (entry.serverFlags[i] & NEW_REPSITE) s++;
+ sites++;
+ if (entry.serverFlags[i] & NEW_REPSITE)
+ new_sites++;
+ if (entry.serverFlags[i] & RO_DONTUSE)
+ notreleased++;
}
origflags[i] = entry.serverFlags[i];
}
- if ((forceflag && !fullrelease) || (s == m) || (s == 0))
- fullrelease = 1;
+
+ if (forceflag) {
+ complete_release |= CR_FORCED;
+ }
+
+ if (new_sites == 0) {
+ complete_release |= CR_LAST_OK;
+ } else if (new_sites == sites) {
+ complete_release |= CR_ALL_NEW;
+ }
+
+ if ((complete_release & (CR_LAST_OK | CR_ALL_NEW))
+ && !(complete_release & CR_FORCED)) {
+ if (notreleased && notreleased != sites) {
+ /* we have some new unreleased sites. try to just release to those,
+ * if the RW has not changed. The caller can override with -force. */
+ justnewsites = 1;
+ }
+ }
/* Determine which volume id to use and see if it exists */
- cloneVolId =
- ((fullrelease
- || (entry.cloneId == 0)) ? entry.volumeId[ROVOL] : entry.cloneId);
+ cloneVolId = (complete_release || entry.cloneId == 0)
+ ? entry.volumeId[ROVOL] : entry.cloneId;
+
code = VolumeExists(afromserver, afrompart, cloneVolId);
roexists = ((code == ENODEV) ? 0 : 1);
ONERROR(-1, afromserver,
"Cannot establish connection with server 0x%x\n");
- if (!fullrelease) {
- if (!roexists)
- fullrelease = 1; /* Do a full release if RO clone does not exist */
- else {
+ if (!complete_release) {
+ if (!roexists) {
+ complete_release |= CR_RO_MISSING; /* Do a complete release if RO clone does not exist */
+ } else {
/* Begin transaction on RW and mark it busy while we query it */
code = AFSVolTransCreate_retry(
fromconn, afromvol, afrompart, ITBusy, &fromtid
"Failed to end transaction on RW clone %u\n");
if (rwcrdate > clcrdate)
- fullrelease = 2;/* Do a full release if RO clone older than RW */
+ complete_release |= CR_NEW_RW; /* Do a complete release if RO clone older than RW */
}
}
+ if (!complete_release || (complete_release & CR_NEW_RW)) {
+ /* in case the RW has changed, and just to be safe */
+ justnewsites = 0;
+ }
+
if (verbose) {
- switch (fullrelease) {
- case 2:
- fprintf(STDOUT, "RW %lu changed, doing a complete release\n",
- (unsigned long)afromvol);
- break;
- case 1:
- fprintf(STDOUT, "This is a complete release of volume %lu\n",
- (unsigned long)afromvol);
- break;
- case 0:
- fprintf(STDOUT, "This is a completion of a previous release\n");
- break;
- }
- }
-
- if (fullrelease) {
+ if (!complete_release) {
+ fprintf(STDOUT,
+ "This is a recovery of previously failed release\n");
+ } else {
+ fprintf(STDOUT, "This is a complete release of volume %u", afromvol);
+ /* Give the reasons for a complete release, except if only CR_LAST_OK. */
+ if (complete_release != CR_LAST_OK) {
+ char *sep = " (";
+ if (complete_release & CR_FORCED) {
+ fprintf(STDOUT, "%sforced", sep);
+ sep = ", ";
+ }
+ if (complete_release & CR_LAST_OK) {
+ fprintf(STDOUT, "%slast ok", sep);
+ sep = ", ";
+ }
+ if (complete_release & CR_ALL_NEW) {
+ fprintf(STDOUT, "%sall sites are new", sep);
+ sep = ", ";
+ }
+ if (complete_release & CR_NEW_RW) {
+ fprintf(STDOUT, "%srw %u changed", sep, afromvol);
+ sep = ", ";
+ }
+ if (complete_release & CR_RO_MISSING) {
+ fprintf(STDOUT, "%sro clone missing", sep);
+ }
+ fprintf(STDOUT, ")");
+ }
+ fprintf(STDOUT, "\n");
+ if (justnewsites) {
+ tried_justnewsites = 1;
+ fprintf(STDOUT, "There are new RO sites; we will try to "
+ "only release to new sites\n");
+ }
+ }
+ }
+
+ if (complete_release) {
+ afs_int32 oldest = 0;
/* If the RO clone exists, then if the clone is a temporary
* clone, delete it. Or if the RO clone is marked RO_DONTUSE
* (it was recently added), then also delete it. We do not
roexists = 0;
}
+ if (justnewsites) {
+ VPRINT("Querying old RO sites for update times...");
+ for (vldbindex = 0; vldbindex < entry.nServers; vldbindex++) {
+ volEntries volumeInfo;
+ struct rx_connection *conn;
+ afs_int32 crdate;
+
+ if (!(entry.serverFlags[vldbindex] & ITSROVOL)) {
+ continue;
+ }
+ if ((entry.serverFlags[vldbindex] & RO_DONTUSE)) {
+ continue;
+ }
+ conn = UV_Bind(entry.serverNumber[vldbindex], AFSCONF_VOLUMEPORT);
+ if (!conn) {
+ fprintf(STDERR, "Cannot establish connection to server %s\n",
+ hostutil_GetNameByINet(entry.serverNumber[vldbindex]));
+ justnewsites = 0;
+ break;
+ }
+ volumeInfo.volEntries_val = NULL;
+ volumeInfo.volEntries_len = 0;
+ code = AFSVolListOneVolume(conn, entry.serverPartition[vldbindex],
+ entry.volumeId[ROVOL],
+ &volumeInfo);
+ if (code) {
+ fprintf(STDERR, "Could not fetch information about RO vol %lu from server %s\n",
+ (unsigned long)entry.volumeId[ROVOL],
+ hostutil_GetNameByINet(entry.serverNumber[vldbindex]));
+ PrintError("", code);
+ justnewsites = 0;
+ rx_DestroyConnection(conn);
+ break;
+ }
+
+ crdate = CLOCKADJ(volumeInfo.volEntries_val[0].creationDate);
+
+ if (oldest == 0 || crdate < oldest) {
+ oldest = crdate;
+ }
+
+ rx_DestroyConnection(conn);
+ free(volumeInfo.volEntries_val);
+ volumeInfo.volEntries_val = NULL;
+ volumeInfo.volEntries_len = 0;
+ }
+ VDONE;
+ }
+ if (justnewsites) {
+ volEntries volumeInfo;
+ volumeInfo.volEntries_val = NULL;
+ volumeInfo.volEntries_len = 0;
+ code = AFSVolListOneVolume(fromconn, afrompart, afromvol,
+ &volumeInfo);
+ if (code) {
+ fprintf(STDERR, "Could not fetch information about RW vol %lu from server %s\n",
+ (unsigned long)afromvol,
+ hostutil_GetNameByINet(afromserver));
+ PrintError("", code);
+ justnewsites = 0;
+ } else {
+ rwupdate = volumeInfo.volEntries_val[0].updateDate;
+
+ free(volumeInfo.volEntries_val);
+ volumeInfo.volEntries_val = NULL;
+ volumeInfo.volEntries_len = 0;
+ }
+ }
+ if (justnewsites && oldest <= rwupdate) {
+ /* RW has changed */
+ justnewsites = 0;
+ }
+
/* Mark all the ROs in the VLDB entry as RO_DONTUSE. We don't
* write this entry out to the vlserver until after the first
* RO volume is released (temp RO clones don't count).
+ *
+ * If 'justnewsites' is set, we're only updating sites that have
+ * RO_DONTUSE set, so set NEW_REPSITE for all of the others.
*/
for (i = 0; i < entry.nServers; i++) {
- entry.serverFlags[i] &= ~NEW_REPSITE;
- entry.serverFlags[i] |= RO_DONTUSE;
+ if (justnewsites) {
+ if ((entry.serverFlags[i] & RO_DONTUSE)) {
+ entry.serverFlags[i] &= ~NEW_REPSITE;
+ } else {
+ entry.serverFlags[i] |= NEW_REPSITE;
+ }
+ } else {
+ entry.serverFlags[i] &= ~NEW_REPSITE;
+ entry.serverFlags[i] |= RO_DONTUSE;
+ }
}
entry.serverFlags[rwindex] |= NEW_REPSITE;
entry.serverFlags[rwindex] &= ~RO_DONTUSE;
+ }
+
+ if (justnewsites && roexists) {
+ /* if 'justnewsites' and 'roexists' are set, we don't need to do
+ * anything with the RO clone, so skip the reclone */
+ /* noop */
+
+ } else if (complete_release) {
if (roclone) {
strcpy(vname, entry.name);
goto rfail;
}
+ if (justnewsites && rwupdate != volstatus.updateDate) {
+ justnewsites = 0;
+ /* reset the serverFlags as if 'justnewsites' had never been set */
+ for (i = 0; i < entry.nServers; i++) {
+ entry.serverFlags[i] &= ~NEW_REPSITE;
+ entry.serverFlags[i] |= RO_DONTUSE;
+ }
+ entry.serverFlags[rwindex] |= NEW_REPSITE;
+ entry.serverFlags[rwindex] &= ~RO_DONTUSE;
+ }
+
rwcrdate = volstatus.creationDate;
/* Remember clone volume ID in case we fail or are interrupted */
* There is a fix in the 3.4 client that does not need this sleep
* anymore, but we don't know what clients we have.
*/
- if (entry.nServers > 2)
+ if (entry.nServers > 2 && !justnewsites)
sleep(5);
/* Mark the RO clone in the VLDB as a good site (already released) */
}
}
+ if (justnewsites) {
+ VPRINT("RW vol has not changed; only releasing to new RO sites\n");
+ /* act like this is a completion of a previous release */
+ complete_release = CR_RECOVER;
+ } else if (tried_justnewsites) {
+ VPRINT("RW vol has changed; releasing to all sites\n");
+ }
+
/* Now we will release from the clone to the remaining RO replicas.
* The first 2 ROs (counting the non-temporary RO clone) are released
* individually: releasecount. This is to reduce the race condition
cookie.clone = 0;
/* how many to do at once, excluding clone */
- if (stayUp)
+ if (stayUp || justnewsites)
nservers = entry.nServers; /* can do all, none offline */
else
nservers = entry.nServers / 2;
memset(&orig_status, 0, sizeof(orig_status));
code = AFSVolGetStatus(fromconn, fromtid, &orig_status);
}
- if (!fullrelease && code)
+ if (!complete_release && code)
ONERROR(VOLSERNOVOL, afromvol,
"Old clone is inaccessible. Try vos release -f %u.\n");
ONERROR0(code, "Failed to create transaction on the release clone\n");
VDONE;
/* if we have a clone, treat this as done, for now */
- if (stayUp && !fullrelease) {
+ if (stayUp && !complete_release) {
entry.serverFlags[roindex] |= NEW_REPSITE;
entry.serverFlags[roindex] &= ~RO_DONTUSE;
entry.flags |= RO_EXISTS;
for (volcount = 0;
((volcount < nservers) && (vldbindex < entry.nServers));
vldbindex++) {
- if (!stayUp) {
+ if (!stayUp && !justnewsites) {
/* The first two RO volumes will be released individually.
* The rest are then released in parallel. This is a hack
* for clients not recognizing right away when a RO volume
continue;
/* Thisdate is the date from which we want to pick up all changes */
- if (forceflag || !fullrelease
- || (rwcrdate > times[volcount].crtime)) {
- /* If the forceflag is set, then we want to do a full dump.
- * If it's not a full release, we can't be sure that the creation
- * date is good (so we also do a full dump).
- * If the RW volume was replaced (its creation date is newer than
- * the last release), then we can't be sure what has changed (so
- * we do a full dump).
+ if (forceflag) {
+ /* Do a full dump when forced by the caller. */
+ VPRINT("This will be a full dump: forced\n");
+ thisdate = 0;
+ } else if (!complete_release) {
+ /* If this release is a recovery of a failed release, we can't be
+ * sure the creation date is good, so do a full dump.
+ */
+ VPRINT("This will be a full dump: previous release failed\n");
+ thisdate = 0;
+ } else if (times[volcount].crtime == 0) {
+ /* A full dump is needed for a new read-only volume. */
+ VPRINT
+ ("This will be a full dump: read-only volume needs to be created\n");
+ thisdate = 0;
+ } else if ((rwcrdate > times[volcount].crtime)) {
+ /* If the RW volume was replaced (its creation date is newer than
+ * the last release), then we can't be sure what has changed (so
+ * we do a full dump).
*/
+ VPRINT
+ ("This will be a full dump: read-write volume was replaced\n");
thisdate = 0;
} else if (remembertime[vldbindex].validtime) {
/* Trans was prev ended. Use the time from the prev trans
}
if (fromdate == 0)
- fprintf(STDOUT, " (full release)");
+ fprintf(STDOUT, " (entire volume)");
else {
tmv = fromdate;
fprintf(STDOUT, " (as of %.24s)", ctime(&tmv));
char tovolreal[VOLSER_OLDMAXVOLNAME];
afs_uint32 pvolid;
afs_int32 temptid, pparentid;
- int success;
struct nvldbentry entry, storeEntry;
afs_int32 error;
int islocked;
memset(&cookie, 0, sizeof(cookie));
islocked = 0;
- success = 0;
error = 0;
reuseID = 1;
tocall = (struct rx_call *)0;
- toconn = (struct rx_connection *)0;
tempconn = (struct rx_connection *)0;
totid = 0;
temptid = 0;
goto refail;
}
- success = 1;
fprintf(STDOUT, " done\n");
fflush(STDOUT);
- if (success && (!reuseID || (flags & RV_FULLRST))) {
+ if (!reuseID || (flags & RV_FULLRST)) {
/* Volume was restored on the file server, update the
* VLDB to reflect the change.
*/
afs_int32 code = 0;
afs_int32 success = 1;
- aconn = (struct rx_connection *)0;
aconn = UV_Bind(aserver, AFSCONF_VOLUMEPORT);
curPos = 0;
for (curPtr = volPtr; curPos < arraySize; curPtr++) {
afs_uint32 curCloneId = 0;
char cloneName[256]; /*max vol name */
- aconn = (struct rx_connection *)0;
aconn = UV_Bind(aserver, AFSCONF_VOLUMEPORT);
curPos = 0;
if ((volPtr->volFlags & REUSECLONEID) && (volPtr->volFlags & ENTRYVALID))
* We set the val field to a null pointer as a hint for the stub to
* allocate space.
*/
- code = 0;
*a_numEntsInResultP = 0;
*a_resultPP = (volintXInfo *) 0;
volumeXInfo.volXEntries_val = (volintXInfo *) 0;
afs_int32 code = 0;
volEntries volumeInfo;
- code = 0;
-
*resultPtr = (volintInfo *) 0;
volumeInfo.volEntries_val = (volintInfo *) 0; /*this hints the stub to allocate space */
volumeInfo.volEntries_len = 0;
* the info. Setting the val field to a null pointer tells the stub
* to allocate space for us.
*/
- code = 0;
*a_resultPP = (volintXInfo *) 0;
volumeXInfo.volXEntries_val = (volintXInfo *) 0;
volumeXInfo.volXEntries_len = 0;
if (code) {
if (code != VL_NOENT) {
fprintf(STDOUT,
- "Could not retreive the VLDB entry for volume %lu \n",
+ "Could not retrieve the VLDB entry for volume %lu \n",
(unsigned long)rwvolid);
ERROR_EXIT(code);
}
aconn = UV_Bind(server, AFSCONF_VOLUMEPORT);
error = DoVolDelete(aconn, volid, part,
"the", 0, NULL, NULL);
+ if (error == VNOVOL) {
+ EPRINT1(error, "Failed to start transaction on %u\n", volid);
+ }
PrintError("", error);
if (aconn)