vol: cmd comerr dir afs sgiefs
${COMPILE_PART1} vol ${COMPILE_PART2}
+tsalvaged: vol libafsrpc libafsauthent cmd util
+ set -x; \
+ if test "@DEMAND_ATTACH@" = "yes" ; then \
+ case ${SYS_NAME} in \
+ alpha_dux*|sgi_*|sun*_5*|rs_aix*|*linux*|hp_ux11*|ia64_hpux*|*fbsd*|*nbsd2*) \
+ ${COMPILE_PART1} tsalvaged ${COMPILE_PART2} ;; \
+ *_darwin_[1-6][0-9]) \
+ echo Not building MT tsalvaged for ${SYS_NAME} ;; \
+ *_darwin_*) \
+ ${COMPILE_PART1} tsalvaged ${COMPILE_PART2} ;; \
+ *) \
+ echo Not building MT tsalvaged for ${SYS_NAME} ;; \
+ esac \
+ else \
+ echo skipping tsalvaged ; \
+ fi
+
+
vlserver: cmd comerr vol audit vlserver_depinstall
${COMPILE_PART1} vlserver ${COMPILE_PART2}
jafsadm: libjafsadm
finale: project cmd comerr afsd butc tbutc @ENABLE_KERNEL_MODULE@ libuafs audit kauth log package \
- ptserver scout bu_utils ubik uss bozo vfsck volser tvolser \
+ ptserver scout bu_utils ubik uss bozo vfsck volser tvolser tsalvaged \
venus update xstat afsmonitor dauth rxdebug libafsrpc \
libafsauthent shlibafsrpc shlibafsauthent libadmin login man-pages
${COMPILE_PART1} finale ${COMPILE_PART2}
finale_nolibafs: project cmd comerr afsd butc tbutc libuafs audit kauth log package \
- ptserver scout bu_utils ubik uss bozo vfsck volser tvolser \
+ ptserver scout bu_utils ubik uss bozo vfsck volser tvolser tsalvaged \
venus update xstat afsmonitor dauth rxdebug libafsrpc \
libafsauthent shlibafsrpc shlibafsauthent libadmin login man-pages
${COMPILE_PART1} finale ${COMPILE_PART2}
-${COMPILE_PART1} tviced ${COMPILE_CLEAN}
-${COMPILE_PART1} volser ${COMPILE_CLEAN}
-${COMPILE_PART1} tvolser ${COMPILE_CLEAN}
+ -${COMPILE_PART1} tsalvaged ${COMPILE_CLEAN}
-${COMPILE_PART1} venus ${COMPILE_CLEAN}
-${COMPILE_PART1} venus/test ${COMPILE_CLEAN}
-${COMPILE_PART1} afsd ${COMPILE_CLEAN}
src/tests/Makefile \
src/tests/run-tests \
src/tests/OpenAFS/Dirpath.pm \
+ src/tsalvaged/Makefile \
src/tsm41/Makefile \
src/tviced/Makefile \
src/tvolser/Makefile \
[ --enable-fast-restart enable fast startup of file server without salvaging],, enable_fast_restart="no")
AC_ARG_ENABLE( bitmap-later,
[ --enable-bitmap-later enable fast startup of file server by not reading bitmap till needed],, enable_bitmap_later="no")
+AC_ARG_ENABLE( demand-attach-fs,
+[ --enable-demand-attach-fs enable Demand Attach Fileserver (please see documentation)],, enable_demand_attach_fs="no")
AC_ARG_ENABLE( full-vos-listvol-switch,
[ --disable-full-vos-listvol-switch disable vos full listvol switch for formatted output],, enable_full_vos_listvol_switch="yes")
AC_ARG_WITH(dux-kernel-headers,
AC_DEFINE(BITMAP_LATER, 1, [define if you want to salvager to check bitmasks later])
fi
+if test "$enable_demand_attach_fs" = "yes"; then
+ AC_DEFINE(DEMAND_ATTACH_ENABLE, 1, [define if you want the demand attach fileserver])
+ DEMAND_ATTACH="yes"
+else
+ DEMAND_ATTACH="no"
+fi
+AC_SUBST(DEMAND_ATTACH)
+
+if test "$enable_fast_restart" = "yes" &&
+ test "$enable_demand_attach_fs" = "yes" ; then
+ AC_MSG_ERROR([The Demand Attach and Fast Restart extensions are mutually exclusive. Demand Attach fileservers automatically salvage volumes in the background, thereby making Fast Restart pointless.])
+ exit 1
+fi
+
if test "$enable_full_vos_listvol_switch" = "yes"; then
AC_DEFINE(FULL_LISTVOL_SWITCH, 1, [define if you want to want listvol switch])
fi
src/tests/Makefile \
src/tests/run-tests \
src/tests/OpenAFS/Dirpath.pm \
+src/tsalvaged/Makefile \
src/tsm41/Makefile \
src/tviced/Makefile \
src/tvolser/Makefile \
cd test; $(MAKE)
clean:
- $(RM) -f *.o *.a copyauth setkey auth.h cellconfig.h acfg_errors.c ktc_errors.c core\
+ $(RM) -f *.o *.a copyauth setkey auth.h cellconfig.h acfg_errors.c ktc_errors.c core \
AFS_component_version_number.c
include ../config/Makefile.version
#include "bosint.h"
-#define MRAFS_OFFSET 9
-#define ADDPARMOFFSET 26
+/* command offsets for bos salvage command */
+#define MRAFS_OFFSET 10
+#define ADDPARMOFFSET 27
-static struct SalvageParms {
+/* MR-AFS salvage parameters */
+struct MRAFSSalvageParms {
afs_int32 Optdebug;
afs_int32 Optnowrite;
afs_int32 Optforce;
afs_int32 OptLogLevel;
afs_int32 OptRxDebug;
afs_uint32 OptResidencies;
-} mrafsParm;
+};
/* dummy routine for the audit work. It should do nothing since audits */
/* occur at the server level and bos is not a server. */
#define PARMBUFFERSSIZE 32
-static
-DoSalvage(aconn, aparm1, aparm2, aoutName, showlog, parallel, atmpDir,
- orphans)
- struct rx_connection *aconn;
- char *aoutName;
- char *aparm1;
- char *aparm2;
- afs_int32 showlog;
- char *parallel;
- char *atmpDir;
- char *orphans;
+static afs_int32
+DoSalvage(struct rx_connection * aconn, char * aparm1, char * aparm2,
+ char * aoutName, afs_int32 showlog, char * parallel,
+ char * atmpDir, char * orphans, int dafs,
+ struct MRAFSSalvageParms * mrafsParm)
{
register afs_int32 code;
char *parms[6];
parms[code] = "";
if (!aparm2)
aparm2 = "";
+
/* MUST pass canonical (wire-format) salvager path to bosserver */
- strncpy(tbuffer, AFSDIR_CANONICAL_SERVER_SALVAGER_FILEPATH, BOZO_BSSIZE);
if (*aparm2 != 0) {
- if ((strlen(tbuffer) + 1 + strlen(partName) + 1 + strlen(aparm2) +
- 1) > BOZO_BSSIZE) {
- printf("bos: command line too big\n");
- return (E2BIG);
+ /* single volume salvage */
+ if (dafs) {
+ /* for DAFS, we call the salvagserver binary with special options.
+ * in this mode, it simply uses SALVSYNC to tell the currently
+ * running salvageserver to offline and salvage the volume in question */
+ strncpy(tbuffer, AFSDIR_CANONICAL_SERVER_SALSRV_FILEPATH, BOZO_BSSIZE);
+
+ if ((strlen(tbuffer) + 9 + strlen(partName) + 1 + strlen(aparm2) +
+ 1) > BOZO_BSSIZE) {
+ printf("bos: command line too big\n");
+ return (E2BIG);
+ }
+
+ strcat(tbuffer, " -client ");
+ strcat(tbuffer, partName);
+ strcat(tbuffer, " ");
+ strcat(tbuffer, aparm2);
+ } else {
+ strncpy(tbuffer, AFSDIR_CANONICAL_SERVER_SALVAGER_FILEPATH, BOZO_BSSIZE);
+
+ if ((strlen(tbuffer) + 1 + strlen(partName) + 1 + strlen(aparm2) +
+ 1) > BOZO_BSSIZE) {
+ printf("bos: command line too big\n");
+ return (E2BIG);
+ }
+
+ strcat(tbuffer, " ");
+ strcat(tbuffer, partName);
+ strcat(tbuffer, " ");
+ strcat(tbuffer, aparm2);
}
- strcat(tbuffer, " ");
- strcat(tbuffer, partName);
- strcat(tbuffer, " ");
- strcat(tbuffer, aparm2);
} else {
+ /* partition salvage */
+ strncpy(tbuffer, AFSDIR_CANONICAL_SERVER_SALVAGER_FILEPATH, BOZO_BSSIZE);
if ((strlen(tbuffer) + 4 + strlen(partName) + 1) > BOZO_BSSIZE) {
printf("bos: command line too big\n");
return (E2BIG);
strcat(tbuffer, partName);
}
- /* add the parallel option if given */
- if (parallel != NULL) {
- if ((strlen(tbuffer) + 11 + strlen(parallel) + 1) > BOZO_BSSIZE) {
- printf("bos: command line too big\n");
- return (E2BIG);
+ /* For DAFS, specifying a single volume does not result in a standard
+ * salvager call. Instead, it simply results in a SALVSYNC call to the
+ * online salvager daemon. This interface does not give us the same rich
+ * set of call flags. Thus, we skip these steps for DAFS single-volume
+ * calls */
+ if (!dafs || (*aparm2 == 0)) {
+ /* add the parallel option if given */
+ if (parallel != NULL) {
+ if ((strlen(tbuffer) + 11 + strlen(parallel) + 1) > BOZO_BSSIZE) {
+ printf("bos: command line too big\n");
+ return (E2BIG);
+ }
+ strcat(tbuffer, " -parallel ");
+ strcat(tbuffer, parallel);
}
- strcat(tbuffer, " -parallel ");
- strcat(tbuffer, parallel);
- }
- /* add the tmpdir option if given */
- if (atmpDir != NULL) {
- if ((strlen(tbuffer) + 9 + strlen(atmpDir) + 1) > BOZO_BSSIZE) {
- printf("bos: command line too big\n");
- return (E2BIG);
+ /* add the tmpdir option if given */
+ if (atmpDir != NULL) {
+ if ((strlen(tbuffer) + 9 + strlen(atmpDir) + 1) > BOZO_BSSIZE) {
+ printf("bos: command line too big\n");
+ return (E2BIG);
+ }
+ strcat(tbuffer, " -tmpdir ");
+ strcat(tbuffer, atmpDir);
}
- strcat(tbuffer, " -tmpdir ");
- strcat(tbuffer, atmpDir);
- }
- /* add the orphans option if given */
- if (orphans != NULL) {
- if ((strlen(tbuffer) + 10 + strlen(orphans) + 1) > BOZO_BSSIZE) {
- printf("bos: command line too big\n");
- return (E2BIG);
+ /* add the orphans option if given */
+ if (orphans != NULL) {
+ if ((strlen(tbuffer) + 10 + strlen(orphans) + 1) > BOZO_BSSIZE) {
+ printf("bos: command line too big\n");
+ return (E2BIG);
+ }
+ strcat(tbuffer, " -orphans ");
+ strcat(tbuffer, orphans);
+ }
+
+ if (mrafsParm->Optdebug)
+ strcat(tbuffer, " -debug");
+ if (mrafsParm->Optnowrite)
+ strcat(tbuffer, " -nowrite");
+ if (mrafsParm->Optforce)
+ strcat(tbuffer, " -force");
+ if (mrafsParm->Optoktozap)
+ strcat(tbuffer, " -oktozap");
+ if (mrafsParm->Optrootfiles)
+ strcat(tbuffer, " -rootfiles");
+ if (mrafsParm->Optsalvagedirs)
+ strcat(tbuffer, " -salvagedirs");
+ if (mrafsParm->Optblockreads)
+ strcat(tbuffer, " -blockreads");
+ if (mrafsParm->OptListResidencies)
+ strcat(tbuffer, " -ListResidencies");
+ if (mrafsParm->OptSalvageRemote)
+ strcat(tbuffer, " -SalvageRemote");
+ if (mrafsParm->OptSalvageArchival)
+ strcat(tbuffer, " -SalvageArchival");
+ if (mrafsParm->OptIgnoreCheck)
+ strcat(tbuffer, " -IgnoreCheck");
+ if (mrafsParm->OptForceOnLine)
+ strcat(tbuffer, " -ForceOnLine");
+ if (mrafsParm->OptUseRootDirACL)
+ strcat(tbuffer, " -UseRootDirACL");
+ if (mrafsParm->OptTraceBadLinkCounts)
+ strcat(tbuffer, " -TraceBadLinkCounts");
+ if (mrafsParm->OptDontAskFS)
+ strcat(tbuffer, " -DontAskFS");
+ if (mrafsParm->OptLogLevel) {
+ sprintf(pbuffer, " -LogLevel %ld", mrafsParm->OptLogLevel);
+ strcat(tbuffer, pbuffer);
+ }
+ if (mrafsParm->OptRxDebug)
+ strcat(tbuffer, " -rxdebug");
+ if (mrafsParm->OptResidencies) {
+ sprintf(pbuffer, " -Residencies %lu", mrafsParm->OptResidencies);
+ strcat(tbuffer, pbuffer);
}
- strcat(tbuffer, " -orphans ");
- strcat(tbuffer, orphans);
- }
-
- if (mrafsParm.Optdebug)
- strcat(tbuffer, " -debug");
- if (mrafsParm.Optnowrite)
- strcat(tbuffer, " -nowrite");
- if (mrafsParm.Optforce)
- strcat(tbuffer, " -force");
- if (mrafsParm.Optoktozap)
- strcat(tbuffer, " -oktozap");
- if (mrafsParm.Optrootfiles)
- strcat(tbuffer, " -rootfiles");
- if (mrafsParm.Optsalvagedirs)
- strcat(tbuffer, " -salvagedirs");
- if (mrafsParm.Optblockreads)
- strcat(tbuffer, " -blockreads");
- if (mrafsParm.OptListResidencies)
- strcat(tbuffer, " -ListResidencies");
- if (mrafsParm.OptSalvageRemote)
- strcat(tbuffer, " -SalvageRemote");
- if (mrafsParm.OptSalvageArchival)
- strcat(tbuffer, " -SalvageArchival");
- if (mrafsParm.OptIgnoreCheck)
- strcat(tbuffer, " -IgnoreCheck");
- if (mrafsParm.OptForceOnLine)
- strcat(tbuffer, " -ForceOnLine");
- if (mrafsParm.OptUseRootDirACL)
- strcat(tbuffer, " -UseRootDirACL");
- if (mrafsParm.OptTraceBadLinkCounts)
- strcat(tbuffer, " -TraceBadLinkCounts");
- if (mrafsParm.OptDontAskFS)
- strcat(tbuffer, " -DontAskFS");
- if (mrafsParm.OptLogLevel) {
- sprintf(pbuffer, " -LogLevel %ld", mrafsParm.OptLogLevel);
- strcat(tbuffer, pbuffer);
- }
- if (mrafsParm.OptRxDebug)
- strcat(tbuffer, " -rxdebug");
- if (mrafsParm.OptResidencies) {
- sprintf(pbuffer, " -Residencies %lu", mrafsParm.OptResidencies);
- strcat(tbuffer, pbuffer);
}
parms[0] = tbuffer;
char tname[BOZO_BSSIZE];
afs_int32 newID;
extern struct ubik_client *cstruct;
- afs_int32 curGoal, showlog = 0, mrafs = 0;
+ afs_int32 curGoal, showlog = 0, dafs = 0, mrafs = 0;
char *parallel;
char *tmpDir;
char *orphans;
char *tp;
+ char * serviceName;
+ struct MRAFSSalvageParms mrafsParm;
memset(&mrafsParm, 0, sizeof(mrafsParm));
/* parm 0 is machine name, 1 is partition, 2 is volume, 3 is -all flag */
tconn = GetConn(as, 0);
- /* Find out whether fileserver is running MR-AFS (has a scanner instance) */
- /* XXX this should really be done some other way, potentially by RPC */
tp = &tname[0];
- if (code = BOZO_GetInstanceParm(tconn, "fs", 3, &tp) == 0)
- mrafs = 1;
+
+ /* find out whether fileserver is running demand attach fs */
+ if (code = BOZO_GetInstanceParm(tconn, "dafs", 0, &tp) == 0) {
+ dafs = 1;
+ serviceName = "dafs";
+ /* Find out whether fileserver is running MR-AFS (has a scanner instance) */
+ /* XXX this should really be done some other way, potentially by RPC */
+ if (code = BOZO_GetInstanceParm(tconn, serviceName, 4, &tp) == 0)
+ mrafs = 1;
+ } else {
+ serviceName = "fs";
+ /* Find out whether fileserver is running MR-AFS (has a scanner instance) */
+ /* XXX this should really be done some other way, potentially by RPC */
+ if (code = BOZO_GetInstanceParm(tconn, serviceName, 3, &tp) == 0)
+ mrafs = 1;
+ }
/* we can do a volume, a partition or the whole thing, but not mixtures
* thereof */
orphans = as->parms[8].items->data;
}
+ if (dafs) {
+ if (!as->parms[9].items) { /* -forceDAFS flag */
+ printf("This is a demand attach fileserver. Are you sure you want to proceed with a manual salvage?\n");
+ printf("must specify -forceDAFS flag in order to proceed.\n");
+ return EINVAL;
+ }
+ }
+
if (mrafs) {
if (as->parms[MRAFS_OFFSET].items)
mrafsParm.Optdebug = 1;
} else {
int stop = 0;
- for (i = 9; i < ADDPARMOFFSET; i++) {
+ for (i = MRAFS_OFFSET; i < ADDPARMOFFSET; i++) {
if (as->parms[i].items) {
printf(" %s only possible for MR-AFS fileserver.\n",
as->parms[i].name);
if (as->parms[4].items) {
/* salvage whole enchilada */
- curGoal = GetServerGoal(tconn, "fs");
+ curGoal = GetServerGoal(tconn, serviceName);
if (curGoal == BSTAT_NORMAL) {
- printf("bos: shutting down fs.\n");
- code = BOZO_SetTStatus(tconn, "fs", BSTAT_SHUTDOWN);
+ printf("bos: shutting down '%s'.\n", serviceName);
+ code = BOZO_SetTStatus(tconn, serviceName, BSTAT_SHUTDOWN);
if (code) {
- printf("bos: failed to stop 'fs' (%s)\n", em(code));
+ printf("bos: failed to stop '%s' (%s)\n", serviceName, em(code));
return code;
}
code = BOZO_WaitAll(tconn); /* wait for shutdown to complete */
/* now do the salvage operation */
printf("Starting salvage.\n");
rc = DoSalvage(tconn, NULL, NULL, outName, showlog, parallel, tmpDir,
- orphans);
+ orphans, dafs, &mrafsParm);
if (curGoal == BSTAT_NORMAL) {
- printf("bos: restarting fs.\n");
- code = BOZO_SetTStatus(tconn, "fs", BSTAT_NORMAL);
+ printf("bos: restarting %s.\n", serviceName);
+ code = BOZO_SetTStatus(tconn, serviceName, BSTAT_NORMAL);
if (code) {
- printf("bos: failed to restart 'fs' (%s)\n", em(code));
+ printf("bos: failed to restart '%s' (%s)\n", serviceName, em(code));
return code;
}
}
as->parms[1].items->data);
return -1;
}
- curGoal = GetServerGoal(tconn, "fs");
+ curGoal = GetServerGoal(tconn, serviceName);
/* salvage a whole partition (specified by parms[1]) */
if (curGoal == BSTAT_NORMAL) {
- printf("bos: shutting down fs.\n");
- code = BOZO_SetTStatus(tconn, "fs", BSTAT_SHUTDOWN);
+ printf("bos: shutting down '%s'.\n", serviceName);
+ code = BOZO_SetTStatus(tconn, serviceName, BSTAT_SHUTDOWN);
if (code) {
- printf("bos: can't stop 'fs' (%s)\n", em(code));
+ printf("bos: can't stop '%s' (%s)\n", serviceName, em(code));
return code;
}
code = BOZO_WaitAll(tconn); /* wait for shutdown to complete */
/* now do the salvage operation */
printf("Starting salvage.\n");
rc = DoSalvage(tconn, as->parms[1].items->data, NULL, outName,
- showlog, parallel, tmpDir, orphans);
+ showlog, parallel, tmpDir, orphans, dafs, &mrafsParm);
if (curGoal == BSTAT_NORMAL) {
- printf("bos: restarting fs.\n");
- code = BOZO_SetTStatus(tconn, "fs", BSTAT_NORMAL);
+ printf("bos: restarting '%s'.\n", serviceName);
+ code = BOZO_SetTStatus(tconn, serviceName, BSTAT_NORMAL);
if (code) {
- printf("bos: failed to restart 'fs' (%s)\n", em(code));
+ printf("bos: failed to restart '%s' (%s)\n", serviceName, em(code));
return code;
}
}
}
printf("Starting salvage.\n");
rc = DoSalvage(tconn, as->parms[1].items->data, tname, outName,
- showlog, parallel, tmpDir, orphans);
+ showlog, parallel, tmpDir, orphans, dafs, &mrafsParm);
if (rc)
return rc;
}
"directory to place tmp files");
cmd_AddParm(ts, "-orphans", CMD_SINGLE, CMD_OPTIONAL,
"ignore | remove | attach");
+ cmd_AddParm(ts, "-forceDAFS", CMD_FLAG, CMD_OPTIONAL,
+ "(DAFS) force salvage of demand attach fileserver");
cmd_AddParm(ts, "-debug", CMD_FLAG, CMD_OPTIONAL,
"(MR-AFS) Run in Debugging mode");
cmd_AddParm(ts, "-nowrite", CMD_FLAG, CMD_OPTIONAL,
#define BOZO_LWP_STACKSIZE 16000
extern int BOZO_ExecuteRequest();
extern int RXSTATS_ExecuteRequest();
-extern struct bnode_ops fsbnode_ops, ezbnode_ops, cronbnode_ops;
+extern struct bnode_ops fsbnode_ops, dafsbnode_ops, ezbnode_ops, cronbnode_ops;
void bozo_Log();
}
bnode_Register("fs", &fsbnode_ops, 3);
+ bnode_Register("dafs", &dafsbnode_ops, 4);
bnode_Register("simple", &ezbnode_ops, 1);
bnode_Register("cron", &cronbnode_ops, 2);
#include <afs/afsutil.h>
#include "bnode.h"
-static int fs_timeout(), fs_getstat(), fs_setstat(), fs_delete();
-static int fs_procexit(), fs_getstring(), fs_getparm(), fs_restartp();
-static int fs_hascore();
-struct bnode *fs_create();
-
-static SetNeedsClock();
-static NudgeProcs();
static int emergency = 0;
The needsSalvage flag is cleared when the salvager exits.
*/
-struct bnode_ops fsbnode_ops = {
- fs_create,
- fs_timeout,
- fs_getstat,
- fs_setstat,
- fs_delete,
- fs_procexit,
- fs_getstring,
- fs_getparm,
- fs_restartp,
- fs_hascore,
-};
-
struct fsbnode {
struct bnode b;
afs_int32 timeSDStarted; /* time shutdown operation started */
char *filecmd; /* command to start primary file server */
char *volcmd; /* command to start secondary vol server */
+ char *salsrvcmd; /* command to start salvageserver (demand attach fs) */
char *salcmd; /* command to start salvager */
char *scancmd; /* command to start scanner (MR-AFS) */
struct bnode_proc *fileProc; /* process for file server */
struct bnode_proc *volProc; /* process for vol server */
+ struct bnode_proc *salsrvProc; /* process for salvageserver (demand attach fs) */
struct bnode_proc *salProc; /* process for salvager */
struct bnode_proc *scanProc; /* process for scanner (MR-AFS) */
afs_int32 lastFileStart; /* last start for file */
afs_int32 lastVolStart; /* last start for vol */
+ afs_int32 lastSalsrvStart; /* last start for salvageserver (demand attach fs) */
afs_int32 lastScanStart; /* last start for scanner (MR-AFS) */
char fileRunning; /* file process is running */
char volRunning; /* volser is running */
+ char salsrvRunning; /* salvageserver is running (demand attach fs) */
char salRunning; /* salvager is running */
char scanRunning; /* scanner is running (MR_AFS) */
char fileSDW; /* file shutdown wait */
char volSDW; /* vol shutdown wait */
+ char salsrvSDW; /* salvageserver shutdown wait (demand attach fs) */
char salSDW; /* waiting for the salvager to shutdown */
char scanSDW; /* scanner shutdown wait (MR_AFS) */
char fileKillSent; /* kill signal has been sent */
char volKillSent;
+ char salsrvKillSent; /* kill signal has been sent (demand attach fs) */
char salKillSent;
char scanKillSent; /* kill signal has been sent (MR_AFS) */
char needsSalvage; /* salvage before running */
char needsClock; /* do we need clock ticks */
};
+
+
+struct bnode * fs_create(char *ainstance, char *afilecmd, char *avolcmd,
+ char *asalcmd, char *ascancmd);
+struct bnode * dafs_create(char *ainstance, char *afilecmd, char *avolcmd,
+ char * asalsrvcmd, char *asalcmd, char *ascancmd);
+
+static int fs_hascore(register struct ezbnode *abnode);
+static int fs_restartp(register struct fsbnode *abnode);
+static int SetSalFlag(register struct fsbnode *abnode, register int aflag);
+static int RestoreSalFlag(register struct fsbnode *abnode);
+static int fs_delete(struct fsbnode *abnode);
+static int fs_timeout(struct fsbnode *abnode);
+static int fs_getstat(struct fsbnode *abnode, afs_int32 * astatus);
+static int fs_setstat(register struct fsbnode *abnode, afs_int32 astatus);
+static int fs_procexit(struct fsbnode *abnode, struct bnode_proc *aproc);
+static int fs_getstring(struct fsbnode *abnode, char *abuffer, afs_int32 alen);
+
+
+static int fs_getparm(struct fsbnode *abnode, afs_int32 aindex,
+ char *abuffer, afs_int32 alen);
+static int dafs_getparm(struct fsbnode *abnode, afs_int32 aindex,
+ char *abuffer, afs_int32 alen);
+
+#ifdef AFS_NT40_ENV
+static void AppendExecutableExtension(char *cmd)
+#else
+#define AppendExecutableExtension(x)
+#endif
+
+static void SetNeedsClock(register struct fsbnode *ab);
+static int NudgeProcs(register struct fsbnode *abnode);
+
+
+
+struct bnode_ops fsbnode_ops = {
+ fs_create,
+ fs_timeout,
+ fs_getstat,
+ fs_setstat,
+ fs_delete,
+ fs_procexit,
+ fs_getstring,
+ fs_getparm,
+ fs_restartp,
+ fs_hascore,
+};
+
+/* demand attach fs bnode ops */
+struct bnode_ops dafsbnode_ops = {
+ dafs_create,
+ fs_timeout,
+ fs_getstat,
+ fs_setstat,
+ fs_delete,
+ fs_procexit,
+ fs_getstring,
+ dafs_getparm,
+ fs_restartp,
+ fs_hascore,
+};
+
+
/* Function to tell whether this bnode has a core file or not. You might
* think that this could be in bnode.c, and decide what core files to check
* for based on the bnode's coreName property, but that doesn't work because
if (access(tbuffer, 0) == 0)
return 1;
+ /* see if salvageserver left a core file */
+ bnode_CoreName(abnode, "salsrv", tbuffer);
+ if (access(tbuffer, 0) == 0)
+ return 1;
+
/* see if salvager left a core file */
bnode_CoreName(abnode, "salv", tbuffer);
if (access(tbuffer, 0) == 0)
if (code)
return code;
+ if (abnode->salsrvcmd) { /* only in demand attach fs */
+ /* now do same for salsrvcmd (demand attach fs) */
+ code = bnode_ParseLine(abnode->salsrvcmd, &tt);
+ if (code)
+ return 0;
+ if (!tt)
+ return 0;
+ code = stat(tt->key, &tstat);
+ if (code) {
+ bnode_FreeTokens(tt);
+ return 0;
+ }
+ if (tstat.st_ctime > abnode->lastScanStart)
+ code = 1;
+ else
+ code = 0;
+ bnode_FreeTokens(tt);
+ }
+
if (abnode->scancmd) { /* Only in MR-AFS */
/* now do same for scancmd (MR-AFS) */
code = bnode_ParseLine(abnode->scancmd, &tt);
char tbuffer[AFSDIR_PATH_MAX];
int fd;
- abnode->needsSalvage = aflag;
- strcompose(tbuffer, AFSDIR_PATH_MAX, AFSDIR_SERVER_LOCAL_DIRPATH, "/",
- SALFILE, abnode->b.name, NULL);
- if (aflag) {
- fd = open(tbuffer, O_CREAT | O_TRUNC | O_RDWR, 0666);
- close(fd);
- } else {
- unlink(tbuffer);
+ /* don't use the salvage flag for demand attach fs */
+ if (abnode->salsrvcmd == NULL) {
+ abnode->needsSalvage = aflag;
+ strcompose(tbuffer, AFSDIR_PATH_MAX, AFSDIR_SERVER_LOCAL_DIRPATH, "/",
+ SALFILE, abnode->b.name, NULL);
+ if (aflag) {
+ fd = open(tbuffer, O_CREAT | O_TRUNC | O_RDWR, 0666);
+ close(fd);
+ } else {
+ unlink(tbuffer);
+ }
}
return 0;
}
{
char tbuffer[AFSDIR_PATH_MAX];
- strcompose(tbuffer, AFSDIR_PATH_MAX, AFSDIR_SERVER_LOCAL_DIRPATH, "/",
- SALFILE, abnode->b.name, NULL);
- if (access(tbuffer, 0) == 0) {
- /* file exists, so need to salvage */
- abnode->needsSalvage = 1;
- } else {
+ /* never set needs salvage flag for demand attach fs */
+ if (abnode->salsrvcmd != NULL) {
abnode->needsSalvage = 0;
+ } else {
+ strcompose(tbuffer, AFSDIR_PATH_MAX, AFSDIR_SERVER_LOCAL_DIRPATH, "/",
+ SALFILE, abnode->b.name, NULL);
+ if (access(tbuffer, 0) == 0) {
+ /* file exists, so need to salvage */
+ abnode->needsSalvage = 1;
+ } else {
+ abnode->needsSalvage = 0;
+ }
}
return 0;
}
free(abnode->filecmd);
free(abnode->volcmd);
free(abnode->salcmd);
+ if (abnode->salsrvcmd)
+ free(abnode->salsrvcmd);
if (abnode->scancmd)
free(abnode->scancmd);
free(abnode);
char *fileCmdpath, *volCmdpath, *salCmdpath, *scanCmdpath;
int bailout = 0;
- fileCmdpath = volCmdpath = salCmdpath = NULL;
+ te = fileCmdpath = volCmdpath = salCmdpath = scanCmdpath = NULL;
/* construct local paths from canonical (wire-format) paths */
if (ConstructLocalBinPath(afilecmd, &fileCmdpath)) {
bozo_Log("BNODE: command path invalid '%s'\n", afilecmd);
bailout = 1;
+ goto done;
}
if (ConstructLocalBinPath(avolcmd, &volCmdpath)) {
bozo_Log("BNODE: command path invalid '%s'\n", avolcmd);
bailout = 1;
+ goto done;
}
if (ConstructLocalBinPath(asalcmd, &salCmdpath)) {
bozo_Log("BNODE: command path invalid '%s'\n", asalcmd);
bailout = 1;
+ goto done;
}
if (ascancmd && strlen(ascancmd)) {
if (ConstructLocalBinPath(ascancmd, &scanCmdpath)) {
bozo_Log("BNODE: command path invalid '%s'\n", ascancmd);
bailout = 1;
+ goto done;
}
}
if (!bailout) {
sscanf(fileCmdpath, "%s", cmdname);
-#ifdef AFS_NT40_ENV
AppendExecutableExtension(cmdname);
-#endif
if (stat(cmdname, &tstat)) {
bozo_Log("BNODE: file server binary '%s' not found\n", cmdname);
bailout = 1;
+ goto done;
}
sscanf(volCmdpath, "%s", cmdname);
-#ifdef AFS_NT40_ENV
AppendExecutableExtension(cmdname);
-#endif
if (stat(cmdname, &tstat)) {
bozo_Log("BNODE: volume server binary '%s' not found\n", cmdname);
bailout = 1;
+ goto done;
}
sscanf(salCmdpath, "%s", cmdname);
-#ifdef AFS_NT40_ENV
AppendExecutableExtension(cmdname);
-#endif
if (stat(cmdname, &tstat)) {
bozo_Log("BNODE: salvager binary '%s' not found\n", cmdname);
bailout = 1;
+ goto done;
}
if (ascancmd && strlen(ascancmd)) {
sscanf(scanCmdpath, "%s", cmdname);
-#ifdef AFS_NT40_ENV
AppendExecutableExtension(cmdname);
-#endif
if (stat(cmdname, &tstat)) {
bozo_Log("BNODE: scanner binary '%s' not found\n", cmdname);
bailout = 1;
+ goto done;
}
}
}
+ te = (struct fsbnode *)malloc(sizeof(struct fsbnode));
+ if (te == NULL) {
+ bailout = 1;
+ goto done;
+ }
+ memset(te, 0, sizeof(struct fsbnode));
+ te->filecmd = fileCmdpath;
+ te->volcmd = volCmdpath;
+ te->salsrvcmd = NULL;
+ te->salcmd = salCmdpath;
+ if (ascancmd && strlen(ascancmd))
+ te->scancmd = scanCmdpath;
+ else
+ te->scancmd = NULL;
+ if (bnode_InitBnode(te, &fsbnode_ops, ainstance) != 0) {
+ bailout = 1;
+ goto done;
+ }
+ bnode_SetTimeout(te, POLLTIME); /* ask for timeout activations every 10 seconds */
+ RestoreSalFlag(te); /* restore needsSalvage flag based on file's existence */
+ SetNeedsClock(te); /* compute needsClock field */
+
+ done:
if (bailout) {
- free(fileCmdpath);
- free(volCmdpath);
- free(salCmdpath);
+ if (te)
+ free(te);
+ if (fileCmdpath)
+ free(fileCmdpath);
+ if (volCmdpath)
+ free(volCmdpath);
+ if (salCmdpath)
+ free(salCmdpath);
+ if (scanCmdpath)
+ free(scanCmdpath);
return NULL;
}
+ return (struct bnode *)te;
+}
+
+/* create a demand attach fs bnode */
+struct bnode *
+dafs_create(char *ainstance, char *afilecmd, char *avolcmd,
+ char * asalsrvcmd, char *asalcmd, char *ascancmd)
+{
+ struct stat tstat;
+ register struct fsbnode *te;
+ char cmdname[AFSDIR_PATH_MAX];
+ char *fileCmdpath, *volCmdpath, *salsrvCmdpath, *salCmdpath, *scanCmdpath;
+ int bailout = 0;
+
+ te = fileCmdpath = volCmdpath = salsrvCmdpath = salCmdpath = scanCmdpath = NULL;
+
+ /* construct local paths from canonical (wire-format) paths */
+ if (ConstructLocalBinPath(afilecmd, &fileCmdpath)) {
+ bozo_Log("BNODE: command path invalid '%s'\n", afilecmd);
+ bailout = 1;
+ goto done;
+ }
+ if (ConstructLocalBinPath(avolcmd, &volCmdpath)) {
+ bozo_Log("BNODE: command path invalid '%s'\n", avolcmd);
+ bailout = 1;
+ goto done;
+ }
+ if (ConstructLocalBinPath(asalsrvcmd, &salsrvCmdpath)) {
+ bozo_Log("BNODE: command path invalid '%s'\n", asalsrvcmd);
+ bailout = 1;
+ goto done;
+ }
+ if (ConstructLocalBinPath(asalcmd, &salCmdpath)) {
+ bozo_Log("BNODE: command path invalid '%s'\n", asalcmd);
+ bailout = 1;
+ goto done;
+ }
+
+ if (ascancmd && strlen(ascancmd)) {
+ if (ConstructLocalBinPath(ascancmd, &scanCmdpath)) {
+ bozo_Log("BNODE: command path invalid '%s'\n", ascancmd);
+ bailout = 1;
+ goto done;
+ }
+ }
+
+ if (!bailout) {
+ sscanf(fileCmdpath, "%s", cmdname);
+ AppendExecutableExtension(cmdname);
+ if (stat(cmdname, &tstat)) {
+ bozo_Log("BNODE: file server binary '%s' not found\n", cmdname);
+ bailout = 1;
+ goto done;
+ }
+
+ sscanf(volCmdpath, "%s", cmdname);
+ AppendExecutableExtension(cmdname);
+ if (stat(cmdname, &tstat)) {
+ bozo_Log("BNODE: volume server binary '%s' not found\n", cmdname);
+ bailout = 1;
+ goto done;
+ }
+
+ sscanf(salsrvCmdpath, "%s", cmdname);
+ AppendExecutableExtension(cmdname);
+ if (stat(cmdname, &tstat)) {
+ bozo_Log("BNODE: salvageserver binary '%s' not found\n", cmdname);
+ bailout = 1;
+ goto done;
+ }
+
+ sscanf(salCmdpath, "%s", cmdname);
+ AppendExecutableExtension(cmdname);
+ if (stat(cmdname, &tstat)) {
+ bozo_Log("BNODE: salvager binary '%s' not found\n", cmdname);
+ bailout = 1;
+ goto done;
+ }
+
+ if (ascancmd && strlen(ascancmd)) {
+ sscanf(scanCmdpath, "%s", cmdname);
+ AppendExecutableExtension(cmdname);
+ if (stat(cmdname, &tstat)) {
+ bozo_Log("BNODE: scanner binary '%s' not found\n", cmdname);
+ bailout = 1;
+ goto done;
+ }
+ }
+ }
+
te = (struct fsbnode *)malloc(sizeof(struct fsbnode));
+ if (te == NULL) {
+ bailout = 1;
+ goto done;
+ }
memset(te, 0, sizeof(struct fsbnode));
te->filecmd = fileCmdpath;
te->volcmd = volCmdpath;
+ te->salsrvcmd = salsrvCmdpath;
te->salcmd = salCmdpath;
if (ascancmd && strlen(ascancmd))
te->scancmd = scanCmdpath;
else
te->scancmd = NULL;
- if (bnode_InitBnode(te, &fsbnode_ops, ainstance) != 0) {
- free(te);
- free(fileCmdpath);
- free(volCmdpath);
- free(salCmdpath);
- return NULL;
+ if (bnode_InitBnode(te, &dafsbnode_ops, ainstance) != 0) {
+ bailout = 1;
+ goto done;
}
bnode_SetTimeout(te, POLLTIME); /* ask for timeout activations every 10 seconds */
RestoreSalFlag(te); /* restore needsSalvage flag based on file's existence */
SetNeedsClock(te); /* compute needsClock field */
+
+ done:
+ if (bailout) {
+ if (te)
+ free(te);
+ if (fileCmdpath)
+ free(fileCmdpath);
+ if (volCmdpath)
+ free(volCmdpath);
+ if (salsrvCmdpath)
+ free(salsrvCmdpath);
+ if (salCmdpath)
+ free(salCmdpath);
+ if (scanCmdpath)
+ free(scanCmdpath);
+ return NULL;
+ }
+
return (struct bnode *)te;
}
FSSDTIME);
}
}
+ if (abnode->salsrvSDW) {
+ if (!abnode->salsrvKillSent && now - abnode->timeSDStarted > SDTIME) {
+ bnode_StopProc(abnode->salsrvProc, SIGKILL);
+ abnode->salsrvKillSent = 1;
+ bozo_Log
+ ("bos shutdown: salvageserver failed to shutdown within %d seconds\n",
+ SDTIME);
+ }
+ }
if (abnode->scanSDW) {
if (!abnode->scanKillSent && now - abnode->timeSDStarted > SDTIME) {
bnode_StopProc(abnode->scanProc, SIGKILL);
{
register afs_int32 temp;
if (abnode->volSDW || abnode->fileSDW || abnode->salSDW
- || abnode->scanSDW)
+ || abnode->scanSDW || abnode->salsrvSDW)
temp = BSTAT_SHUTTINGDOWN;
else if (abnode->salRunning)
temp = BSTAT_NORMAL;
else if (abnode->volRunning && abnode->fileRunning
- && (!abnode->scancmd || abnode->scanRunning))
+ && (!abnode->scancmd || abnode->scanRunning)
+ && (!abnode->salsrvcmd || abnode->salsrvRunning))
temp = BSTAT_NORMAL;
else if (!abnode->salRunning && !abnode->volRunning
- && !abnode->fileRunning && !abnode->scanRunning)
+ && !abnode->fileRunning && !abnode->scanRunning
+ && !abnode->salsrvRunning)
temp = BSTAT_SHUTDOWN;
else
temp = BSTAT_STARTINGUP;
abnode->scanRunning = 0;
abnode->scanSDW = 0;
abnode->scanKillSent = 0;
+ } else if (aproc == abnode->salsrvProc) {
+ abnode->salsrvProc = 0;
+ abnode->salsrvRunning = 0;
+ abnode->salsrvSDW = 0;
+ abnode->salsrvKillSent = 0;
}
/* now restart anyone who needs to restart */
}
/* make sure we're periodically checking the state if we need to */
-static int
+static void
SetNeedsClock(register struct fsbnode *ab)
{
if (ab->b.goal == 1 && ab->fileRunning && ab->volRunning
- && (!ab->scancmd || ab->scanRunning))
+ && (!ab->scancmd || ab->scanRunning)
+ && (!ab->salsrvcmd || ab->salsrvRunning))
ab->needsClock = 0; /* running normally */
else if (ab->b.goal == 0 && !ab->fileRunning && !ab->volRunning
- && !ab->salRunning && !ab->scanRunning)
+ && !ab->salRunning && !ab->scanRunning && !ab->salsrvRunning)
ab->needsClock = 0; /* halted normally */
else
ab->needsClock = 1; /* other */
abnode->volRunning = 1;
}
}
+ if (abnode->salsrvcmd) {
+ if (!abnode->salsrvRunning) {
+ abnode->lastSalsrvStart = FT_ApproxTime();
+ code =
+ bnode_NewProc(abnode, abnode->salsrvcmd, "salsrv",
+ &tp);
+ if (code == 0) {
+ abnode->salsrvProc = tp;
+ abnode->salsrvRunning = 1;
+ }
+ }
+ }
if (abnode->scancmd) {
if (!abnode->scanRunning) {
abnode->lastScanStart = FT_ApproxTime();
}
} else { /* file is not running */
/* see how to start */
- if (!abnode->needsSalvage) {
+ /* for demand attach fs, needsSalvage flag is ignored */
+ if (!abnode->needsSalvage || abnode->salsrvcmd) {
/* no crash apparent, just start up normally */
if (!abnode->fileRunning) {
abnode->lastFileStart = FT_ApproxTime();
abnode->volRunning = 1;
}
}
+ if (abnode->salsrvcmd && !abnode->salsrvRunning) {
+ abnode->lastSalsrvStart = FT_ApproxTime();
+ code =
+ bnode_NewProc(abnode, abnode->salsrvcmd, "salsrv",
+ &tp);
+ if (code == 0) {
+ abnode->salsrvProc = tp;
+ abnode->salsrvRunning = 1;
+ }
+ }
if (abnode->scancmd && !abnode->scanRunning) {
abnode->lastScanStart = FT_ApproxTime();
code =
abnode->volSDW = 1;
abnode->timeSDStarted = now;
}
+ if (abnode->salsrvRunning && !abnode->salsrvSDW) {
+ bnode_StopProc(abnode->salsrvProc, SIGTERM);
+ abnode->salsrvSDW = 1;
+ abnode->timeSDStarted = now;
+ }
if (abnode->scanRunning && !abnode->scanSDW) {
bnode_StopProc(abnode->scanProc, SIGTERM);
abnode->scanSDW = 1;
return BZDOM;
return 0;
}
+
+static int
+dafs_getparm(struct fsbnode *abnode, afs_int32 aindex, char *abuffer,
+ afs_int32 alen)
+{
+ if (aindex == 0)
+ strcpy(abuffer, abnode->filecmd);
+ else if (aindex == 1)
+ strcpy(abuffer, abnode->volcmd);
+ else if (aindex == 2)
+ strcpy(abuffer, abnode->salsrvcmd);
+ else if (aindex == 3)
+ strcpy(abuffer, abnode->salcmd);
+ else if (aindex == 4 && abnode->scancmd)
+ strcpy(abuffer, abnode->scancmd);
+ else
+ return BZDOM;
+ return 0;
+}
;;
esac
+
+
+dnl pthreads fixes
+case $AFS_SYSNAME in
+dnl we'll go ahead and turn on XOPEN2K and ISO_C99
+dnl if this causes problems, we should scale back to _XOPEN_SOURCE=500
+ *linux*)
+ MT_CFLAGS="${MT_CFLAGS} -D_XOPEN_SOURCE=600 -D_BSD_SOURCE"
+ ;;
+esac
+
+
dnl Disable the default for debugging/optimization if not enabled
if test "x$enable_debug_kernel" = "xno"; then
KERN_DBG=
#ifdef AFS_NAMEI_ENV
#define AFS_64BIT_IOPS_ENV 1
#endif
-#define BITMAP_LATER 1
-#define FAST_RESTART 1
#define AFS_HAVE_FLOCK_SYSID 1
#ifdef AFS_NAMEI_ENV
#define AFS_64BIT_IOPS_ENV 1
#endif
-#define BITMAP_LATER 1
-#define FAST_RESTART 1
#define AFS_HAVE_FLOCK_SYSID 1
#ifdef AFS_NAMEI_ENV
#define AFS_64BIT_IOPS_ENV 1
#endif
-#define BITMAP_LATER 1
-#define FAST_RESTART 1
#define AFS_HAVE_FLOCK_SYSID 1
typedef long long afs_int64;
typedef unsigned long long afs_uint64;
#endif
-#define ZeroInt64(a) (a) = 0
+#define ZeroInt64(a) (a = 0)
#define AssignInt64(a, b) *(b) = (a)
+#define IncInt64(a) (*(a))++
+#define IncUInt64(a) (*(a))++
+#define DecInt64(a) (*(a))--
+#define DecUInt64(a) (*(a))--
+#define GTInt64(a,b) ((a) > (b))
+#define GEInt64(a,b) ((a) >= (b))
+#define LEInt64(a,b) ((a) <= (b))
+#define LTInt64(a,b) ((a) < (b))
#define AddInt64(a,b,c) *(c) = (afs_int64)(a) + (afs_int64)(b)
#define AddUInt64(a,b,c) *(c) = (afs_uint64)(a) + (afs_uint64)(b)
#define SubtractInt64(a,b,c) *(c) = (afs_int64)(a) - (afs_int64)(b)
afs_uint32 low;
};
typedef struct u_Int64 afs_uint64;
-#define ZeroInt64(a) (a).high = (a).low = 0
+#define ZeroInt64(a) ((a).high = (a).low = 0)
#define AssignInt64(a, b) (b)->high = (a).high; (b)->low = (a).low
+#define IncInt64(a) ((++((a)->low)) ? 0 : (a)->high++ )
+#define IncUInt64(a) ((++((a)->low)) ? 0 : (a)->high++ )
+#define DecInt64(a) (((a)->low)-- ? 0 : (a)->high-- )
+#define DecUInt64(a) (((a)->low)-- ? 0 : (a)->high-- )
+#define GTInt64(a,b) (((a).high > (b).high) || (((a).high == (b).high) && ((a).low > (b).low)))
+#define GEInt64(a,b) (((a).high > (b).high) || (((a).high == (b).high) && ((a).low >= (b).low)))
+#define LEInt64(a,b) (((a).high < (b).high) || (((a).high == (b).high) && ((a).low <= (b).low)))
+#define LTInt64(a,b) (((a).high < (b).high) || (((a).high == (b).high) && ((a).low < (b).low)))
#define CompareInt64(a,b) (((afs_int32)(a).high - (afs_int32)(b).high) || (((a).high == (b).high) && ((a).low - (b).low)))
#define AddInt64(a, b, c) { afs_int64 _a, _b; _a = a; _b = b; (c)->low = _a.low + _b.low; (c)->high = _a.high + _b.high + ((c)->low < _b.low); }
#define SubtractInt64(a, b, c) { afs_int64 _a, _b; _a = a; _b = b; (c)->low = _a.low - _b.low; (c)->high = _a.high - _b.high - (_a.low < _b.low); }
};
typedef struct afsUUID afsUUID;
+/* for now, demand attach fileserver is only support on unix pthreads builds */
+#if defined(DEMAND_ATTACH_ENABLE) && defined(AFS_PTHREAD_ENV) && !defined(AFS_NT40_ENV)
+#define AFS_DEMAND_ATTACH_FS 1
+#endif
+
#endif /* OPENAFS_CONFIG_AFS_STDS_H */
#define _RXQSP(q1,q2,i,a,b,c,d,x,y) if (!queue_IsEnd(q1,i->c)) \
(((y->b->a=q2->a)->b=y->b), ((x->a->b=q2)->a=x->a), ((i->c=q1)->d=i))
+/* This one moves a chain of elements from (s) to (e) from its
+ * current position to either before or after element (i)
+ * if (a,b,x,y) is (prev,next,s,e) then chain is moved before (i)
+ * if (a,b,x,y) is (next,prev,e,s) then chain is moved after (i) */
+#define _RXQMV(i, s, e, a, b, x, y) if (i->a != y) \
+ (((e->next->prev=s->prev)->next=e->next), ((i->a->b=x)->a=i->a), ((y->b=i)->a=y))
+
/* Basic remove operation. Doesn't update the queue item to indicate it's been removed */
#define _RXQR(i) ((_RXQ(i)->prev->next=_RXQ(i)->next)->prev=_RXQ(i)->prev)
#define queue_Replace(q1,q2) if (queue_IsEmpty(q2)) queue_Init(q1); else \
(*_RXQ(q1) = *_RXQ(q2), _RXQ(q1)->next->prev = _RXQ(q1)->prev->next = _RXQ(q1), queue_Init(q2))
+/* move a chain of elements beginning at (s) and ending at (e) before node (i) */
+#define queue_MoveChainBefore(i, s, e) _RXQMV(_RXQ(i),_RXQ(s),_RXQ(e),prev,next,_RXQ(s),_RXQ(e))
+
+/* move a chain of elements beginning at (s) and ending at (e) after node (i) */
+#define queue_MoveChainAfter(i, s, e) _RXQMV(_RXQ(i),_RXQ(s),_RXQ(e),next,prev,_RXQ(e),_RXQ(s))
+
/* Remove a queue element (*i) from it's queue. The next field is 0'd, so that any further use of this q entry will hopefully cause a core dump. Multiple removes of the same queue item are not supported */
#define queue_Remove(i) (_RXQR(i), _RXQ(i)->next = 0)
/* Returns false if the item was removed from a queue OR is uninitialized (zero) */
#define queue_IsOnQueue(i) (_RXQ(i)->next != 0)
+/* Returns true if the item was removed from a queue OR is uninitialized (zero) */
+/* Return false if the queue item is currently in a queue */
+#define queue_IsNotOnQueue(i) (_RXQ(i)->next == 0)
+
/* Returns true if the queue item (i) is the first element of the queue (q) */
#define queue_IsFirst(q,i) (_RXQ(q)->first == _RXQ(i))
/* Returns true if the queue item (i) is the end of the queue (q), that is, i is the head of the queue */
#define queue_IsEnd(q,i) (_RXQ(q) == _RXQ(i))
+/* Returns false if the queue item (i) is the end of the queue (q), that is, i is the head of the queue */
+#define queue_IsNotEnd(q,i) (_RXQ(q) != _RXQ(i))
+
/* Prototypical loop to scan an entire queue forwards. q is the queue
* head, qe is the loop variable, next is a variable used to store the
* queue entry for the next iteration of the loop, s is the user's
!queue_IsEnd(q, qe); \
(qe) = (next), next = queue_Next(qe, s)
+/* similar to queue_Scan except start at element 'start' instead of the beginning */
+#define queue_ScanFrom(q, start, qe, next, s) \
+ (qe) = (struct s*)(start), next = queue_Next(qe, s); \
+ !queue_IsEnd(q, qe); \
+ (qe) = (next), next = queue_Next(qe, s)
+
/* This is similar to queue_Scan, but scans from the end of the queue to the beginning. Next is the previous queue entry. */
#define queue_ScanBackwards(q, qe, prev, s) \
(qe) = queue_Last(q, s), prev = queue_Prev(qe, s); \
!queue_IsEnd(q, qe); \
(qe) = prev, prev = queue_Prev(qe, s)
+/* This is similar to queue_ScanBackwards, but start at element 'start' instead of the end. Next is the previous queue entry. */
+#define queue_ScanBackwardsFrom(q, start, qe, prev, s) \
+ (qe) = (struct s*)(start), prev = queue_Prev(qe, s); \
+ !queue_IsEnd(q, qe); \
+ (qe) = prev, prev = queue_Prev(qe, s)
+
#define queue_Count(q, qe, nqe, s, n) \
for (n=0, queue_Scan(q, qe, nqe, s), n++) {}
#endif /* _RX_QUEUE_ */
--- /dev/null
+# Copyright 2000, International Business Machines Corporation and others.
+# All Rights Reserved.
+#
+# This software has been released under the terms of the IBM Public
+# License. For details, see the LICENSE file in the top-level source
+# directory or online at http://www.openafs.org/dl/license10.html
+#
+# Portions Copyright (c) 2003 Apple Computer, Inc.
+# Portions Copyright (c) 2006 Sine Nomine Associates
+
+srcdir=@srcdir@
+include @TOP_OBJDIR@/src/config/Makefile.config
+
+CC=${MT_CC}
+CFLAGS=${COMMON_CFLAGS} -I.. -DNINTERFACE ${MT_CFLAGS} -DRXDEBUG -DFSSYNC_BUILD_CLIENT \
+ -DSALVSYNC_BUILD_SERVER -DSALVSYNC_BUILD_CLIENT
+
+CCRULE=${CC} ${CFLAGS} -c $?
+
+VICED=../viced
+VLSERVER=../vlserver
+LWP=../lwp
+LIBACL=../libacl
+UTIL=../util
+DIR=../dir
+VOL=../vol
+FSINT=../fsint
+
+SALVAGEDOBJS=salvaged.o vol-salvage.o physio.o
+
+DIROBJS=buffer.o dir.o salvage.o
+
+LWPOBJS=lock.o threadname.o
+
+UTILOBJS=assert.o uuid.o serverLog.o fileutil.o netutils.o dirpath.o volparse.o flipbase64.o softsig.o fstab.o
+
+VLIBOBJS=vnode.o volume.o vutil.o partition.o fssync-client.o \
+ clone.o nuke.o devname.o listinodes.o ihandle.o \
+ namei_ops.o salvsync-server.o salvsync-client.o \
+ daemon_com.o
+
+OBJECTS= ${SALVAGEDOBJS} ${UTILOBJS} ${VLIBOBJS} ${DIROBJS} ${LWPOBJS}
+
+FSSDEBUG_OBJS = fssync-debug.o physio.o common.o ${UTILOBJS} ${VLIBOBJS} ${DIROBJS} ${LWPOBJS}
+
+SSSDEBUG_OBJS = salvsync-debug.o physio.o common.o ${UTILOBJS} ${VLIBOBJS} ${DIROBJS} ${LWPOBJS}
+
+LIBS=${TOP_LIBDIR}/libafsauthent.a ${TOP_LIBDIR}/libafsrpc.a ${TOP_LIBDIR}/util.a ${TOP_LIBDIR}/libcmd.a
+
+INSTALL_TARGS = ${DESTDIR}${afssrvlibexecdir}/salvageserver \
+ ${DESTDIR}${afssrvsbindir}/fssync-debug \
+ ${DESTDIR}${afssrvsbindir}/salvsync-debug
+
+DEST_TARGS = ${DEST}/root.server/usr/afs/bin/salvageserver \
+ ${DEST}/root.server/usr/afs/bin/fssync-debug \
+ ${DEST}/root.server/usr/afs/bin/salvsync-debug
+
+all: salvageserver fssync-debug salvsync-debug
+
+salvaged.o: ${VOL}/salvaged.c
+ ${CCRULE}
+
+vol-salvage.o: ${VOL}/vol-salvage.c
+ ${CCRULE}
+
+physio.o: ${VOL}/physio.c
+ ${CCRULE}
+
+fssync-debug.o: ${VOL}/fssync-debug.c
+ ${CCRULE}
+
+salvsync-debug.o: salvsync-debug.c
+ ${CCRULE}
+
+assert.o: ${UTIL}/assert.c
+ ${CCRULE}
+
+uuid.o: ${UTIL}/uuid.c
+ ${CCRULE}
+
+serverLog.o: ${UTIL}/serverLog.c
+ ${CCRULE}
+
+fileutil.o: ${UTIL}/fileutil.c
+ ${CCRULE}
+
+volparse.o: ${UTIL}/volparse.c
+ ${CCRULE}
+
+flipbase64.o: ${UTIL}/flipbase64.c
+ ${CCRULE}
+
+netutils.o: ${UTIL}/netutils.c
+ ${CCRULE}
+
+dirpath.o: ${UTIL}/dirpath.c
+ ${CCRULE}
+
+softsig.o: ${UTIL}/softsig.c
+ ${CCRULE}
+
+buffer.o: ${DIR}/buffer.c
+ ${CCRULE}
+
+dir.o: ${DIR}/dir.c
+ ${CCRULE}
+
+salvage.o: ${DIR}/salvage.c
+ ${CCRULE}
+
+lock.o: ${LWP}/lock.c
+ ${CCRULE}
+
+threadname.o: ${LWP}/threadname.c
+ ${CCRULE}
+
+vnode.o: ${VOL}/vnode.c
+ ${CCRULE}
+
+volume.o: ${VOL}/volume.c
+ ${CCRULE}
+
+vutil.o: ${VOL}/vutil.c
+ ${CCRULE}
+
+partition.o: ${VOL}/partition.c
+ ${CCRULE}
+
+fssync-client.o: ${VOL}/fssync-client.c
+ ${CCRULE}
+
+salvsync-server.o: ${VOL}/salvsync-server.c
+ ${CCRULE}
+
+salvsync-client.o: ${VOL}/salvsync-client.c
+ ${CCRULE}
+
+daemon_com.o: ${VOL}/daemon_com.c
+ ${CCRULE}
+
+clone.o: ${VOL}/clone.c
+ ${CCRULE}
+
+nuke.o: ${VOL}/nuke.c
+ ${CCRULE}
+
+devname.o: ${VOL}/devname.c
+ ${CCRULE}
+
+# only for darwin?
+fstab.o: ${UTIL}/fstab.c
+ ${CCRULE}
+
+common.o: ${VOL}/common.c
+ ${CCRULE}
+
+listinodes.o: ${VOL}/listinodes.c
+ ${CCRULE}
+
+ihandle.o: ${VOL}/ihandle.c
+ ${CCRULE}
+
+namei_ops.o: ${VOL}/namei_ops.c
+ ${CCRULE}
+
+salvageserver: ${OBJECTS} ${LIBS}
+ ${CC} ${LDFLAGS} -o salvageserver ${OBJECTS} ${LIBS} ${MT_LIBS} ${XLIBS}
+
+fssync-debug: ${FSSDEBUG_OBJS} ${LIBS}
+ ${CC} ${LDFLAGS} -o fssync-debug ${FSSDEBUG_OBJS} ${LIBS} ${MT_LIBS} ${XLIBS}
+
+salvsync-debug: ${SSSDEBUG_OBJS} ${LIBS}
+ ${CC} ${LDFLAGS} -o salvsync-debug ${SSSDEBUG_OBJS} ${LIBS} ${MT_LIBS} ${XLIBS}
+
+${DEST}/root.server/usr/afs/bin/salvageserver: salvageserver
+ ${INSTALL} -ns $? $@
+
+${DEST}/root.server/usr/afs/bin/fssync-debug: fssync-debug
+ ${INSTALL} -s $? $@
+
+${DEST}/root.server/usr/afs/bin/salvsync-debug: salvsync-debug
+ ${INSTALL} -s $? $@
+
+install: ${INSTALL_TARGS}
+
+clean:
+ $(RM) -f *.o salvageserver core AFS_component_version_number.c
+
+include ../config/Makefile.version
+
+${DESTDIR}${afssrvlibexecdir}/salvageserver: salvageserver
+ ${INSTALL} -ns $? $@
+
+${DESTDIR}${afssrvsbindir}/fssync-debug: fssync-debug
+ ${INSTALL} -s $? $@
+
+${DESTDIR}${afssrvsbindir}/salvsync-debug: salvsync-debug
+ ${INSTALL} -s $? $@
+
+dest: ${DEST_TARGS}
--- /dev/null
+/*
+ * Copyright 2006, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/* Main program file. Define globals. */
+#define MAIN 1
+
+/*
+ * salvsync debug tool
+ */
+
+
+#include <afsconfig.h>
+#include <afs/param.h>
+
+RCSID
+ ("$Header$");
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <errno.h>
+#ifdef AFS_NT40_ENV
+#include <io.h>
+#include <WINNT/afsevent.h>
+#else
+#include <sys/param.h>
+#include <sys/file.h>
+#ifndef ITIMER_REAL
+#include <sys/time.h>
+#endif /* ITIMER_REAL */
+#endif
+#include <rx/xdr.h>
+#include <afs/afsint.h>
+#include <afs/assert.h>
+
+
+#include <fcntl.h>
+
+#ifndef AFS_NT40_ENV
+#include <afs/osi_inode.h>
+#endif
+
+#include <afs/cmd.h>
+#include <afs/afsutil.h>
+#include <afs/fileutil.h>
+
+#include "nfs.h"
+#include "lwp.h"
+#include "lock.h"
+#include "ihandle.h"
+#include "vnode.h"
+#include "volume.h"
+#include "partition.h"
+#include "daemon_com.h"
+#include "salvsync.h"
+#ifdef AFS_NT40_ENV
+#include <pthread.h>
+#endif
+
+int VolumeChanged; /* hack to make dir package happy */
+
+
+#ifndef AFS_DEMAND_ATTACH_FS
+int
+main(int argc, char ** argv)
+{
+ fprintf(stderr, "*** salvsync-debug is only supported for OpenAFS builds with the demand-attach fileserver extension\n");
+ return -1;
+}
+#else /* AFS_DEMAND_ATTACH_FS */
+
+struct salv_state {
+ afs_uint32 prio;
+ afs_uint32 volume;
+ char partName[16];
+};
+
+struct state {
+ afs_int32 reason;
+ struct salv_state * sop;
+};
+
+static int common_prolog(struct cmd_syndesc *, struct state *);
+static int common_salv_prolog(struct cmd_syndesc *, struct state *);
+
+static int do_salvop(struct state *, afs_int32 command, SYNC_response * res);
+
+static char * response_code_to_string(afs_int32);
+static char * command_code_to_string(afs_int32);
+static char * reason_code_to_string(afs_int32);
+static char * program_type_to_string(afs_int32);
+static char * state_code_to_string(afs_int32);
+
+
+static int OpStats(struct cmd_syndesc * as, char * rock);
+static int OpSalvage(struct cmd_syndesc * as, char * rock);
+static int OpCancel(struct cmd_syndesc * as, char * rock);
+static int OpCancelAll(struct cmd_syndesc * as, char * rock);
+static int OpRaisePrio(struct cmd_syndesc * as, char * rock);
+static int OpQuery(struct cmd_syndesc * as, char * rock);
+
+
+#ifndef AFS_NT40_ENV
+#include "AFS_component_version_number.c"
+#endif
+#define MAX_ARGS 128
+
+#define COMMON_PARMS_OFFSET 13
+#define COMMON_PARMS(ts) \
+ cmd_Seek(ts, COMMON_PARMS_OFFSET); \
+ cmd_AddParm(ts, "-reason", CMD_SINGLE, CMD_OPTIONAL, "sync protocol reason code"); \
+ cmd_AddParm(ts, "-programtype", CMD_SINGLE, CMD_OPTIONAL, "program type code")
+
+#define COMMON_SALV_PARMS_OFFSET 10
+#define COMMON_SALV_PARMS(ts) \
+ cmd_Seek(ts, COMMON_SALV_PARMS_OFFSET); \
+ cmd_AddParm(ts, "-volumeid", CMD_SINGLE, 0, "volume id"); \
+ cmd_AddParm(ts, "-partition", CMD_SINGLE, CMD_OPTIONAL, "partition name"); \
+ cmd_AddParm(ts, "-priority", CMD_SINGLE, CMD_OPTIONAL, "priority")
+
+#define SALV_PARMS_DECL(ts) \
+ COMMON_SALV_PARMS(ts); \
+ COMMON_PARMS(ts)
+
+#define COMMON_PARMS_DECL(ts) \
+ COMMON_PARMS(ts)
+
+int
+main(int argc, char **argv)
+{
+ struct cmd_syndesc *ts;
+ int err = 0;
+ int i;
+ extern char cml_version_number[];
+
+ /* Initialize directory paths */
+ if (!(initAFSDirPath() & AFSDIR_SERVER_PATHS_OK)) {
+#ifdef AFS_NT40_ENV
+ ReportErrorEventAlt(AFSEVT_SVR_NO_INSTALL_DIR, 0, argv[0], 0);
+#endif
+ fprintf(stderr, "%s: Unable to obtain AFS server directory.\n",
+ argv[0]);
+ exit(2);
+ }
+
+
+ ts = cmd_CreateSyntax("stats", OpStats, 0, "get salvageserver statistics (SALVSYNC_NOP opcode)");
+ COMMON_PARMS_DECL(ts);
+ cmd_CreateAlias(ts, "nop");
+
+ ts = cmd_CreateSyntax("salvage", OpSalvage, 0, "schedule a salvage (SALVSYNC_SALVAGE opcode)");
+ SALV_PARMS_DECL(ts);
+
+ ts = cmd_CreateSyntax("cancel", OpCancel, 0, "cancel a salvage (SALVSYNC_CANCEL opcode)");
+ SALV_PARMS_DECL(ts);
+
+ ts = cmd_CreateSyntax("raiseprio", OpRaisePrio, 0, "raise a salvage priority (SALVSYNC_RAISEPRIO opcode)");
+ SALV_PARMS_DECL(ts);
+ cmd_CreateAlias(ts, "rp");
+
+ ts = cmd_CreateSyntax("query", OpQuery, 0, "query salvage status (SALVSYNC_QUERY opcode)");
+ SALV_PARMS_DECL(ts);
+ cmd_CreateAlias(ts, "qry");
+
+ ts = cmd_CreateSyntax("kill", OpCancelAll, 0, "cancel all scheduled salvages (SALVSYNC_CANCELALL opcode)");
+ COMMON_PARMS_DECL(ts);
+
+ err = cmd_Dispatch(argc, argv);
+ exit(err);
+}
+
+static int
+common_prolog(struct cmd_syndesc * as, struct state * state)
+{
+ register struct cmd_item *ti;
+
+#ifdef AFS_NT40_ENV
+ if (afs_winsockInit() < 0) {
+ Exit(1);
+ }
+#endif
+
+ VInitVolumePackage(debugUtility, 1, 1,
+ DONT_CONNECT_FS, 0);
+ DInit(1);
+
+ if ((ti = as->parms[COMMON_PARMS_OFFSET].items)) { /* -reason */
+ state->reason = atoi(ti->data);
+ }
+ if ((ti = as->parms[COMMON_PARMS_OFFSET+1].items)) { /* -programtype */
+ if (!strcmp(ti->data, "fileServer")) {
+ programType = fileServer;
+ } else if (!strcmp(ti->data, "volumeUtility")) {
+ programType = volumeUtility;
+ } else if (!strcmp(ti->data, "salvager")) {
+ programType = salvager;
+ } else if (!strcmp(ti->data, "salvageServer")) {
+ programType = salvageServer;
+ } else {
+ programType = (ProgramType) atoi(ti->data);
+ }
+ }
+
+ VConnectSALV();
+
+ return 0;
+}
+
+static int
+common_salv_prolog(struct cmd_syndesc * as, struct state * state)
+{
+ register struct cmd_item *ti;
+ char pname[100], *temp;
+
+ state->sop = (struct salv_state *) calloc(1, sizeof(struct salv_state));
+ assert(state->sop != NULL);
+
+ if ((ti = as->parms[COMMON_SALV_PARMS_OFFSET].items)) { /* -volumeid */
+ state->sop->volume = atoi(ti->data);
+ } else {
+ fprintf(stderr, "required argument -volumeid not given\n");
+ }
+
+ if ((ti = as->parms[COMMON_SALV_PARMS_OFFSET+1].items)) { /* -partition */
+ strlcpy(state->sop->partName, ti->data, sizeof(state->sop->partName));
+ } else {
+ memset(state->sop->partName, 0, sizeof(state->sop->partName));
+ }
+
+ if ((ti = as->parms[COMMON_SALV_PARMS_OFFSET+2].items)) { /* -prio */
+ state->sop->prio = atoi(ti->data);
+ } else {
+ state->sop->prio = 0;
+ }
+
+ return 0;
+}
+
+static int
+do_salvop(struct state * state, afs_int32 command, SYNC_response * res)
+{
+ afs_int32 code;
+ SALVSYNC_response_hdr hdr_l, *hdr;
+ SYNC_response res_l;
+
+ if (!res) {
+ res = &res_l;
+ res->payload.len = sizeof(hdr_l);
+ res->payload.buf = hdr = &hdr_l;
+ } else {
+ hdr = (SALVSYNC_response_hdr *) res->payload.buf;
+ }
+
+ fprintf(stderr, "calling SALVSYNC_SalvageVolume with command code %d (%s)\n",
+ command, command_code_to_string(command));
+
+ code = SALVSYNC_SalvageVolume(state->sop->volume,
+ state->sop->partName,
+ command,
+ state->reason,
+ state->sop->prio,
+ res);
+
+ switch (code) {
+ case SYNC_OK:
+ case SYNC_DENIED:
+ break;
+ default:
+ fprintf(stderr, "possible sync protocol error. return code was %d\n", code);
+ }
+
+ fprintf(stderr, "SALVSYNC_SalvageVolume returned %d (%s)\n", code, response_code_to_string(code));
+ fprintf(stderr, "protocol response code was %d (%s)\n",
+ res->hdr.response, response_code_to_string(res->hdr.response));
+ fprintf(stderr, "protocol reason code was %d (%s)\n",
+ res->hdr.reason, reason_code_to_string(res->hdr.reason));
+
+ printf("state = {\n");
+ if (res->hdr.flags & SALVSYNC_FLAG_VOL_STATS_VALID) {
+ printf("\tstate = %d (%s)\n",
+ hdr->state, state_code_to_string(hdr->state));
+ printf("\tprio = %d\n", hdr->prio);
+ }
+ printf("\tsq_len = %d\n", hdr->sq_len);
+ printf("\tpq_len = %d\n", hdr->pq_len);
+ printf("}\n");
+
+ VDisconnectSALV();
+}
+
+static char *
+response_code_to_string(afs_int32 response)
+{
+ switch (response) {
+ case SYNC_OK:
+ return "SYNC_OK";
+ case SYNC_DENIED:
+ return "SYNC_DENIED";
+ case SYNC_COM_ERROR:
+ return "SYNC_COM_ERROR";
+ case SYNC_BAD_COMMAND:
+ return "SYNC_BAD_COMMAND";
+ case SYNC_FAILED:
+ return "SYNC_FAILED";
+ default:
+ return "**UNKNOWN**";
+ }
+}
+
+static char *
+command_code_to_string(afs_int32 command)
+{
+ switch (command) {
+ case SYNC_COM_CHANNEL_CLOSE:
+ return "SYNC_COM_CHANNEL_CLOSE";
+ case SALVSYNC_NOP:
+ return "SALVSYNC_NOP";
+ case SALVSYNC_SALVAGE:
+ return "SALVSYNC_SALVAGE";
+ case SALVSYNC_CANCEL:
+ return "SALVSYNC_CANCEL";
+ case SALVSYNC_RAISEPRIO:
+ return "SALVSYNC_RAISEPRIO";
+ case SALVSYNC_QUERY:
+ return "SALVSYNC_QUERY";
+ case SALVSYNC_CANCELALL:
+ return "SALVSYNC_CANCELLALL";
+ default:
+ return "**UNKNOWN**";
+ }
+}
+
+static char *
+reason_code_to_string(afs_int32 reason)
+{
+ switch (reason) {
+ case SALVSYNC_WHATEVER:
+ return "SALVSYNC_WHATEVER";
+ case SALVSYNC_ERROR:
+ return "SALVSYNC_ERROR";
+ case SALVSYNC_OPERATOR:
+ return "SALVSYNC_OPERATOR";
+ case SALVSYNC_SHUTDOWN:
+ return "SALVSYNC_SHUTDOWN";
+ case SALVSYNC_NEEDED:
+ return "SALVSYNC_NEEDED";
+ default:
+ return "**UNKNOWN**";
+ }
+}
+
+static char *
+program_type_to_string(afs_int32 type)
+{
+ switch ((ProgramType)type) {
+ case fileServer:
+ return "fileServer";
+ case volumeUtility:
+ return "volumeUtility";
+ case salvager:
+ return "salvager";
+ case salvageServer:
+ return "salvageServer";
+ default:
+ return "**UNKNOWN**";
+ }
+}
+
+static char *
+state_code_to_string(afs_int32 state)
+{
+ switch (state) {
+ case SALVSYNC_STATE_UNKNOWN:
+ return "SALVSYNC_STATE_UNKNOWN";
+ case SALVSYNC_STATE_QUEUED:
+ return "SALVSYNC_STATE_QUEUED";
+ case SALVSYNC_STATE_SALVAGING:
+ return "SALVSYNC_STATE_SALVAGING";
+ case SALVSYNC_STATE_ERROR:
+ return "SALVSYNC_STATE_ERROR";
+ case SALVSYNC_STATE_DONE:
+ return "SALVSYNC_STATE_DONE";
+ default:
+ return "**UNKNOWN**";
+ }
+}
+
+static int
+OpStats(struct cmd_syndesc * as, char * rock)
+{
+ struct state state;
+
+ common_prolog(as, &state);
+ common_salv_prolog(as, &state);
+
+ do_salvop(&state, SALVSYNC_NOP, NULL);
+
+ return 0;
+}
+
+static int
+OpSalvage(struct cmd_syndesc * as, char * rock)
+{
+ struct state state;
+
+ common_prolog(as, &state);
+ common_salv_prolog(as, &state);
+
+ do_salvop(&state, SALVSYNC_SALVAGE, NULL);
+
+ return 0;
+}
+
+static int
+OpCancel(struct cmd_syndesc * as, char * rock)
+{
+ struct state state;
+
+ common_prolog(as, &state);
+ common_salv_prolog(as, &state);
+
+ do_salvop(&state, SALVSYNC_CANCEL, NULL);
+
+ return 0;
+}
+
+static int
+OpCancelAll(struct cmd_syndesc * as, char * rock)
+{
+ struct state state;
+
+ common_prolog(as, &state);
+ common_salv_prolog(as, &state);
+
+ do_salvop(&state, SALVSYNC_CANCELALL, NULL);
+
+ return 0;
+}
+
+static int
+OpRaisePrio(struct cmd_syndesc * as, char * rock)
+{
+ struct state state;
+
+ common_prolog(as, &state);
+ common_salv_prolog(as, &state);
+
+ do_salvop(&state, SALVSYNC_RAISEPRIO, NULL);
+
+ return 0;
+}
+
+static int
+OpQuery(struct cmd_syndesc * as, char * rock)
+{
+ struct state state;
+
+ common_prolog(as, &state);
+ common_salv_prolog(as, &state);
+
+ do_salvop(&state, SALVSYNC_QUERY, NULL);
+
+ return 0;
+}
+
+#endif /* AFS_DEMAND_ATTACH_FS */
include @TOP_OBJDIR@/src/config/Makefile.config
CC=${MT_CC}
-CFLAGS=${COMMON_CFLAGS} -I.. -DNINTERFACE ${MT_CFLAGS} -DRXDEBUG
+CFLAGS=${COMMON_CFLAGS} -I.. -DNINTERFACE ${MT_CFLAGS} -DRXDEBUG -DFSSYNC_BUILD_SERVER -DSALVSYNC_BUILD_CLIENT
CCRULE=${CC} ${CFLAGS} -c $?
VOL=../vol
FSINT=../fsint
-VICEDOBJS=viced.o afsfileprocs.o host.o physio.o callback.o
+VICEDOBJS=viced.o afsfileprocs.o host.o physio.o callback.o serialize_state.o
VLSERVEROBJS=vldbint.cs.o vldbint.xdr.o
DIROBJS=buffer.o dir.o salvage.o
-VOLOBJS= vnode.o volume.o vutil.o partition.o fssync.o purge.o \
+VOLOBJS= vnode.o volume.o vutil.o partition.o fssync-server.o \
clone.o devname.o common.o ihandle.o listinodes.o namei_ops.o \
- fstab.o
+ fstab.o salvsync-client.o daemon_com.o
FSINTOBJS= afsaux.o afscbint.cs.o afsint.ss.o afsint.xdr.o
objects= ${VICEDOBJS} ${VLSERVEROBJS} ${LWPOBJS} ${LIBACLOBJS} \
${UTILOBJS} ${DIROBJS} ${VOLOBJS} ${FSINTOBJS}
+SDBGOBJS = state_analyzer.o uuid.o dirpath.o fileutil.o ${TOP_LIBDIR}/util.a
+
LIBS=${TOP_LIBDIR}/libafsauthent.a ${TOP_LIBDIR}/libafsrpc.a ${TOP_LIBDIR}/util.a
-all: fileserver
+all: fileserver state_analyzer
viced.o: ${VICED}/viced.c
${CCRULE}
callback.o: ${VICED}/callback.c
${CCRULE}
+serialize_state.o: ./serialize_state.c
+ ${CCRULE}
+
assert.o: ${UTIL}/assert.c
${CCRULE}
partition.o: ${VOL}/partition.c
${CCRULE}
-fssync.o: ${VOL}/fssync.c
+fssync-server.o: ${VOL}/fssync-server.c
+ ${CCRULE}
+
+fssync-client.o: ${VOL}/fssync-client.c
+ ${CCRULE}
+
+salvsync-client.o: ${VOL}/salvsync-client.c
${CCRULE}
-purge.o: ${VOL}/purge.c
+daemon_com.o: ${VOL}/daemon_com.c
${CCRULE}
clone.o: ${VOL}/clone.c
afsint.xdr.o: ${FSINT}/afsint.xdr.c
${CCRULE}
+state_analyzer.o: state_analyzer.c
+ ${CCRULE}
+
fileserver: ${objects} ${LIBS}
${CC} ${LDFLAGS} -o fileserver ${objects} ${LIBS} ${MT_LIBS} ${XLIBS}
+state_analyzer: ${SDBGOBJS}
+ ${CC} ${LDFLAGS} -o state_analyzer ${SDBGOBJS} ${MT_LIBS} ${XLIBS}
+
${DEST}/root.server/usr/afs/bin/fileserver: fileserver
${INSTALL} -ns $? $@
-install: ${DESTDIR}${afssrvlibexecdir}/fileserver
+${DEST}/root.server/usr/afs/bin/state_analyzer: state_analyzer
+ ${INSTALL} $? $@
+
+install: ${DESTDIR}${afssrvlibexecdir}/fileserver ${DESTDIR}${afssrvsbindir}/state_analyzer
clean:
- $(RM) -f *.o fileserver core AFS_component_version_number.c
+ $(RM) -f *.o fileserver state_analyzer core AFS_component_version_number.c
include ../config/Makefile.version
${DESTDIR}${afssrvlibexecdir}/fileserver: fileserver
${INSTALL} -ns $? $@
-dest: ${DEST}/root.server/usr/afs/bin/fileserver
+${DESTDIR}${afssrvsbindir}/state_analyzer: state_analyzer
+ ${INSTALL} $? $@
+
+dest: ${DEST}/root.server/usr/afs/bin/fileserver ${DEST}/root.server/usr/afs/bin/state_analyzer
# License. For details, see the LICENSE file in the top-level source
# directory or online at http://www.openafs.org/dl/license10.html
-AFSDEV_AUXCDEFINES = -DAFS_PTHREAD_ENV -DRXDEBUG
+AFSDEV_AUXCDEFINES = -DAFS_PTHREAD_ENV -DRXDEBUG -DFSSYNC_BUILD_SERVER
RELDIR=tviced
!INCLUDE ..\config\NTMakefile.$(SYS_NAME)
--- /dev/null
+/*
+ * Copyright 2006, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/*
+ * demand attach fs
+ * fileserver state serialization
+ */
+
+#include <afsconfig.h>
+#include <afs/param.h>
+
+RCSID
+ ("$Header$");
+
+#include <stdio.h>
+#include <stdlib.h> /* for malloc() */
+#include <time.h> /* ANSI standard location for time stuff */
+#ifdef AFS_NT40_ENV
+#include <fcntl.h>
+#include <io.h>
+#else
+#include <sys/time.h>
+#include <sys/file.h>
+#endif
+#ifdef HAVE_STRING_H
+#include <string.h>
+#else
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#endif
+#include <afs/assert.h>
+#include <sys/stat.h>
+
+#include <afs/stds.h>
+
+#include <rx/xdr.h>
+#include <lwp.h>
+#include <lock.h>
+#include <afs/afsint.h>
+#include <afs/rxgen_consts.h>
+#include <afs/nfs.h>
+#include <afs/errors.h>
+#include <afs/ihandle.h>
+#include <afs/vnode.h>
+#include <afs/volume.h>
+#include <afs/acl.h>
+#include <afs/ptclient.h>
+#include <afs/prs_fs.h>
+#include <afs/auth.h>
+#include <afs/afsutil.h>
+#include <rx/rx.h>
+#include <afs/cellconfig.h>
+#include <stdlib.h>
+
+#include "../viced/viced_prototypes.h"
+#include "../viced/viced.h"
+#include "../viced/host.h"
+#include "../viced/callback.h"
+#include "serialize_state.h"
+
+/*@+fcnmacros +macrofcndecl@*/
+#ifdef O_LARGEFILE
+#ifdef S_SPLINT_S
+extern off64_t afs_lseek(int FD, off64_t O, int F);
+#endif /*S_SPLINT_S */
+#define afs_lseek(FD, O, F) lseek64(FD, (off64_t)(O), F)
+#define afs_stat stat64
+#define afs_fstat fstat64
+#define afs_open open64
+#define afs_fopen fopen64
+#define afs_ftruncate ftruncate64
+#define afs_mmap mmap64
+#ifdef AFS_AIX_ENV
+extern void * mmap64(); /* ugly hack since aix build env appears to be somewhat broken */
+#endif
+#else /* !O_LARGEFILE */
+#ifdef S_SPLINT_S
+extern off_t afs_lseek(int FD, off_t O, int F);
+#endif /*S_SPLINT_S */
+#define afs_lseek(FD, O, F) lseek(FD, (off_t)(O), F)
+#define afs_stat stat
+#define afs_fstat fstat
+#define afs_open open
+#define afs_fopen fopen
+#define afs_ftruncate ftruncate
+#define afs_mmap mmap
+#endif /* !O_LARGEFILE */
+/*@=fcnmacros =macrofcndecl@*/
+
+
+#ifdef AFS_DEMAND_ATTACH_FS
+
+/*
+ * demand attach fs
+ * state dump routines
+ *
+ * in order to make state dump/restore as fast as possible,
+ * we use memory mapped files
+ *
+ * if this causes problems on certain platforms, the APIs
+ * have been written so that it will be very simple to go
+ * back to standard I/O for just those poorly written platforms
+ */
+#define FS_STATE_USE_MMAP
+
+
+#ifdef FS_STATE_USE_MMAP
+#define FS_STATE_INIT_FILESIZE (8 * 1024 * 1024) /* truncate to 8MB initially */
+#include <sys/mman.h>
+#endif
+
+static int fs_stateCreateDump(struct fs_dump_state * state);
+static int fs_stateLoadDump(struct fs_dump_state * state);
+static int fs_stateInvalidateDump(struct fs_dump_state * state);
+static int fs_stateCommitDump(struct fs_dump_state * state);
+static int fs_stateCloseDump(struct fs_dump_state * state);
+
+#ifdef FS_STATE_USE_MMAP
+static int fs_stateSizeFile(struct fs_dump_state * state);
+static int fs_stateResizeFile(struct fs_dump_state * state, size_t min_add);
+static int fs_stateTruncateFile(struct fs_dump_state * state);
+
+static int fs_stateMapFile(struct fs_dump_state * state);
+static int fs_stateUnmapFile(struct fs_dump_state * state);
+
+static int fs_stateIncCursor(struct fs_dump_state * state, size_t len);
+static int fs_stateCheckIOSafety(struct fs_dump_state * state,
+ size_t len);
+#endif
+
+static int fs_stateFillHeader(struct fs_state_header * hdr);
+static int fs_stateCheckHeader(struct fs_state_header * hdr);
+
+static int fs_stateAlloc(struct fs_dump_state * state);
+static int fs_stateFree(struct fs_dump_state * state);
+
+extern afsUUID FS_HostUUID;
+extern char cml_version_number[];
+
+/*
+ * demand attach fs
+ * save all fileserver state
+ */
+int
+fs_stateSave(void)
+{
+ int ret = 0, verified = 1;
+ struct fs_dump_state state;
+
+ /* save and restore need to be atomic wrt other host package operations */
+ H_LOCK;
+
+ ViceLog(0, ("fs_stateSave: commencing fileserver state dump\n"));
+
+ if (fs_stateAlloc(&state)) {
+ ViceLog(0, ("fs_stateSave: memory allocation failed; dump aborted\n"));
+ ret = 1;
+ goto done;
+ }
+
+ /* XXX
+ * on busy servers, these checks will inevitably fail since stuff drops H_LOCK
+ * all over the place (with structs left in inconsistent states) while RPCs to
+ * clients happen (grumble, grumble, the host package needs to be rewritten...)
+ *
+ * the current hack is to force the background threads that deal with host and
+ * callback state offline early in the shutdown process, do VShutdown, come
+ * back and wait for those threads to die, THEN do the state dump
+ *
+ * BUT, this still has one flaw -- what do we do about rx worker threads that
+ * are blocked in the host package making an RPC call to a cm???
+ *
+ * perhaps we need a refcounter that keeps track of threads blocked in rpc calls
+ * with H_LOCK dropped (and the host struct likely left in an inconsistent state)
+ *
+ * or better yet, we need to associate a state machine with each host object
+ * (kind of like demand attach Volume structures).
+ *
+ * sigh. I suspect we'll need to revisit this issue
+ */
+
+ if (fs_state.options.fs_state_verify_before_save) {
+ ViceLog(0, ("fs_stateSave: performing internal consistency checks before proceeding with state dump\n"));
+
+ if (h_stateVerify(&state)) {
+ ViceLog(0, ("fs_stateSave: error: host table consistency checks failed; state dump will not be marked clean\n"));
+ verified = 0;
+ ret = 1;
+ }
+
+ if (cb_stateVerify(&state)) {
+ ViceLog(0, ("fs_stateSave: error: callback table consistency checks failed; state dump will not be marked clean\n"));
+ verified = 0;
+ ret = 1;
+ }
+
+ /* if a consistency check asserted the bail flag, reset it */
+ state.bail = 0;
+
+ ViceLog(0, ("fs_stateSave: proceeding with dump\n"));
+ }
+
+ if (fs_stateCreateDump(&state)) {
+ ViceLog(0, ("fs_stateSave: error: dump create failed\n"));
+ ret = 1;
+ goto done;
+ }
+
+ if (h_stateSave(&state)) {
+ ViceLog(0, ("fs_stateSave: error: host state dump failed\n"));
+ ret = 1;
+ goto done;
+ }
+
+ if (cb_stateSave(&state)) {
+ ViceLog(0, ("fs_stateSave: error: callback state dump failed\n"));
+ ret = 1;
+ goto done;
+ }
+
+ if (!verified) {
+ state.bail = 1;
+ }
+
+ if (fs_stateCommitDump(&state)) {
+ ViceLog(0, ("fs_stateSave: error: dump commit failed\n"));
+ ret = 1;
+ goto done;
+ }
+
+ if (verified) {
+ ViceLog(0, ("fs_stateSave: fileserver state dump completed successfully\n"));
+ } else {
+ ViceLog(0, ("fs_stateSave: fileserver state dump completed, but not marked clean.\n"));
+ ViceLog(0, ("fs_stateSave: please save a copy of '%s' for use by technical support\n",
+ state.fn));
+ }
+
+ done:
+ if (state.fd >= 0)
+ fs_stateCloseDump(&state);
+ fs_stateFree(&state);
+ H_UNLOCK;
+ return ret;
+}
+
+/*
+ * demand attach fs
+ * restore all fileserver state
+ *
+ * this function must appear as one atomic operation to the host and callback
+ * packages, hence H_LOCK is held for the entirety of the process.
+ */
+int
+fs_stateRestore(void)
+{
+ int ret = 0;
+ struct fs_dump_state state;
+
+ /* save and restore need to be atomic wrt other host package operations */
+ H_LOCK;
+
+ ViceLog(0, ("fs_stateRestore: commencing fileserver state restore\n"));
+
+ if (fs_stateAlloc(&state)) {
+ ViceLog(0, ("fs_stateRestore: memory allocation failed\n"));
+ ret = 1;
+ goto done;
+ }
+
+ if (fs_stateLoadDump(&state)) {
+ ViceLog(0, ("fs_stateRestore: failed to load dump file '%s'\n", state.fn));
+ ret = 1;
+ goto done;
+ }
+
+ if (fs_stateInvalidateDump(&state)) {
+ ViceLog(0, ("fs_stateRestore: failed to invalidate dump file '%s'\n", state.fn));
+ ret = 1;
+ goto done;
+ }
+
+
+ if (state.flags.do_host_restore) {
+ if (h_stateRestore(&state)) {
+ ViceLog(0, ("fs_stateRestore: error: host state restore failed. exiting avoid further corruption\n"));
+ exit(0);
+ }
+ ViceLog(0, ("fs_stateRestore: host table restored\n"));
+
+ if (cb_stateRestore(&state)) {
+ ViceLog(0, ("fs_stateRestore: error: callback state restore failed. exiting to avoid further corruption\n"));
+ exit(0);
+ }
+ ViceLog(0, ("fs_stateRestore: FileEntry and CallBack tables restored\n"));
+
+ if (h_stateRestoreIndices(&state)) {
+ ViceLog(0, ("fs_stateRestore: error: host index remapping failed. exiting to avoid further corruption\n"));
+ exit(0);
+ }
+ ViceLog(0, ("fs_stateRestore: host table indices remapped\n"));
+
+ if (cb_stateRestoreIndices(&state)) {
+ ViceLog(0, ("fs_stateRestore: error: callback index remapping failed. exiting to avoid further corruption\n"));
+ exit(0);
+ }
+ ViceLog(0, ("fs_stateRestore: FileEntry and CallBack indices remapped\n"));
+ }
+
+ ViceLog(0, ("fs_stateRestore: restore phase complete\n"));
+
+ if (fs_state.options.fs_state_verify_after_restore) {
+ ViceLog(0, ("fs_stateRestore: beginning state verification phase\n"));
+
+ if (state.flags.do_host_restore) {
+ if (h_stateVerify(&state)) {
+ ViceLog(0, ("fs_stateRestore: error: host table consistency checks failed; exiting to avoid further corruption\n"));
+ exit(0);
+ }
+
+ if (cb_stateVerify(&state)) {
+ ViceLog(0, ("fs_stateRestore: error: callback table consistency checks failed; exiting to avoid further corruption\n"));
+ exit(0);
+ }
+ }
+
+ ViceLog(0, ("fs_stateRestore: fileserver state verification complete\n"));
+ }
+
+ ViceLog(0, ("fs_stateRestore: restore was successful\n"));
+
+ done:
+ if (state.fd >= 0) {
+ fs_stateInvalidateDump(&state);
+ fs_stateCloseDump(&state);
+ }
+ fs_stateFree(&state);
+ H_UNLOCK;
+ return ret;
+}
+
+static int
+fs_stateCreateDump(struct fs_dump_state * state)
+{
+ int fd, ret = 0;
+ char savedump[MAXPATHLEN];
+ struct afs_stat status;
+
+ afs_snprintf(savedump, sizeof(savedump), "%s.old", state->fn);
+
+ if (afs_stat(state->fn, &status) == 0) {
+ renamefile(state->fn, savedump);
+ }
+
+ if (((fd = afs_open(state->fn,
+ O_RDWR | O_CREAT | O_TRUNC,
+ S_IRUSR | S_IWUSR)) == -1) ||
+ (afs_fstat(fd, &status) == -1)) {
+ ViceLog(0, ("fs_stateCreateDump: failed to create state dump file '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+
+ state->fd = fd;
+ state->mode = FS_STATE_DUMP_MODE;
+ memset(state->hdr, 0, sizeof(struct fs_state_header));
+ fs_stateIncEOF(state, sizeof(struct fs_state_header));
+
+#ifdef FS_STATE_USE_MMAP
+ if (fs_stateSizeFile(state)) {
+ ViceLog(0, ("fs_stateCreateDump: failed to resize state dump file '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+
+ if (fs_stateMapFile(state)) {
+ ViceLog(0, ("fs_stateCreateDump: failed to memory map state dump file '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+#endif
+
+ ret = fs_stateInvalidateDump(state);
+
+ done:
+ return ret;
+}
+
+static int
+fs_stateInvalidateDump(struct fs_dump_state * state)
+{
+ afs_uint64 z;
+ int ret = 0;
+ struct fs_state_header hdr;
+
+#ifdef FS_STATE_USE_MMAP
+ if (state->mmap.map == NULL) {
+ return 1;
+ }
+#endif
+
+ memcpy(&hdr, state->hdr, sizeof(hdr));
+ hdr.valid = 0;
+ ZeroInt64(z);
+
+ /* write a bogus header to flag dump in progress */
+ if (fs_stateWriteHeader(state, &z, &hdr, sizeof(hdr))) {
+ ViceLog(0, ("fs_stateInvalidateDump: failed to invalidate old dump file header '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+ if (fs_stateSync(state)) {
+ ViceLog(0, ("fs_stateInvalidateDump: failed to sync changes to disk\n"));
+ ret = 1;
+ goto done;
+ }
+
+ done:
+ return ret;
+}
+
+static int
+fs_stateCommitDump(struct fs_dump_state * state)
+{
+ afs_uint64 z;
+ int ret = 0;
+
+ ZeroInt64(z);
+
+#ifdef FS_STATE_USE_MMAP
+ if (fs_stateTruncateFile(state)) {
+ ViceLog(0, ("fs_stateCommitDump: failed to truncate dump file to proper size\n"));
+ ret = 1;
+ goto done;
+ }
+#endif
+
+ /* ensure that all pending data I/Os for the state file have been committed
+ * _before_ we make the metadata I/Os */
+ if (fs_stateSync(state)) {
+ ViceLog(0, ("fs_stateCommitDump: failed to sync changes to disk\n"));
+ ret = 1;
+ goto done;
+ }
+
+#ifdef FS_STATE_USE_MMAP
+ /* XXX madvise may not exist on all platforms, so
+ * we may need to add some ifdefs at some point... */
+ {
+ madvise((((char *)state->mmap.map) + sizeof(struct fs_state_header)),
+ state->mmap.size - sizeof(struct fs_state_header),
+ MADV_DONTNEED);
+ }
+#endif
+
+ /* build the header, and write it to disk */
+ fs_stateFillHeader(state->hdr);
+ if (state->bail) {
+ state->hdr->valid = 0;
+ }
+ if (fs_stateWriteHeader(state, &z, state->hdr, sizeof(struct fs_state_header))) {
+ ViceLog(0, ("fs_stateCommitDump: failed to write header to dump file '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+ if (fs_stateSync(state)) {
+ ViceLog(0, ("fs_stateCommitDump: failed to sync new header to disk\n"));
+ ret = 1;
+ goto done;
+ }
+
+ done:
+ return ret;
+}
+
+static int
+fs_stateLoadDump(struct fs_dump_state * state)
+{
+ afs_uint64 z;
+ int fd, ret = 0;
+ struct afs_stat status;
+ afs_int32 now = FT_ApproxTime();
+
+ ZeroInt64(z);
+
+ if ((fd = afs_open(state->fn, O_RDWR)) == -1 ||
+ (afs_fstat(fd, &status) == -1)) {
+ ViceLog(0, ("fs_stateLoadDump: failed to load state dump file '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+ state->fd = fd;
+ state->mode = FS_STATE_LOAD_MODE;
+ state->file_len = status.st_size;
+
+#ifdef FS_STATE_USE_MMAP
+ if (fs_stateMapFile(state)) {
+ ViceLog(0, ("fs_stateLoadDump: failed to memory map state dump file '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+#endif
+
+ if (fs_stateReadHeader(state, &z, state->hdr, sizeof(struct fs_state_header))) {
+ ViceLog(0, ("fs_stateLoadDump: failed to read header from dump file '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+
+ /* check the validity of the header */
+ if (fs_stateCheckHeader(state->hdr)) {
+ ViceLog(1, ("fs_stateLoadDump: header failed validity checks; not restoring '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+
+ if ((state->hdr->timestamp + HOST_STATE_VALID_WINDOW) >= now) {
+ state->flags.do_host_restore = 1;
+ } else {
+ ViceLog(0, ("fs_stateLoadDump: warning: dump is too old for host and callback restore; skipping those steps\n"));
+ }
+
+ done:
+ return ret;
+}
+
+static int
+fs_stateCloseDump(struct fs_dump_state * state)
+{
+#ifdef FS_STATE_USE_MMAP
+ fs_stateUnmapFile(state);
+#endif
+ close(state->fd);
+ return 0;
+}
+
+int
+fs_stateWrite(struct fs_dump_state * state,
+ void * buf, size_t len)
+{
+ int ret = 0;
+
+#ifdef FS_STATE_USE_MMAP
+ if (fs_stateCheckIOSafety(state, len)) {
+ if (fs_stateResizeFile(state, len)) {
+ ViceLog(0, ("fs_stateWrite: could not resize dump file '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+ }
+
+ memcpy(state->mmap.cursor, buf, len);
+ fs_stateIncCursor(state, len);
+#else
+ if (write(state->fd, buf, len) != len) {
+ ViceLog(0, ("fs_stateWrite: write failed\n"));
+ ret = 1;
+ goto done;
+ }
+#endif
+
+ done:
+ return ret;
+}
+
+int
+fs_stateRead(struct fs_dump_state * state,
+ void * buf, size_t len)
+{
+ int ret = 0;
+
+#ifdef FS_STATE_USE_MMAP
+ if (fs_stateCheckIOSafety(state, len)) {
+ ViceLog(0, ("fs_stateRead: read beyond EOF for dump file '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+
+ memcpy(buf, state->mmap.cursor, len);
+ fs_stateIncCursor(state, len);
+#else
+ if (read(state->fd, buf, len) != len) {
+ ViceLog(0, ("fs_stateRead: read failed\n"));
+ ret = 1;
+ goto done;
+ }
+#endif
+
+ done:
+ return ret;
+}
+
+int
+fs_stateWriteV(struct fs_dump_state * state,
+ struct iovec * iov, int niov)
+{
+ int i, ret = 0;
+ size_t len = 0;
+
+ for (i=0; i < niov; i++) {
+ len += iov[i].iov_len;
+ }
+
+#ifdef FS_STATE_USE_MMAP
+ if (fs_stateCheckIOSafety(state, len)) {
+ if (fs_stateResizeFile(state, len)) {
+ ViceLog(0, ("fs_stateWrite: could not resize dump file '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+ }
+
+ for (i=0; i < niov; i++) {
+ memcpy(state->mmap.cursor, iov[i].iov_base, iov[i].iov_len);
+ fs_stateIncCursor(state, iov[i].iov_len);
+ }
+#else
+ if (writev(state->fd, iov, niov) != len) {
+ ViceLog(0, ("fs_stateWriteV: write failed\n"));
+ ret = 1;
+ goto done;
+ }
+#endif
+
+ done:
+ return ret;
+}
+
+int
+fs_stateReadV(struct fs_dump_state * state,
+ struct iovec * iov, int niov)
+{
+ int i, ret = 0;
+ size_t len = 0;
+
+ for (i=0; i < niov; i++) {
+ len += iov[i].iov_len;
+ }
+
+#ifdef FS_STATE_USE_MMAP
+ if (fs_stateCheckIOSafety(state, len)) {
+ ViceLog(0, ("fs_stateRead: read beyond EOF for dump file '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+
+ for (i=0; i < niov; i++) {
+ memcpy(iov[i].iov_base, state->mmap.cursor, iov[i].iov_len);
+ fs_stateIncCursor(state, iov[i].iov_len);
+ }
+#else
+ if (readv(state->fd, iov, niov) != len) {
+ ViceLog(0, ("fs_stateReadV: read failed\n"));
+ ret = 1;
+ goto done;
+ }
+#endif
+
+ done:
+ return ret;
+}
+
+int
+fs_stateWriteHeader(struct fs_dump_state * state,
+ afs_uint64 * offset,
+ void * hdr, size_t len)
+{
+ int ret = 0;
+
+ if (fs_stateSeek(state, offset)) {
+ ViceLog(0, ("fs_stateWriteHeader: could not seek to correct position in dump file '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+
+ if (fs_stateWrite(state, hdr, len)) {
+ ViceLog(0, ("fs_stateWriteHeader: write failed\n"));
+ ret = 1;
+ goto done;
+ }
+
+ done:
+ return ret;
+}
+
+int
+fs_stateReadHeader(struct fs_dump_state * state,
+ afs_uint64 * offset,
+ void * hdr, size_t len)
+{
+ int ret = 0;
+
+ if (fs_stateSeek(state, offset)) {
+ ViceLog(0, ("fs_stateReadHeader: could not seek to correct position in dump file '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+
+ if (fs_stateRead(state, hdr,len)) {
+ ViceLog(0, ("fs_stateReadHeader: read failed\n"));
+ ret = 1;
+ goto done;
+ }
+
+ done:
+ return ret;
+}
+
+#ifdef FS_STATE_USE_MMAP
+static int
+fs_stateSizeFile(struct fs_dump_state * state)
+{
+ int ret = 0;
+ state->file_len = FS_STATE_INIT_FILESIZE;
+ if (afs_ftruncate(state->fd, state->file_len) != 0)
+ ret = 1;
+ return ret;
+}
+
+static int
+fs_stateResizeFile(struct fs_dump_state * state, size_t min_add)
+{
+ int ret = 0;
+ afs_foff_t inc;
+
+#ifdef FS_STATE_USE_MMAP
+ fs_stateUnmapFile(state);
+#endif
+
+ inc = ((min_add / FS_STATE_INIT_FILESIZE)+1) * FS_STATE_INIT_FILESIZE;
+ state->file_len += inc;
+
+ if (afs_ftruncate(state->fd, state->file_len) != 0) {
+ ViceLog(0, ("fs_stateResizeFile: truncate failed\n"));
+ ret = 1;
+ goto done;
+ }
+
+#ifdef FS_STATE_USE_MMAP
+ if (fs_stateMapFile(state)) {
+ ViceLog(0, ("fs_stateResizeFile: remapping memory mapped file failed\n"));
+ ret = 1;
+ goto done;
+ }
+#endif
+
+ done:
+ return ret;
+}
+
+static int
+fs_stateTruncateFile(struct fs_dump_state * state)
+{
+ int ret = 0;
+
+#ifdef AFS_LARGEFILE_ENV
+ if (afs_ftruncate(state->fd, state->eof_offset) != 0) {
+ ret = 1;
+ }
+#else
+ afs_uint32 hi, lo;
+ SplitInt64(state->eof_offset, hi, lo);
+ if (afs_ftruncate(state->fd, lo) != 0) {
+ ret = 1;
+ }
+#endif
+
+ return ret;
+}
+#endif
+
+#ifdef FS_STATE_USE_MMAP
+static int
+fs_stateMapFile(struct fs_dump_state * state)
+{
+ int ret = 0, flags;
+
+ switch(state->mode) {
+ case FS_STATE_LOAD_MODE:
+ flags = PROT_READ | PROT_WRITE; /* loading involves a header invalidation */
+ break;
+ case FS_STATE_DUMP_MODE:
+ flags = PROT_WRITE;
+ break;
+ default:
+ ViceLog(0, ("fs_stateMapFile: invalid dump state mode\n"));
+ return 1;
+ }
+
+ state->mmap.map = afs_mmap(NULL,
+ state->file_len,
+ flags,
+ MAP_SHARED,
+ state->fd,
+ 0);
+
+ if (state->mmap.map == MAP_FAILED) {
+ state->mmap.size = 0;
+ state->mmap.map = NULL;
+ ViceLog(0, ("fs_stateMapFile: failed to memory map file '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+
+ state->mmap.size = state->file_len;
+ state->mmap.cursor = state->mmap.map;
+ state->mmap.offset = 0;
+
+ /* for state loading, accesses will be sequential, so let's give
+ * the VM subsystem a heads up */
+ if (state->mode == FS_STATE_LOAD_MODE) {
+ /* XXX madvise may not exist on all platforms, so
+ * we may need to add some ifdefs at some point... */
+ flags = MADV_SEQUENTIAL | MADV_WILLNEED;
+#ifdef AFS_SUN510_ENV
+ flags |= MADV_ACCESS_LWP; /* added in solaris 9 12/02 */
+#endif
+ madvise(state->mmap.map, state->mmap.size, flags);
+ }
+
+ done:
+ return ret;
+}
+
+static int
+fs_stateUnmapFile(struct fs_dump_state * state)
+{
+ int ret = 0;
+
+ if (munmap(state->mmap.map, state->mmap.size) == -1) {
+ ViceLog(0, ("fs_stateUnmapFile: failed to unmap dump file '%s'\n",
+ state->fn));
+ ret = 1;
+ goto done;
+ }
+
+ done:
+ return ret;
+}
+#endif /* FS_STATE_USE_MMAP */
+
+#ifdef FS_STATE_USE_MMAP
+int
+fs_stateSync(struct fs_dump_state * state)
+{
+ int ret = 0;
+
+ msync(state->mmap.map, state->mmap.size, MS_SYNC);
+
+ done:
+ return ret;
+}
+#else /* !FS_STATE_USE_MMAP */
+int
+fs_stateSync(struct fs_dump_state * state)
+{
+ int ret = 0;
+
+ if (fsync(state->fd) == -1)
+ ret = 1;
+
+ done:
+ return ret;
+}
+#endif /* !FS_STATE_USE_MMAP */
+
+int
+fs_stateIncEOF(struct fs_dump_state * state, afs_int32 len)
+{
+ afs_uint64 temp;
+ FillInt64(temp, 0, len);
+ AddUInt64(state->eof_offset, temp, &state->eof_offset);
+ return 0;
+}
+
+#ifdef FS_STATE_USE_MMAP
+static int
+fs_stateIncCursor(struct fs_dump_state * state, size_t len)
+{
+ char * p;
+
+ state->mmap.offset += len;
+
+ p = (char *) state->mmap.cursor;
+ p += len;
+ state->mmap.cursor = (void *) p;
+
+ return 0;
+}
+
+static int
+fs_stateCheckIOSafety(struct fs_dump_state * state, size_t len)
+{
+ int ret = 0;
+
+ if ((state->mmap.offset + len) > state->mmap.size) {
+ ret = 1;
+ }
+ return ret;
+}
+#endif /* FS_STATE_USE_MMAP */
+
+#ifdef FS_STATE_USE_MMAP
+int
+fs_stateSeek(struct fs_dump_state * state, afs_uint64 * offset)
+{
+ int ret = 0;
+ char * p;
+ afs_uint32 hi, lo;
+
+ SplitInt64(*offset, hi, lo);
+
+ /* update cursor */
+ p = (char *) state->mmap.map;
+#ifdef AFS_64BIT_ENV
+ p += *offset;
+#else
+ p += lo;
+#endif
+ state->mmap.cursor = (void *) p;
+
+ /* update offset */
+#ifdef AFS_LARGEFILE_ENV
+ state->mmap.offset = *offset;
+#else
+ if (hi)
+ ret = 1;
+ state->mmap.offset = lo;
+#endif
+
+ return ret;
+}
+#else /* !FS_STATE_USE_MMAP */
+int
+fs_stateSeek(struct fs_dump_state * state, afs_uint64 * offset)
+{
+ int ret = 0;
+#ifndef AFS_LARGEFILE_ENV
+ afs_uint32 high, low;
+
+ SplitInt64(*offset, high, low);
+ if (high) {
+ ret = 1;
+ goto done;
+ }
+
+ if (afs_lseek(state->fd, low, SEEK_SET) == -1)
+ ret = 1;
+#else
+ if (afs_lseek(state->fd, *offset, SEEK_SET) == -1)
+ ret = 1;
+#endif
+ return ret;
+}
+#endif /* !FS_STATE_USE_MMAP */
+
+static int
+fs_stateFillHeader(struct fs_state_header * hdr)
+{
+ hdr->stamp.magic = FS_STATE_MAGIC;
+ hdr->stamp.version = FS_STATE_VERSION;
+#ifdef SYS_NAME_ID
+ hdr->sys_name = SYS_NAME_ID;
+#else
+ hdr->sys_name = 0xFFFFFFFF;
+#endif
+ hdr->timestamp = FT_ApproxTime();
+ hdr->server_uuid = FS_HostUUID;
+ hdr->valid = 1;
+#ifdef AFSBIG_ENDIAN
+ hdr->endianness = 1;
+#else
+ hdr->endianness = 0;
+#endif
+#ifdef FS_STATS_DETAILED
+ hdr->stats_detailed = 1;
+#else
+ hdr->stats_detailed = 0;
+#endif
+ if (strlcpy(hdr->server_version_string, cml_version_number, sizeof(hdr->server_version_string))
+ >= sizeof(hdr->server_version_string)) {
+ ViceLog(0, ("fs_stateFillHeader: WARNING -- cml_version_number field truncated\n"));
+ }
+ return 0;
+}
+
+static int
+fs_stateCheckHeader(struct fs_state_header * hdr)
+{
+ int ret = 0;
+
+ if (!hdr->valid) {
+ ViceLog(0, ("fs_stateCheckHeader: dump was previously flagged invalid\n"));
+ ret = 1;
+ }
+#ifdef AFSBIG_ENDIAN
+ else if (!hdr->endianness) {
+ ViceLog(0, ("fs_stateCheckHeader: wrong endianness\n"));
+ ret = 1;
+ }
+#else /* AFSLITTLE_ENDIAN */
+ else if (hdr->endianness) {
+ ViceLog(0, ("fs_stateCheckHeader: wrong endianness\n"));
+ ret = 1;
+ }
+#endif /* AFSLITTLE_ENDIAN */
+
+ else if (hdr->stamp.magic != FS_STATE_MAGIC) {
+ ViceLog(0, ("fs_stateCheckHeader: invalid dump header\n"));
+ ret = 1;
+ }
+ else if (hdr->stamp.version != FS_STATE_VERSION) {
+ ViceLog(0, ("fs_stateCheckHeader: unknown dump format version number\n"));
+ ret = 1;
+ }
+
+#ifdef FS_STATS_DETAILED
+ else if (!hdr->stats_detailed) {
+ ViceLog(0, ("fs_stateCheckHeader: wrong config flags\n"));
+ ret = 1;
+ }
+#else /* FS_STATS_DETAILED */
+ else if (hdr->stats_detailed) {
+ ViceLog(0, ("fs_stateCheckHeader: wrong config flags\n"));
+ ret = 1;
+ }
+#endif /* FS_STATS_DETAILED */
+
+ else if (!afs_uuid_equal(&hdr->server_uuid, &FS_HostUUID)) {
+ ViceLog(0, ("fs_stateCheckHeader: server UUID does not match this server's UUID\n"));
+ ret = 1;
+ }
+
+ /* the cml_version_string is included for informational purposes only. If someone ever
+ * wants to limit state dump reloading based upon the contents of this string, just
+ * uncomment the following code. uncommenting this code is _strongly discouraged_ because
+ * we already make use of the version stamps in the various dump headers to deal with
+ * data structure version incompatabilities.
+ else if (strncmp(hdr->server_version_string, cml_version_number,
+ sizeof(hdr->server_version_string)) != 0) {
+ ViceLog(0, ("fs_stateCheckHeader: dump from different server version\n"));
+ ret = 1;
+ }
+ */
+
+ else if (strncmp(hdr->server_version_string, cml_version_number,
+ sizeof(hdr->server_version_string)) != 0) {
+ ViceLog(0, ("fs_stateCheckHeader: dump from different server version ; attempting state reload anyway\n"));
+ }
+
+
+ return ret;
+}
+
+static int
+fs_stateAlloc(struct fs_dump_state * state)
+{
+ int ret = 0;
+ memset(state, 0, sizeof(struct fs_dump_state));
+ state->fd = -1;
+ state->fn = AFSDIR_SERVER_FSSTATE_FILEPATH;
+ state->hdr = (struct fs_state_header *)malloc(sizeof(struct fs_state_header));
+ state->h_hdr = (struct host_state_header *)malloc(sizeof(struct host_state_header));
+ state->cb_hdr = (struct callback_state_header *)malloc(sizeof(struct callback_state_header));
+ state->cb_timeout_hdr = (struct callback_state_timeout_header *)
+ malloc(sizeof(struct callback_state_timeout_header));
+ state->cb_fehash_hdr = (struct callback_state_fehash_header *)
+ malloc(sizeof(struct callback_state_fehash_header));
+ if ((state->hdr == NULL) || (state->h_hdr == NULL) || (state->cb_hdr == NULL) ||
+ (state->cb_timeout_hdr == NULL) || (state->cb_fehash_hdr == NULL))
+ ret = 1;
+ return ret;
+}
+
+static int
+fs_stateFree(struct fs_dump_state * state)
+{
+ if (state->hdr)
+ free(state->hdr);
+ if (state->h_hdr)
+ free(state->h_hdr);
+ if (state->cb_hdr)
+ free(state->cb_hdr);
+ if (state->cb_timeout_hdr)
+ free(state->cb_timeout_hdr);
+ if (state->cb_fehash_hdr)
+ free(state->cb_fehash_hdr);
+ if (state->h_map.entries)
+ free(state->h_map.entries);
+ if (state->fe_map.entries)
+ free(state->fe_map.entries);
+ if (state->cb_map.entries)
+ free(state->cb_map.entries);
+ return 0;
+}
+
+#endif /* AFS_DEMAND_ATTACH_FS */
--- /dev/null
+/*
+ * Copyright 2006, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/*
+ * demand attach fs
+ * fileserver state serialization
+ */
+
+#ifndef _AFS_TVICED_SERIALIZE_STATE_H
+#define _AFS_TVICED_SERIALIZE_STATE_H
+
+#ifdef AFS_DEMAND_ATTACH_FS
+
+#define FS_STATE_MAGIC 0x62FA841C
+#define FS_STATE_VERSION 2
+
+#define HOST_STATE_MAGIC 0x7B8C9DAE
+#define HOST_STATE_VERSION 2
+
+#define HOST_STATE_ENTRY_MAGIC 0xA8B9CADB
+
+#define CALLBACK_STATE_MAGIC 0x89DE67BC
+#define CALLBACK_STATE_VERSION 1
+
+#define CALLBACK_STATE_TIMEOUT_MAGIC 0x99DD5511
+#define CALLBACK_STATE_FEHASH_MAGIC 0x77BB33FF
+#define CALLBACK_STATE_ENTRY_MAGIC 0x54637281
+
+#define ACTIVE_VOLUME_STATE_MAGIC 0xAC7557CA
+#define ACTIVE_VOLUME_STATE_VERSION 1
+
+#define ACTIVE_VOLUME_STATE_AVEHASH_MAGIC 0xBADDF00D
+
+#define HOST_STATE_VALID_WINDOW 1800 /* 30 minutes */
+
+/*
+ * on-disk structures
+ */
+struct disk_version_stamp {
+ afs_uint32 magic;
+ afs_uint32 version;
+};
+
+/* 1024 byte header structure */
+struct fs_state_header {
+ struct disk_version_stamp stamp; /* version stamp */
+ afs_uint32 timestamp; /* timestamp of save */
+ afs_uint32 sys_name; /* sys name id for this machine */
+ afsUUID server_uuid; /* server's UUID */
+ byte valid; /* whether header contents are valid */
+ byte endianness; /* endianness sanity check (0 for LE, 1 for BE) */
+ byte stats_detailed; /* fs stats detailed sanity check */
+ byte padding1[1]; /* padding */
+ afs_uint32 reserved1[23]; /* for expansion */
+ afs_uint64 avol_offset; /* offset of active volumes structure */
+ afs_uint64 h_offset; /* offset of host_state_header structure */
+ afs_uint64 cb_offset; /* offset of callback_state_header structure */
+ afs_uint64 vlru_offset; /* offset of vlru state structure */
+ afs_uint32 reserved2[56]; /* for expansion */
+ char server_version_string[128]; /* version string from AFS_component_version_number.c */
+ afs_uint32 reserved3[128]; /* for expansion */
+};
+
+/*
+ * host package serialization
+ */
+
+/* 256 byte header for the host state data */
+struct host_state_header {
+ struct disk_version_stamp stamp; /* host state version stamp */
+ afs_uint32 records; /* number of stored host records */
+ afs_uint32 index_max; /* max index value encountered */
+ afs_uint32 reserved[60]; /* for expansion */
+};
+
+/* 32 byte host entry header */
+struct host_state_entry_header {
+ afs_uint32 magic; /* stamp */
+ afs_uint32 len; /* number of bytes in this record */
+ afs_uint32 interfaces; /* number of interfaces included in record */
+ afs_uint32 hcps; /* number of hcps entries in record */
+ afs_uint32 reserved[4];
+};
+
+/* 36 byte host entry structure */
+struct hostDiskEntry {
+ afs_uint32 host; /* IP address of host interface that is
+ * currently being used, in network
+ * byte order */
+ afs_uint16 port; /* port address of host */
+ afs_uint16 hostFlags; /* bit map */
+ byte Console; /* XXXX This host is a console */
+ byte hcpsfailed; /* Retry the cps call next time */
+ byte hcps_valid; /* prlist_val not null */
+#if FS_STATS_DETAILED
+ byte InSameNetwork; /*Is host's addr in the same network as
+ * the File Server's? */
+#else
+ byte padding1[1]; /* for padding */
+#endif /* FS_STATS_DETAILED */
+ afs_uint32 hcps_len; /* length of hcps */
+ afs_uint32 LastCall; /* time of last call from host */
+ afs_uint32 ActiveCall; /* time of any call but gettime */
+ afs_uint32 cpsCall; /* time of last cps call from this host */
+ afs_uint32 cblist; /* Call back list for this host */
+ afs_uint32 index; /* index for correlating w/ callback dumps */
+};
+
+/*
+ * callback package serialization
+ */
+
+/* 512 byte header */
+struct callback_state_header {
+ struct disk_version_stamp stamp; /* callback state version stamp */
+ afs_uint32 nFEs; /* number of FileEntry records */
+ afs_uint32 nCBs; /* number of CallBack records */
+ afs_uint32 fe_max; /* max FileEntry index */
+ afs_uint32 cb_max; /* max CallBack index */
+ afs_int32 tfirst; /* first valid timeout */
+ afs_uint32 reserved[115]; /* for expansion */
+ afs_uint64 timeout_offset; /* offset of timeout queue heads */
+ afs_uint64 fehash_offset; /* offset of file entry hash buckets */
+ afs_uint64 fe_offset; /* offset of first file entry */
+};
+
+/* 32 byte header */
+struct callback_state_timeout_header {
+ afs_uint32 magic; /* magic number for timeout header */
+ afs_uint32 len; /* total length of header and timeout records */
+ afs_uint32 records; /* number of timeout records */
+ afs_uint32 reserved[5];
+};
+
+/* 32 byte header */
+struct callback_state_fehash_header {
+ afs_uint32 magic; /* magic number for fehash header */
+ afs_uint32 len; /* total length of header and fehash bucket heads */
+ afs_uint32 records; /* number of hash buckets */
+ afs_uint32 reserved[5];
+};
+
+/* 32 byte header */
+struct callback_state_entry_header {
+ afs_uint32 magic; /* magic number for FE entry */
+ afs_uint32 len; /* number of bytes in this record */
+ afs_uint32 nCBs; /* number of callbacks for this FE */
+ afs_uint32 reserved[5];
+};
+
+struct FEDiskEntry {
+ struct FileEntry fe;
+ afs_uint32 index;
+};
+
+struct CBDiskEntry {
+ struct CallBack cb;
+ afs_uint32 index;
+};
+
+/*
+ * active volumes state serialization
+ *
+ * these structures are meant to support
+ * automated salvaging of active volumes
+ * in the event of a fileserver crash
+ */
+
+/* 512 byte header */
+struct active_volume_state_header {
+ struct disk_version_stamp stamp; /* callback state version stamp */
+ afs_uint32 nAVEs; /* number of ActiveVolumeEntry records */
+ afs_uint32 init_timestamp; /* timestamp of AVE initialization */
+ afs_uint32 update_timetamp; /* timestamp of last AVE update */
+ afs_uint32 reserved[119]; /* for expansion */
+ afs_uint64 avehash_offset; /* offset of active volume entry hash buckets */
+ afs_uint64 ave_offset; /* offset of first active volume entry */
+};
+
+/* 32 byte header */
+struct active_volume_state_avehash_header {
+ afs_uint32 magic; /* magic number for avehash header */
+ afs_uint32 len; /* total length of header and avehash bucket heads */
+ afs_uint32 records; /* number of hash buckets */
+ afs_uint32 reserved[5];
+};
+
+typedef afs_uint32 active_volume_state_avehash_entry;
+
+/* active volume entry */
+struct AVDiskEntry {
+ afs_uint32 volume;
+ afs_uint32 partition;
+ afs_uint32 hash_next;
+};
+
+
+/*
+ * dump runtime state
+ */
+struct idx_map_entry_t {
+ afs_uint32 old_idx; /* host hash id from last runtime */
+ afs_uint32 new_idx; /* host hash id for this runtime */
+};
+
+
+/* verification process sanity check constants
+ *
+ * make them fairly large so we don't get
+ * false positives
+ */
+#define FS_STATE_H_MAX_UUID_HASH_CHAIN_LEN 100000 /* max elements in a host uuid-hash chain */
+#define FS_STATE_H_MAX_ADDR_HASH_CHAIN_LEN 2000000 /* max elements in a host ipv4-hash chain */
+#define FS_STATE_FE_MAX_HASH_CHAIN_LEN 100000 /* max elements in a FE fid-hash chain */
+#define FS_STATE_FCB_MAX_LIST_LEN 100000 /* max elements in a per-FE CB list */
+#define FS_STATE_HCB_MAX_LIST_LEN 100000 /* max elements in a per-host CB list */
+#define FS_STATE_TCB_MAX_LIST_LEN 100000 /* max elements in a per-timeout CB list */
+
+
+/*
+ * main state serialization state structure
+ */
+
+struct fs_dump_state {
+ enum {
+ FS_STATE_DUMP_MODE,
+ FS_STATE_LOAD_MODE
+ } mode;
+ struct {
+ byte do_host_restore; /* whether host restore should be done */
+ byte some_steps_skipped; /* whether some steps were skipped */
+ byte warnings_generated; /* whether any warnings were generated during restore */
+ } flags;
+ afs_fsize_t file_len;
+ int fd; /* fd of the current dump file */
+ int bail; /* non-zero if something went wrong */
+ char * fn; /* name of the current dump file */
+ struct { /* memory map of dump file */
+ void * map;
+ void * cursor;
+ afs_foff_t offset;
+ afs_fsize_t size;
+ } mmap;
+ struct fs_state_header * hdr; /* main header */
+ struct host_state_header * h_hdr; /* header for host state data */
+ struct callback_state_header * cb_hdr; /* header for callback state data */
+ struct callback_state_timeout_header * cb_timeout_hdr;
+ struct callback_state_fehash_header * cb_fehash_hdr;
+ afs_uint64 eof_offset; /* current end of file offset */
+ struct {
+ int len; /* number of host entries in map */
+ struct idx_map_entry_t * entries;
+ } h_map;
+ struct {
+ int len;
+ struct idx_map_entry_t * entries;
+ } fe_map;
+ struct {
+ int len;
+ struct idx_map_entry_t * entries;
+ } cb_map;
+};
+
+
+/* prototypes */
+
+/* serialize_state.c */
+extern int fs_stateWrite(struct fs_dump_state * state,
+ void * buf, size_t len);
+extern int fs_stateRead(struct fs_dump_state * state,
+ void * buf, size_t len);
+extern int fs_stateWriteV(struct fs_dump_state * state,
+ struct iovec * iov, int niov);
+extern int fs_stateReadV(struct fs_dump_state * state,
+ struct iovec * iov, int niov);
+extern int fs_stateSync(struct fs_dump_state * state);
+extern int fs_stateWriteHeader(struct fs_dump_state * state,
+ afs_uint64 * offset,
+ void * hdr, size_t len);
+extern int fs_stateReadHeader(struct fs_dump_state * state,
+ afs_uint64 * offset,
+ void * hdr, size_t len);
+extern int fs_stateIncEOF(struct fs_dump_state * state,
+ afs_int32 len);
+extern int fs_stateSeek(struct fs_dump_state * state,
+ afs_uint64 * offset);
+
+/* host.c */
+extern int h_stateSave(struct fs_dump_state * state);
+extern int h_stateRestore(struct fs_dump_state * state);
+extern int h_stateRestoreIndices(struct fs_dump_state * state);
+extern int h_stateVerify(struct fs_dump_state * state);
+extern int h_OldToNew(struct fs_dump_state * state, afs_uint32 old, afs_uint32 * new);
+
+/* callback.c */
+extern int cb_stateSave(struct fs_dump_state * state);
+extern int cb_stateRestore(struct fs_dump_state * state);
+extern int cb_stateRestoreIndices(struct fs_dump_state * state);
+extern int cb_stateVerify(struct fs_dump_state * state);
+extern int cb_stateVerifyHCBList(struct fs_dump_state * state, struct host * host);
+extern int fe_OldToNew(struct fs_dump_state * state, afs_uint32 old, afs_uint32 * new);
+extern int cb_OldToNew(struct fs_dump_state * state, afs_uint32 old, afs_uint32 * new);
+
+#endif /* AFS_DEMAND_ATTACH_FS */
+#endif /* _AFS_TVICED_SERIALIZE_STATE_H */
--- /dev/null
+/*
+ * Copyright 2006, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/*
+ * demand attach fs
+ * fileserver state serialization
+ *
+ * state analyzer
+ */
+
+#include <afsconfig.h>
+#include <afs/param.h>
+
+RCSID
+ ("$Header$");
+
+#include <stdio.h>
+#include <errno.h>
+#include <sys/file.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <time.h>
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#else
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#endif
+
+#include <afs/stds.h>
+#include <rx/xdr.h>
+#include <afs/assert.h>
+#include <lwp.h>
+#include <lock.h>
+#include <afs/afsint.h>
+#include <afs/rxgen_consts.h>
+#include <afs/nfs.h>
+#include <afs/errors.h>
+#include <afs/ihandle.h>
+#include <afs/vnode.h>
+#include <afs/volume.h>
+#ifdef AFS_ATHENA_STDENV
+#include <krb.h>
+#endif
+#include <afs/acl.h>
+#include <afs/ptclient.h>
+#include <afs/prs_fs.h>
+#include <afs/auth.h>
+#include <afs/afsutil.h>
+#include <rx/rx.h>
+#include <afs/cellconfig.h>
+#include <stdlib.h>
+#include "../util/afsutil_prototypes.h"
+#include "../viced/viced.h"
+#include "../viced/host.h"
+#include "../viced/callback.h"
+#include "serialize_state.h"
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+/*@+fcnmacros +macrofcndecl@*/
+#ifdef O_LARGEFILE
+#ifdef S_SPLINT_S
+extern off64_t afs_lseek(int FD, off64_t O, int F);
+#endif /*S_SPLINT_S */
+#define afs_lseek(FD, O, F) lseek64(FD, (off64_t)(O), F)
+#define afs_stat stat64
+#define afs_fstat fstat64
+#define afs_open open64
+#define afs_fopen fopen64
+#define afs_mmap mmap64
+#ifdef AFS_AIX_ENV
+extern void * mmap64(); /* ugly hack since aix build env appears to be somewhat broken */
+#endif
+#else /* !O_LARGEFILE */
+#ifdef S_SPLINT_S
+extern off_t afs_lseek(int FD, off_t O, int F);
+#endif /*S_SPLINT_S */
+#define afs_lseek(FD, O, F) lseek(FD, (off_t)(O), F)
+#define afs_stat stat
+#define afs_fstat fstat
+#define afs_open open
+#define afs_fopen fopen
+#define afs_mmap mmap
+#endif /* !O_LARGEFILE */
+/*@=fcnmacros =macrofcndecl@*/
+
+
+#ifndef AFS_DEMAND_ATTACH_FS
+int
+main (int argc, char ** argv)
+{
+ fprintf(stderr, "%s is only supported for demand attach fileservers\n",
+ argv[0] ? argv[0] : "state analyzer");
+ return 1;
+}
+#else /* AFS_DEMAND_ATTACH_FS */
+
+static void usage(char * prog);
+static int openFile(char * path);
+static void initState(void);
+
+static void banner(void);
+static void prompt(void);
+
+static void print_help(void);
+static void print_global_help(void);
+static void print_h_help(void);
+static void print_fe_help(void);
+static void print_cb_help(void);
+
+static void dump_hdr(void);
+static void dump_h_hdr(void);
+static void dump_cb_hdr(void);
+
+static void dump_cb_timeout(void);
+static void dump_cb_fehash(void);
+
+static void dump_all_hes(void);
+static void dump_all_fes(void);
+static void dump_all_cbs(void);
+
+static void dump_he(afs_uint32 idx);
+static void dump_fe(afs_uint32 idx);
+static void dump_cb(afs_uint32 idx);
+static void dump_this_he(void);
+static void dump_this_fe(void);
+static void dump_this_cb(void);
+static void dump_next_he(void);
+static void dump_next_fe(void);
+static void dump_next_cb(void);
+static void dump_prev_he(void);
+static void dump_prev_fe(void);
+static void dump_prev_cb(void);
+static void dump_first_he(void);
+static void dump_first_fe(void);
+static void dump_first_cb(void);
+static void dump_last_he(void);
+static void dump_last_fe(void);
+static void dump_last_cb(void);
+static void dump_he_hdr(void);
+static void dump_he_entry(void);
+static void dump_he_interfaces(void);
+static void dump_he_hcps(void);
+static void dump_fe_hdr(void);
+static void dump_fe_entry(void);
+static void dump_cb_entry(void);
+
+static void hexdump_map(afs_uint32 offset, afs_uint32 len);
+
+static int get_hdr(void);
+static int get_h_hdr(void);
+static int get_cb_hdr(void);
+static int get_cb_timeout_hdr(void);
+static int get_cb_timeout(void);
+static int get_cb_fehash_hdr(void);
+static int get_cb_fehash(void);
+static int get_he(afs_uint32 idx);
+static int get_he_hdr(void);
+static int get_he_entry(void);
+static int get_fe(afs_uint32 idx);
+static int get_fe_hdr(void);
+static int get_fe_entry(void);
+static int get_cb(afs_uint32 idx);
+static int get_cb_entry(void);
+
+static int find_fe_by_index(afs_uint32 idx);
+static int find_cb_by_index(afs_uint32 idx);
+static int find_fe_by_fid(afs_uint32 vol, afs_uint32 vn, afs_uint32 uniq);
+
+
+static int dump_fd = -1;
+static void * map = NULL;
+static size_t map_len;
+
+static struct {
+ struct fs_state_header hdr;
+ struct host_state_header h_hdr;
+ struct callback_state_header cb_hdr;
+ struct callback_state_timeout_header timeout_hdr;
+ struct callback_state_fehash_header fehash_hdr;
+ afs_uint32 * timeout;
+ afs_uint32 * fehash;
+
+ /* pointers into the memory map */
+ void * hdr_p;
+ void * h_hdr_p;
+ void * cb_hdr_p;
+ void * timeout_hdr_p;
+ void * timeout_p;
+ void * fehash_hdr_p;
+ void * fehash_p;
+
+ byte hdr_valid;
+ byte h_hdr_valid;
+ byte cb_hdr_valid;
+ byte timeout_hdr_valid;
+ byte fehash_hdr_valid;
+} hdrs;
+
+static struct {
+ void * fh;
+ void * cursor;
+ void * ifp;
+ void * hcps;
+ struct host_state_entry_header hdr;
+ struct hostDiskEntry he;
+ afs_uint32 idx;
+ byte hdr_valid;
+ byte he_valid;
+} he_cursor;
+
+static struct {
+ void ** cursor;
+} he_cache;
+
+static struct {
+ void * ffe;
+ void * cursor;
+ void * fcb;
+ struct callback_state_entry_header hdr;
+ struct FEDiskEntry fe;
+ afs_uint32 idx;
+ byte hdr_valid;
+ byte fe_valid;
+} fe_cursor;
+
+static struct {
+ void ** cursor;
+} fe_cache;
+
+static struct {
+ void * cursor;
+ struct CBDiskEntry cb;
+ afs_uint32 idx;
+ byte cb_valid;
+} cb_cursor;
+
+static struct {
+ void ** cursor;
+} cb_cache;
+
+static void
+usage(char * prog)
+{
+ fprintf(stderr, "usage: %s [<state dump file>]\n");
+}
+
+int
+main(int argc, char ** argv)
+{
+ banner();
+
+ if (argc > 2 || (argc == 2 && !strcmp(argv[1], "-h"))) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ initState();
+
+ if (argc > 1) {
+ if (openFile(argv[1]))
+ return 1;
+ } else {
+ if (openFile(AFSDIR_SERVER_FSSTATE_FILEPATH))
+ return 1;
+ }
+
+ prompt();
+ return 0;
+}
+
+
+static int
+openFile(char * path)
+{
+ int ret = 0;
+ struct afs_stat status;
+
+ dump_fd = afs_open(path, O_RDWR);
+ if (dump_fd == -1) {
+ fprintf(stderr, "dump file '%s' failed to open\n", path);
+ ret = 1;
+ goto done;
+ }
+
+ printf("opened dump file '%s'\n", path);
+
+ if (afs_fstat(dump_fd, &status) == -1) {
+ fprintf(stderr, "failed to stat file\n");
+ ret = 1;
+ goto done;
+ }
+
+ map_len = status.st_size;
+
+ map = afs_mmap(NULL, map_len, PROT_READ, MAP_SHARED, dump_fd, 0);
+ if (map == MAP_FAILED) {
+ fprintf(stderr, "failed to mmap file\n");
+ ret = 1;
+ goto done;
+ }
+
+ printf("mapped %d bytes at 0x%x\n", map_len, map);
+
+ done:
+ if (ret) {
+ if (map) {
+ munmap(map, map_len);
+ map = NULL;
+ }
+ if (dump_fd != -1) {
+ close(dump_fd);
+ dump_fd = -1;
+ }
+ }
+ return ret;
+}
+
+static void
+initState(void)
+{
+ hdrs.hdr_valid = hdrs.h_hdr_valid = hdrs.cb_hdr_valid = 0;
+ he_cursor.cursor = fe_cursor.cursor = cb_cursor.cursor = NULL;
+ he_cursor.fh = fe_cursor.ffe = fe_cursor.fcb = NULL;
+ he_cache.cursor = fe_cache.cursor = NULL;
+}
+
+static void
+banner(void)
+{
+ fprintf(stderr, "demand attach fs\n");
+ fprintf(stderr, "fileserver state analyzer\n");
+ fprintf(stderr, "version 0.1\n");
+}
+
+#define PROGNAME "fs state analyzer"
+
+static void
+prompt(void)
+{
+ char input[256];
+ char prev_input[256];
+ char * tok = NULL;
+ afs_uint32 x, y, z;
+ enum {
+ PR_GLOBAL_MODE,
+ PR_H_MODE,
+ PR_FE_MODE,
+ PR_CB_MODE
+ } mode = PR_GLOBAL_MODE, next_mode;
+
+ next_mode = mode;
+ input[0] = prev_input[0] = '\0';
+
+ while (1) {
+ if (!tok) {
+ switch(mode) {
+ case PR_GLOBAL_MODE:
+ printf(PROGNAME "> ");
+ break;
+ case PR_H_MODE:
+ printf(PROGNAME ": h(%d)> ", he_cursor.idx);
+ break;
+ case PR_FE_MODE:
+ printf(PROGNAME ": fe(%d)> ", fe_cursor.idx);
+ break;
+ case PR_CB_MODE:
+ printf(PROGNAME ": fe(%d):cb(%d)> ", fe_cursor.idx, cb_cursor.idx);
+ break;
+ default:
+ fprintf(stderr, "prompt state broken; aborting\n");
+ return;
+ }
+ gets(input);
+
+ if (!strcmp(input, "")) {
+ /* repeat last command */
+ if (!strcmp(prev_input, "")) {
+ continue;
+ }
+ strlcpy(input, prev_input, sizeof(input));
+ } else {
+ /* save command for repetition */
+ strlcpy(prev_input, input, sizeof(prev_input));
+ }
+
+ tok = strtok(input, " \t");
+ }
+ while (tok && !strcmp(tok, ";")) {
+ tok = strtok(NULL, "; \t");
+ }
+
+ if (!tok) {
+ continue;
+ }
+
+ if (!strcasecmp(tok, "exit")) {
+ return;
+ } else if (!strcasecmp(tok, "quit")) {
+ switch(mode) {
+ case PR_CB_MODE:
+ next_mode = PR_FE_MODE;
+ break;
+ case PR_FE_MODE:
+ case PR_H_MODE:
+ next_mode = PR_GLOBAL_MODE;
+ break;
+ default:
+ return;
+ }
+ } else if (!strcasecmp(tok, "h")) {
+ tok = strtok(NULL, " \t");
+ mode = PR_H_MODE;
+ if (!tok) {
+ next_mode = mode;
+ }
+ continue;
+ } else if (!strcasecmp(tok, "fe")) {
+ tok = strtok(NULL, " \t");
+ mode = PR_FE_MODE;
+ if (!tok) {
+ next_mode = mode;
+ }
+ continue;
+ } else if (!strcasecmp(tok, "fs")) {
+ tok = strtok(NULL, " \t");
+ mode = PR_GLOBAL_MODE;
+ if (!tok) {
+ next_mode = mode;
+ }
+ continue;
+ } else if (!strcasecmp(tok, "cb")) {
+ tok = strtok(NULL, " \t");
+ mode = PR_CB_MODE;
+ if (!tok) {
+ next_mode = mode;
+ }
+ continue;
+ } else if (!strcasecmp(tok, "help")) {
+ switch(mode) {
+ case PR_H_MODE:
+ print_h_help();
+ break;
+ case PR_FE_MODE:
+ print_fe_help();
+ break;
+ case PR_CB_MODE:
+ print_cb_help();
+ break;
+ default:
+ print_global_help();
+ }
+ print_help();
+ } else if (!strcasecmp(tok, "hexdump")) {
+ tok = strtok(NULL, " \t");
+ if (!tok) {
+ hexdump_map(0, map_len);
+ continue;
+ }
+ if (sscanf(tok, "%u", &x) != 1) {
+ fprintf(stderr, "hexdump parse error 1\n");
+ tok = NULL;
+ continue;
+ }
+ tok = strtok(NULL, " \t");
+ if (!tok) {
+ hexdump_map(x, map_len - x);
+ continue;
+ }
+ if (sscanf(tok, "%u", &y) != 1) {
+ fprintf(stderr, "hexdump parse error 2\n");
+ continue;
+ }
+ hexdump_map(x,y);
+ } else if (!strcasecmp(tok, "hdr")) {
+ switch(mode) {
+ case PR_H_MODE:
+ dump_h_hdr();
+ break;
+ case PR_FE_MODE:
+ dump_cb_hdr();
+ break;
+ case PR_CB_MODE:
+ dump_this_fe();
+ break;
+ default:
+ dump_hdr();
+ }
+ } else if (!strcasecmp(tok, "this")) {
+ switch(mode) {
+ case PR_H_MODE:
+ dump_this_he();
+ break;
+ case PR_FE_MODE:
+ dump_this_fe();
+ break;
+ case PR_CB_MODE:
+ dump_this_cb();
+ break;
+ default:
+ fprintf(stderr, "command not valid for this mode\n");
+ }
+ } else if (!strcasecmp(tok, "next")) {
+ switch(mode) {
+ case PR_H_MODE:
+ dump_next_he();
+ break;
+ case PR_FE_MODE:
+ dump_next_fe();
+ break;
+ case PR_CB_MODE:
+ dump_next_cb();
+ break;
+ default:
+ fprintf(stderr, "command not valid for this mode\n");
+ }
+ } else if (!strcasecmp(tok, "prev")) {
+ switch(mode) {
+ case PR_H_MODE:
+ dump_prev_he();
+ break;
+ case PR_FE_MODE:
+ dump_prev_fe();
+ break;
+ case PR_CB_MODE:
+ dump_prev_cb();
+ break;
+ default:
+ fprintf(stderr, "command not valid for this mode\n");
+ }
+ } else if (!strcasecmp(tok, "first")) {
+ switch(mode) {
+ case PR_H_MODE:
+ dump_first_he();
+ break;
+ case PR_FE_MODE:
+ dump_first_fe();
+ break;
+ case PR_CB_MODE:
+ dump_first_cb();
+ break;
+ default:
+ fprintf(stderr, "command not valid for this mode\n");
+ }
+ } else if (!strcasecmp(tok, "last")) {
+ switch(mode) {
+ case PR_H_MODE:
+ dump_last_he();
+ break;
+ case PR_FE_MODE:
+ dump_last_fe();
+ break;
+ case PR_CB_MODE:
+ dump_last_cb();
+ break;
+ default:
+ fprintf(stderr, "command not valid for this mode\n");
+ }
+ } else if (!strcasecmp(tok, "dump")) {
+ switch(mode) {
+ case PR_H_MODE:
+ dump_all_hes();
+ break;
+ case PR_FE_MODE:
+ dump_all_fes();
+ break;
+ case PR_CB_MODE:
+ dump_all_cbs();
+ break;
+ default:
+ fprintf(stderr, "command not valid for this mode\n");
+ }
+ } else if (!strcasecmp(tok, "find")) {
+ tok = strtok(NULL, " \t");
+ if (!tok || strcasecmp(tok, "by")) {
+ tok = NULL;
+ fprintf(stderr, "find syntax error 1 (%s)\n",
+ (tok) ? tok : "nil");
+ continue;
+ }
+ tok = strtok(NULL, " \t");
+ if (!tok) {
+ fprintf(stderr, "find syntax error 2\n");
+ continue;
+ }
+ switch(mode) {
+ case PR_H_MODE:
+ fprintf(stderr, "not implemented yet\n");
+ break;
+ case PR_FE_MODE:
+ if (!strcasecmp(tok, "index")) {
+ tok = strtok(NULL, " \t");
+ if (!tok || sscanf(tok, "%u", &x) != 1) {
+ tok = NULL;
+ fprintf(stderr, "find syntax error 3\n");
+ continue;
+ }
+ if (find_fe_by_index(x)) {
+ fprintf(stderr, "find returned no results\n");
+ }
+ } else if (!strcasecmp(tok, "fid")) {
+ tok = strtok(NULL, "(), \t");
+ if (!tok || sscanf(tok, "%u", &x) != 1) {
+ tok = NULL;
+ fprintf(stderr, "find syntax error 4\n");
+ continue;
+ }
+ tok = strtok(NULL, "(), \t");
+ if (!tok || sscanf(tok, "%u", &y) != 1) {
+ tok = NULL;
+ fprintf(stderr, "find syntax error 5\n");
+ continue;
+ }
+ tok = strtok(NULL, "(), \t");
+ if (!tok || sscanf(tok, "%u", &z) != 1) {
+ tok = NULL;
+ fprintf(stderr, "find syntax error 6\n");
+ continue;
+ }
+ if (find_fe_by_fid(x,y,z)) {
+ fprintf(stderr, "find returned no results\n");
+ }
+ } else {
+ fprintf(stderr, "unsupported filter type\n");
+ }
+ break;
+ case PR_CB_MODE:
+ if (!strcasecmp(tok, "index")) {
+ tok = strtok(NULL, " \t");
+ if (!tok || sscanf(tok, "%u", &x) != 1) {
+ tok = NULL;
+ fprintf(stderr, "find syntax error 3\n");
+ continue;
+ }
+ if (find_cb_by_index(x)) {
+ fprintf(stderr, "find returned no results\n");
+ }
+ } else {
+ fprintf(stderr, "unsupported filter type\n");
+ }
+ break;
+ default:
+ fprintf(stderr, "find not supported for this menu\n");
+ }
+ } else if (!strcspn(tok, "0123456789")) {
+ if (sscanf(tok, "%u", &x) == 1) {
+ switch(mode) {
+ case PR_H_MODE:
+ dump_he(x);
+ break;
+ case PR_FE_MODE:
+ dump_fe(x);
+ break;
+ case PR_CB_MODE:
+ dump_cb(x);
+ break;
+ default:
+ fprintf(stderr, "command not available from this menu\n");
+ }
+ } else {
+ fprintf(stderr, "input parse error ('%s')\n", tok);
+ }
+ } else if (mode == PR_FE_MODE) {
+ if (!strcmp(tok, "timeout")) {
+ dump_cb_timeout();
+ } else if (!strcmp(tok, "hash")) {
+ dump_cb_fehash();
+ }
+ } else {
+ fprintf(stderr, "unknown command\n");
+ }
+ tok = strtok(NULL, " \t");
+ mode = next_mode;
+ }
+}
+
+static void
+print_help(void)
+{
+ printf("\th <...> -- host menu commands\n");
+ printf("\tfe <...> -- FileEntry menu commands\n");
+ printf("\tcb <...> -- CallBack menu commands\n");
+ printf("\thexdump [<offset> [<len>]]\n\t\t -- hex dump the raw data\n");
+ printf("\tquit -- quit this menu\n");
+ printf("\texit -- exit the debugger\n");
+ printf("\thelp -- this help message\n");
+}
+
+static void
+print_global_help(void)
+{
+ printf("\thdr -- display the fs_state_header struct\n");
+}
+
+static void
+print_h_help(void)
+{
+ printf("\thdr -- display the host_state_header struct\n");
+ printf("\tfirst -- display the first host\n");
+ printf("\tprev -- display the previous host\n");
+ printf("\tthis -- display this host\n");
+ printf("\tnext -- display the next host\n");
+ printf("\tlast -- display the last host\n");
+ printf("\tdump -- display all hosts\n");
+}
+
+static void
+print_fe_help(void)
+{
+ printf("\thdr -- display the callback_state_header struct\n");
+ printf("\tfirst -- display the first FE\n");
+ printf("\tprev -- display the previous FE\n");
+ printf("\tthis -- display this FE\n");
+ printf("\tnext -- display the next FE\n");
+ printf("\tlast -- display the last FE\n");
+ printf("\tdump -- display all FEs\n");
+ printf("\ttimeout -- display the timeout queue heads\n");
+ printf("\thash -- display the file entry hash buckets\n");
+ printf("\tfind by index <id>\n\t\t -- find an fe by its array index\n");
+ printf("\tfind by fid <(vol,vnode,unique)>\n\t\t -- find an fe by its AFSFid\n");
+}
+
+static void
+print_cb_help(void)
+{
+ printf("\thdr -- display the callback_state_entry_header struct\n");
+ printf("\tfirst -- display the first CB\n");
+ printf("\tprev -- display the previous CB\n");
+ printf("\tthis -- display this CB\n");
+ printf("\tnext -- display the next CB\n");
+ printf("\tlast -- display the last CB\n");
+ printf("\tdump -- display all CBs\n");
+}
+
+#define DPFTB0 "\t"
+#define DPFTB1 "\t\t"
+#define DPFTB2 "\t\t\t"
+
+#define DPFOFF(addr) \
+ do { \
+ char * _p = (char *)addr; \
+ char * _m = (char *)map; \
+ printf("loading structure from address 0x%x (offset %u)\n", \
+ addr, _p-_m); \
+ } while (0)
+
+/* structs */
+#define DPFSO(T, name) printf(T "%s = {\n", name)
+#define DPFSO0(name) DPFSO(DPFTB0, name)
+#define DPFSO1(name) DPFSO(DPFTB1, name)
+#define DPFSC(T) printf(T "}\n")
+#define DPFSC0 DPFSC(DPFTB0)
+#define DPFSC1 DPFSC(DPFTB1)
+
+/* arrays */
+#define DPFAO(T1, T2, name) printf(T1 "%s =\n" T2 "{ ", name)
+#define DPFAO0(name) DPFAO(DPFTB0, DPFTB1, name)
+#define DPFAO1(name) DPFAO(DPFTB1, DPFTB2, name)
+#define DPFAC0 printf(" }\n")
+#define DPFAC1 DPFAC0
+#define DPFA1 printf(DPFTB1 " ")
+#define DPFA2 printf(DPFTB2 " ")
+#define DPFAN printf("\n")
+#define DPFALE(type, var) printf("%" type, var)
+#define DPFAE(type, var) printf("%" type ",\t", var)
+
+/* normal vars */
+#define DPFV(T, name, type, var) printf(T "%s = %" type "\n", name, var)
+#define DPFV1(name, type, var) DPFV(DPFTB1, name, type, var)
+#define DPFV2(name, type, var) DPFV(DPFTB2, name, type, var)
+
+/* hex */
+#define DPFX(T, name, var) printf(T "%s = 0x%x\n", name, var)
+#define DPFX1(name, var) DPFX(DPFTB1, name, var)
+#define DPFX2(name, var) DPFX(DPFTB2, name, var)
+
+/* strings */
+#define DPFS(T, name, var) printf(T "%s = \"%s\"\n", name, var)
+#define DPFS1(name, var) DPFS(DPFTB1, name, var)
+#define DPFS2(name, var) DPFS(DPFTB2, name, var)
+
+/* time */
+#define DPFT(T, name, var) \
+ do { \
+ char * last; \
+ printf(T "%s = \"%s\"\n", name, strtok_r(ctime(&(var)), "\r\n", &last)); \
+ } while(0)
+#define DPFT1(name, var) DPFT(DPFTB1, name, var)
+#define DPFT2(name, var) DPFT(DPFTB2, name, var)
+
+static void
+dump_hdr(void)
+{
+ char uuid_str[40];
+ afs_uint32 hi, lo;
+
+ if (get_hdr())
+ return;
+
+ DPFOFF(map);
+ DPFSO0("fs_state_header");
+ DPFSO1("stamp");
+ DPFX2("magic", hdrs.hdr.stamp.magic);
+ DPFV2("version", "u", hdrs.hdr.stamp.version);
+ DPFSC1;
+ DPFT1("timestamp", hdrs.hdr.timestamp);
+ DPFV1("sys_name", "u", hdrs.hdr.sys_name);
+
+ afsUUID_to_string(&hdrs.hdr.server_uuid, uuid_str, sizeof(uuid_str));
+ DPFS1("server_uuid", uuid_str);
+ DPFV1("valid", "d", hdrs.hdr.valid);
+ DPFV1("endianness", "d", hdrs.hdr.endianness);
+ DPFV1("stats_detailed", "d", hdrs.hdr.stats_detailed);
+
+ SplitInt64(hdrs.hdr.h_offset, hi, lo);
+ DPFSO1("h_offset");
+ DPFV2("hi", "u", hi);
+ DPFV2("lo", "u", lo);
+ DPFSC1;
+
+ SplitInt64(hdrs.hdr.cb_offset, hi, lo);
+ DPFSO1("cb_offset");
+ DPFV2("hi", "u", hi);
+ DPFV2("lo", "u", lo);
+ DPFSC1;
+
+ DPFS1("server_version_string", hdrs.hdr.server_version_string);
+ DPFSC0;
+
+ if (hdrs.hdr.stamp.magic != FS_STATE_MAGIC) {
+ fprintf(stderr, "* magic check failed\n");
+ }
+ if (hdrs.hdr.stamp.version != FS_STATE_VERSION) {
+ fprintf(stderr, "* version check failed\n");
+ }
+}
+
+static void
+dump_h_hdr(void)
+{
+ if (get_h_hdr())
+ return;
+
+ DPFOFF(hdrs.h_hdr_p);
+ DPFSO0("host_state_header");
+ DPFSO1("stamp");
+ DPFX2("magic", hdrs.h_hdr.stamp.magic);
+ DPFV2("version", "u", hdrs.h_hdr.stamp.version);
+ DPFSC1;
+ DPFV1("records", "u", hdrs.h_hdr.records);
+ DPFV1("index_max", "u", hdrs.h_hdr.index_max);
+ DPFSC0;
+
+ if (hdrs.h_hdr.stamp.magic != HOST_STATE_MAGIC) {
+ fprintf(stderr, "* magic check failed\n");
+ }
+ if (hdrs.h_hdr.stamp.version != HOST_STATE_VERSION) {
+ fprintf(stderr, "* version check failed\n");
+ }
+}
+
+static void
+dump_cb_hdr(void)
+{
+ afs_uint32 hi, lo;
+
+ if (get_cb_hdr())
+ return;
+
+ DPFOFF(hdrs.cb_hdr_p);
+ DPFSO0("callback_state_header");
+ DPFSO1("stamp");
+ DPFX2("magic", hdrs.cb_hdr.stamp.magic);
+ DPFV2("version", "u", hdrs.cb_hdr.stamp.version);
+ DPFSC1;
+ DPFV1("nFEs", "u", hdrs.cb_hdr.nFEs);
+ DPFV1("nCBs", "u", hdrs.cb_hdr.nCBs);
+ DPFV1("fe_max", "u", hdrs.cb_hdr.fe_max);
+ DPFV1("cb_max", "u", hdrs.cb_hdr.cb_max);
+ DPFV1("tfirst", "d", hdrs.cb_hdr.tfirst);
+
+ SplitInt64(hdrs.cb_hdr.timeout_offset, hi, lo);
+ DPFSO1("timeout_offset");
+ DPFV2("hi", "u", hi);
+ DPFV2("lo", "u", lo);
+ DPFSC1;
+
+ SplitInt64(hdrs.cb_hdr.fehash_offset, hi, lo);
+ DPFSO1("fehash_offset");
+ DPFV2("hi", "u", hi);
+ DPFV2("lo", "u", lo);
+ DPFSC1;
+
+ SplitInt64(hdrs.cb_hdr.fe_offset, hi, lo);
+ DPFSO1("fe_offset");
+ DPFV2("hi", "u", hi);
+ DPFV2("lo", "u", lo);
+ DPFSC1;
+
+ DPFSC0;
+
+ if (hdrs.cb_hdr.stamp.magic != CALLBACK_STATE_MAGIC) {
+ fprintf(stderr, "* magic check failed\n");
+ }
+ if (hdrs.cb_hdr.stamp.version != CALLBACK_STATE_VERSION) {
+ fprintf(stderr, "* version check failed\n");
+ }
+}
+
+static void
+dump_cb_timeout(void)
+{
+ int i;
+
+ if (get_cb_hdr())
+ return;
+
+ if (get_cb_timeout_hdr())
+ return;
+
+ if (get_cb_timeout())
+ return;
+
+ DPFOFF(hdrs.timeout_hdr_p);
+ DPFSO0("callback_state_timeout_header");
+ DPFX1("magic", hdrs.timeout_hdr.magic);
+ DPFV1("len", "u", hdrs.timeout_hdr.len);
+ DPFV1("records", "u", hdrs.timeout_hdr.records);
+ DPFSC0;
+
+ if (hdrs.timeout_hdr.magic != CALLBACK_STATE_TIMEOUT_MAGIC) {
+ fprintf(stderr, "* magic check failed\n");
+ }
+
+ DPFOFF(hdrs.timeout_p);
+ DPFAO0("timeout");
+ for (i = 0; i < 127; i++) {
+ DPFAE("u", hdrs.timeout[i]);
+ if ((i % 8) == 7) {
+ DPFAN;
+ DPFA1;
+ }
+ }
+ DPFALE("u", hdrs.timeout[127]);
+ DPFAC0;
+}
+
+static void
+dump_cb_fehash(void)
+{
+ int i;
+
+ if (get_cb_hdr())
+ return;
+
+ if (get_cb_fehash_hdr())
+ return;
+
+ if (get_cb_fehash())
+ return;
+
+ DPFOFF(hdrs.fehash_hdr_p);
+ DPFSO0("callback_state_fehash_header");
+ DPFX1("magic", hdrs.fehash_hdr.magic);
+ DPFV1("len", "u", hdrs.fehash_hdr.len);
+ DPFV1("records", "u", hdrs.fehash_hdr.records);
+ DPFSC0;
+
+ if (hdrs.fehash_hdr.magic != CALLBACK_STATE_FEHASH_MAGIC) {
+ fprintf(stderr, "* magic check failed\n");
+ }
+
+ DPFOFF(hdrs.fehash_p);
+ DPFAO0("fehash");
+ for (i = 0; i < hdrs.fehash_hdr.records - 1; i++) {
+ DPFAE("u", hdrs.fehash[i]);
+ if ((i % 8) == 7) {
+ DPFAN;
+ DPFA1;
+ }
+ }
+ DPFALE("u", hdrs.fehash[hdrs.fehash_hdr.records-1]);
+ DPFAC0;
+}
+
+static void
+dump_all_hes(void)
+{
+ int i;
+
+ if (get_h_hdr()) {
+ fprintf(stderr, "error getting host_state_header\n");
+ return;
+ }
+
+ for (i = 0; i < hdrs.h_hdr.records; i++) {
+ dump_he(i);
+ }
+}
+
+static void
+dump_all_fes(void)
+{
+ int i;
+
+ if (get_cb_hdr()) {
+ fprintf(stderr, "error getting callback_state_header\n");
+ return;
+ }
+
+ for (i = 0; i < hdrs.cb_hdr.nFEs; i++) {
+ dump_fe(i);
+ }
+}
+
+static void
+dump_all_cbs(void)
+{
+ int i;
+
+ if (get_fe_hdr()) {
+ fprintf(stderr, "error getting callback_state_entry_header\n");
+ return;
+ }
+
+ for (i = 0; i < fe_cursor.hdr.nCBs; i++) {
+ dump_cb(i);
+ }
+}
+
+static void
+dump_he(afs_uint32 idx)
+{
+ if (get_he(idx)) {
+ fprintf(stderr, "error getting he %d\n", idx);
+ return;
+ }
+
+ DPFOFF(he_cursor.cursor);
+ dump_he_hdr();
+ dump_he_entry();
+ dump_he_interfaces();
+ dump_he_hcps();
+}
+
+static void
+dump_fe(afs_uint32 idx)
+{
+ if (get_fe(idx)) {
+ fprintf(stderr, "error getting fe %d\n", idx);
+ return;
+ }
+
+ DPFOFF(fe_cursor.cursor);
+ dump_fe_hdr();
+ dump_fe_entry();
+}
+
+static void
+dump_cb(afs_uint32 idx)
+{
+ if (get_cb(idx)) {
+ fprintf(stderr, "error getting cb %d\n", idx);
+ return;
+ }
+
+ DPFOFF(cb_cursor.cursor);
+ dump_cb_entry();
+}
+
+static void
+dump_this_he(void)
+{
+ dump_he(he_cursor.idx);
+}
+
+static void
+dump_this_fe(void)
+{
+ dump_fe(fe_cursor.idx);
+}
+
+static void
+dump_this_cb(void)
+{
+ dump_cb(cb_cursor.idx);
+}
+
+static void
+dump_next_he(void)
+{
+ if (get_h_hdr()) {
+ fprintf(stderr, "error getting host_state_header\n");
+ return;
+ }
+
+ if ((he_cursor.idx + 1) >= hdrs.h_hdr.records) {
+ fprintf(stderr, "no more HEs\n");
+ return;
+ }
+
+ dump_he(he_cursor.idx+1);
+}
+
+static void
+dump_next_fe(void)
+{
+ if (get_cb_hdr()) {
+ fprintf(stderr, "error getting callback_state_header\n");
+ return;
+ }
+
+ if ((fe_cursor.idx + 1) >= hdrs.cb_hdr.nFEs) {
+ fprintf(stderr, "no more FEs\n");
+ return;
+ }
+
+ dump_fe(fe_cursor.idx+1);
+}
+
+static void
+dump_next_cb(void)
+{
+ if (get_fe_hdr()) {
+ fprintf(stderr, "error getting callback_state_entry_header\n");
+ return;
+ }
+
+ if ((cb_cursor.idx + 1) >= fe_cursor.hdr.nCBs) {
+ fprintf(stderr, "no more CBs\n");
+ return;
+ }
+
+ dump_cb(cb_cursor.idx+1);
+}
+
+static void
+dump_prev_he(void)
+{
+ if (!he_cursor.idx) {
+ fprintf(stderr, "no more HEs\n");
+ return;
+ }
+
+ dump_he(he_cursor.idx-1);
+}
+
+static void
+dump_prev_fe(void)
+{
+ if (!fe_cursor.idx) {
+ fprintf(stderr, "no more FEs\n");
+ return;
+ }
+
+ dump_fe(fe_cursor.idx-1);
+}
+
+static void
+dump_prev_cb(void)
+{
+ if (!cb_cursor.idx) {
+ fprintf(stderr, "no more CBs\n");
+ return;
+ }
+
+ dump_cb(cb_cursor.idx-1);
+}
+
+static void
+dump_first_fe(void)
+{
+ if (get_cb_hdr()) {
+ fprintf(stderr, "error getting callback_state_header\n");
+ return;
+ }
+
+ if (!hdrs.cb_hdr.nFEs) {
+ fprintf(stderr, "no FEs present\n");
+ return;
+ }
+
+ dump_fe(0);
+}
+
+static void
+dump_first_he(void)
+{
+ if (get_h_hdr()) {
+ fprintf(stderr, "error getting host_state_header\n");
+ return;
+ }
+
+ if (!hdrs.h_hdr.records) {
+ fprintf(stderr, "no HEs present\n");
+ return;
+ }
+
+ dump_he(0);
+}
+
+static void
+dump_first_cb(void)
+{
+ if (get_fe_hdr()) {
+ fprintf(stderr, "error getting callback_state_entry_header\n");
+ return;
+ }
+
+ if (!fe_cursor.hdr.nCBs) {
+ fprintf(stderr, "no CBs present\n");
+ return;
+ }
+
+ dump_cb(0);
+}
+
+static void
+dump_last_he(void)
+{
+ if (get_h_hdr()) {
+ fprintf(stderr, "error getting host_state_header\n");
+ return;
+ }
+
+ if (!hdrs.h_hdr.records) {
+ fprintf(stderr, "no HEs present\n");
+ return;
+ }
+
+ dump_he(hdrs.h_hdr.records-1);
+}
+
+static void
+dump_last_fe(void)
+{
+ if (get_cb_hdr()) {
+ fprintf(stderr, "error getting callback_state_header\n");
+ return;
+ }
+
+ if (!hdrs.cb_hdr.nFEs) {
+ fprintf(stderr, "no FEs present\n");
+ return;
+ }
+
+ dump_fe(hdrs.cb_hdr.nFEs-1);
+}
+
+static void
+dump_last_cb(void)
+{
+ if (get_fe_hdr()) {
+ fprintf(stderr, "error getting callback_state_entry_header\n");
+ return;
+ }
+
+ if (!fe_cursor.hdr.nCBs) {
+ fprintf(stderr, "no CBs present\n");
+ return;
+ }
+
+ dump_cb(fe_cursor.hdr.nCBs-1);
+}
+
+static void
+dump_he_hdr(void)
+{
+ DPFSO0("host_state_entry_header");
+ DPFX1("magic", he_cursor.hdr.magic);
+ DPFV1("len", "u", he_cursor.hdr.len);
+ DPFV1("interfaces", "u", he_cursor.hdr.interfaces);
+ DPFV1("hcps", "u", he_cursor.hdr.hcps);
+ DPFSC0;
+
+ if (he_cursor.hdr.magic != HOST_STATE_ENTRY_MAGIC) {
+ fprintf(stderr, "* magic check failed\n");
+ }
+}
+
+static void
+dump_he_entry(void)
+{
+ DPFSO0("hostDiskEntry");
+ DPFS1("host", afs_inet_ntoa(he_cursor.he.host));
+ DPFV1("port", "u", he_cursor.he.port);
+ DPFX1("hostFlags", he_cursor.he.hostFlags);
+ DPFV1("Console", "u", he_cursor.he.Console);
+ DPFV1("hcpsfailed", "u", he_cursor.he.hcpsfailed);
+ DPFV1("hcps_valid", "u", he_cursor.he.hcps_valid);
+ if (hdrs.hdr.stats_detailed) {
+#ifdef FS_STATS_DETAILED
+ DPFV1("InSameNetwork", "u", he_cursor.he.InSameNetwork);
+#else
+ DPFV1("InSameNetwork", "u", he_cursor.he.padding1[0]);
+#endif
+ }
+ DPFV1("hcps_len", "u", he_cursor.he.hcps_len);
+ DPFT1("LastCall", he_cursor.he.LastCall);
+ DPFT1("ActiveCall", he_cursor.he.ActiveCall);
+ DPFT1("cpsCall", he_cursor.he.cpsCall);
+ DPFV1("cblist", "u", he_cursor.he.cblist);
+ DPFV1("index", "u", he_cursor.he.index);
+ DPFSC0;
+}
+
+static void
+dump_he_interfaces(void)
+{
+ char temp_str[40];
+ struct Interface * ifp;
+ int len, i;
+
+ if (!he_cursor.hdr.interfaces)
+ return;
+
+ len = sizeof(struct Interface) + ((he_cursor.hdr.interfaces-1)*sizeof(struct AddrPort));
+ ifp = (struct Interface *) malloc(len);
+ assert(ifp != NULL);
+
+ memcpy(ifp, he_cursor.ifp, len);
+
+ DPFSO0("Interface");
+ DPFV1("numberOfInterfaces", "u", ifp->numberOfInterfaces);
+
+ afsUUID_to_string(&ifp->uuid, temp_str, sizeof(temp_str));
+ DPFS1("uuid", temp_str);
+ for (i = 0; i < he_cursor.hdr.interfaces; i++) {
+ snprintf(temp_str, sizeof(temp_str), "interface[%d]", i);
+ DPFSO1(temp_str);
+ DPFS2("addr", afs_inet_ntoa(ifp->interface[i].addr));
+ DPFV2("port", "u", ifp->interface[i].port);
+ DPFSC1;
+ }
+
+ DPFSC0;
+
+ if (he_cursor.hdr.interfaces != ifp->numberOfInterfaces) {
+ fprintf(stderr, "* interface count mismatch between header and Interface struct\n");
+ }
+ free(ifp);
+}
+
+static void
+dump_he_hcps(void)
+{
+ char temp_str[40];
+ afs_int32 * hcps;
+ int len, i;
+
+ if (!he_cursor.hdr.hcps)
+ return;
+
+ len = (he_cursor.hdr.hcps)*sizeof(afs_uint32);
+ hcps = (afs_int32 *) malloc(len);
+ assert(hcps != NULL);
+ memcpy(hcps, he_cursor.hcps, len);
+
+ DPFSO0("hcps");
+ DPFAO1("prlist_val");
+ for (i = 0; i < he_cursor.hdr.hcps - 1; i++) {
+ DPFAE("d", hcps[i]);
+ if ((i % 8) == 7) {
+ DPFAN;
+ DPFA2;
+ }
+ }
+ DPFALE("d", hcps[he_cursor.hdr.hcps-1]);
+ DPFAC1;
+ DPFSC0;
+ free(hcps);
+}
+
+static void
+dump_fe_hdr(void)
+{
+ DPFSO0("callback_state_entry_header");
+ DPFX1("magic", fe_cursor.hdr.magic);
+ DPFV1("len", "u", fe_cursor.hdr.len);
+ DPFV1("nCBs", "u", fe_cursor.hdr.nCBs);
+ DPFSC0;
+
+ if (fe_cursor.hdr.magic != CALLBACK_STATE_ENTRY_MAGIC) {
+ fprintf(stderr, "* magic check failed\n");
+ }
+}
+
+static void
+dump_fe_entry(void)
+{
+ DPFSO0("FEDiskEntry");
+ DPFSO1("fe");
+ DPFV2("vnode", "u", fe_cursor.fe.fe.vnode);
+ DPFV2("unique", "u", fe_cursor.fe.fe.unique);
+ DPFV2("volid", "u", fe_cursor.fe.fe.volid);
+ DPFV2("fnext", "u", fe_cursor.fe.fe.fnext);
+ DPFV2("ncbs", "u", fe_cursor.fe.fe.ncbs);
+ DPFV2("firstcb", "u", fe_cursor.fe.fe.firstcb);
+ DPFV2("status", "u", fe_cursor.fe.fe.status);
+ DPFSC1;
+ DPFV1("index", "u", fe_cursor.fe.index);
+ DPFSC0;
+}
+
+static void
+dump_cb_entry(void)
+{
+ DPFSO0("CBDiskEntry");
+ DPFSO1("cb");
+ DPFV2("cnext", "u", cb_cursor.cb.cb.cnext);
+ DPFV2("fhead", "u", cb_cursor.cb.cb.fhead);
+ DPFV2("thead", "u", (afs_uint32)cb_cursor.cb.cb.thead);
+ DPFV2("status", "u", (afs_uint32)cb_cursor.cb.cb.status);
+ DPFV2("hhead", "u", cb_cursor.cb.cb.hhead);
+ DPFV2("tprev", "u", cb_cursor.cb.cb.tprev);
+ DPFV2("tnext", "u", cb_cursor.cb.cb.tnext);
+ DPFV2("hprev", "u", cb_cursor.cb.cb.hprev);
+ DPFV2("hnext", "u", cb_cursor.cb.cb.hnext);
+ DPFSC1;
+ DPFV1("index", "u", cb_cursor.cb.index);
+ DPFSC0;
+}
+
+#define DPFHMS printf(" ")
+#define DPFHS printf(" ")
+#define DPFHN(offset) printf("\n%u\t", offset)
+#define DPFHD(x) printf("%02X ", x)
+#define DPFHE printf("\n")
+
+static void
+hexdump_map(afs_uint32 offset, afs_uint32 len)
+{
+ int i;
+ unsigned char * p = (unsigned char *)map;
+ afs_uint32 c32;
+
+ if (!len)
+ return;
+
+ if ((offset + len) > map_len) {
+ fprintf(stderr, "offset + length exceeds memory map size (%u > %u)\n",
+ offset+len, map_len);
+ return;
+ }
+
+ p += offset;
+ DPFOFF(p);
+ DPFHN(offset);
+
+ for (i = offset % 16; i > 0; i--) {
+ DPFHS;
+ }
+
+ for (i=0; i < len; i++, p++, offset++) {
+ if (!(offset % 16)) {
+ DPFHN(offset);
+ } else if (!(offset % 8)) {
+ DPFHMS;
+ }
+ DPFHD(*p);
+ }
+ DPFHE;
+}
+
+static int
+get_hdr(void)
+{
+ if (!hdrs.hdr_valid) {
+ if (map_len < sizeof(struct fs_state_header)) {
+ fprintf(stderr, "corrupt state dump: fs_state_header larger than memory map\n");
+ return 1;
+ }
+ memcpy(&hdrs.hdr, map, sizeof(hdrs.hdr));
+ hdrs.hdr_p = map;
+ hdrs.hdr_valid = 1;
+ }
+ return 0;
+}
+
+static int
+get_h_hdr(void)
+{
+ char * buf;
+ afs_uint32 hi, lo;
+
+ if (hdrs.h_hdr_valid)
+ return 0;
+
+ if (get_hdr())
+ return 1;
+
+ SplitInt64(hdrs.hdr.h_offset, hi, lo);
+
+ if (hi) {
+ fprintf(stderr, "hi offset bits set in h_offset; can't get host_state_header\n");
+ return 1;
+ }
+ if ((lo >= map_len) ||
+ ((lo + sizeof(struct host_state_header)) > map_len) ||
+ (lo + sizeof(struct host_state_header) < lo)) {
+ fprintf(stderr, "h_offset puts host_state_header beyond end of memory map\n");
+ return 1;
+ }
+
+ buf = (char *) map;
+ buf += lo;
+ memcpy(&hdrs.h_hdr, buf, sizeof(struct host_state_header));
+ hdrs.h_hdr_p = buf;
+ buf += sizeof(struct host_state_header);
+ he_cursor.fh = (void *)buf;
+ return 0;
+}
+
+static int
+get_cb_hdr(void)
+{
+ char * buf;
+ afs_uint32 hi, lo;
+
+ if (hdrs.cb_hdr_valid)
+ return 0;
+
+ if (get_hdr())
+ return 1;
+
+ SplitInt64(hdrs.hdr.cb_offset, hi, lo);
+
+ if (hi) {
+ fprintf(stderr, "hi offset bits set in cb_offset; can't get callback_state_header\n");
+ return 1;
+ }
+ if ((lo >= map_len) ||
+ ((lo + sizeof(struct callback_state_header)) > map_len) ||
+ (lo + sizeof(struct callback_state_header) < lo)) {
+ fprintf(stderr, "cb_offset puts callback_state_header beyond end of memory map\n");
+ return 1;
+ }
+
+ buf = (char *) map;
+ buf += lo;
+ memcpy(&hdrs.cb_hdr, buf, sizeof(struct callback_state_header));
+ hdrs.cb_hdr_p = buf;
+ hdrs.cb_hdr_valid = 1;
+
+ SplitInt64(hdrs.cb_hdr.fe_offset, hi, lo);
+
+ if (hi) {
+ fprintf(stderr, "hi offset bits set in fe_offset; can't get callback_state_entry_header\n");
+ return 1;
+ }
+ hi = lo + (hdrs.cb_hdr.nFEs * (sizeof(struct callback_state_entry_header) +
+ sizeof(struct FEDiskEntry)) +
+ hdrs.cb_hdr.nCBs * sizeof(struct CBDiskEntry));
+ if ((hi > map_len) ||
+ (lo > hi)) {
+ fprintf(stderr, "fe_offset puts callback_state_entry_header beyond end of memory map\n");
+ return 1;
+ }
+
+ buf = (char *) map;
+ buf += lo;
+ fe_cursor.ffe = (void *)buf;
+
+ return 0;
+}
+
+static int
+get_cb_timeout_hdr(void)
+{
+ char * buf;
+ afs_uint32 hi, lo;
+
+ if (hdrs.timeout_hdr_valid)
+ return 0;
+
+ if (get_cb_hdr())
+ return 1;
+
+ SplitInt64(hdrs.cb_hdr.timeout_offset, hi, lo);
+
+ if (hi) {
+ fprintf(stderr, "hi offset bits set in timeout_offset; can't get callback_state_timeout_header\n");
+ return 1;
+ }
+ if ((lo >= map_len) ||
+ ((lo + sizeof(struct callback_state_timeout_header)) > map_len) ||
+ (lo + sizeof(struct callback_state_timeout_header) < lo)) {
+ fprintf(stderr, "timeout_offset puts callback_state_timeout_header beyond end of memory map\n");
+ return 1;
+ }
+
+ buf = (char *) map;
+ buf += lo;
+ memcpy(&hdrs.timeout_hdr, buf, sizeof(struct callback_state_timeout_header));
+ hdrs.timeout_hdr_p = buf;
+ hdrs.timeout_hdr_valid = 1;
+ buf += sizeof(struct callback_state_timeout_header);
+ hdrs.timeout_p = buf;
+
+ return 0;
+}
+
+static int
+get_cb_timeout(void)
+{
+ char * buf;
+
+ if (hdrs.timeout)
+ return 0;
+
+ if (get_cb_timeout_hdr())
+ return 1;
+
+ hdrs.timeout = (afs_uint32 *) calloc(hdrs.timeout_hdr.records, sizeof(afs_uint32));
+ assert(hdrs.timeout != NULL);
+ memcpy(hdrs.timeout, hdrs.timeout_p, hdrs.timeout_hdr.records * sizeof(afs_uint32));
+ return 0;
+}
+
+static int
+get_cb_fehash_hdr(void)
+{
+ char * buf;
+ afs_uint32 hi, lo;
+
+ if (hdrs.fehash_hdr_valid)
+ return 0;
+
+ if (get_cb_hdr())
+ return 1;
+
+ SplitInt64(hdrs.cb_hdr.fehash_offset, hi, lo);
+
+ if (hi) {
+ fprintf(stderr, "hi offset bits set in fehash_offset; can't get callback_state_fehash_header\n");
+ return 1;
+ }
+ if ((lo >= map_len) ||
+ ((lo + sizeof(struct callback_state_fehash_header)) > map_len) ||
+ (lo + sizeof(struct callback_state_fehash_header) < lo)) {
+ fprintf(stderr, "timeout_offset puts callback_state_fehash_header beyond end of memory map\n");
+ return 1;
+ }
+
+ buf = (char *) map;
+ buf += lo;
+ memcpy(&hdrs.fehash_hdr, buf, sizeof(struct callback_state_fehash_header));
+ hdrs.fehash_hdr_p = buf;
+ hdrs.fehash_hdr_valid = 1;
+ buf += sizeof(struct callback_state_fehash_header);
+ hdrs.fehash_p = buf;
+
+ return 0;
+}
+
+static int
+get_cb_fehash(void)
+{
+ char * buf;
+
+ if (hdrs.fehash)
+ return 0;
+
+ if (get_cb_fehash_hdr())
+ return 1;
+
+ hdrs.fehash = (afs_uint32 *) calloc(hdrs.fehash_hdr.records, sizeof(afs_uint32));
+ assert(hdrs.fehash != NULL);
+ memcpy(hdrs.fehash, hdrs.fehash_p, hdrs.fehash_hdr.records * sizeof(afs_uint32));
+ return 0;
+}
+
+static int
+get_he(afs_uint32 idx)
+{
+ int i;
+ char * p;
+
+ if (get_h_hdr())
+ return 1;
+
+ if (idx >= hdrs.h_hdr.records)
+ return 1;
+
+ if (he_cursor.idx == idx && he_cursor.hdr_valid && he_cursor.he_valid)
+ return 0;
+
+ he_cursor.hdr_valid = he_cursor.he_valid = 0;
+
+ if (he_cache.cursor == NULL) {
+ he_cache.cursor = (void **) calloc(hdrs.h_hdr.records, sizeof(void *));
+ assert(he_cache.cursor != NULL);
+ }
+
+ if (idx && he_cache.cursor[idx-1] == NULL) {
+ for (i = 0; i < idx; i++) {
+ if (he_cache.cursor[i] == NULL) {
+ get_he(i);
+ }
+ }
+ }
+
+ if (!idx) {
+ he_cursor.cursor = he_cursor.fh;
+ } else if (he_cursor.cursor == he_cache.cursor[idx-1]) {
+ p = (char *)he_cursor.cursor;
+ p += he_cursor.hdr.len;
+ he_cursor.cursor = (void *)p;
+ } else {
+ he_cursor.cursor = he_cache.cursor[idx-1];
+ if (get_he_hdr())
+ return 1;
+ p = (char *)he_cursor.cursor;
+ p += he_cursor.hdr.len;
+ he_cursor.cursor = (void *)p;
+ }
+
+ he_cursor.idx = idx;
+ he_cache.cursor[idx] = he_cursor.cursor;
+
+ if (get_he_hdr())
+ return 1;
+ if (get_he_entry())
+ return 1;
+
+ return 0;
+}
+
+static int
+get_he_hdr(void)
+{
+ memcpy(&he_cursor.hdr, he_cursor.cursor, sizeof(struct host_state_entry_header));
+ he_cursor.hdr_valid = 1;
+ return 0;
+}
+
+static int
+get_he_entry(void)
+{
+ char * p;
+
+ if (!he_cursor.hdr_valid) {
+ if (get_he_hdr()) {
+ return 1;
+ }
+ }
+
+ p = (char *) he_cursor.cursor;
+ p += sizeof(struct host_state_entry_header);
+
+ memcpy(&he_cursor.he, p, sizeof(struct hostDiskEntry));
+
+ he_cursor.he_valid = 1;
+ p += sizeof(struct hostDiskEntry);
+ he_cursor.ifp = (void *)p;
+ if (he_cursor.hdr.interfaces) {
+ p += sizeof(struct Interface) + ((he_cursor.hdr.interfaces-1)*sizeof(struct AddrPort));
+ he_cursor.hcps = (void *)p;
+ } else {
+ he_cursor.hcps = he_cursor.ifp;
+ }
+ return 0;
+}
+
+static int
+get_fe(afs_uint32 idx)
+{
+ int i;
+ char * p;
+
+ cb_cursor.cb_valid = 0;
+
+ if (get_cb_hdr())
+ return 1;
+
+ if (idx >= hdrs.cb_hdr.nFEs)
+ return 1;
+
+ if (fe_cursor.idx == idx && fe_cursor.hdr_valid && fe_cursor.fe_valid)
+ return 0;
+
+ fe_cursor.hdr_valid = fe_cursor.fe_valid = 0;
+
+ if (fe_cache.cursor == NULL) {
+ fe_cache.cursor = (void **) calloc(hdrs.cb_hdr.nFEs, sizeof(void *));
+ assert(fe_cache.cursor != NULL);
+ }
+
+ if (idx && fe_cache.cursor[idx-1] == NULL) {
+ for (i = 0; i < idx; i++) {
+ if (fe_cache.cursor[i] == NULL) {
+ get_fe(i);
+ }
+ }
+ }
+
+ if (!idx) {
+ fe_cursor.cursor = fe_cursor.ffe;
+ } else if (fe_cursor.cursor == fe_cache.cursor[idx-1]) {
+ p = (char *)fe_cursor.cursor;
+ p += fe_cursor.hdr.len;
+ fe_cursor.cursor = (void *)p;
+ } else {
+ fe_cursor.cursor = fe_cache.cursor[idx-1];
+ if (get_fe_hdr())
+ return 1;
+ p = (char *)fe_cursor.cursor;
+ p += fe_cursor.hdr.len;
+ fe_cursor.cursor = (void *)p;
+ }
+
+ fe_cursor.idx = idx;
+ fe_cache.cursor[idx] = fe_cursor.cursor;
+
+ if (get_fe_hdr())
+ return 1;
+ if (get_fe_entry())
+ return 1;
+
+ return 0;
+}
+
+static int
+get_fe_hdr(void)
+{
+ memcpy(&fe_cursor.hdr, fe_cursor.cursor, sizeof(struct callback_state_entry_header));
+ fe_cursor.hdr_valid = 1;
+ return 0;
+}
+
+static int
+get_fe_entry(void)
+{
+ char * p;
+
+ if (!fe_cursor.hdr_valid) {
+ if (get_fe_hdr()) {
+ return 1;
+ }
+ }
+
+ p = (char *) fe_cursor.cursor;
+ p += sizeof(struct callback_state_entry_header);
+
+ memcpy(&fe_cursor.fe, p, sizeof(struct FEDiskEntry));
+
+ fe_cursor.fe_valid = 1;
+ p += sizeof(struct FEDiskEntry);
+ fe_cursor.fcb = (void *)p;
+ return 0;
+}
+
+static int
+get_cb(afs_uint32 idx)
+{
+ int i;
+ char * p;
+
+ if (get_fe(fe_cursor.idx))
+ return 1;
+
+ if (idx >= fe_cursor.hdr.nCBs)
+ return 1;
+
+ if (idx == cb_cursor.idx && cb_cursor.cb_valid)
+ return 0;
+
+ cb_cursor.cb_valid = 0;
+
+ p = (char *)fe_cursor.fcb;
+ p += idx * sizeof(struct CBDiskEntry);
+ cb_cursor.cursor = (void *)p;
+
+ cb_cursor.idx = idx;
+
+ if (get_cb_entry())
+ return 1;
+
+ return 0;
+}
+
+static int
+get_cb_entry(void)
+{
+ memcpy(&cb_cursor.cb, cb_cursor.cursor, sizeof(struct CBDiskEntry));
+ cb_cursor.cb_valid = 1;
+ return 0;
+}
+
+static int
+find_he_by_index(afs_uint32 idx)
+{
+ int i;
+
+ if (get_h_hdr()) {
+ return 1;
+ }
+
+ for (i = 0; i < hdrs.h_hdr.records; i++) {
+ if (get_he(i)) {
+ fprintf(stderr, "error getting he %d\n", i);
+ return 1;
+ }
+ if (he_cursor.he.index == idx)
+ break;
+ }
+
+ if (i < hdrs.h_hdr.records) {
+ dump_this_he();
+ return 0;
+ }
+ return 1;
+}
+
+static int
+find_fe_by_index(afs_uint32 idx)
+{
+ int i;
+
+ if (get_cb_hdr()) {
+ return 1;
+ }
+
+ for (i = 0; i < hdrs.cb_hdr.nFEs; i++) {
+ if (get_fe(i)) {
+ fprintf(stderr, "error getting fe %d\n", i);
+ return 1;
+ }
+ if (fe_cursor.fe.index == idx)
+ break;
+ }
+
+ if (i < hdrs.cb_hdr.nFEs) {
+ dump_this_fe();
+ return 0;
+ }
+ return 1;
+}
+
+static int
+find_fe_by_fid(afs_uint32 volid, afs_uint32 vnode, afs_uint32 unique)
+{
+ int i;
+
+ if (get_cb_hdr()) {
+ return 1;
+ }
+
+ for (i = 0; i < hdrs.cb_hdr.nFEs; i++) {
+ if (get_fe(i)) {
+ fprintf(stderr, "error getting fe %d\n", i);
+ return 1;
+ }
+ if ((fe_cursor.fe.fe.unique == unique) &&
+ (fe_cursor.fe.fe.volid == volid) &&
+ (fe_cursor.fe.fe.vnode == vnode))
+ break;
+ }
+
+ if (i < hdrs.cb_hdr.nFEs) {
+ dump_this_fe();
+ return 0;
+ }
+ return 1;
+}
+
+static int
+find_cb_by_index(afs_uint32 idx)
+{
+ int i;
+
+ if (get_fe_hdr()) {
+ return 1;
+ }
+
+ for (i = 0; i < fe_cursor.hdr.nCBs; i++) {
+ if (get_cb(i)) {
+ fprintf(stderr, "error getting cb %d\n", i);
+ return 1;
+ }
+ if (cb_cursor.cb.index == idx)
+ break;
+ }
+
+ if (i < fe_cursor.hdr.nCBs) {
+ dump_this_cb();
+ return 0;
+ }
+ return 1;
+}
+
+#endif /* AFS_DEMAND_ATTACH_FS */
HELPER_SPLINT=@HELPER_SPLINT@
CC=${MT_CC}
-CFLAGS=${COMMON_CFLAGS} -I.. -DNINTERFACE ${MT_CFLAGS} -DRXDEBUG
+CFLAGS=${COMMON_CFLAGS} -I.. -DNINTERFACE ${MT_CFLAGS} -DRXDEBUG -DFSSYNC_BUILD_CLIENT
CCRULE=${CC} ${CFLAGS} -c $?
DIROBJS=buffer.o dir.o salvage.o
-VOLOBJS= vnode.o volume.o vutil.o partition.o fssync.o purge.o \
- clone.o devname.o common.o ihandle.o listinodes.o namei_ops.o nuke.o
+VOLOBJS= vnode.o volume.o vutil.o partition.o fssync-client.o purge.o \
+ clone.o devname.o common.o ihandle.o listinodes.o \
+ namei_ops.o nuke.o salvsync-client.o daemon_com.o
FSINTOBJS=# afsaux.o afscbint.cs.o afsint.ss.o afsint.xdr.o
nuke.o: ${VOL}/nuke.c
${COMPILE}
-fssync.o: ${VOL}/fssync.c
+fssync-client.o: ${VOL}/fssync-client.c
+ ${COMPILE}
+
+salvsync-client.o: ${VOL}/salvsync-client.c
+ ${COMPILE}
+
+daemon_com.o: ${VOL}/daemon_com.c
${COMPILE}
purge.o: ${VOL}/purge.c
objects = assert.o base64.o casestrcpy.o ktime.o volparse.o hostparse.o \
hputil.o kreltime.o isathing.o get_krbrlm.o uuid.o serverLog.o \
dirpath.o fileutil.o netutils.o flipbase64.o fstab.o \
- afs_atomlist.o afs_lhash.o snprintf.o strlcat.o strlcpy.o \
+ afs_atomlist.o afs_lhash.o snprintf.o strlcat.o strlcpy.o strnlen.o \
daemon.o rxkstats.o ${REGEX_OBJ}
includes = \
strlcpy.o: ${srcdir}/strlcpy.c ${includes}
${CCOBJ} ${CFLAGS} -c ${srcdir}/strlcpy.c
+strnlen.o: ${srcdir}/strnlen.c ${includes}
+ ${CCOBJ} ${CFLAGS} -c ${srcdir}/strnlen.c
+
daemon.o: ${srcdir}/daemon.c ${includes}
${CCOBJ} ${CFLAGS} -c ${srcdir}/daemon.c
extern size_t strlcat(char *dst, const char *src, size_t siz);
#endif
+/* strn */
+extern size_t afs_strnlen(char * buf, size_t len);
+
/* sys.c */
extern void afs_ntohuuid(afsUUID * uuidp);
extern afs_int32 afs_uuid_create(afsUUID * uuid);
extern u_short afs_uuid_hash(afsUUID * uuid);
+#if !defined(KERNEL) && !defined(UKERNEL)
+extern int afsUUID_from_string(const char *str, afsUUID * uuid);
+extern int afsUUID_to_string(const afsUUID * uuid, char *str, size_t strsz);
+#endif
/* volparse.c */
extern afs_int32 volutil_GetPartitionID(char *aname);
pathp = dirPathArray[AFSDIR_SERVER_SLVGLOG_FILEPATH_ID];
AFSDIR_SERVER_FILEPATH(pathp, AFSDIR_LOGS_DIR, AFSDIR_SLVGLOG_FILE);
+ pathp = dirPathArray[AFSDIR_SERVER_SALSRVLOG_FILEPATH_ID];
+ AFSDIR_SERVER_FILEPATH(pathp, AFSDIR_LOGS_DIR, AFSDIR_SALSRVLOG_FILE);
+
pathp = dirPathArray[AFSDIR_SERVER_SALVAGER_FILEPATH_ID];
AFSDIR_SERVER_FILEPATH(pathp, AFSDIR_SERVER_BIN_DIR,
AFSDIR_SALVAGER_FILE);
+ pathp = dirPathArray[AFSDIR_SERVER_SALSRV_FILEPATH_ID];
+ AFSDIR_SERVER_FILEPATH(pathp, AFSDIR_SERVER_BIN_DIR,
+ AFSDIR_SALSRV_FILE);
+
pathp = dirPathArray[AFSDIR_SERVER_SLVGLOCK_FILEPATH_ID];
AFSDIR_SERVER_FILEPATH(pathp, AFSDIR_LOCAL_DIR, AFSDIR_SLVGLOCK_FILE);
pathp = dirPathArray[AFSDIR_SERVER_KRB_EXCL_FILEPATH_ID];
AFSDIR_SERVER_FILEPATH(pathp, AFSDIR_SERVER_ETC_DIR, AFSDIR_KRB_EXCL_FILE);
+ pathp = dirPathArray[AFSDIR_SERVER_FSSTATE_FILEPATH_ID];
+ AFSDIR_SERVER_FILEPATH(pathp, AFSDIR_LOCAL_DIR, AFSDIR_FSSTATE_FILE);
+
/* client file paths */
#ifdef AFS_NT40_ENV
strcpy(dirPathArray[AFSDIR_CLIENT_THISCELL_FILEPATH_ID],
#define AFSDIR_VLOG_FILE "VLLog"
#define AFSDIR_CORE_FILE "core"
#define AFSDIR_SLVGLOG_FILE "SalvageLog"
+#define AFSDIR_SALSRVLOG_FILE "SalsrvLog"
#define AFSDIR_SALVAGER_FILE "salvager"
+#define AFSDIR_SALSRV_FILE "salvageserver"
#define AFSDIR_SLVGLOCK_FILE "salvage.lock"
#define AFSDIR_BOZCONF_FILE "BosConfig"
#define AFSDIR_BOZCONFNEW_FILE "BosConfig.new"
#define AFSDIR_FILELOG_FILE "FileLog"
#define AFSDIR_MIGRATE_LOGNAME "wtlog."
+#define AFSDIR_FSSTATE_FILE "fsstate.dat"
+
#define AFSDIR_CELLSERVDB_FILE_NTCLIENT "afsdcell.ini"
#define AFSDIR_NETINFO_FILE "NetInfo"
#define AFSDIR_CANONICAL_SERVER_SALVAGER_FILEPATH \
AFSDIR_CANONICAL_SERVER_BIN_DIRPATH "/" AFSDIR_SALVAGER_FILE
+#define AFSDIR_CANONICAL_SERVER_SALSRV_FILEPATH \
+AFSDIR_CANONICAL_SERVER_BIN_DIRPATH "/" AFSDIR_SALSRV_FILE
+
#define AFSDIR_CANONICAL_SERVER_SLVGLOG_FILEPATH \
AFSDIR_CANONICAL_SERVER_LOGS_DIRPATH "/" AFSDIR_SLVGLOG_FILE
+#define AFSDIR_CANONICAL_SERVER_SALSRVLOG_FILEPATH \
+AFSDIR_CANONICAL_SERVER_LOGS_DIRPATH "/" AFSDIR_SALSRVLOG_FILE
+
/* --------------------- Local path macros ---------------------- */
AFSDIR_SERVER_BIN_FILE_DIRPATH_ID,
AFSDIR_CLIENT_CELLALIAS_FILEPATH_ID,
AFSDIR_SERVER_KRB_EXCL_FILEPATH_ID,
+ AFSDIR_SERVER_SALSRV_FILEPATH_ID,
+ AFSDIR_SERVER_SALSRVLOG_FILEPATH_ID,
+ AFSDIR_SERVER_FSSTATE_FILEPATH_ID,
AFSDIR_PATHSTRING_MAX } afsdir_id_t;
/* getDirPath() returns a pointer to a string from an internal array of path strings
#define AFSDIR_SERVER_VLOG_FILEPATH getDirPath(AFSDIR_SERVER_VLOG_FILEPATH_ID)
#define AFSDIR_SERVER_CORELOG_FILEPATH getDirPath(AFSDIR_SERVER_CORELOG_FILEPATH_ID)
#define AFSDIR_SERVER_SLVGLOG_FILEPATH getDirPath(AFSDIR_SERVER_SLVGLOG_FILEPATH_ID)
+#define AFSDIR_SERVER_SALSRVLOG_FILEPATH getDirPath(AFSDIR_SERVER_SALSRVLOG_FILEPATH_ID)
#define AFSDIR_SERVER_SALVAGER_FILEPATH getDirPath(AFSDIR_SERVER_SALVAGER_FILEPATH_ID)
+#define AFSDIR_SERVER_SALSRV_FILEPATH getDirPath(AFSDIR_SERVER_SALSRV_FILEPATH_ID)
#define AFSDIR_SERVER_BOZCONF_FILEPATH getDirPath(AFSDIR_SERVER_BOZCONF_FILEPATH_ID)
#define AFSDIR_SERVER_BOZCONFNEW_FILEPATH getDirPath(AFSDIR_SERVER_BOZCONFNEW_FILEPATH_ID)
#define AFSDIR_SERVER_BOZINIT_FILEPATH getDirPath(AFSDIR_SERVER_BOZINIT_FILEPATH_ID)
#define AFSDIR_SERVER_THRESHOLD_CONSTANTS_FILEPATH getDirPath(AFSDIR_SERVER_THRESHOLD_CONSTANTS_FILEPATH_ID)
#define AFSDIR_SERVER_MIGRATELOG_FILEPATH getDirPath(AFSDIR_SERVER_MIGRATELOG_FILEPATH_ID)
#define AFSDIR_SERVER_KRB_EXCL_FILEPATH getDirPath(AFSDIR_SERVER_KRB_EXCL_FILEPATH_ID)
+#define AFSDIR_SERVER_FSSTATE_FILEPATH getDirPath(AFSDIR_SERVER_FSSTATE_FILEPATH_ID)
/* client file paths */
#define AFSDIR_CLIENT_THISCELL_FILEPATH getDirPath(AFSDIR_CLIENT_THISCELL_FILEPATH_ID)
#define AFSDIR_VLOG_FILE "VLLog"
#define AFSDIR_CORE_FILE "core"
#define AFSDIR_SLVGLOG_FILE "SalvageLog"
+#define AFSDIR_SALSRVLOG_FILE "SalsrvLog"
#define AFSDIR_SALVAGER_FILE "salvager"
+#define AFSDIR_SALSRV_FILE "salvageserver"
#define AFSDIR_SLVGLOCK_FILE "salvage.lock"
#define AFSDIR_BOZCONF_FILE "BosConfig"
#define AFSDIR_BOZCONFNEW_FILE "BosConfig.new"
#define AFSDIR_FILELOG_FILE "FileLog"
#define AFSDIR_MIGRATE_LOGNAME "wtlog."
+#define AFSDIR_FSSTATE_FILE "fsstate.dat"
+
#ifdef COMMENT
#define AFSDIR_CELLSERVDB_FILE_NTCLIENT "afsdcell.ini"
#else
#define AFSDIR_CANONICAL_SERVER_SALVAGER_FILEPATH \
AFSDIR_CANONICAL_SERVER_BIN_DIRPATH "/" AFSDIR_SALVAGER_FILE
+#define AFSDIR_CANONICAL_SERVER_SALSRV_FILEPATH \
+AFSDIR_CANONICAL_SERVER_BIN_DIRPATH "/" AFSDIR_SALSRV_FILE
+
#define AFSDIR_CANONICAL_SERVER_SLVGLOG_FILEPATH \
AFSDIR_CANONICAL_SERVER_LOGS_DIRPATH "/" AFSDIR_SLVGLOG_FILE
+#define AFSDIR_CANONICAL_SERVER_SALSRVLOG_FILEPATH \
+AFSDIR_CANONICAL_SERVER_LOGS_DIRPATH "/" AFSDIR_SALSRVLOG_FILE
+
/* --------------------- Local path macros ---------------------- */
AFSDIR_SERVER_BIN_FILE_DIRPATH_ID,
AFSDIR_CLIENT_CELLALIAS_FILEPATH_ID,
AFSDIR_SERVER_KRB_EXCL_FILEPATH_ID,
+ AFSDIR_SERVER_SALSRV_FILEPATH_ID,
+ AFSDIR_SERVER_SALSRVLOG_FILEPATH_ID,
+ AFSDIR_SERVER_FSSTATE_FILEPATH_ID,
AFSDIR_PATHSTRING_MAX
} afsdir_id_t;
#define AFSDIR_SERVER_VLOG_FILEPATH getDirPath(AFSDIR_SERVER_VLOG_FILEPATH_ID)
#define AFSDIR_SERVER_CORELOG_FILEPATH getDirPath(AFSDIR_SERVER_CORELOG_FILEPATH_ID)
#define AFSDIR_SERVER_SLVGLOG_FILEPATH getDirPath(AFSDIR_SERVER_SLVGLOG_FILEPATH_ID)
+#define AFSDIR_SERVER_SALSRVLOG_FILEPATH getDirPath(AFSDIR_SERVER_SALSRVLOG_FILEPATH_ID)
#define AFSDIR_SERVER_SALVAGER_FILEPATH getDirPath(AFSDIR_SERVER_SALVAGER_FILEPATH_ID)
+#define AFSDIR_SERVER_SALSRV_FILEPATH getDirPath(AFSDIR_SERVER_SALSRV_FILEPATH_ID)
#define AFSDIR_SERVER_BOZCONF_FILEPATH getDirPath(AFSDIR_SERVER_BOZCONF_FILEPATH_ID)
#define AFSDIR_SERVER_BOZCONFNEW_FILEPATH getDirPath(AFSDIR_SERVER_BOZCONFNEW_FILEPATH_ID)
#define AFSDIR_SERVER_BOZINIT_FILEPATH getDirPath(AFSDIR_SERVER_BOZINIT_FILEPATH_ID)
#define AFSDIR_SERVER_THRESHOLD_CONSTANTS_FILEPATH getDirPath(AFSDIR_SERVER_THRESHOLD_CONSTANTS_FILEPATH_ID)
#define AFSDIR_SERVER_MIGRATELOG_FILEPATH getDirPath(AFSDIR_SERVER_MIGRATELOG_FILEPATH_ID)
#define AFSDIR_SERVER_KRB_EXCL_FILEPATH getDirPath(AFSDIR_SERVER_KRB_EXCL_FILEPATH_ID)
+#define AFSDIR_SERVER_FSSTATE_FILEPATH getDirPath(AFSDIR_SERVER_FSSTATE_FILEPATH_ID)
/* client file paths */
#define AFSDIR_CLIENT_THISCELL_FILEPATH getDirPath(AFSDIR_CLIENT_THISCELL_FILEPATH_ID)
* to THIS server to find out where */
#define VIO 112 /* Vnode temporarily unaccessible, but not known
* to be permanently bad. */
+#define VSALVAGING 113 /* Volume is being salvaged (demand attach fs) */
#define VRESTRICTED 120 /* Volume is restricted from using one or more
* of the given residencies; do a
* vos examine to find out the current
--- /dev/null
+/*
+ * Copyright 2006, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/* strnlen.c - fixed length string length */
+
+#include <afsconfig.h>
+#include <afs/param.h>
+
+RCSID
+ ("$Header$");
+
+#include <sys/types.h>
+#include <stdarg.h>
+#include <ctype.h>
+
+
+size_t
+afs_strnlen(char * buf, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (buf[i] == '\0')
+ break;
+ }
+
+ return i;
+}
+
${TOP_INCDIR}/afs/afsint.h \
viced.h \
host.h \
+ callback.h \
fs_stats.h
objects=viced.o \
# License. For details, see the LICENSE file in the top-level source
# directory or online at http://www.openafs.org/dl/license10.html
+AFSDEV_AUXCDEFINES = -DFSSYNC_BUILD_SERVER
+
RELDIR=viced
!INCLUDE ..\config\NTMakefile.$(SYS_NAME)
!INCLUDE ..\config\NTMakefile.version
#include "viced_prototypes.h"
#include "viced.h"
#include "host.h"
+#include "callback.h"
#include <afs/unified_afs.h>
#include <afs/audit.h>
#include <afs/afsutil.h>
/*
* Externals used by the xstat code.
*/
-extern int VolumeCacheSize, VolumeGets, VolumeReplacements;
+extern VolPkgStats VStats;
extern int CEs, CEBlocks;
extern int HTs, HTBlocks;
CheckVnode(AFSFid * fid, Volume ** volptr, Vnode ** vptr, int lock)
{
int fileCode = 0;
- int errorCode = -1;
+ afs_int32 local_errorCode, errorCode = -1;
static struct timeval restartedat = { 0, 0 };
if (fid->Volume == 0 || fid->Vnode == 0) /* not: || fid->Unique == 0) */
while (1) {
errorCode = 0;
- *volptr = VGetVolume(&errorCode, (afs_int32) fid->Volume);
+ *volptr = VGetVolume(&local_errorCode, &errorCode, (afs_int32) fid->Volume);
if (!errorCode) {
assert(*volptr);
break;
}
}
}
- /* allow read operations on busy volume */
- else if (errorCode == VBUSY && lock == READ_LOCK) {
+ /* allow read operations on busy volume.
+ * must check local_errorCode because demand attach fs
+ * can have local_errorCode == VSALVAGING, errorCode == VBUSY */
+ else if (local_errorCode == VBUSY && lock == READ_LOCK) {
errorCode = 0;
break;
} else if (errorCode)
wrlen, errno));
#ifdef FAST_RESTART /* if running in no-salvage, don't core the server */
ViceLog(0, ("CopyOnWrite failed: taking volume offline\n"));
+#elif defined(AFS_DEMAND_ATTACH_FS)
+ ViceLog(0, ("CopyOnWrite failed: requesting salvage\n"));
#else /* Avoid further corruption and try to get a core. */
assert(0);
#endif
static void
FillPerfValues(struct afs_PerfStats *a_perfP)
{ /*FillPerfValues */
-
+ afs_uint32 hi, lo;
int dir_Buffers; /*# buffers in use by dir package */
int dir_Calls; /*# read calls in dir package */
int dir_IOs; /*# I/O ops in dir package */
a_perfP->vcache_S_Gets = VnodeClassInfo[vSmall].gets;
a_perfP->vcache_S_Reads = VnodeClassInfo[vSmall].reads;
a_perfP->vcache_S_Writes = VnodeClassInfo[vSmall].writes;
- a_perfP->vcache_H_Entries = VolumeCacheSize;
- a_perfP->vcache_H_Gets = VolumeGets;
- a_perfP->vcache_H_Replacements = VolumeReplacements;
+ a_perfP->vcache_H_Entries = VStats.hdr_cache_size;
+ SplitInt64(VStats.hdr_gets, hi, lo);
+ a_perfP->vcache_H_Gets = lo;
+ SplitInt64(VStats.hdr_loads, hi, lo);
+ a_perfP->vcache_H_Replacements = lo;
/*
* Directory section.
* This software has been released under the terms of the IBM Public
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
+ *
+ * Portions Copyright (c) 2006 Sine Nomine Associates
*/
/*
#include <afs/ptclient.h> /* need definition of prlist for host.h */
#include "host.h"
+#include "callback.h"
+#ifdef AFS_DEMAND_ATTACH_FS
+#include "../tviced/serialize_state.h"
+#endif /* AFS_DEMAND_ATTACH_FS */
+
extern afsUUID FS_HostUUID;
extern int hostCount;
-int ShowProblems = 1;
-
-/* Maximum number of call backs to break at once, single fid */
-/* There is some debate as to just how large this value should be */
-/* Ideally, it would be very very large, but I am afraid that the */
-/* cache managers will all send in their responses simultaneously, */
-/* thereby swamping the file server. As a result, something like */
-/* 10 or 15 might be a better bet. */
-#define MAX_CB_HOSTS 10
-
-/* max time to break a callback, otherwise client is dead or net is hosed */
-#define MAXCBT 25
-
-#define u_byte unsigned char
+static int ShowProblems = 1;
struct cbcounters cbstuff;
-struct cbstruct {
- struct host *hp;
- afs_uint32 thead;
-};
-
-struct FileEntry {
- afs_uint32 vnode;
- afs_uint32 unique;
- afs_uint32 volid;
- afs_uint32 fnext;
- afs_uint32 ncbs;
- afs_uint32 firstcb;
- afs_uint32 status;
- afs_uint32 spare;
-} *FE; /* Don't use FE[0] */
-#define FE_LATER 0x1
-
-struct CallBack {
- afs_uint32 cnext; /* Next call back entry */
- afs_uint32 fhead; /* Head of this call back chain */
- u_byte thead; /* Head of timeout chain */
- u_byte status; /* Call back status; see definitions, below */
- afs_uint32 hhead; /* Head of host table chain */
- afs_uint32 tprev, tnext; /* Timeout chain */
- afs_uint32 hprev, hnext; /* Chain from host table */
- unsigned short spare; /* make it a multiple of 32 bits. */
-} *CB; /* Don't use CB[0] */
-
-/* status values for status field of CallBack structure */
-#define CB_NORMAL 1 /* Normal call back */
-#define CB_DELAYED 2 /* Delayed call back due to rpc problems.
- * The call back entry will be added back to the
- * host list at the END of the list, so that
- * searching backwards in the list will find all
- * the (consecutive)host. delayed call back entries */
-#define CB_VOLUME 3 /* Callback for a volume */
-#define CB_BULK 4 /* Normal callbacks, handed out from FetchBulkStatus */
-
-/* call back indices to pointers, and vice-versa */
-#define itocb(i) ((i)?CB+(i):0)
-#define cbtoi(cbp) (!(cbp)?0:(cbp)-CB)
-
-/* file entry indices to pointers, and vice-versa */
-#define itofe(i) ((i)?FE+(i):0)
-#define fetoi(fep) (!(fep)?0:(fep)-FE)
-
-/* Timeouts: there are 128 possible timeout values in effect at any
- * given time. Each timeout represents timeouts in an interval of 128
- * seconds. So the maximum timeout for a call back is 128*128=16384
- * seconds, or 4 1/2 hours. The timeout cleanup stuff is called only
- * if space runs out or by the file server every 5 minutes. This 5
- * minute slack should be allowed for--so a maximum time of 4 hours
- * is safer.
- *
- * Timeouts must be chosen to correspond to an exact multiple
- * of 128, because all times are truncated to a 128 multiple, and
- * timed out if the current truncated time is <= to the truncated time
- * corresponding to the timeout queue.
- */
+static struct FileEntry * FE = NULL; /* don't use FE[0] */
+static struct CallBack * CB = NULL; /* don't use CB[0] */
-/* Unix time to Call Back time, and vice-versa. Call back time is
- in units of 128 seconds, corresponding to time queues. */
-#define CBtime(uxtime) ((uxtime)>>7)
-#define UXtime(cbtime) ((cbtime)<<7)
+static struct CallBack * CBfree = NULL;
+static struct FileEntry * FEfree = NULL;
-/* Given a Unix time, compute the closest Unix time that corresponds to
- a time queue, rounding up */
-#define TimeCeiling(uxtime) (((uxtime)+127)&~127)
/* Time to live for call backs depends upon number of users of the file.
* TimeOuts is indexed by this number/8 (using TimeOut macro). Times
/* minimum time given for a call back */
static int MinTimeOut = (7 * 60);
-#define TimeOutCutoff ((sizeof(TimeOuts)/sizeof(TimeOuts[0]))*8)
-#define TimeOut(nusers) ((nusers)>=TimeOutCutoff? MinTimeOut: TimeOuts[(nusers)>>3])
-
-/* time out at server is 3 minutes more than ws */
-#define ServerBias (3*60)
-
/* Heads of CB queues; a timeout index is 1+index into this array */
-static afs_uint32 timeout[128];
-
-/* Convert cbtime to timeout queue index */
-#define TIndex(cbtime) (((cbtime)&127)+1)
-
-/* Convert cbtime to pointer to timeout queue head */
-#define THead(cbtime) (&timeout[TIndex(cbtime)-1])
+static afs_uint32 timeout[CB_NUM_TIMEOUT_QUEUES];
static afs_int32 tfirst; /* cbtime of oldest unexpired call back time queue */
-/* Normalize index into timeout array so that two such indices will be
- ordered correctly, so that they can be compared to see which times
- sooner, or so that the difference in time out times between them
- can be computed. */
-#define TNorm(index) ((index)<TIndex(tfirst)?(index)+128:(index))
-
-/* This converts a timeout index into the actual time it will expire */
-#define TIndexToTime(index) (UXtime(TNorm(index) - TIndex(tfirst) + tfirst))
-
-
-/* Convert pointer to timeout queue head to index, and vice versa */
-#define ttoi(t) ((t-timeout)+1)
-#define itot(i) ((timeout)+(i-1))
/* 16 byte object get/free routines */
struct object {
struct object *next;
};
-struct VCBParams {
- struct cbstruct cba[MAX_CB_HOSTS]; /* re-entrant storage */
- unsigned int ncbas;
- afs_uint32 thead; /* head of timeout queue for youngest callback */
- struct AFSFid *fid;
-};
-
-struct CallBack *CBfree = 0;
-struct FileEntry *FEfree = 0;
-
/* Prototypes for static routines */
static struct FileEntry *FindFE(register AFSFid * fid);
static struct CallBack *iGetCB(register int *nused);
#define FreeCB(cb) iFreeCB((struct CallBack *)cb, &cbstuff.nCBs)
#define FreeFE(fe) iFreeFE((struct FileEntry *)fe, &cbstuff.nFEs)
+
/* Other protos - move out sometime */
void PrintCB(register struct CallBack *cb, afs_uint32 now);
-#define VHASH 512 /* Power of 2 */
-static afs_uint32 HashTable[VHASH]; /* File entry hash table */
-#define VHash(volume, unique) (((volume)+(unique))&(VHASH-1))
+static afs_uint32 HashTable[FEHASH_SIZE]; /* File entry hash table */
static struct FileEntry *
FindFE(register AFSFid * fid)
register int fei;
register struct FileEntry *fe;
- hash = VHash(fid->Volume, fid->Unique);
+ hash = FEHash(fid->Volume, fid->Unique);
for (fei = HashTable[hash]; fei; fei = fe->fnext) {
fe = itofe(fei);
if (fe->volid == fid->Volume && fe->unique == fid->Unique
if (!host->cblist) {
host->cblist = cb->hnext = cb->hprev = cbtoi(cb);
} else {
- register struct CallBack *hhp = itocb(host->cblist);
+ register struct CallBack *fcb = itocb(host->cblist);
- cb->hprev = hhp->hprev;
- cb->hnext = host->cblist;
- hhp->hprev = (itocb(hhp->hprev)->hnext = cbtoi(cb));
+ cb->hprev = fcb->hprev;
+ cb->hnext = cbtoi(fcb);
+ fcb->hprev = (itocb(fcb->hprev)->hnext = cbtoi(cb));
}
return 0;
}
/* N.B. This one also deletes the CB, and also possibly parent FE, so
* make sure that it is not on any other list before calling this
* routine */
-int Ccdelpt = 0, CcdelB = 0;
+static int Ccdelpt = 0, CcdelB = 0;
static int
CDelPtr(register struct FileEntry *fe, register afs_uint32 * cbp,
FDel(register struct FileEntry *fe)
{
register int fei = fetoi(fe);
- register afs_uint32 *p = &HashTable[VHash(fe->volid, fe->unique)];
+ register afs_uint32 *p = &HashTable[FEHash(fe->volid, fe->unique)];
while (*p && *p != fei)
p = &itofe(*p)->fnext;
return 0;
}
+/* initialize the callback package */
int
InitCallBack(int nblks)
{
tfirst = CBtime(FT_ApproxTime());
/* N.B. The "-1", below, is because
* FE[0] and CB[0] are not used--and not allocated */
- FE = ((struct FileEntry *)(calloc(nblks, sizeof(struct FileEntry)))) - 1;
+ FE = ((struct FileEntry *)(calloc(nblks, sizeof(struct FileEntry))));
if (!FE) {
ViceLog(0, ("Failed malloc in InitCallBack\n"));
assert(0);
}
+ FE--; /* FE[0] is supposed to point to junk */
cbstuff.nFEs = nblks;
while (cbstuff.nFEs)
FreeFE(&FE[cbstuff.nFEs]); /* This is correct */
- CB = ((struct CallBack *)(calloc(nblks, sizeof(struct CallBack)))) - 1;
+ CB = ((struct CallBack *)(calloc(nblks, sizeof(struct CallBack))));
if (!CB) {
ViceLog(0, ("Failed malloc in InitCallBack\n"));
assert(0);
}
+ CB--; /* CB[0] is supposed to point to junk */
cbstuff.nCBs = nblks;
while (cbstuff.nCBs)
FreeCB(&CB[cbstuff.nCBs]); /* This is correct */
fe->unique = fid->Unique;
fe->ncbs = 0;
fe->status = 0;
- hash = VHash(fid->Volume, fid->Unique);
+ hash = FEHash(fid->Volume, fid->Unique);
fe->fnext = HashTable[hash];
HashTable[hash] = fetoi(fe);
}
H_LOCK;
fid.Volume = volume, fid.Vnode = fid.Unique = 0;
- for (hash = 0; hash < VHASH; hash++) {
+ for (hash = 0; hash < FEHASH_SIZE; hash++) {
for (feip = &HashTable[hash]; (fe = itofe(*feip));) {
if (fe->volid == volume) {
register struct CallBack *cbnext;
BreakVolumeCallBacksLater(afs_uint32 volume)
{
int hash;
- afs_int32 *feip;
+ afs_uint32 *feip;
struct FileEntry *fe;
struct CallBack *cb;
struct host *host;
ViceLog(25, ("Setting later on volume %u\n", volume));
H_LOCK;
- for (hash = 0; hash < VHASH; hash++) {
+ for (hash = 0; hash < FEHASH_SIZE; hash++) {
for (feip = &HashTable[hash]; (fe = itofe(*feip)) != NULL; ) {
if (fe->volid == volume) {
register struct CallBack *cbnext;
FSYNC_LOCK;
fe->status |= FE_LATER;
FSYNC_UNLOCK;
- found++;
+ found = 1;
}
feip = &fe->fnext;
}
{
struct AFSFid fid;
int hash;
- afs_int32 *feip;
+ afs_uint32 *feip;
struct CallBack *cb;
struct FileEntry *fe = NULL;
struct FileEntry *myfe = NULL;
/* Pick the first volume we see to clean up */
fid.Volume = fid.Vnode = fid.Unique = 0;
- for (hash = 0; hash < VHASH; hash++) {
+ for (hash = 0; hash < FEHASH_SIZE; hash++) {
for (feip = &HashTable[hash]; (fe = itofe(*feip)) != NULL; ) {
if (fe && (fe->status & FE_LATER)
&& (fid.Volume == 0 || fid.Volume == fe->volid)) {
#ifndef INTERPRET_DUMP
+#ifdef AFS_DEMAND_ATTACH_FS
+/*
+ * demand attach fs
+ * callback state serialization
+ */
+static int cb_stateSaveTimeouts(struct fs_dump_state * state);
+static int cb_stateSaveFEHash(struct fs_dump_state * state);
+static int cb_stateSaveFEs(struct fs_dump_state * state);
+static int cb_stateSaveFE(struct fs_dump_state * state, struct FileEntry * fe);
+static int cb_stateRestoreTimeouts(struct fs_dump_state * state);
+static int cb_stateRestoreFEHash(struct fs_dump_state * state);
+static int cb_stateRestoreFEs(struct fs_dump_state * state);
+static int cb_stateRestoreFE(struct fs_dump_state * state);
+static int cb_stateRestoreCBs(struct fs_dump_state * state, struct FileEntry * fe,
+ struct iovec * iov, int niovecs);
+
+static int cb_stateVerifyFEHash(struct fs_dump_state * state);
+static int cb_stateVerifyFE(struct fs_dump_state * state, struct FileEntry * fe);
+static int cb_stateVerifyFCBList(struct fs_dump_state * state, struct FileEntry * fe);
+static int cb_stateVerifyTimeoutQueues(struct fs_dump_state * state);
+
+static int cb_stateFEToDiskEntry(struct FileEntry *, struct FEDiskEntry *);
+static int cb_stateDiskEntryToFE(struct fs_dump_state * state,
+ struct FEDiskEntry *, struct FileEntry *);
+
+static int cb_stateCBToDiskEntry(struct CallBack *, struct CBDiskEntry *);
+static int cb_stateDiskEntryToCB(struct fs_dump_state * state,
+ struct CBDiskEntry *, struct CallBack *);
+
+static int cb_stateFillHeader(struct callback_state_header * hdr);
+static int cb_stateCheckHeader(struct callback_state_header * hdr);
+
+static int cb_stateAllocMap(struct fs_dump_state * state);
+
+int
+cb_stateSave(struct fs_dump_state * state)
+{
+ int ret = 0;
+
+ AssignInt64(state->eof_offset, &state->hdr->cb_offset);
+
+ /* invalidate callback state header */
+ memset(state->cb_hdr, 0, sizeof(struct callback_state_header));
+ if (fs_stateWriteHeader(state, &state->hdr->cb_offset, state->cb_hdr,
+ sizeof(struct callback_state_header))) {
+ ret = 1;
+ goto done;
+ }
+
+ fs_stateIncEOF(state, sizeof(struct callback_state_header));
+
+ /* dump timeout state */
+ if (cb_stateSaveTimeouts(state)) {
+ ret = 1;
+ goto done;
+ }
+
+ /* dump fe hashtable state */
+ if (cb_stateSaveFEHash(state)) {
+ ret = 1;
+ goto done;
+ }
+
+ /* dump callback state */
+ if (cb_stateSaveFEs(state)) {
+ ret = 1;
+ goto done;
+ }
+
+ /* write the callback state header to disk */
+ cb_stateFillHeader(state->cb_hdr);
+ if (fs_stateWriteHeader(state, &state->hdr->cb_offset, state->cb_hdr,
+ sizeof(struct callback_state_header))) {
+ ret = 1;
+ goto done;
+ }
+
+ done:
+ return ret;
+}
+
+int
+cb_stateRestore(struct fs_dump_state * state)
+{
+ int ret = 0;
+
+ if (fs_stateReadHeader(state, &state->hdr->cb_offset, state->cb_hdr,
+ sizeof(struct callback_state_header))) {
+ ret = 1;
+ goto done;
+ }
+
+ if (cb_stateCheckHeader(state->cb_hdr)) {
+ ret = 1;
+ goto done;
+ }
+
+ if (cb_stateAllocMap(state)) {
+ ret = 1;
+ goto done;
+ }
+
+ if (cb_stateRestoreTimeouts(state)) {
+ ret = 1;
+ goto done;
+ }
+
+ if (cb_stateRestoreFEHash(state)) {
+ ret = 1;
+ goto done;
+ }
+
+ /* restore FEs and CBs from disk */
+ if (cb_stateRestoreFEs(state)) {
+ ret = 1;
+ goto done;
+ }
+
+ /* restore the timeout queue heads */
+ tfirst = state->cb_hdr->tfirst;
+
+ done:
+ return ret;
+}
+
+int
+cb_stateRestoreIndices(struct fs_dump_state * state)
+{
+ int i, ret = 0;
+ struct FileEntry * fe;
+ struct CallBack * cb;
+
+ /* restore indices in the FileEntry structures */
+ for (i = 1; i < state->fe_map.len; i++) {
+ if (state->fe_map.entries[i].new_idx) {
+ fe = itofe(state->fe_map.entries[i].new_idx);
+
+ /* restore the fe->fnext entry */
+ if (fe_OldToNew(state, fe->fnext, &fe->fnext)) {
+ ret = 1;
+ goto done;
+ }
+
+ /* restore the fe->firstcb entry */
+ if (cb_OldToNew(state, fe->firstcb, &fe->firstcb)) {
+ ret = 1;
+ goto done;
+ }
+ }
+ }
+
+ /* restore indices in the CallBack structures */
+ for (i = 1; i < state->cb_map.len; i++) {
+ if (state->cb_map.entries[i].new_idx) {
+ cb = itocb(state->cb_map.entries[i].new_idx);
+
+ /* restore the cb->cnext entry */
+ if (cb_OldToNew(state, cb->cnext, &cb->cnext)) {
+ ret = 1;
+ goto done;
+ }
+
+ /* restore the cb->fhead entry */
+ if (fe_OldToNew(state, cb->fhead, &cb->fhead)) {
+ ret = 1;
+ goto done;
+ }
+
+ /* restore the cb->hhead entry */
+ if (h_OldToNew(state, cb->hhead, &cb->hhead)) {
+ ret = 1;
+ goto done;
+ }
+
+ /* restore the cb->tprev entry */
+ if (cb_OldToNew(state, cb->tprev, &cb->tprev)) {
+ ret = 1;
+ goto done;
+ }
+
+ /* restore the cb->tnext entry */
+ if (cb_OldToNew(state, cb->tnext, &cb->tnext)) {
+ ret = 1;
+ goto done;
+ }
+
+ /* restore the cb->hprev entry */
+ if (cb_OldToNew(state, cb->hprev, &cb->hprev)) {
+ ret = 1;
+ goto done;
+ }
+
+ /* restore the cb->hnext entry */
+ if (cb_OldToNew(state, cb->hnext, &cb->hnext)) {
+ ret = 1;
+ goto done;
+ }
+ }
+ }
+
+ /* restore the timeout queue head indices */
+ for (i = 0; i < state->cb_timeout_hdr->records; i++) {
+ if (cb_OldToNew(state, timeout[i], &timeout[i])) {
+ ret = 1;
+ goto done;
+ }
+ }
+
+ /* restore the FE hash table queue heads */
+ for (i = 0; i < state->cb_fehash_hdr->records; i++) {
+ if (fe_OldToNew(state, HashTable[i], &HashTable[i])) {
+ ret = 1;
+ goto done;
+ }
+ }
+
+ done:
+ return ret;
+}
+
+int
+cb_stateVerify(struct fs_dump_state * state)
+{
+ int ret = 0;
+
+ if (cb_stateVerifyFEHash(state)) {
+ ret = 1;
+ }
+
+ if (cb_stateVerifyTimeoutQueues(state)) {
+ ret = 1;
+ }
+
+ done:
+ return ret;
+}
+
+static int
+cb_stateVerifyFEHash(struct fs_dump_state * state)
+{
+ int ret = 0, i;
+ struct FileEntry * fe;
+ afs_uint32 fei, chain_len;
+
+ for (i = 0; i < FEHASH_SIZE; i++) {
+ chain_len = 0;
+ for (fei = HashTable[i], fe = itofe(fei);
+ fe;
+ fei = fe->fnext, fe = itofe(fei)) {
+ if (fei > cbstuff.nblks) {
+ ViceLog(0, ("cb_stateVerifyFEHash: error: index out of range (fei=%d)\n", fei));
+ ret = 1;
+ break;
+ }
+ if (cb_stateVerifyFE(state, fe)) {
+ ret = 1;
+ }
+ if (chain_len > FS_STATE_FE_MAX_HASH_CHAIN_LEN) {
+ ViceLog(0, ("cb_stateVerifyFEHash: error: hash chain %d length exceeds %d; assuming there's a loop\n",
+ i, FS_STATE_FE_MAX_HASH_CHAIN_LEN));
+ ret = 1;
+ break;
+ }
+ chain_len++;
+ }
+ }
+
+ done:
+ return ret;
+}
+
+static int
+cb_stateVerifyFE(struct fs_dump_state * state, struct FileEntry * fe)
+{
+ int ret = 0;
+
+ if ((fe->firstcb && !fe->ncbs) ||
+ (!fe->firstcb && fe->ncbs)) {
+ ViceLog(0, ("cb_stateVerifyFE: error: fe->firstcb does not agree with fe->ncbs (fei=%d, fe->firstcb=%d, fe->ncbs=%d)\n",
+ fetoi(fe), fe->firstcb, fe->ncbs));
+ ret = 1;
+ }
+ if (cb_stateVerifyFCBList(state, fe)) {
+ ViceLog(0, ("cb_stateVerifyFE: error: FCBList failed verification (fei=%d)\n", fetoi(fe)));
+ ret = 1;
+ }
+
+ done:
+ return ret;
+}
+
+static int
+cb_stateVerifyFCBList(struct fs_dump_state * state, struct FileEntry * fe)
+{
+ int ret = 0;
+ afs_uint32 cbi, fei, chain_len = 0;
+ struct CallBack * cb;
+
+ fei = fetoi(fe);
+
+ for (cbi = fe->firstcb, cb = itocb(cbi);
+ cb;
+ cbi = cb->cnext, cb = itocb(cbi)) {
+ if (cbi > cbstuff.nblks) {
+ ViceLog(0, ("cb_stateVerifyFCBList: error: list index out of range (cbi=%d, ncbs=%d)\n",
+ cbi, cbstuff.nblks));
+ ret = 1;
+ goto done;
+ }
+ if (cb->fhead != fei) {
+ ViceLog(0, ("cb_stateVerifyFCBList: error: cb->fhead != fei (fei=%d, cb->fhead=%d)\n",
+ fei, cb->fhead));
+ ret = 1;
+ }
+ if (chain_len > FS_STATE_FCB_MAX_LIST_LEN) {
+ ViceLog(0, ("cb_stateVerifyFCBList: error: list length exceeds %d (fei=%d); assuming there's a loop\n",
+ FS_STATE_FCB_MAX_LIST_LEN, fei));
+ ret = 1;
+ goto done;
+ }
+ chain_len++;
+ }
+
+ if (fe->ncbs != chain_len) {
+ ViceLog(0, ("cb_stateVerifyFCBList: error: list length mismatch (len=%d, fe->ncbs=%d)\n",
+ chain_len, fe->ncbs));
+ ret = 1;
+ }
+
+ done:
+ return ret;
+}
+
+int
+cb_stateVerifyHCBList(struct fs_dump_state * state, struct host * host)
+{
+ int ret = 0;
+ afs_uint32 hi, chain_len, cbi;
+ struct CallBack *cb, *ncb;
+
+ hi = h_htoi(host);
+ chain_len = 0;
+
+ for (cbi = host->cblist, cb = itocb(cbi);
+ cb;
+ cbi = cb->hnext, cb = ncb) {
+ if (chain_len && (host->cblist == cbi)) {
+ /* we've wrapped around the circular list, and everything looks ok */
+ break;
+ }
+ if (cb->hhead != hi) {
+ ViceLog(0, ("cb_stateVerifyHCBList: error: incorrect cb->hhead (cbi=%d, h->index=%d, cb->hhead=%d)\n",
+ cbi, hi, cb->hhead));
+ ret = 1;
+ }
+ if (!cb->hprev || !cb->hnext) {
+ ViceLog(0, ("cb_stateVerifyHCBList: error: null index in circular list (cbi=%d, h->index=%d)\n",
+ cbi, hi));
+ ret = 1;
+ goto done;
+ }
+ if ((cb->hprev > cbstuff.nblks) ||
+ (cb->hnext > cbstuff.nblks)) {
+ ViceLog(0, ("cb_stateVerifyHCBList: error: list index out of range (cbi=%d, h->index=%d, cb->hprev=%d, cb->hnext=%d, nCBs=%d)\n",
+ cbi, hi, cb->hprev, cb->hnext, cbstuff.nblks));
+ ret = 1;
+ goto done;
+ }
+ ncb = itocb(cb->hnext);
+ if (cbi != ncb->hprev) {
+ ViceLog(0, ("cb_stateVerifyHCBList: error: corrupt linked list (cbi=%d, h->index=%d)\n",
+ cbi, hi));
+ ret = 1;
+ goto done;
+ }
+ if (chain_len > FS_STATE_HCB_MAX_LIST_LEN) {
+ ViceLog(0, ("cb_stateVerifyFCBList: error: list length exceeds %d (h->index=%d); assuming there's a loop\n",
+ FS_STATE_HCB_MAX_LIST_LEN, hi));
+ ret = 1;
+ goto done;
+ }
+ chain_len++;
+ }
+
+ done:
+ return ret;
+}
+
+static int
+cb_stateVerifyTimeoutQueues(struct fs_dump_state * state)
+{
+ int ret = 0, i;
+ afs_uint32 cbi, chain_len;
+ struct CallBack *cb, *ncb;
+
+ for (i = 0; i < CB_NUM_TIMEOUT_QUEUES; i++) {
+ chain_len = 0;
+ for (cbi = timeout[i], cb = itocb(cbi);
+ cb;
+ cbi = cb->tnext, cb = ncb) {
+ if (chain_len && (cbi == timeout[i])) {
+ /* we've wrapped around the circular list, and everything looks ok */
+ break;
+ }
+ if (cbi > cbstuff.nblks) {
+ ViceLog(0, ("cb_stateVerifyTimeoutQueues: error: list index out of range (cbi=%d, tindex=%d)\n",
+ cbi, i));
+ ret = 1;
+ break;
+ }
+ if (itot(cb->thead) != &timeout[i]) {
+ ViceLog(0, ("cb_stateVerifyTimeoutQueues: error: cb->thead points to wrong timeout queue (tindex=%d, cbi=%d, cb->thead=%d)\n",
+ i, cbi, cb->thead));
+ ret = 1;
+ }
+ if (!cb->tprev || !cb->tnext) {
+ ViceLog(0, ("cb_stateVerifyTimeoutQueues: null index in circular list (cbi=%d, tindex=%d)\n",
+ cbi, i));
+ ret = 1;
+ break;
+ }
+ if ((cb->tprev > cbstuff.nblks) ||
+ (cb->tnext > cbstuff.nblks)) {
+ ViceLog(0, ("cb_stateVerifyTimeoutQueues: list index out of range (cbi=%d, tindex=%d, cb->tprev=%d, cb->tnext=%d, nCBs=%d)\n",
+ cbi, i, cb->tprev, cb->tnext, cbstuff.nblks));
+ ret = 1;
+ break;
+ }
+ ncb = itocb(cb->tnext);
+ if (cbi != ncb->tprev) {
+ ViceLog(0, ("cb_stateVerifyTimeoutQueues: corrupt linked list (cbi=%d, tindex=%d)\n",
+ cbi, i));
+ ret = 1;
+ break;
+ }
+ if (chain_len > FS_STATE_TCB_MAX_LIST_LEN) {
+ ViceLog(0, ("cb_stateVerifyTimeoutQueues: list length exceeds %d (tindex=%d); assuming there's a loop\n",
+ FS_STATE_TCB_MAX_LIST_LEN, i));
+ ret = 1;
+ break;
+ }
+ chain_len++;
+ }
+ }
+
+ done:
+ return ret;
+}
+
+static int
+cb_stateSaveTimeouts(struct fs_dump_state * state)
+{
+ int ret = 0;
+ struct iovec iov[2];
+
+ AssignInt64(state->eof_offset, &state->cb_hdr->timeout_offset);
+
+ memset(state->cb_timeout_hdr, 0, sizeof(struct callback_state_fehash_header));
+ state->cb_timeout_hdr->magic = CALLBACK_STATE_TIMEOUT_MAGIC;
+ state->cb_timeout_hdr->records = CB_NUM_TIMEOUT_QUEUES;
+ state->cb_timeout_hdr->len = sizeof(struct callback_state_timeout_header) +
+ (state->cb_timeout_hdr->records * sizeof(afs_uint32));
+
+ iov[0].iov_base = (char *)state->cb_timeout_hdr;
+ iov[0].iov_len = sizeof(struct callback_state_timeout_header);
+ iov[1].iov_base = (char *)timeout;
+ iov[1].iov_len = sizeof(timeout);
+
+ if (fs_stateSeek(state, &state->cb_hdr->timeout_offset)) {
+ ret = 1;
+ goto done;
+ }
+
+ if (fs_stateWriteV(state, iov, 2)) {
+ ret = 1;
+ goto done;
+ }
+
+ fs_stateIncEOF(state, state->cb_timeout_hdr->len);
+
+ done:
+ return ret;
+}
+
+static int
+cb_stateRestoreTimeouts(struct fs_dump_state * state)
+{
+ int ret = 0, len;
+
+ if (fs_stateReadHeader(state, &state->cb_hdr->timeout_offset,
+ state->cb_timeout_hdr,
+ sizeof(struct callback_state_timeout_header))) {
+ ret = 1;
+ goto done;
+ }
+
+ if (state->cb_timeout_hdr->magic != CALLBACK_STATE_TIMEOUT_MAGIC) {
+ ret = 1;
+ goto done;
+ }
+ if (state->cb_timeout_hdr->records != CB_NUM_TIMEOUT_QUEUES) {
+ ret = 1;
+ goto done;
+ }
+
+ len = state->cb_timeout_hdr->records * sizeof(afs_uint32);
+
+ if (state->cb_timeout_hdr->len !=
+ (sizeof(struct callback_state_timeout_header) + len)) {
+ ret = 1;
+ goto done;
+ }
+
+ if (fs_stateRead(state, timeout, len)) {
+ ret = 1;
+ goto done;
+ }
+
+ done:
+ return ret;
+}
+
+static int
+cb_stateSaveFEHash(struct fs_dump_state * state)
+{
+ int ret = 0;
+ struct iovec iov[2];
+
+ AssignInt64(state->eof_offset, &state->cb_hdr->fehash_offset);
+
+ memset(state->cb_fehash_hdr, 0, sizeof(struct callback_state_fehash_header));
+ state->cb_fehash_hdr->magic = CALLBACK_STATE_FEHASH_MAGIC;
+ state->cb_fehash_hdr->records = FEHASH_SIZE;
+ state->cb_fehash_hdr->len = sizeof(struct callback_state_fehash_header) +
+ (state->cb_fehash_hdr->records * sizeof(afs_uint32));
+
+ iov[0].iov_base = (char *)state->cb_fehash_hdr;
+ iov[0].iov_len = sizeof(struct callback_state_fehash_header);
+ iov[1].iov_base = (char *)HashTable;
+ iov[1].iov_len = sizeof(HashTable);
+
+ if (fs_stateSeek(state, &state->cb_hdr->fehash_offset)) {
+ ret = 1;
+ goto done;
+ }
+
+ if (fs_stateWriteV(state, iov, 2)) {
+ ret = 1;
+ goto done;
+ }
+
+ fs_stateIncEOF(state, state->cb_fehash_hdr->len);
+
+ done:
+ return ret;
+}
+
+static int
+cb_stateRestoreFEHash(struct fs_dump_state * state)
+{
+ int ret = 0, len;
+
+ if (fs_stateReadHeader(state, &state->cb_hdr->fehash_offset,
+ state->cb_fehash_hdr,
+ sizeof(struct callback_state_fehash_header))) {
+ ret = 1;
+ goto done;
+ }
+
+ if (state->cb_fehash_hdr->magic != CALLBACK_STATE_FEHASH_MAGIC) {
+ ret = 1;
+ goto done;
+ }
+ if (state->cb_fehash_hdr->records != FEHASH_SIZE) {
+ ret = 1;
+ goto done;
+ }
+
+ len = state->cb_fehash_hdr->records * sizeof(afs_uint32);
+
+ if (state->cb_fehash_hdr->len !=
+ (sizeof(struct callback_state_fehash_header) + len)) {
+ ret = 1;
+ goto done;
+ }
+
+ if (fs_stateRead(state, HashTable, len)) {
+ ret = 1;
+ goto done;
+ }
+
+ done:
+ return ret;
+}
+
+static int
+cb_stateSaveFEs(struct fs_dump_state * state)
+{
+ int ret = 0;
+ register int fei, hash;
+ register struct FileEntry *fe;
+
+ AssignInt64(state->eof_offset, &state->cb_hdr->fe_offset);
+
+ for (hash = 0; hash < FEHASH_SIZE ; hash++) {
+ for (fei = HashTable[hash]; fei; fei = fe->fnext) {
+ fe = itofe(fei);
+ if (cb_stateSaveFE(state, fe)) {
+ ret = 1;
+ goto done;
+ }
+ }
+ }
+
+ done:
+ return ret;
+}
+
+static int
+cb_stateRestoreFEs(struct fs_dump_state * state)
+{
+ int count, nFEs, ret = 0;
+
+ nFEs = state->cb_hdr->nFEs;
+
+ for (count = 0; count < nFEs; count++) {
+ if (cb_stateRestoreFE(state)) {
+ ret = 1;
+ goto done;
+ }
+ }
+
+ done:
+ return ret;
+}
+
+static int
+cb_stateSaveFE(struct fs_dump_state * state, struct FileEntry * fe)
+{
+ int ret = 0, iovcnt, cbi, idx, len, written = 0;
+ afs_uint32 fei;
+ struct callback_state_entry_header hdr;
+ struct FEDiskEntry fedsk;
+ struct CBDiskEntry cbdsk[16];
+ struct iovec iov[16];
+ struct CallBack *cb;
+
+ fei = fetoi(fe);
+ if (fei > state->cb_hdr->fe_max) {
+ state->cb_hdr->fe_max = fei;
+ }
+
+ memset(&hdr, 0, sizeof(struct callback_state_entry_header));
+
+ if (cb_stateFEToDiskEntry(fe, &fedsk)) {
+ ret = 1;
+ goto done;
+ }
+
+ iov[0].iov_base = (char *)&hdr;
+ len = iov[0].iov_len = sizeof(hdr);
+ iov[1].iov_base = (char *)&fedsk;
+ len += iov[1].iov_len = sizeof(struct FEDiskEntry);
+ iovcnt = 2;
+
+ for (cbi = fe->firstcb, cb = itocb(cbi), idx = 2;
+ cb != NULL;
+ cbi = cb->cnext, cb = itocb(cbi), idx++, hdr.nCBs++) {
+ if (cbi > state->cb_hdr->cb_max) {
+ state->cb_hdr->cb_max = cbi;
+ }
+ if (cb_stateCBToDiskEntry(cb, &cbdsk[idx])) {
+ ret = 1;
+ goto done;
+ }
+ cbdsk[idx].index = cbi;
+ iov[idx].iov_base = (char *)&cbdsk[idx];
+ len += iov[idx].iov_len = sizeof(struct CBDiskEntry);
+ iovcnt++;
+ if ((iovcnt == 16) || (!cb->cnext)) {
+ if (fs_stateWriteV(state, iov, iovcnt)) {
+ ret = 1;
+ goto done;
+ }
+ written = 1;
+ iovcnt = 0;
+ len = 0;
+ }
+ }
+
+ hdr.magic = CALLBACK_STATE_ENTRY_MAGIC;
+ hdr.len = sizeof(hdr) + sizeof(struct FEDiskEntry) +
+ (hdr.nCBs * sizeof(struct CBDiskEntry));
+
+ if (!written) {
+ if (fs_stateWriteV(state, iov, iovcnt)) {
+ ret = 1;
+ goto done;
+ }
+ } else {
+ if (fs_stateWriteHeader(state, &state->eof_offset, &hdr, sizeof(hdr))) {
+ ret = 1;
+ goto done;
+ }
+ }
+
+ fs_stateIncEOF(state, hdr.len);
+
+ if (written) {
+ if (fs_stateSeek(state, &state->eof_offset)) {
+ ret = 1;
+ goto done;
+ }
+ }
+
+ state->cb_hdr->nFEs++;
+ state->cb_hdr->nCBs += hdr.nCBs;
+
+ done:
+ return ret;
+}
+
+static int
+cb_stateRestoreFE(struct fs_dump_state * state)
+{
+ int ret = 0, iovcnt, len, nCBs, idx;
+ struct callback_state_entry_header hdr;
+ struct FEDiskEntry fedsk;
+ struct CBDiskEntry cbdsk[16];
+ struct iovec iov[16];
+ struct FileEntry * fe;
+ struct CallBack * cb;
+
+ iov[0].iov_base = (char *)&hdr;
+ len = iov[0].iov_len = sizeof(hdr);
+ iov[1].iov_base = (char *)&fedsk;
+ len += iov[1].iov_len = sizeof(fedsk);
+ iovcnt = 2;
+
+ if (fs_stateReadV(state, iov, iovcnt)) {
+ ret = 1;
+ goto done;
+ }
+
+ if (hdr.magic != CALLBACK_STATE_ENTRY_MAGIC) {
+ ret = 1;
+ goto done;
+ }
+
+ fe = GetFE();
+ if (fe == NULL) {
+ ViceLog(0, ("cb_stateRestoreFE: ran out of free FileEntry structures\n"));
+ ret = 1;
+ goto done;
+ }
+
+ if (cb_stateDiskEntryToFE(state, &fedsk, fe)) {
+ ret = 1;
+ goto done;
+ }
+
+ if (hdr.nCBs) {
+ for (iovcnt = 0, idx = 0, len = 0, nCBs = 0;
+ nCBs < hdr.nCBs;
+ idx++, nCBs++) {
+ iov[idx].iov_base = (char *)&cbdsk[idx];
+ len += iov[idx].iov_len = sizeof(struct CBDiskEntry);
+ iovcnt++;
+ if ((iovcnt == 16) || (nCBs == hdr.nCBs - 1)) {
+ if (fs_stateReadV(state, iov, iovcnt)) {
+ ret = 1;
+ goto done;
+ }
+ if (cb_stateRestoreCBs(state, fe, iov, iovcnt)) {
+ ret = 1;
+ goto done;
+ }
+ len = 0;
+ iovcnt = 0;
+ }
+ }
+ }
+
+ done:
+ return ret;
+}
+
+static int
+cb_stateRestoreCBs(struct fs_dump_state * state, struct FileEntry * fe,
+ struct iovec * iov, int niovecs)
+{
+ int ret = 0, idx;
+ register struct CallBack * cb;
+ struct CBDiskEntry * cbdsk;
+ afs_uint32 fei;
+
+ fei = fetoi(fe);
+
+ for (idx = 0; idx < niovecs; idx++) {
+ cbdsk = (struct CBDiskEntry *) iov[idx].iov_base;
+ if ((cb = GetCB()) == NULL) {
+ ViceLog(0, ("cb_stateRestoreCBs: ran out of free CallBack structures\n"));
+ ret = 1;
+ goto done;
+ }
+ if (cb_stateDiskEntryToCB(state, cbdsk, cb)) {
+ ViceLog(0, ("cb_stateRestoreCBs: corrupt CallBack disk entry\n"));
+ ret = 1;
+ goto done;
+ }
+ }
+
+ done:
+ return ret;
+}
+
+
+static int
+cb_stateFillHeader(struct callback_state_header * hdr)
+{
+ hdr->stamp.magic = CALLBACK_STATE_MAGIC;
+ hdr->stamp.version = CALLBACK_STATE_VERSION;
+ hdr->tfirst = tfirst;
+ return 0;
+}
+
+static int
+cb_stateCheckHeader(struct callback_state_header * hdr)
+{
+ int ret = 0;
+
+ if (hdr->stamp.magic != CALLBACK_STATE_MAGIC) {
+ ret = 1;
+ } else if (hdr->stamp.version != CALLBACK_STATE_VERSION) {
+ ret = 1;
+ } else if ((hdr->nFEs > cbstuff.nblks) || (hdr->nCBs > cbstuff.nblks)) {
+ ViceLog(0, ("cb_stateCheckHeader: saved callback state larger than callback memory allocation\n"));
+ ret = 1;
+ }
+ return ret;
+}
+
+/* disk entry conversion routines */
+static int
+cb_stateFEToDiskEntry(struct FileEntry * in, struct FEDiskEntry * out)
+{
+ memcpy(&out->fe, in, sizeof(struct FileEntry));
+ out->index = fetoi(in);
+ return 0;
+}
+
+static int
+cb_stateDiskEntryToFE(struct fs_dump_state * state,
+ struct FEDiskEntry * in, struct FileEntry * out)
+{
+ int ret = 0;
+
+ memcpy(out, &in->fe, sizeof(struct FileEntry));
+
+ /* setup FE map entry */
+ if (!in->index || (in->index >= state->fe_map.len)) {
+ ViceLog(0, ("cb_stateDiskEntryToFE: index (%d) out of range",
+ in->index));
+ ret = 1;
+ goto done;
+ }
+ state->fe_map.entries[in->index].old_idx = in->index;
+ state->fe_map.entries[in->index].new_idx = fetoi(out);
+
+ done:
+ return ret;
+}
+
+static int
+cb_stateCBToDiskEntry(struct CallBack * in, struct CBDiskEntry * out)
+{
+ memcpy(&out->cb, in, sizeof(struct CallBack));
+ out->index = cbtoi(in);
+ return 0;
+}
+
+static int
+cb_stateDiskEntryToCB(struct fs_dump_state * state,
+ struct CBDiskEntry * in, struct CallBack * out)
+{
+ int ret = 0;
+
+ memcpy(out, &in->cb, sizeof(struct CallBack));
+
+ /* setup CB map entry */
+ if (!in->index || (in->index >= state->cb_map.len)) {
+ ViceLog(0, ("cb_stateDiskEntryToCB: index (%d) out of range\n",
+ in->index));
+ ret = 1;
+ goto done;
+ }
+ state->cb_map.entries[in->index].old_idx = in->index;
+ state->cb_map.entries[in->index].new_idx = cbtoi(out);
+
+ done:
+ return ret;
+}
+
+/* index map routines */
+static int
+cb_stateAllocMap(struct fs_dump_state * state)
+{
+ state->fe_map.len = state->cb_hdr->fe_max + 1;
+ state->cb_map.len = state->cb_hdr->cb_max + 1;
+ state->fe_map.entries = (struct idx_map_entry_t *)
+ calloc(state->fe_map.len, sizeof(struct idx_map_entry_t));
+ state->cb_map.entries = (struct idx_map_entry_t *)
+ calloc(state->cb_map.len, sizeof(struct idx_map_entry_t));
+ return ((state->fe_map.entries != NULL) && (state->cb_map.entries != NULL)) ? 0 : 1;
+}
+
+int
+fe_OldToNew(struct fs_dump_state * state, afs_uint32 old, afs_uint32 * new)
+{
+ int ret = 0;
+
+ /* FEs use a one-based indexing system, so old==0 implies no mapping */
+ if (!old) {
+ *new = 0;
+ goto done;
+ }
+
+ if (old >= state->fe_map.len) {
+ ViceLog(0, ("fe_OldToNew: index %d is out of range\n", old));
+ ret = 1;
+ } else if (state->fe_map.entries[old].old_idx != old) { /* sanity check */
+ ViceLog(0, ("fe_OldToNew: index %d points to an invalid FileEntry record\n", old));
+ ret = 1;
+ } else {
+ *new = state->fe_map.entries[old].new_idx;
+ }
+
+ done:
+ return ret;
+}
+
+int
+cb_OldToNew(struct fs_dump_state * state, afs_uint32 old, afs_uint32 * new)
+{
+ int ret = 0;
+
+ /* CBs use a one-based indexing system, so old==0 implies no mapping */
+ if (!old) {
+ *new = 0;
+ goto done;
+ }
+
+ if (old >= state->cb_map.len) {
+ ViceLog(0, ("cb_OldToNew: index %d is out of range\n", old));
+ ret = 1;
+ } else if (state->cb_map.entries[old].old_idx != old) { /* sanity check */
+ ViceLog(0, ("cb_OldToNew: index %d points to an invalid CallBack record\n", old));
+ ret = 1;
+ } else {
+ *new = state->cb_map.entries[old].new_idx;
+ }
+
+ done:
+ return ret;
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
int
DumpCallBackState(void)
{
return 0;
}
-#endif
+#endif /* !INTERPRET_DUMP */
#ifdef INTERPRET_DUMP
struct CallBack *cb;
struct FileEntry *fe;
- for (hash = 0; hash < VHASH; hash++) {
+ for (hash = 0; hash < FEHASH_SIZE; hash++) {
for (feip = &HashTable[hash]; fe = itofe(*feip);) {
if (!vol || (fe->volid == vol)) {
register struct CallBack *cbnext;
H_UNLOCK;
}
}
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* try to bail ASAP if the fileserver is shutting down */
+ FS_STATE_RDLOCK;
+ if (fs_state.mode == FS_MODE_SHUTDOWN) {
+ FS_STATE_UNLOCK;
+ multi_Abort;
+ }
+ FS_STATE_UNLOCK;
+#endif
}
multi_End_Ignore;
H_LOCK;
--- /dev/null
+/*
+ * Copyright 2000, International Business Machines Corporation and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ *
+ * Portions Copyright (c) 2006 Sine Nomine Associates
+ */
+
+#ifndef _AFS_VICED_CALLBACK_H
+#define _AFS_VICED_CALLBACK_H
+
+/* Maximum number of call backs to break at once, single fid
+ * There is some debate as to just how large this value should be
+ * Ideally, it would be very very large, but I am afraid that the
+ * cache managers will all send in their responses simultaneously,
+ * thereby swamping the file server. As a result, something like
+ * 10 or 15 might be a better bet.
+ */
+#define MAX_CB_HOSTS 10
+
+/* max time to break a callback, otherwise client is dead or net is hosed */
+#define MAXCBT 25
+
+#define u_byte unsigned char
+
+struct cbcounters {
+ afs_int32 DeleteFiles;
+ afs_int32 DeleteCallBacks;
+ afs_int32 BreakCallBacks;
+ afs_int32 AddCallBacks;
+ afs_int32 GotSomeSpaces;
+ afs_int32 DeleteAllCallBacks;
+ afs_int32 nFEs, nCBs, nblks;
+ afs_int32 CBsTimedOut;
+ afs_int32 nbreakers;
+ afs_int32 GSS1, GSS2, GSS3, GSS4, GSS5;
+};
+extern struct cbcounters cbstuff;
+
+struct cbstruct {
+ struct host *hp;
+ afs_uint32 thead;
+};
+
+/* structure MUST be multiple of 8 bytes, otherwise the casts to
+ * struct object will have alignment issues on *P64 userspaces */
+struct FileEntry {
+ afs_uint32 vnode;
+ afs_uint32 unique;
+ afs_uint32 volid;
+ afs_uint32 fnext; /* index of next FE in hash chain */
+ afs_uint32 ncbs; /* number of callbacks for this FE */
+ afs_uint32 firstcb; /* index of first cb in per-FE list */
+ afs_uint32 status; /* status bits for this FE */
+ afs_uint32 spare;
+};
+#define FE_LATER 0x1
+
+/* structure MUST be multiple of 8 bytes, otherwise the casts to
+ * struct object will have alignment issues on *P64 userspaces */
+struct CallBack {
+ afs_uint32 cnext; /* index of next cb in per-FE list */
+ afs_uint32 fhead; /* index of associated FE */
+ u_byte thead; /* Head of timeout chain */
+ u_byte status; /* Call back status; see definitions, below */
+ unsigned short spare; /* ensure proper alignment */
+ afs_uint32 hhead; /* Head of host table chain */
+ afs_uint32 tprev, tnext; /* per-timeout circular list of callbacks */
+ afs_uint32 hprev, hnext; /* per-host circular list of callbacks */
+};
+
+struct VCBParams {
+ struct cbstruct cba[MAX_CB_HOSTS]; /* re-entrant storage */
+ unsigned int ncbas;
+ afs_uint32 thead; /* head of timeout queue for youngest callback */
+ struct AFSFid *fid;
+};
+
+
+/* callback hash macros */
+#define FEHASH_SIZE 512 /* Power of 2 */
+#define FEHASH_MASK (FEHASH_SIZE-1)
+#define FEHash(volume, unique) (((volume)+(unique))&(FEHASH_MASK))
+
+#define CB_NUM_TIMEOUT_QUEUES 128
+
+
+/* status values for status field of CallBack structure */
+#define CB_NORMAL 1 /* Normal call back */
+#define CB_DELAYED 2 /* Delayed call back due to rpc problems.
+ * The call back entry will be added back to the
+ * host list at the END of the list, so that
+ * searching backwards in the list will find all
+ * the (consecutive)host. delayed call back entries */
+#define CB_VOLUME 3 /* Callback for a volume */
+#define CB_BULK 4 /* Normal callbacks, handed out from FetchBulkStatus */
+
+/* call back indices to pointers, and vice-versa */
+#define itocb(i) ((i)?CB+(i):0)
+#define cbtoi(cbp) (!(cbp)?0:(cbp)-CB)
+
+/* file entry indices to pointers, and vice-versa */
+#define itofe(i) ((i)?FE+(i):0)
+#define fetoi(fep) (!(fep)?0:(fep)-FE)
+
+/* Timeouts: there are 128 possible timeout values in effect at any
+ * given time. Each timeout represents timeouts in an interval of 128
+ * seconds. So the maximum timeout for a call back is 128*128=16384
+ * seconds, or 4 1/2 hours. The timeout cleanup stuff is called only
+ * if space runs out or by the file server every 5 minutes. This 5
+ * minute slack should be allowed for--so a maximum time of 4 hours
+ * is safer.
+ *
+ * Timeouts must be chosen to correspond to an exact multiple
+ * of 128, because all times are truncated to a 128 multiple, and
+ * timed out if the current truncated time is <= to the truncated time
+ * corresponding to the timeout queue.
+ */
+
+/* Unix time to Call Back time, and vice-versa. Call back time is
+ in units of 128 seconds, corresponding to time queues. */
+#define CBtime(uxtime) ((uxtime)>>7)
+#define UXtime(cbtime) ((cbtime)<<7)
+
+/* Given a Unix time, compute the closest Unix time that corresponds to
+ a time queue, rounding up */
+#define TimeCeiling(uxtime) (((uxtime)+127)&~127)
+
+#define TimeOutCutoff ((sizeof(TimeOuts)/sizeof(TimeOuts[0]))*8)
+#define TimeOut(nusers) ((nusers)>=TimeOutCutoff? MinTimeOut: TimeOuts[(nusers)>>3])
+
+/* time out at server is 3 minutes more than ws */
+#define ServerBias (3*60)
+
+/* Convert cbtime to timeout queue index */
+#define TIndex(cbtime) (((cbtime)&127)+1)
+
+/* Convert cbtime to pointer to timeout queue head */
+#define THead(cbtime) (&timeout[TIndex(cbtime)-1])
+
+/* Normalize index into timeout array so that two such indices will be
+ ordered correctly, so that they can be compared to see which times
+ sooner, or so that the difference in time out times between them
+ can be computed. */
+#define TNorm(index) ((index)<TIndex(tfirst)?(index)+128:(index))
+
+/* This converts a timeout index into the actual time it will expire */
+#define TIndexToTime(index) (UXtime(TNorm(index) - TIndex(tfirst) + tfirst))
+
+
+/* Convert pointer to timeout queue head to index, and vice versa */
+#define ttoi(t) ((t-timeout)+1)
+#define itot(i) ((timeout)+(i-1))
+
+#endif /* _AFS_VICED_CALLBACK_H */
* This software has been released under the terms of the IBM Public
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
+ *
+ * Portions Copyright (c) 2006 Sine Nomine Associates
*/
#include <afsconfig.h>
#include "viced_prototypes.h"
#include "viced.h"
#include "host.h"
-
+#include "callback.h"
+#ifdef AFS_DEMAND_ATTACH_FS
+#include "../util/afsutil_prototypes.h"
+#include "../tviced/serialize_state.h"
+#endif /* AFS_DEMAND_ATTACH_FS */
#ifdef AFS_PTHREAD_ENV
pthread_mutex_t host_glock_mutex;
int rxcon_ident_key;
int rxcon_client_key;
+static struct rx_securityClass *sc = NULL;
+
+static void h_SetupCallbackConn_r(struct host * host);
+static void h_AddHostToHashTable_r(afs_uint32 addr, afs_uint16 port, struct host * host);
+static void h_AddHostToUuidHashTable_r(afsUUID * uuid, struct host * host);
+static int h_DeleteHostFromHashTableByAddr_r(afs_uint32 addr, afs_uint16 port, struct host *host);
+
#define CESPERBLOCK 73
struct CEBlock { /* block of CESPERBLOCK file entries */
struct client entry[CESPERBLOCK];
{
register struct host *entry;
- if (HTFree == 0)
+ if (HTFree == NULL)
GetHTBlock();
- assert(HTFree != 0);
+ assert(HTFree != NULL);
entry = HTFree;
HTFree = entry->next;
HTs++;
free(host->hcps.prlist_val); /* this is for hostaclRefresh */
host->hcps.prlist_val = NULL;
host->hcps.prlist_len = 0;
- slept ? (host->cpsCall = FT_ApproxTime()) : (host->cpsCall = now);
+ host->cpsCall = slept ? (FT_ApproxTime()) : (now);
H_UNLOCK;
code = pr_GetHostCPS(ntohl(host->host), &host->hcps);
{
struct servent *serverentry;
struct host *host;
- static struct rx_securityClass *sc = 0;
afs_int32 now;
#if FS_STATS_DETAILED
afs_uint32 newHostAddr_HBO; /*New host IP addr, in host byte order */
host->host = rxr_HostOf(r_con);
host->port = rxr_PortOf(r_con);
- hashInsert_r(host->host, host->port, host);
+ h_AddHostToHashTable_r(host->host, host->port, host);
if (consolePort == 0) { /* find the portal number for console */
#if defined(AFS_OSF_ENV)
host->Console = 1;
/* Make a callback channel even for the console, on the off chance that it
* makes a request that causes a break call back. It shouldn't. */
- {
- if (!sc)
- sc = rxnull_NewClientSecurityObject();
- host->callback_rxcon =
- rx_NewConnection(host->host, host->port, 1, sc, 0);
- rx_SetConnDeadTime(host->callback_rxcon, 50);
- rx_SetConnHardDeadTime(host->callback_rxcon, AFS_HARDDEADTIME);
- }
+ h_SetupCallbackConn_r(host);
now = host->LastCall = host->cpsCall = host->ActiveCall = FT_ApproxTime();
host->hostFlags = 0;
host->hcps.prlist_val = NULL;
host->hcps.prlist_len = 0;
- host->interface = 0;
+ host->interface = NULL;
#ifdef undef
host->hcpsfailed = 0; /* save cycles */
h_gethostcps(host); /* do this under host hold/lock */
#endif
- host->FirstClient = 0;
+ host->FirstClient = NULL;
h_Hold_r(host);
h_Lock_r(host);
h_InsertList_r(host); /* update global host List */
} /*h_Alloc_r */
+
+/* Make a callback channel even for the console, on the off chance that it
+ * makes a request that causes a break call back. It shouldn't. */
+static void
+h_SetupCallbackConn_r(struct host * host)
+{
+ if (!sc)
+ sc = rxnull_NewClientSecurityObject();
+ host->callback_rxcon =
+ rx_NewConnection(host->host, host->port, 1, sc, 0);
+ rx_SetConnDeadTime(host->callback_rxcon, 50);
+ rx_SetConnHardDeadTime(host->callback_rxcon, AFS_HARDDEADTIME);
+}
+
/* Lookup a host given an IP address and UDP port number. */
/* hostaddr and hport are in network order */
/* Note: host should be released by caller if 0 == *heldp and non-null */
if (client) {
H_LOCK;
if (client->tcon == tcon)
- client->tcon = (struct rx_connection *)0;
+ client->tcon = NULL;
H_UNLOCK;
}
return 0;
H_UNLOCK;
for (i = 0; i < count; i++) {
held[i] = (*proc) (list[i], held[i], param);
- if (!held[i])
+ if (!H_ENUMERATE_ISSET_HELD(held[i]))
h_Release(list[i]); /* this might free up the host */
+ /* bail out of the enumeration early */
+ if (H_ENUMERATE_ISSET_BAIL(held[i]))
+ break;
}
free((void *)list);
free((void *)held);
h_Hold_r(enumstart);
for (host = enumstart; host; host = next, held = nheld) {
next = host->next;
- if (next && !(nheld = h_Held_r(next)))
+ if (next && !(nheld = h_Held_r(next)) && !H_ENUMERATE_ISSET_BAIL(held))
h_Hold_r(next);
held = (*proc) (host, held, param);
- if (!held)
+ if (!H_ENUMERATE_ISSET_HELD(held))
h_Release_r(host); /* this might free up the host */
+ if (H_ENUMERATE_ISSET_BAIL(held))
+ break;
}
} /*h_Enumerate_r */
/* inserts a new HashChain structure corresponding to this UUID */
-void
-hashInsertUuid_r(struct afsUUID *uuid, struct host *host)
+static void
+h_AddHostToUuidHashTable_r(struct afsUUID *uuid, struct host *host)
{
int index;
struct h_hashChain *chain;
/* insert into beginning of list for this bucket */
chain = (struct h_hashChain *)malloc(sizeof(struct h_hashChain));
if (!chain) {
- ViceLog(0, ("Failed malloc in hashInsertUuid_r\n"));
+ ViceLog(0, ("Failed malloc in h_AddHostToUuidHashTable_r\n"));
assert(0);
}
assert(chain);
/* inserts a new HashChain structure corresponding to this address */
-void
-hashInsert_r(afs_uint32 addr, afs_uint16 port, struct host *host)
+static void
+h_AddHostToHashTable_r(afs_uint32 addr, afs_uint16 port, struct host *host)
{
int index;
struct h_hashChain *chain;
/* insert into beginning of list for this bucket */
chain = (struct h_hashChain *)malloc(sizeof(struct h_hashChain));
if (!chain) {
- ViceLog(0, ("Failed malloc in hashInsert_r\n"));
+ ViceLog(0, ("Failed malloc in h_AddHostToHashTable_r\n"));
assert(0);
}
chain->hostPtr = host;
/*
* Create a hash table entry for this address
*/
- hashInsert_r(addr, port, host);
+ h_AddHostToHashTable_r(addr, port, host);
return 0;
}
/*
* Remove the hash table entry for this address
*/
- hashDelete_r(addr, port, host);
+ h_DeleteHostFromHashTableByAddr_r(addr, port, host);
return 0;
}
/* the new host is held and locked */
} else {
/* This really is a new host */
- hashInsertUuid_r(&identP->uuid, host);
+ h_AddHostToUuidHashTable_r(&identP->uuid, host);
cb_conn = host->callback_rxcon;
rx_GetConnection(cb_conn);
H_UNLOCK;
client->authClass = authClass; /* rx only */
client->sid = rxr_CidOf(tcon);
client->VenusEpoch = rxr_GetEpoch(tcon);
- client->CPS.prlist_val = 0;
+ client->CPS.prlist_val = NULL;
client->CPS.prlist_len = 0;
h_Unlock_r(host);
}
} /*h_DumpHosts */
+#ifdef AFS_DEMAND_ATTACH_FS
+/*
+ * demand attach fs
+ * host state serialization
+ */
+static int h_stateFillHeader(struct host_state_header * hdr);
+static int h_stateCheckHeader(struct host_state_header * hdr);
+static int h_stateAllocMap(struct fs_dump_state * state);
+static int h_stateSaveHost(register struct host * host, int held, struct fs_dump_state * state);
+static int h_stateRestoreHost(struct fs_dump_state * state);
+static int h_stateRestoreIndex(struct host * h, int held, struct fs_dump_state * state);
+static int h_stateVerifyHost(struct host * h, int held, struct fs_dump_state * state);
+static int h_stateVerifyAddrHash(struct fs_dump_state * state, struct host * h, afs_uint32 addr, afs_uint16 port);
+static int h_stateVerifyUuidHash(struct fs_dump_state * state, struct host * h);
+static void h_hostToDiskEntry_r(struct host * in, struct hostDiskEntry * out);
+static void h_diskEntryToHost_r(struct hostDiskEntry * in, struct host * out);
+
+
+/* this procedure saves all host state to disk for fast startup */
+int
+h_stateSave(struct fs_dump_state * state)
+{
+ AssignInt64(state->eof_offset, &state->hdr->h_offset);
+
+ /* XXX debug */
+ ViceLog(0, ("h_stateSave: hostCount=%d\n", hostCount));
+
+ /* invalidate host state header */
+ memset(state->h_hdr, 0, sizeof(struct host_state_header));
+
+ if (fs_stateWriteHeader(state, &state->hdr->h_offset, state->h_hdr,
+ sizeof(struct host_state_header))) {
+ state->bail = 1;
+ goto done;
+ }
+
+ fs_stateIncEOF(state, sizeof(struct host_state_header));
+
+ h_Enumerate_r(h_stateSaveHost, hostList, (char *)state);
+ if (state->bail) {
+ goto done;
+ }
+
+ h_stateFillHeader(state->h_hdr);
+
+ /* write the real header to disk */
+ state->bail = fs_stateWriteHeader(state, &state->hdr->h_offset, state->h_hdr,
+ sizeof(struct host_state_header));
+
+ done:
+ return state->bail;
+}
+
+/* demand attach fs
+ * host state serialization
+ *
+ * this procedure restores all host state from a disk for fast startup
+ */
+int
+h_stateRestore(struct fs_dump_state * state)
+{
+ int i, records;
+
+ /* seek to the right position and read in the host state header */
+ if (fs_stateReadHeader(state, &state->hdr->h_offset, state->h_hdr,
+ sizeof(struct host_state_header))) {
+ state->bail = 1;
+ goto done;
+ }
+
+ /* check the validity of the header */
+ if (h_stateCheckHeader(state->h_hdr)) {
+ state->bail = 1;
+ goto done;
+ }
+
+ records = state->h_hdr->records;
+
+ if (h_stateAllocMap(state)) {
+ state->bail = 1;
+ goto done;
+ }
+
+ /* iterate over records restoring host state */
+ for (i=0; i < records; i++) {
+ if (h_stateRestoreHost(state) != 0) {
+ state->bail = 1;
+ break;
+ }
+ }
+
+ done:
+ return state->bail;
+}
+
+int
+h_stateRestoreIndices(struct fs_dump_state * state)
+{
+ h_Enumerate_r(h_stateRestoreIndex, hostList, (char *)state);
+ return state->bail;
+}
+
+static int
+h_stateRestoreIndex(struct host * h, int held, struct fs_dump_state * state)
+{
+ if (cb_OldToNew(state, h->cblist, &h->cblist)) {
+ return H_ENUMERATE_BAIL(held);
+ }
+ return held;
+}
+
+int
+h_stateVerify(struct fs_dump_state * state)
+{
+ h_Enumerate_r(h_stateVerifyHost, hostList, (char *)state);
+ return state->bail;
+}
+
+static int
+h_stateVerifyHost(struct host * h, int held, struct fs_dump_state * state)
+{
+ int i;
+
+ if (h == NULL) {
+ ViceLog(0, ("h_stateVerifyHost: error: NULL host pointer in linked list\n"));
+ return H_ENUMERATE_BAIL(held);
+ }
+
+ if (h->interface) {
+ for (i = h->interface->numberOfInterfaces-1; i >= 0; i--) {
+ if (h_stateVerifyAddrHash(state, h, h->interface->interface[i].addr,
+ h->interface->interface[i].port)) {
+ state->bail = 1;
+ }
+ }
+ if (h_stateVerifyUuidHash(state, h)) {
+ state->bail = 1;
+ }
+ } else if (h_stateVerifyAddrHash(state, h, h->host, h->port)) {
+ state->bail = 1;
+ }
+
+ if (cb_stateVerifyHCBList(state, h)) {
+ state->bail = 1;
+ }
+
+ done:
+ return held;
+}
+
+static int
+h_stateVerifyAddrHash(struct fs_dump_state * state, struct host * h, afs_uint32 addr, afs_uint16 port)
+{
+ int ret = 0, found = 0;
+ struct host *host = NULL;
+ struct h_hashChain *chain;
+ int index = h_HashIndex(addr);
+ char tmp[16];
+ int chain_len = 0;
+
+ for (chain = hostHashTable[index]; chain; chain = chain->next) {
+ host = chain->hostPtr;
+ if (host == NULL) {
+ afs_inet_ntoa_r(addr, tmp);
+ ViceLog(0, ("h_stateVerifyAddrHash: error: addr hash chain has NULL host ptr (lookup addr %s)\n", tmp));
+ ret = 1;
+ goto done;
+ }
+ if ((chain->addr == addr) && (chain->port == port)) {
+ if (host != h) {
+ ViceLog(0, ("h_stateVerifyAddrHash: warning: addr hash entry points to different host struct (%d, %d)\n",
+ h->index, host->index));
+ state->flags.warnings_generated = 1;
+ }
+ found = 1;
+ break;
+ }
+ if (chain_len > FS_STATE_H_MAX_ADDR_HASH_CHAIN_LEN) {
+ ViceLog(0, ("h_stateVerifyAddrHash: error: hash chain length exceeds %d; assuming there's a loop\n",
+ FS_STATE_H_MAX_ADDR_HASH_CHAIN_LEN));
+ ret = 1;
+ goto done;
+ }
+ chain_len++;
+ }
+
+ if (!found) {
+ afs_inet_ntoa_r(addr, tmp);
+ if (state->mode == FS_STATE_LOAD_MODE) {
+ ViceLog(0, ("h_stateVerifyAddrHash: error: addr %s not found in hash\n", tmp));
+ ret = 1;
+ goto done;
+ } else {
+ ViceLog(0, ("h_stateVerifyAddrHash: warning: addr %s not found in hash\n", tmp));
+ state->flags.warnings_generated = 1;
+ }
+ }
+
+ done:
+ return ret;
+}
+
+static int
+h_stateVerifyUuidHash(struct fs_dump_state * state, struct host * h)
+{
+ int ret = 0, found = 0;
+ struct host *host = NULL;
+ struct h_hashChain *chain;
+ afsUUID * uuidp = &h->interface->uuid;
+ int index = h_UuidHashIndex(uuidp);
+ char tmp[40];
+ int chain_len = 0;
+
+ for (chain = hostUuidHashTable[index]; chain; chain = chain->next) {
+ host = chain->hostPtr;
+ if (host == NULL) {
+ afsUUID_to_string(uuidp, tmp, sizeof(tmp));
+ ViceLog(0, ("h_stateVerifyUuidHash: error: uuid hash chain has NULL host ptr (lookup uuid %s)\n", tmp));
+ ret = 1;
+ goto done;
+ }
+ if (host->interface &&
+ afs_uuid_equal(&host->interface->uuid, uuidp)) {
+ if (host != h) {
+ ViceLog(0, ("h_stateVerifyUuidHash: warning: uuid hash entry points to different host struct (%d, %d)\n",
+ h->index, host->index));
+ state->flags.warnings_generated = 1;
+ }
+ found = 1;
+ goto done;
+ }
+ if (chain_len > FS_STATE_H_MAX_UUID_HASH_CHAIN_LEN) {
+ ViceLog(0, ("h_stateVerifyUuidHash: error: hash chain length exceeds %d; assuming there's a loop\n",
+ FS_STATE_H_MAX_UUID_HASH_CHAIN_LEN));
+ ret = 1;
+ goto done;
+ }
+ chain_len++;
+ }
+
+ if (!found) {
+ afsUUID_to_string(uuidp, tmp, sizeof(tmp));
+ if (state->mode == FS_STATE_LOAD_MODE) {
+ ViceLog(0, ("h_stateVerifyUuidHash: error: uuid %s not found in hash\n", tmp));
+ ret = 1;
+ goto done;
+ } else {
+ ViceLog(0, ("h_stateVerifyUuidHash: warning: uuid %s not found in hash\n", tmp));
+ state->flags.warnings_generated = 1;
+ }
+ }
+
+ done:
+ return ret;
+}
+
+/* create the host state header structure */
+static int
+h_stateFillHeader(struct host_state_header * hdr)
+{
+ hdr->stamp.magic = HOST_STATE_MAGIC;
+ hdr->stamp.version = HOST_STATE_VERSION;
+}
+
+/* check the contents of the host state header structure */
+static int
+h_stateCheckHeader(struct host_state_header * hdr)
+{
+ int ret=0;
+
+ if (hdr->stamp.magic != HOST_STATE_MAGIC) {
+ ViceLog(0, ("check_host_state_header: invalid state header\n"));
+ ret = 1;
+ }
+ else if (hdr->stamp.version != HOST_STATE_VERSION) {
+ ViceLog(0, ("check_host_state_header: unknown version number\n"));
+ ret = 1;
+ }
+ return ret;
+}
+
+/* allocate the host id mapping table */
+static int
+h_stateAllocMap(struct fs_dump_state * state)
+{
+ state->h_map.len = state->h_hdr->index_max + 1;
+ state->h_map.entries = (struct idx_map_entry_t *)
+ calloc(state->h_map.len, sizeof(struct idx_map_entry_t));
+ return (state->h_map.entries != NULL) ? 0 : 1;
+}
+
+/* function called by h_Enumerate to save a host to disk */
+static int
+h_stateSaveHost(register struct host * host, int held, struct fs_dump_state * state)
+{
+ int i, if_len=0, hcps_len=0;
+ struct hostDiskEntry hdsk;
+ struct host_state_entry_header hdr;
+ struct Interface * ifp = NULL;
+ afs_int32 * hcps = NULL;
+ struct iovec iov[4];
+ int iovcnt = 2;
+
+ memset(&hdr, 0, sizeof(hdr));
+
+ if (state->h_hdr->index_max < host->index) {
+ state->h_hdr->index_max = host->index;
+ }
+
+ h_hostToDiskEntry_r(host, &hdsk);
+ if (host->interface) {
+ if_len = sizeof(struct Interface) +
+ ((host->interface->numberOfInterfaces-1) * sizeof(struct AddrPort));
+ ifp = (struct Interface *) malloc(if_len);
+ assert(ifp != NULL);
+ memcpy(ifp, host->interface, if_len);
+ hdr.interfaces = host->interface->numberOfInterfaces;
+ iov[iovcnt].iov_base = (char *) ifp;
+ iov[iovcnt].iov_len = if_len;
+ iovcnt++;
+ }
+ if (host->hcps.prlist_val) {
+ hdr.hcps = host->hcps.prlist_len;
+ hcps_len = hdr.hcps * sizeof(afs_int32);
+ hcps = (afs_int32 *) malloc(hcps_len);
+ assert(hcps != NULL);
+ memcpy(hcps, host->hcps.prlist_val, hcps_len);
+ iov[iovcnt].iov_base = (char *) hcps;
+ iov[iovcnt].iov_len = hcps_len;
+ iovcnt++;
+ }
+
+ if (hdsk.index > state->h_hdr->index_max)
+ state->h_hdr->index_max = hdsk.index;
+
+ hdr.len = sizeof(struct host_state_entry_header) +
+ sizeof(struct hostDiskEntry) + if_len + hcps_len;
+ hdr.magic = HOST_STATE_ENTRY_MAGIC;
+
+ iov[0].iov_base = (char *) &hdr;
+ iov[0].iov_len = sizeof(hdr);
+ iov[1].iov_base = (char *) &hdsk;
+ iov[1].iov_len = sizeof(struct hostDiskEntry);
+
+ if (fs_stateWriteV(state, iov, iovcnt)) {
+ ViceLog(0, ("h_stateSaveHost: failed to save host %d", host->index));
+ state->bail = 1;
+ }
+
+ fs_stateIncEOF(state, hdr.len);
+
+ state->h_hdr->records++;
+
+ done:
+ if (ifp)
+ free(ifp);
+ if (hcps)
+ free(hcps);
+ if (state->bail) {
+ return H_ENUMERATE_BAIL(held);
+ }
+ return held;
+}
+
+/* restores a host from disk */
+static int
+h_stateRestoreHost(struct fs_dump_state * state)
+{
+ int ifp_len=0, hcps_len=0, bail=0;
+ struct host_state_entry_header hdr;
+ struct hostDiskEntry hdsk;
+ struct host *host = NULL;
+ struct Interface *ifp = NULL;
+ afs_int32 * hcps = NULL;
+ struct iovec iov[3];
+ int iovcnt = 1;
+
+ if (fs_stateRead(state, &hdr, sizeof(hdr))) {
+ ViceLog(0, ("h_stateRestoreHost: failed to read host entry header from dump file '%s'\n",
+ state->fn));
+ bail = 1;
+ goto done;
+ }
+
+ if (hdr.magic != HOST_STATE_ENTRY_MAGIC) {
+ ViceLog(0, ("h_stateRestoreHost: fileserver state dump file '%s' is corrupt.\n",
+ state->fn));
+ bail = 1;
+ goto done;
+ }
+
+ iov[0].iov_base = (char *) &hdsk;
+ iov[0].iov_len = sizeof(struct hostDiskEntry);
+
+ if (hdr.interfaces) {
+ ifp_len = sizeof(struct Interface) +
+ ((hdr.interfaces-1) * sizeof(struct AddrPort));
+ ifp = (struct Interface *) malloc(ifp_len);
+ assert(ifp != NULL);
+ iov[iovcnt].iov_base = (char *) ifp;
+ iov[iovcnt].iov_len = ifp_len;
+ iovcnt++;
+ }
+ if (hdr.hcps) {
+ hcps_len = hdr.hcps * sizeof(afs_int32);
+ hcps = (afs_int32 *) malloc(hcps_len);
+ assert(hcps != NULL);
+ iov[iovcnt].iov_base = (char *) hcps;
+ iov[iovcnt].iov_len = hcps_len;
+ iovcnt++;
+ }
+
+ if ((ifp_len + hcps_len + sizeof(hdsk) + sizeof(hdr)) != hdr.len) {
+ ViceLog(0, ("h_stateRestoreHost: host entry header length fields are inconsistent\n"));
+ bail = 1;
+ goto done;
+ }
+
+ if (fs_stateReadV(state, iov, iovcnt)) {
+ ViceLog(0, ("h_stateRestoreHost: failed to read host entry\n"));
+ bail = 1;
+ goto done;
+ }
+
+ if (!hdr.hcps && hdsk.hcps_valid) {
+ /* valid, zero-length host cps ; does this ever happen? */
+ hcps = (afs_int32 *) malloc(sizeof(afs_int32));
+ assert(hcps != NULL);
+ }
+
+ host = GetHT();
+ assert(host != NULL);
+
+ if (ifp) {
+ host->interface = ifp;
+ }
+ if (hcps) {
+ host->hcps.prlist_val = hcps;
+ host->hcps.prlist_len = hdr.hcps;
+ }
+
+ h_diskEntryToHost_r(&hdsk, host);
+ h_SetupCallbackConn_r(host);
+
+ if (ifp) {
+ int i;
+ for (i = ifp->numberOfInterfaces-1; i >= 0; i--) {
+ h_AddHostToHashTable_r(ifp->interface[i].addr,
+ ifp->interface[i].port, host);
+ }
+ h_AddHostToUuidHashTable_r(&ifp->uuid, host);
+ } else {
+ h_AddHostToHashTable_r(host->host, host->port, host);
+ }
+ h_InsertList_r(host);
+
+ /* setup host id map entry */
+ state->h_map.entries[hdsk.index].old_idx = hdsk.index;
+ state->h_map.entries[hdsk.index].new_idx = host->index;
+
+ done:
+ if (bail) {
+ if (ifp)
+ free(ifp);
+ if (hcps)
+ free(hcps);
+ }
+ return bail;
+}
+
+/* serialize a host structure to disk */
+static void
+h_hostToDiskEntry_r(struct host * in, struct hostDiskEntry * out)
+{
+ out->host = in->host;
+ out->port = in->port;
+ out->hostFlags = in->hostFlags;
+ out->Console = in->Console;
+ out->hcpsfailed = in->hcpsfailed;
+ out->LastCall = in->LastCall;
+ out->ActiveCall = in->ActiveCall;
+ out->cpsCall = in->cpsCall;
+ out->cblist = in->cblist;
+#ifdef FS_STATS_DETAILED
+ out->InSameNetwork = in->InSameNetwork;
+#endif
+
+ /* special fields we save, but are not memcpy'd back on restore */
+ out->index = in->index;
+ out->hcps_len = in->hcps.prlist_len;
+ out->hcps_valid = (in->hcps.prlist_val == NULL) ? 0 : 1;
+}
+
+/* restore a host structure from disk */
+static void
+h_diskEntryToHost_r(struct hostDiskEntry * in, struct host * out)
+{
+ out->host = in->host;
+ out->port = in->port;
+ out->hostFlags = in->hostFlags;
+ out->Console = in->Console;
+ out->hcpsfailed = in->hcpsfailed;
+ out->LastCall = in->LastCall;
+ out->ActiveCall = in->ActiveCall;
+ out->cpsCall = in->cpsCall;
+ out->cblist = in->cblist;
+#ifdef FS_STATS_DETAILED
+ out->InSameNetwork = in->InSameNetwork;
+#endif
+}
+
+/* index translation routines */
+int
+h_OldToNew(struct fs_dump_state * state, afs_uint32 old, afs_uint32 * new)
+{
+ int ret = 0;
+
+ /* hosts use a zero-based index, so old==0 is valid */
+
+ if (old >= state->h_map.len) {
+ ViceLog(0, ("h_OldToNew: index %d is out of range\n", old));
+ ret = 1;
+ } else if (state->h_map.entries[old].old_idx != old) { /* sanity check */
+ ViceLog(0, ("h_OldToNew: index %d points to an invalid host record\n", old));
+ ret = 1;
+ } else {
+ *new = state->h_map.entries[old].new_idx;
+ }
+
+ done:
+ return ret;
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
/*
* This counts the number of workstations, the number of active workstations,
* Since it can serialize them, and pile up, it should be a separate LWP
* from other events.
*/
-int
+static int
CheckHost(register struct host *host, int held)
{
register struct client *client;
struct rx_connection *cb_conn = NULL;
int code;
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* kill the checkhost lwp ASAP during shutdown */
+ FS_STATE_RDLOCK;
+ if (fs_state.mode == FS_MODE_SHUTDOWN) {
+ FS_STATE_UNLOCK;
+ return H_ENUMERATE_BAIL(held);
+ }
+ FS_STATE_UNLOCK;
+#endif
+
/* Host is held by h_Enumerate */
H_LOCK;
for (client = host->FirstClient; client; client = client->next) {
* This routine is called roughly every 5 minutes.
*/
void
-h_CheckHosts()
+h_CheckHosts(void)
{
afs_uint32 now = FT_ApproxTime();
/* deleted a HashChain structure for this address and host */
/* returns 1 on success */
static int
-hashDelete_r(afs_uint32 addr, afs_uint16 port, struct host *host)
+h_DeleteHostFromHashTableByAddr_r(afs_uint32 addr, afs_uint16 port, struct host *host)
{
int flag;
register struct h_hashChain **hp, *th;
* This software has been released under the terms of the IBM Public
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
+ *
+ * Portions Copyright (c) 2006 Sine Nomine Associates
*/
+#ifndef _AFS_VICED_HOST_H
+#define _AFS_VICED_HOST_H
+
#include "fs_stats.h" /*File Server stats package */
#ifdef AFS_PTHREAD_ENV
struct AddrPort interface[1];/* there are actually more than one here */
/* in network byte order */
};
+
struct host {
struct host *next, *prev; /* linked list of all hosts */
struct rx_connection *callback_rxcon; /* rx callback connection */
struct client *FirstClient; /* first connection from host */
afs_uint32 cpsCall; /* time of last cps call from this host */
struct Interface *interface; /* all alternate addr for client */
- afs_uint32 cblist; /* Call back list for this host */
+ afs_uint32 cblist; /* index of a cb in the per-host circular CB list */
/*
* These don't get zeroed, keep them at the end. If index doesn't
* follow an unsigned short then we need to pad to ensure that
/* Don't zero the lock */
#define CLIENT_TO_ZERO(C) ((int)(((char *)(&((C)->lock))-(char *)(C))))
+
/*
* key for the client structure stored in connection specific data
*/
struct Interface *MultiVerifyInterface_r();
extern int initInterfaceAddr_r(struct host *host, struct interfaceAddr *interf);
+#ifdef AFS_DEMAND_ATTACH_FS
+/*
+ * demand attach fs
+ * state serialization
+ */
+extern int h_SaveState(void);
+extern int h_RestoreState(void);
+#endif
+
+#define H_ENUMERATE_BAIL(held) ((held)|0x80000000)
+#define H_ENUMERATE_ISSET_BAIL(held) ((held)&0x80000000)
+#define H_ENUMERATE_ISSET_HELD(held) ((held)&0x7FFFFFFF)
+
struct host *(hosttableptrs[h_MAXHOSTTABLES]); /* Used by h_itoh */
#define h_htoi(host) ((host)->index) /* index isn't zeroed, no need to lock */
#define h_itoh(hostindex) (hosttableptrs[(hostindex)>>h_HTSHIFT]+((hostindex)&(h_HTSPERBLOCK-1)))
#define HFE_LATER 0x80 /* host has FE_LATER callbacks */
#define HERRORTRANS 0x100 /* do error translation */
-
+#endif /* _AFS_VICED_HOST_H */
* This software has been released under the terms of the IBM Public
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
+ *
+ * Portions Copyright (c) 2006 Sine Nomine Associates
*/
/* viced.c - File Server main loop */
static void FlagMsg();
+#ifdef AFS_DEMAND_ATTACH_FS
+/*
+ * demand attach fs
+ * fileserver mode support
+ *
+ * during fileserver shutdown, we have to track the graceful shutdown of
+ * certain background threads before we are allowed to dump state to
+ * disk
+ */
+struct fs_state fs_state =
+ { FS_MODE_NORMAL,
+ 0,
+ 0,
+ 0,
+ 0,
+ { 1,1,1,1 },
+ PTHREAD_COND_INITIALIZER,
+ PTHREAD_RWLOCK_INITIALIZER
+ };
+#endif /* AFS_DEMAND_ATTACH_FS */
+
/*
* Home for the performance statistics.
*/
ViceLog(1, ("Starting five minute check process\n"));
setThreadId("FiveMinuteCheckLWP");
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ FS_STATE_WRLOCK;
+ while (fs_state.mode == FS_MODE_NORMAL) {
+ fs_state.FiveMinuteLWP_tranquil = 1;
+ FS_STATE_UNLOCK;
+#else
while (1) {
+#endif
+
#ifdef AFS_PTHREAD_ENV
sleep(fiveminutes);
#else /* AFS_PTHREAD_ENV */
IOMGR_Sleep(fiveminutes);
#endif /* AFS_PTHREAD_ENV */
+#ifdef AFS_DEMAND_ATTACH_FS
+ FS_STATE_WRLOCK;
+ if (fs_state.mode != FS_MODE_NORMAL) {
+ break;
+ }
+ fs_state.FiveMinuteLWP_tranquil = 0;
+ FS_STATE_UNLOCK;
+#endif
+
/* close the log so it can be removed */
ReOpenLog(AFSDIR_SERVER_FILELOG_FILEPATH); /* don't trunc, just append */
ViceLog(2, ("Cleaning up timed out callbacks\n"));
afs_ctime(&now, tbuffer, sizeof(tbuffer))));
}
}
+#ifdef AFS_DEMAND_ATTACH_FS
+ FS_STATE_WRLOCK;
+#endif
}
+#ifdef AFS_DEMAND_ATTACH_FS
+ fs_state.FiveMinuteLWP_tranquil = 1;
+ FS_LOCK;
+ assert(pthread_cond_broadcast(&fs_state.worker_done_cv)==0);
+ FS_UNLOCK;
+ FS_STATE_UNLOCK;
+#endif
} /*FiveMinuteCheckLWP */
* other 5 minute activities because it may be delayed by timeouts when
* it probes the workstations
*/
+
static void
HostCheckLWP()
{
ViceLog(1, ("Starting Host check process\n"));
setThreadId("HostCheckLWP");
- while (1) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ FS_STATE_WRLOCK;
+ while (fs_state.mode == FS_MODE_NORMAL) {
+ fs_state.HostCheckLWP_tranquil = 1;
+ FS_STATE_UNLOCK;
+#else
+ while(1) {
+#endif
+
#ifdef AFS_PTHREAD_ENV
sleep(fiveminutes);
#else /* AFS_PTHREAD_ENV */
IOMGR_Sleep(fiveminutes);
#endif /* AFS_PTHREAD_ENV */
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ FS_STATE_WRLOCK;
+ if (fs_state.mode != FS_MODE_NORMAL) {
+ break;
+ }
+ fs_state.HostCheckLWP_tranquil = 0;
+ FS_STATE_UNLOCK;
+#endif
+
ViceLog(2, ("Checking for dead venii & clients\n"));
h_CheckHosts();
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ FS_STATE_WRLOCK;
+#endif
}
+#ifdef AFS_DEMAND_ATTACH_FS
+ fs_state.HostCheckLWP_tranquil = 1;
+ FS_LOCK;
+ assert(pthread_cond_broadcast(&fs_state.worker_done_cv)==0);
+ FS_UNLOCK;
+ FS_STATE_UNLOCK;
+#endif
} /*HostCheckLWP */
/* This LWP does fsync checks every 5 minutes: it should not be used for
assert(pthread_mutex_init(&fsync_glock_mutex, NULL) == 0);
#endif
- while (1) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ FS_STATE_WRLOCK;
+ while (fs_state.mode == FS_MODE_NORMAL) {
+ fs_state.FsyncCheckLWP_tranquil = 1;
+ FS_STATE_UNLOCK;
+#else
+ while(1) {
+#endif
FSYNC_LOCK;
#ifdef AFS_PTHREAD_ENV
/* rounding is fine */
ViceLog(0, ("LWP_WaitProcess returned %d\n", code));
#endif /* AFS_PTHREAD_ENV */
FSYNC_UNLOCK;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ FS_STATE_WRLOCK;
+ if (fs_state.mode != FS_MODE_NORMAL) {
+ break;
+ }
+ fs_state.FsyncCheckLWP_tranquil = 0;
+ FS_STATE_UNLOCK;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
ViceLog(2, ("Checking for fsync events\n"));
do {
code = BreakLaterCallBacks();
} while (code != 0);
+#ifdef AFS_DEMAND_ATTACH_FS
+ FS_STATE_WRLOCK;
+#endif
}
+#ifdef AFS_DEMAND_ATTACH_FS
+ fs_state.FsyncCheckLWP_tranquil = 1;
+ FS_LOCK;
+ assert(pthread_cond_broadcast(&fs_state.worker_done_cv)==0);
+ FS_UNLOCK;
+ FS_STATE_UNLOCK;
+#endif /* AFS_DEMAND_ATTACH_FS */
}
/*------------------------------------------------------------------------
("Vice was last started at %s\n",
afs_ctime(&StartTime, tbuffer, sizeof(tbuffer))));
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* XXX perhaps set extended stats verbosity flags
+ * based upon LogLevel ?? */
+ VPrintExtendedCacheStats(VOL_STATS_PER_CHAIN2);
+#endif
VPrintCacheStats();
VPrintDiskStats();
DStat(&dirbuff, &dircall, &dirio);
time_t now = time(0);
char tbuffer[32];
+ /* do not allows new reqests to be served from now on, all new requests
+ * are returned with an error code of RX_RESTARTING ( transient failure ) */
+ rx_SetRxTranquil(); /* dhruba */
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ FS_STATE_WRLOCK;
+ fs_state.mode = FS_MODE_SHUTDOWN;
+ FS_STATE_UNLOCK;
+#endif
+
ViceLog(0,
("Shutting down file server at %s",
afs_ctime(&now, tbuffer, sizeof(tbuffer))));
if (!dopanic)
PrintCounters();
- /* do not allows new reqests to be served from now on, all new requests
- * are returned with an error code of RX_RESTARTING ( transient failure ) */
- rx_SetRxTranquil(); /* dhruba */
+ /* shut down volume package */
VShutdown();
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (fs_state.options.fs_state_save) {
+ /*
+ * demand attach fs
+ * save fileserver state to disk */
+
+ /* make sure background threads have finished all of their asynchronous
+ * work on host and callback structures */
+ FS_STATE_RDLOCK;
+ while (!fs_state.FiveMinuteLWP_tranquil ||
+ !fs_state.HostCheckLWP_tranquil ||
+ !fs_state.FsyncCheckLWP_tranquil) {
+ FS_LOCK;
+ FS_STATE_UNLOCK;
+ ViceLog(0, ("waiting for background host/callback threads to quiesce before saving fileserver state...\n"));
+ assert(pthread_cond_wait(&fs_state.worker_done_cv, &fileproc_glock_mutex) == 0);
+ FS_UNLOCK;
+ FS_STATE_RDLOCK;
+ }
+
+ /* ok. it should now be fairly safe. let's do the state dump */
+ fs_stateSave();
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
if (debugFile) {
rx_PrintStats(debugFile);
fflush(debugFile);
static void
FlagMsg()
{
- char buffer[1024];
+ char buffer[2048];
/* default supports help flag */
strcat(buffer, "[-rxdbg (enable rx debugging)] ");
strcat(buffer, "[-rxdbge (enable rxevent debugging)] ");
strcat(buffer, "[-rxmaxmtu <bytes>] ");
-#if AFS_PTHREAD_ENV
- strcat(buffer, "[-vattachpar <number of volume attach threads>] ");
+#ifdef AFS_DEMAND_ATTACH_FS
+ strcat(buffer, "[-fs-state-dont-save (disable state save during shutdown)] ");
+ strcat(buffer, "[-fs-state-dont-restore (disable state restore during startup)] ");
+ strcat(buffer, "[-fs-state-verify <none|save|restore|both> (default is both)] ");
+ strcat(buffer, "[-vattachpar <max number of volume attach/shutdown threads> (default is 1)] ");
+ strcat(buffer, "[-vhashsize <log(2) of number of volume hash buckets> (default is 8)] ");
+ strcat(buffer, "[-vlrudisable (disable VLRU functionality)] ");
+ strcat(buffer, "[-vlruthresh <minutes before unused volumes become eligible for soft detach> (default is 2 hours)] ");
+ strcat(buffer, "[-vlruinterval <seconds between VLRU scans> (default is 2 minutes)] ");
+ strcat(buffer, "[-vlrumax <max volumes to soft detach in one VLRU scan> (default is 8)] ");
+#elif AFS_PTHREAD_ENV
+ strcat(buffer, "[-vattachpar <number of volume attach threads> (default is 1)] ");
#endif
#ifdef AFS_AIX32_ENV
strcat(buffer, "[-m <min percentage spare in partition>] ");
#ifdef AFS_PTHREAD_ENV
} else if (!strcmp(argv[i], "-vattachpar")) {
if ((i + 1) >= argc) {
- fprintf(stderr, "missing argument for -vattachpar\n");
+ fprintf(stderr, "missing argument for %s\n", argv[i]);
return -1;
}
vol_attach_threads = atoi(argv[++i]);
#endif /* AFS_PTHREAD_ENV */
+#ifdef AFS_DEMAND_ATTACH_FS
+ } else if (!strcmp(argv[i], "-fs-state-dont-save")) {
+ fs_state.options.fs_state_save = 0;
+ } else if (!strcmp(argv[i], "-fs-state-dont-restore")) {
+ fs_state.options.fs_state_restore = 0;
+ } else if (!strcmp(argv[i], "-fs-state-verify")) {
+ if ((i + 1) >= argc) {
+ fprintf(stderr, "missing argument for %s\n", argv[i]);
+ return -1;
+ }
+ i++;
+ if (!strcmp(argv[i], "none")) {
+ fs_state.options.fs_state_verify_before_save = 0;
+ fs_state.options.fs_state_verify_after_restore = 0;
+ } else if (!strcmp(argv[i], "save")) {
+ fs_state.options.fs_state_verify_after_restore = 0;
+ } else if (!strcmp(argv[i], "restore")) {
+ fs_state.options.fs_state_verify_before_save = 0;
+ } else if (!strcmp(argv[i], "both")) {
+ /* default */
+ } else {
+ fprintf(stderr, "invalid argument for %s\n", argv[i-1]);
+ return -1;
+ }
+ } else if (!strcmp(argv[i], "-vhashsize")) {
+ if ((i + 1) >= argc) {
+ fprintf(stderr, "missing argument for %s\n", argv[i]);
+ return -1;
+ }
+ VSetVolHashSize(atoi(argv[++i]));
+ } else if (!strcmp(argv[i], "-vlrudisable")) {
+ VLRU_SetOptions(VLRU_SET_ENABLED, 0);
+ } else if (!strcmp(argv[i], "-vlruthresh")) {
+ if ((i + 1) >= argc) {
+ fprintf(stderr, "missing argument for %s\n", argv[i]);
+ return -1;
+ }
+ VLRU_SetOptions(VLRU_SET_THRESH, 60*atoi(argv[++i]));
+ } else if (!strcmp(argv[i], "-vlruinterval")) {
+ if ((i + 1) >= argc) {
+ fprintf(stderr, "missing argument for %s\n", argv[i]);
+ return -1;
+ }
+ VLRU_SetOptions(VLRU_SET_INTERVAL, atoi(argv[++i]));
+ } else if (!strcmp(argv[i], "-vlrumax")) {
+ if ((i + 1) >= argc) {
+ fprintf(stderr, "missing argument for %s\n", argv[i]);
+ return -1;
+ }
+ VLRU_SetOptions(VLRU_SET_MAX, atoi(argv[++i]));
+#endif /* AFS_DEMAND_ATTACH_FS */
} else if (!strcmp(argv[i], "-s")) {
Sawsmall = 1;
if ((i + 1) >= argc) {
exit(1);
}
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (fs_state.options.fs_state_restore) {
+ /*
+ * demand attach fs
+ * restore fileserver state */
+ fs_stateRestore();
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
/*
* We are done calling fopen/fdopen. It is safe to use a large
* of the file descriptor cache.
* This software has been released under the terms of the IBM Public
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
+ *
+ * Portions Copyright (c) 2006 Sine Nomine Associates
*/
/* file.h - include file for the File Server */
* Start with clean version to sync test and dev trees.
* */
+#ifndef _AFS_VICED_VICED_H
+#define _AFS_VICED_VICED_H
+
#include <afs/afssyscalls.h>
#include <afs/afsutil.h>
#include "fs_stats.h" /*Defs for xstat-based statistics */
} DirHandle;
-struct cbcounters {
- int DeleteFiles;
- int DeleteCallBacks;
- int BreakCallBacks;
- int AddCallBacks;
- int GotSomeSpaces;
- int DeleteAllCallBacks;
- int nFEs, nCBs, nblks;
- int CBsTimedOut;
- int nbreakers;
- int GSS1, GSS2, GSS3, GSS4, GSS5;
-};
#define MAXCNTRS (AFS_HIGHEST_OPCODE+1)
#define FSYNC_LOCK
#define FSYNC_UNLOCK
#endif /* AFS_PTHREAD_ENV */
+
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/*
+ * demand attach fs
+ * fileserver mode support
+ */
+struct fs_state {
+ volatile int mode;
+ volatile byte FiveMinuteLWP_tranquil; /* five minute check thread is shutdown or sleeping */
+ volatile byte HostCheckLWP_tranquil; /* host check thread is shutdown or sleeping */
+ volatile byte FsyncCheckLWP_tranquil; /* fsync check thread is shutdown or sleeping */
+ volatile byte salvsync_fatal_error; /* fatal error with salvsync comm */
+
+ /* some command-line options we use in
+ * various places
+ *
+ * these fields are immutable once we
+ * go multithreaded */
+ struct {
+ byte fs_state_save;
+ byte fs_state_restore;
+ byte fs_state_verify_before_save;
+ byte fs_state_verify_after_restore;
+ } options;
+
+ pthread_cond_t worker_done_cv;
+ pthread_rwlock_t state_lock;
+};
+
+extern struct fs_state fs_state;
+
+/* this lock is defined to be directly above FS_LOCK in the locking hierarchy */
+#define FS_STATE_RDLOCK assert(pthread_rwlock_rdlock(&fs_state.state_lock) == 0)
+#define FS_STATE_WRLOCK assert(pthread_rwlock_wrlock(&fs_state.state_lock) == 0)
+#define FS_STATE_UNLOCK assert(pthread_rwlock_unlock(&fs_state.state_lock) == 0)
+
+#define FS_MODE_NORMAL 0
+#define FS_MODE_SHUTDOWN 1
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+
+#endif /* _AFS_VICED_VICED_H */
+/*
+ * Copyright 2000, International Business Machines Corporation and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+#ifndef _AFS_VICED_VICED_PROTOTYPES_H
+#define _AFS_VICED_VICED_PROTOTYPES_H
+
extern int sendBufSize;
afs_int32 sys_error_to_et(afs_int32 in);
void init_sys_error_to_et(void);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/*
+ * demand attach fs
+ * fileserver state serialization
+ */
+extern int fs_stateSave(void);
+extern int fs_stateRestore(void);
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+#endif /* _AFS_VICED_VICED_PROTOTYPES_H */
${TOP_LIBDIR}/libsys.a ${TOP_LIBDIR}/libdir.a \
${TOP_LIBDIR}/liblwp.a ${TOP_LIBDIR}/libacl.a
-CFLAGS = ${COMMON_CFLAGS} -D${SYS_NAME} ${FSINCLUDES} ${XCFLAGS} ${ARCHFLAGS}
+CFLAGS = ${COMMON_CFLAGS} -D${SYS_NAME} ${FSINCLUDES} ${XCFLAGS} ${ARCHFLAGS} -DFSSYNC_BUILD_SERVER -DFSSYNC_BUILD_CLIENT
-PUBLICHEADERS=nfs.h vnode.h viceinode.h volume.h voldefs.h partition.h\
- fssync.h ihandle.h namei_ops.h
+PUBLICHEADERS=nfs.h vnode.h viceinode.h volume.h voldefs.h partition.h \
+ fssync.h ihandle.h namei_ops.h salvsync.h daemon_com.h
-VLIBOBJS=vnode.o volume.o vutil.o partition.o fssync.o purge.o \
- clone.o nuke.o devname.o listinodes.o common.o ihandle.o \
- namei_ops.o
+VLIBOBJS=vnode.o volume.o vutil.o partition.o fssync-server.o fssync-client.o \
+ clone.o nuke.o devname.o listinodes.o common.o ihandle.o purge.o \
+ namei_ops.o salvsync-server.o salvsync-client.o daemon_com.o
-OBJECTS=${VLIBOBJS} physio.o vol-salvage.o vol-info.o vol-dump.o vol-bless.o
+OBJECTS=${VLIBOBJS} physio.o vol-salvage.o vol-info.o vol-dump.o vol-bless.o fssync-debug.o
all: gi \
${TOP_LIBDIR}/vlib.a \
${TOP_LIBDIR}/libvlib.a \
salvager \
volinfo \
+ fssync-debug \
$(FS_CONV_OSF40D) \
$(XFS_SIZE_CHECK) \
$(FS_CONV_SOL26) \
${TOP_INCDIR}/afs/voldefs.h \
${TOP_INCDIR}/afs/partition.h \
${TOP_INCDIR}/afs/fssync.h \
+ ${TOP_INCDIR}/afs/salvsync.h \
+ ${TOP_INCDIR}/afs/daemon_com.h \
${TOP_INCDIR}/afs/ihandle.h \
${TOP_INCDIR}/afs/namei_ops.h
${DESTDIR}${libdir}/afs/libvlib.a \
${DESTDIR}${afssrvlibexecdir}/salvager \
${DESTDIR}${afssrvsbindir}/volinfo \
+ ${DESTDIR}${afssrvsbindir}/fssync-debug \
$(install_FS_CONV_OSF40D) \
$(install_XFS_SIZE_CHECK) \
$(install_FS_CONV_SOL26) \
${DESTDIR}${includedir}/afs/voldefs.h \
${DESTDIR}${includedir}/afs/partition.h \
${DESTDIR}${includedir}/afs/fssync.h \
+ ${DESTDIR}${includedir}/afs/salvsync.h \
+ ${DESTDIR}${includedir}/afs/daemon_com.h \
${DESTDIR}${includedir}/afs/ihandle.h \
${DESTDIR}${includedir}/afs/namei_ops.h
${DEST}/root.server/usr/afs/bin/volinfo: volinfo
${INSTALL} -s $? $@
+${DEST}/root.server/usr/afs/bin/fssync-debug: fssync-debug
+ if test "@DEMAND_ATTACH@" = "no"; then \
+ ${INSTALL} -s $? $@ ; \
+ fi
+
${DEST}/lib/afs/vlib.a: vlib.a
${INSTALL} $? $@
${DEST}/include/afs/fssync.h: fssync.h
${INSTALL} $? $@
+${DEST}/include/afs/salvsync.h: salvsync.h
+ ${INSTALL} $? $@
+
+${DEST}/include/afs/daemon_com.h: daemon_com.h
+ ${INSTALL} $? $@
+
${DEST}/include/afs/ihandle.h: ihandle.h
${INSTALL} $? $@
${OBJECTS}: ${PUBLICHEADERS} ${TOP_INCDIR}/lwp.h ${TOP_INCDIR}/lock.h ${TOP_INCDIR}/afs/afsint.h vutils.h salvage.h AFS_component_version_number.c
vol-salvage.o vutil.o: volinodes.h
+vol-salvage.o salvager.o: vol-salvage.h
+vol-salvage.o: salvsync.h daemon_com.h
vlib.a: ${VLIBOBJS} AFS_component_version_number.o
$(RM) -f $@
$(RANLIB) $@
# new salvager: remove references to /vice by linking with novice.o
-salvager: vol-salvage.o physio.o vlib.a
- ${CC} ${LDFLAGS} -o salvager vol-salvage.o physio.o ${LIBS} ${XLIBS}
+salvager: vol-salvage.o physio.o vlib.a salvager.o ${LIBS}
+ ${CC} ${LDFLAGS} -o salvager vol-salvage.o physio.o salvager.o ${LIBS} ${XLIBS}
vol-salvage: vol-salvage.o
vol-info: vol-info.o physio.o ihandle.o
${CC} ${CFLAGS} -o volinfo vol-info.o physio.o \
ihandle.o ${LIBS} ${XLIBS}
+fssync-debug: fssync-debug.o physio.o AFS_component_version_number.c ${LIBS}
+ ${CC} ${LDFLAGS} -o fssync-debug fssync-debug.o physio.o ${LIBS} ${XLIBS}
+
vol-bless: vol-bless.o physio.o ihandle.o ${LIBS}
${CC} ${CFLAGS} -o vol-bless vol-bless.o physio.o ${LIBS} ${XLIBS}
-fs_conv_dux40D: fs_conv_411.o
+fs_conv_dux40D: fs_conv_411.o ${LIBS}
${CC} ${CFLAGS} ${TOP_LIBDIR}/libcmd.a -o fs_conv_dux40D fs_conv_411.o ${LIBS} ${XLIBS}
-fs_conv_sol26: fs_conv_411.o vlib.a
+fs_conv_sol26: fs_conv_411.o ${LIBS}
${CC} ${CFLAGS} ${TOP_LIBDIR}/libcmd.a -o fs_conv_sol26 fs_conv_411.o ${LIBS} ${XLIBS}
fs_conv_411.o: fs_conv_411.c AFS_component_version_number.c
${DESTDIR}${afssrvsbindir}/volinfo: volinfo
${INSTALL} -s $? $@
+${DESTDIR}${afssrvsbindir}/fssync-debug: fssync-debug
+ if test "@DEMAND_ATTACH@" = "no" ; then \
+ ${INSTALL} -s $? $@ ; \
+ fi
+
${DESTDIR}${includedir}/afs/nfs.h: nfs.h
${INSTALL} $? $@
${TOP_INCDIR}/afs/fssync.h: fssync.h
${INSTALL} $? $@
+${DESTDIR}${includedir}/afs/salvsync.h: salvsync.h
+ ${INSTALL} $? $@
+
+${TOP_INCDIR}/afs/salvsync.h: salvsync.h
+ ${INSTALL} $? $@
+
+${DESTDIR}${includedir}/afs/daemon_com.h: daemon_com.h
+ ${INSTALL} $? $@
+
+${TOP_INCDIR}/afs/daemon_com.h: daemon_com.h
+ ${INSTALL} $? $@
+
${DESTDIR}${includedir}/afs/ihandle.h: ihandle.h
${INSTALL} $? $@
${TOP_INCDIR}/afs/namei_ops.h: namei_ops.h
${INSTALL} $? $@
+${DESTDIR}${includedir}/afs/salvage.h: salvage.h
+ ${INSTALL} $? $@
+
+${TOP_INCDIR}/afs/salvage.h: salvage.h
+ ${INSTALL} $? $@
+
+${DESTDIR}${includedir}/afs/vol-salvage.h: vol-salvage.h
+ ${INSTALL} $? $@
+
+${TOP_INCDIR}/afs/vol-salvage.h: vol-salvage.h
+ ${INSTALL} $? $@
+
dest: \
${DEST}/lib/afs/vlib.a \
${DEST}/lib/afs/libvlib.a \
${DEST}/root.server/usr/afs/bin/salvager \
${DEST}/root.server/usr/afs/bin/volinfo \
+ ${DEST}/root.server/usr/afs/bin/fssync-debug \
$(dest_FS_CONV_OSF40D) \
$(dest_XFS_SIZE_CHECK) \
$(dest_FS_CONV_SOL26) \
${DEST}/include/afs/voldefs.h \
${DEST}/include/afs/partition.h \
${DEST}/include/afs/fssync.h \
+ ${DEST}/include/afs/salvsync.h \
+ ${DEST}/include/afs/daemon_com.h \
${DEST}/include/afs/ihandle.h \
${DEST}/include/afs/namei_ops.h
check-splint::
sh $(HELPER_SPLINT) $(CFLAGS) \
- vnode.c volume.c vutil.c partition.c fssync.c purge.c \
+ vnode.c volume.c vutil.c partition.c fssync-server.c fssync-client.c \
clone.c nuke.c devname.c listinodes.c common.c ihandle.c \
- namei_ops.c \
- physio.c vol-salvage.c vol-info.c vol-bless.c
+ namei_ops.c salvsync-server.c salvsync-client.c daemon_com.c purge.c \
+ physio.c vol-salvage.c vol-info.c vol-bless.c fssync-debug.c
# License. For details, see the LICENSE file in the top-level source
# directory or online at http://www.openafs.org/dl/license10.html
+AFSDEV_AUXCDEFINES = -DFSSYNC_BUILD_SERVER -DFSSYNC_BUILD_CLIENT
+
RELDIR=vol
!INCLUDE ..\config\NTMakefile.$(SYS_NAME)
!INCLUDE ..\config\NTMakefile.version
--- /dev/null
+/*
+ * Copyright 2006, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/*
+ * localhost interprocess communication for servers
+ *
+ * currently handled by a localhost socket
+ * (yes, this needs to be replaced someday)
+ */
+
+#ifndef _WIN32
+#define FD_SETSIZE 65536
+#endif
+
+#include <afsconfig.h>
+#include <afs/param.h>
+
+RCSID
+ ("$Header$");
+
+#include <sys/types.h>
+#include <stdio.h>
+#ifdef AFS_NT40_ENV
+#include <winsock2.h>
+#include <time.h>
+#else
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <sys/time.h>
+#endif
+#include <errno.h>
+#include <assert.h>
+#include <signal.h>
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#else
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#endif
+
+
+#include <rx/xdr.h>
+#include <afs/afsint.h>
+#include "nfs.h"
+#include <afs/errors.h>
+#include "daemon_com.h"
+#include "lwp.h"
+#include "lock.h"
+#include <afs/afssyscalls.h>
+#include "ihandle.h"
+#include "vnode.h"
+#include "volume.h"
+#include "partition.h"
+#include <rx/rx_queue.h>
+
+/*@printflike@*/ extern void Log(const char *format, ...);
+
+#ifdef osi_Assert
+#undef osi_Assert
+#endif
+#define osi_Assert(e) (void)(e)
+
+int (*V_BreakVolumeCallbacks) ();
+
+#define MAXHANDLERS 4 /* Up to 4 clients; must be at least 2, so that
+ * move = dump+restore can run on single server */
+
+#define MAX_BIND_TRIES 5 /* Number of times to retry socket bind */
+
+static int getport(SYNC_client_state * state, struct sockaddr_in *addr);
+static int SYNC_ask_internal(SYNC_client_state * state, SYNC_command * com, SYNC_response * res);
+
+/* daemon com SYNC client interface */
+
+int
+SYNC_connect(SYNC_client_state * state)
+{
+ struct sockaddr_in addr;
+ /* I can't believe the following is needed for localhost connections!! */
+ static time_t backoff[] =
+ { 3, 3, 3, 5, 5, 5, 7, 15, 16, 24, 32, 40, 48, 0 };
+ time_t *timeout = &backoff[0];
+
+ if (state->fd >= 0) {
+ return 1;
+ }
+
+ for (;;) {
+ state->fd = getport(state, &addr);
+ if (connect(state->fd, (struct sockaddr *)&addr, sizeof(addr)) >= 0)
+ return 1;
+ if (!*timeout)
+ break;
+ if (!(*timeout & 1))
+ Log("SYNC_connect temporary failure (will retry)\n");
+ SYNC_disconnect(state);
+ sleep(*timeout++);
+ }
+ perror("SYNC_connect failed (giving up!)");
+ return 0;
+}
+
+int
+SYNC_disconnect(SYNC_client_state * state)
+{
+#ifdef AFS_NT40_ENV
+ closesocket(state->fd);
+#else
+ close(state->fd);
+#endif
+ state->fd = -1;
+ return 0;
+}
+
+afs_int32
+SYNC_closeChannel(SYNC_client_state * state)
+{
+ afs_int32 code;
+ SYNC_command com;
+ SYNC_response res;
+ SYNC_PROTO_BUF_DECL(ores);
+
+ if (state->fd == -1)
+ return SYNC_OK;
+
+ memset(&com, 0, sizeof(com));
+ memset(&res, 0, sizeof(res));
+
+ res.payload.len = SYNC_PROTO_MAX_LEN;
+ res.payload.buf = ores;
+
+ com.hdr.command = SYNC_COM_CHANNEL_CLOSE;
+ com.hdr.command_len = sizeof(SYNC_command_hdr);
+
+ /* in case the other end dropped, don't do any retries */
+ state->retry_limit = 0;
+ state->hard_timeout = 0;
+
+ code = SYNC_ask(state, &com, &res);
+
+ if (code == SYNC_OK) {
+ if (res.hdr.response != SYNC_OK) {
+ Log("SYNC_closeChannel: channel shutdown request denied; closing socket anyway\n");
+ } else if (!(res.hdr.flags & SYNC_FLAG_CHANNEL_SHUTDOWN)) {
+ Log("SYNC_closeChannel: channel shutdown request mishandled by server\n");
+ }
+ } else {
+ Log("SYNC_closeChannel: channel communications problem");
+ }
+
+ SYNC_disconnect(state);
+
+ return code;
+}
+
+int
+SYNC_reconnect(SYNC_client_state * state)
+{
+ SYNC_disconnect(state);
+ return SYNC_connect(state);
+}
+
+/* private function to fill in the sockaddr struct for us */
+static int
+getport(SYNC_client_state * state, struct sockaddr_in *addr)
+{
+ int sd;
+
+ memset(addr, 0, sizeof(*addr));
+ assert((sd = socket(AF_INET, SOCK_STREAM, 0)) >= 0);
+#ifdef STRUCT_SOCKADDR_HAS_SA_LEN
+ addr->sin_len = sizeof(struct sockaddr_in);
+#endif
+ addr->sin_addr.s_addr = htonl(0x7f000001);
+ addr->sin_family = AF_INET; /* was localhost->h_addrtype */
+ addr->sin_port = htons(state->port); /* XXXX htons not _really_ neccessary */
+
+ return sd;
+}
+
+afs_int32
+SYNC_ask(SYNC_client_state * state, SYNC_command * com, SYNC_response * res)
+{
+ int tries;
+ afs_uint32 now, timeout, code=SYNC_OK;
+
+ if (state->fatal_error) {
+ return SYNC_COM_ERROR;
+ }
+
+ if (state->fd == -1) {
+ SYNC_connect(state);
+ }
+
+ if (state->fd == -1) {
+ state->fatal_error = 1;
+ return SYNC_COM_ERROR;
+ }
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ com->hdr.flags |= SYNC_FLAG_DAFS_EXTENSIONS;
+#endif
+
+ now = FT_ApproxTime();
+ timeout = now + state->hard_timeout;
+ for (tries = 0;
+ (tries <= state->retry_limit) && (now <= timeout);
+ tries++, now = FT_ApproxTime()) {
+ code = SYNC_ask_internal(state, com, res);
+ if (code == SYNC_OK) {
+ break;
+ } else if (code == SYNC_BAD_COMMAND) {
+ Log("SYNC_ask: protocol mismatch; make sure fileserver, volserver, salvageserver and salvager are same version\n");
+ break;
+ } else if (code == SYNC_COM_ERROR) {
+ Log("SYNC_ask: protocol communications failure; attempting reconnect to server\n");
+ SYNC_reconnect(state);
+ /* try again */
+ } else {
+ /* unknown (probably protocol-specific) response code, pass it up to the caller, and let them deal with it */
+ break;
+ }
+ }
+
+ if (code == SYNC_COM_ERROR) {
+ Log("SYNC_ask: fatal protocol error; disabling sync protocol to server running on port %d until next server restart\n",
+ state->port);
+ state->fatal_error = 1;
+ }
+
+ return code;
+}
+
+static afs_int32
+SYNC_ask_internal(SYNC_client_state * state, SYNC_command * com, SYNC_response * res)
+{
+ int n;
+ SYNC_PROTO_BUF_DECL(buf);
+#ifndef AFS_NT40_ENV
+ int iovcnt;
+ struct iovec iov[2];
+#endif
+
+ if (state->fd == -1) {
+ Log("SYNC_ask: invalid sync file descriptor\n");
+ res->hdr.response = SYNC_COM_ERROR;
+ goto done;
+ }
+
+ if (com->hdr.command_len > SYNC_PROTO_MAX_LEN) {
+ Log("SYNC_ask: internal SYNC buffer too small; please file a bug\n");
+ res->hdr.response = SYNC_COM_ERROR;
+ goto done;
+ }
+
+ com->hdr.proto_version = state->proto_version;
+
+ memcpy(buf, &com->hdr, sizeof(com->hdr));
+ if (com->payload.len) {
+ memcpy(buf + sizeof(com->hdr), com->payload.buf,
+ com->hdr.command_len - sizeof(com->hdr));
+ }
+
+#ifdef AFS_NT40_ENV
+ n = send(state->fd, buf, com->hdr.command_len, 0);
+ if (n != com->hdr.command_len) {
+ Log("SYNC_ask: write failed\n");
+ res->hdr.response = SYNC_COM_ERROR;
+ goto done;
+ }
+
+ n = recv(state->fd, buf, SYNC_PROTO_MAX_LEN, 0);
+ if (n == 0 || (n < 0 && WSAEINTR != WSAGetLastError())) {
+ Log("SYNC_ask: No response\n");
+ res->hdr.response = SYNC_COM_ERROR;
+ goto done;
+ }
+#else /* !AFS_NT40_ENV */
+ n = write(state->fd, buf, com->hdr.command_len);
+ if (com->hdr.command_len != n) {
+ Log("SYNC_ask: write failed\n");
+ res->hdr.response = SYNC_COM_ERROR;
+ goto done;
+ }
+
+ /* receive the response */
+ iov[0].iov_base = (char *)&res->hdr;
+ iov[0].iov_len = sizeof(res->hdr);
+ if (res->payload.len) {
+ iov[1].iov_base = (char *)res->payload.buf;
+ iov[1].iov_len = res->payload.len;
+ iovcnt = 2;
+ } else {
+ iovcnt = 1;
+ }
+ n = readv(state->fd, iov, iovcnt);
+ if (n == 0 || (n < 0 && errno != EINTR)) {
+ Log("SYNC_ask: No response\n");
+ res->hdr.response = SYNC_COM_ERROR;
+ goto done;
+ }
+#endif /* !AFS_NT40_ENV */
+
+ res->recv_len = n;
+
+ if (n < sizeof(res->hdr)) {
+ Log("SYNC_ask: response too short\n");
+ res->hdr.response = SYNC_COM_ERROR;
+ goto done;
+ }
+#ifdef AFS_NT40_ENV
+ memcpy(&res->hdr, buf, sizeof(res->hdr));
+#endif
+
+ if ((n - sizeof(res->hdr)) > res->payload.len) {
+ Log("SYNC_ask: response too long\n");
+ res->hdr.response = SYNC_COM_ERROR;
+ goto done;
+ }
+#ifdef AFS_NT40_ENV
+ memcpy(res->payload.buf, buf + sizeof(res->hdr), n - sizeof(res->hdr));
+#endif
+
+ if (res->hdr.response_len != n) {
+ Log("SYNC_ask: length field in response inconsistent\n");
+ res->hdr.response = SYNC_COM_ERROR;
+ goto done;
+ }
+ if (res->hdr.response == SYNC_DENIED) {
+ Log("SYNC_ask: negative response\n");
+ }
+
+ done:
+ return res->hdr.response;
+}
+
+
+/*
+ * daemon com SYNC server-side interfaces
+ */
+
+/* get a command */
+afs_int32
+SYNC_getCom(int fd, SYNC_command * com)
+{
+ int n;
+ afs_int32 code = SYNC_OK;
+#ifdef AFS_NT40_ENV
+ SYNC_PROTO_BUF_DECL(buf);
+#else
+ struct iovec iov[2];
+ int iovcnt;
+#endif
+
+#ifdef AFS_NT40_ENV
+ n = recv(fd, buf, SYNC_PROTO_MAX_LEN, 0);
+
+ if (n == 0 || (n < 0 && WSAEINTR != WSAGetLastError())) {
+ Log("SYNC_getCom: error receiving command\n");
+ code = SYNC_COM_ERROR;
+ goto done;
+ }
+#else /* !AFS_NT40_ENV */
+ iov[0].iov_base = (char *)&com->hdr;
+ iov[0].iov_len = sizeof(com->hdr);
+ if (com->payload.len) {
+ iov[1].iov_base = (char *)com->payload.buf;
+ iov[1].iov_len = com->payload.len;
+ iovcnt = 2;
+ } else {
+ iovcnt = 1;
+ }
+
+ n = readv(fd, iov, iovcnt);
+ if (n == 0 || (n < 0 && errno != EINTR)) {
+ Log("SYNC_getCom: error receiving command\n");
+ code = SYNC_COM_ERROR;
+ goto done;
+ }
+#endif /* !AFS_NT40_ENV */
+
+ com->recv_len = n;
+
+ if (n < sizeof(com->hdr)) {
+ Log("SYNC_getCom: command too short\n");
+ code = SYNC_COM_ERROR;
+ goto done;
+ }
+#ifdef AFS_NT40_ENV
+ memcpy(&com->hdr, buf, sizeof(com->hdr));
+#endif
+
+ if ((n - sizeof(com->hdr)) > com->payload.len) {
+ Log("SYNC_getCom: command too long\n");
+ code = SYNC_COM_ERROR;
+ goto done;
+ }
+#ifdef AFS_NT40_ENV
+ memcpy(com->payload.buf, buf + sizeof(com->hdr), n - sizeof(com->hdr));
+#endif
+
+ done:
+ return code;
+}
+
+/* put a response */
+afs_int32
+SYNC_putRes(int fd, SYNC_response * res)
+{
+ int n;
+ afs_int32 code = SYNC_OK;
+ SYNC_PROTO_BUF_DECL(buf);
+
+ if (res->hdr.response_len > (sizeof(res->hdr) + res->payload.len)) {
+ Log("SYNC_putRes: response_len field in response header inconsistent\n");
+ code = SYNC_COM_ERROR;
+ goto done;
+ }
+
+ if (res->hdr.response_len > SYNC_PROTO_MAX_LEN) {
+ Log("SYNC_putRes: internal SYNC buffer too small; please file a bug\n");
+ code = SYNC_COM_ERROR;
+ goto done;
+ }
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ res->hdr.flags |= SYNC_FLAG_DAFS_EXTENSIONS;
+#endif
+
+ memcpy(buf, &res->hdr, sizeof(res->hdr));
+ if (res->payload.len) {
+ memcpy(buf + sizeof(res->hdr), res->payload.buf,
+ res->hdr.response_len - sizeof(res->hdr));
+ }
+
+#ifdef AFS_NT40_ENV
+ n = send(fd, buf, res->hdr.response_len, 0);
+#else /* !AFS_NT40_ENV */
+ n = write(fd, buf, res->hdr.response_len);
+#endif /* !AFS_NT40_ENV */
+
+ if (res->hdr.response_len != n) {
+ Log("SYNC_putRes: write failed\n");
+ res->hdr.response = SYNC_COM_ERROR;
+ goto done;
+ }
+
+ done:
+ return code;
+}
+
+/* return 0 for legal (null-terminated) string,
+ * 1 for illegal (unterminated) string */
+int
+SYNC_verifyProtocolString(char * buf, size_t len)
+{
+ int ret = 0;
+ size_t s_len;
+
+ s_len = afs_strnlen(buf, len);
+
+ return (s_len == len) ? 1 : 0;
+}
--- /dev/null
+/*
+ * Copyright 2006, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+#ifndef _AFS_VOL_DAEMON_COM_H
+#define _AFS_VOL_DAEMON_COM_H
+
+/*
+ * SYNC protocol constants
+ */
+
+/* SYNC protocol command codes
+ *
+ * command codes 0-65535 are reserved for
+ * global SYNC package command codes
+ */
+#define SYNC_COM_CODE_USER_BASE 65536
+#define SYNC_COM_CODE_DECL(code) (SYNC_COM_CODE_USER_BASE+(code))
+
+/* general command codes */
+#define SYNC_COM_CHANNEL_CLOSE 0
+
+
+/* SYNC protocol response codes
+ *
+ * response codes 0-65535 are reserved for
+ * global SYNC package response codes
+ */
+#define SYNC_RES_CODE_USER_BASE 65536
+#define SYNC_RES_CODE_DECL(code) (SYNC_RES_CODE_USER_BASE+(code))
+
+/* general response codes */
+#define SYNC_OK 0 /* sync call returned ok */
+#define SYNC_DENIED 1 /* sync request denied by server */
+#define SYNC_COM_ERROR 2 /* sync protocol communicaions error */
+#define SYNC_BAD_COMMAND 3 /* sync command code not implemented by server */
+#define SYNC_FAILED 4 /* sync server-side procedure failed */
+
+
+/* SYNC protocol reason codes
+ *
+ * reason codes 0-65535 are reserved for
+ * global SYNC package reason codes
+ */
+#define SYNC_REASON_CODE_USER_BASE 65536
+#define SYNC_REASON_CODE_DECL(code) (SYNC_REASON_CODE_USER_BASE+(code))
+
+/* general reason codes */
+#define SYNC_REASON_NONE 0
+#define SYNC_REASON_MALFORMED_PACKET 1
+
+
+/* SYNC protocol flags
+ *
+ * flag bits 0-7 are reserved for
+ * global SYNC package flags
+ */
+#define SYNC_FLAG_CODE_USER_BASE 8
+#define SYNC_FLAG_CODE_DECL(code) (1 << (SYNC_FLAG_CODE_USER_BASE+(code)))
+
+/* general flag codes */
+#define SYNC_FLAG_CHANNEL_SHUTDOWN 0x1
+#define SYNC_FLAG_DAFS_EXTENSIONS 0x2 /* signal that other end of socket is compiled
+ * with demand attach extensions */
+
+/* SYNC protocol response buffers */
+#define SYNC_PROTO_MAX_LEN 768 /* maximum size of sync protocol message */
+
+/* use a large type to get proper buffer alignment so we can safely cast the pointer */
+#define SYNC_PROTO_BUF_DECL(buf) \
+ afs_int64 _##buf##_l[SYNC_PROTO_MAX_LEN/sizeof(afs_int64)]; \
+ char * buf = (char *)(_##buf##_l)
+
+
+/* client-side state object */
+typedef struct SYNC_client_state {
+ int fd;
+ afs_uint16 port;
+ afs_uint32 proto_version;
+ int retry_limit; /* max number of times for SYNC_ask to retry */
+ afs_int32 hard_timeout; /* upper limit on time to keep trying */
+ byte fatal_error; /* fatal error on this client conn */
+} SYNC_client_state;
+
+/* wire types */
+typedef struct SYNC_command_hdr {
+ afs_uint32 proto_version; /* sync protocol version */
+ afs_int32 programType; /* type of program issuing the request */
+ afs_int32 command; /* request type */
+ afs_int32 reason; /* reason for request */
+ afs_uint32 command_len; /* entire length of command */
+ afs_uint32 flags;
+} SYNC_command_hdr;
+
+typedef struct SYNC_response_hdr {
+ afs_uint32 proto_version; /* sync protocol version */
+ afs_uint32 response_len; /* entire length of response */
+ afs_int32 response; /* response code */
+ afs_int32 reason; /* reason for response */
+ afs_uint32 flags;
+} SYNC_response_hdr;
+
+
+/* user-visible types */
+typedef struct SYNC_command {
+ SYNC_command_hdr hdr;
+ struct {
+ afs_uint32 len;
+ void * buf;
+ } payload;
+ afs_int32 recv_len;
+} SYNC_command;
+
+typedef struct SYNC_response {
+ SYNC_response_hdr hdr;
+ struct {
+ afs_uint32 len;
+ void * buf;
+ } payload;
+ afs_int32 recv_len;
+} SYNC_response;
+
+
+/* client-side prototypes */
+extern afs_int32 SYNC_ask(SYNC_client_state *, SYNC_command * com, SYNC_response * res);
+extern int SYNC_connect(SYNC_client_state *); /* setup the channel */
+extern int SYNC_disconnect(SYNC_client_state *); /* just close the socket */
+extern afs_int32 SYNC_closeChannel(SYNC_client_state *); /* do a graceful channel close */
+extern int SYNC_reconnect(SYNC_client_state *); /* do a reconnect after a protocol error, or from a forked child */
+
+/* server-side prototypes */
+extern int SYNC_getCom(int fd, SYNC_command * com);
+extern int SYNC_putRes(int fd, SYNC_response * res);
+extern int SYNC_verifyProtocolString(char * buf, size_t len);
+
+#endif /* _AFS_VOL_DAEMON_COM_H */
--- /dev/null
+/*
+ * Copyright 2000, International Business Machines Corporation and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ *
+ * Portions Copyright (c) 2006 Sine Nomine Associates
+ */
+
+/*
+ System: VICE-TWO
+ Module: fssync.c
+ Institution: The Information Technology Center, Carnegie-Mellon University
+
+ */
+#ifdef notdef
+
+/* All this is going away in early 1989 */
+int newVLDB; /* Compatibility flag */
+
+#endif
+static int newVLDB = 1;
+
+
+#ifndef AFS_PTHREAD_ENV
+#define USUAL_PRIORITY (LWP_MAX_PRIORITY - 2)
+
+/*
+ * stack size increased from 8K because the HP machine seemed to have trouble
+ * with the smaller stack
+ */
+#define USUAL_STACK_SIZE (24 * 1024)
+#endif /* !AFS_PTHREAD_ENV */
+
+/*
+ fssync-client.c
+ File server synchronization with external volume utilities.
+ client-side implementation
+ */
+
+#include <afsconfig.h>
+#include <afs/param.h>
+
+RCSID
+ ("$Header$");
+
+#include <sys/types.h>
+#include <stdio.h>
+#ifdef AFS_NT40_ENV
+#include <winsock2.h>
+#include <time.h>
+#else
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <sys/time.h>
+#endif
+#include <errno.h>
+#ifdef AFS_PTHREAD_ENV
+#include <assert.h>
+#else /* AFS_PTHREAD_ENV */
+#include <afs/assert.h>
+#endif /* AFS_PTHREAD_ENV */
+#include <signal.h>
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#else
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#endif
+
+
+#include <rx/xdr.h>
+#include <afs/afsint.h>
+#include "nfs.h"
+#include <afs/errors.h>
+#include "daemon_com.h"
+#include "fssync.h"
+#include "lwp.h"
+#include "lock.h"
+#include <afs/afssyscalls.h>
+#include "ihandle.h"
+#include "vnode.h"
+#include "volume.h"
+#include "partition.h"
+
+#ifdef FSSYNC_BUILD_CLIENT
+
+/*@printflike@*/ extern void Log(const char *format, ...);
+
+#ifdef osi_Assert
+#undef osi_Assert
+#endif
+#define osi_Assert(e) (void)(e)
+
+extern int LogLevel;
+
+static SYNC_client_state fssync_state = { -1, 2040, FSYNC_PROTO_VERSION, 5, 120 };
+
+#ifdef AFS_PTHREAD_ENV
+static pthread_mutex_t vol_fsync_mutex;
+static volatile vol_fsync_mutex_init = 0;
+#define VFSYNC_LOCK \
+ assert(pthread_mutex_lock(&vol_fsync_mutex) == 0)
+#define VFSYNC_UNLOCK \
+ assert(pthread_mutex_unlock(&vol_fsync_mutex) == 0)
+#else
+#define VFSYNC_LOCK
+#define VFSYNC_UNLOCK
+#endif
+
+int
+FSYNC_clientInit(void)
+{
+#ifdef AFS_PTHREAD_ENV
+ /* this is safe since it gets called with VOL_LOCK held, or before we go multithreaded */
+ if (!vol_fsync_mutex_init) {
+ assert(pthread_mutex_init(&vol_fsync_mutex, NULL) == 0);
+ vol_fsync_mutex_init = 1;
+ }
+#endif
+ return SYNC_connect(&fssync_state);
+}
+
+void
+FSYNC_clientFinis(void)
+{
+ SYNC_closeChannel(&fssync_state);
+}
+
+int
+FSYNC_clientChildProcReconnect(void)
+{
+ return SYNC_reconnect(&fssync_state);
+}
+
+/* fsync client interface */
+afs_int32
+FSYNC_askfs(SYNC_command * com, SYNC_response * res)
+{
+ afs_int32 code;
+
+ VFSYNC_LOCK;
+ code = SYNC_ask(&fssync_state, com, res);
+ VFSYNC_UNLOCK;
+
+ switch (code) {
+ case SYNC_OK:
+ case SYNC_FAILED:
+ break;
+ case SYNC_COM_ERROR:
+ case SYNC_BAD_COMMAND:
+ Log("FSYNC_askfs: fatal FSSYNC protocol error; volume management functionality disabled until next fileserver restart\n");
+ break;
+ case SYNC_DENIED:
+ Log("FSYNC_askfs: FSSYNC request denied for reason=%d\n", res->hdr.reason);
+ break;
+ default:
+ Log("FSYNC_askfs: unknown protocol response %d\n", code);
+ break;
+ }
+ return code;
+}
+
+afs_int32
+FSYNC_GenericOp(void * ext_hdr, size_t ext_len,
+ int command, int reason,
+ SYNC_response * res_in)
+{
+ SYNC_response res_l, *res;
+ SYNC_command com;
+
+ if (res_in) {
+ res = res_in;
+ } else {
+ res = &res_l;
+ res_l.payload.buf = NULL;
+ res_l.payload.len = 0;
+ }
+
+ memset(&com, 0, sizeof(com));
+
+ com.hdr.programType = programType;
+ com.hdr.command = command;
+ com.hdr.reason = reason;
+ com.hdr.command_len = sizeof(com.hdr) + ext_len;
+ com.payload.buf = ext_hdr;
+ com.payload.len = ext_len;
+
+ return FSYNC_askfs(&com, res);
+}
+
+afs_int32
+FSYNC_VolOp(VolumeId volume, char * partition,
+ int command, int reason,
+ SYNC_response * res)
+{
+ FSSYNC_VolOp_hdr vcom;
+
+ memset(&vcom, 0, sizeof(vcom));
+
+ vcom.volume = volume;
+ if (partition)
+ strlcpy(vcom.partName, partition, sizeof(vcom.partName));
+
+ return FSYNC_GenericOp(&vcom, sizeof(vcom), command, reason, res);
+}
+
+afs_int32
+FSYNC_StatsOp(FSSYNC_StatsOp_hdr * scom, int command, int reason,
+ SYNC_response * res)
+{
+ return FSYNC_GenericOp(scom, sizeof(*scom), command, reason, res);
+}
+
+
+#endif /* FSSYNC_BUILD_CLIENT */
--- /dev/null
+/*
+ * Copyright 2006, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/* Main program file. Define globals. */
+#define MAIN 1
+
+/*
+ * fssync administration tool
+ */
+
+
+#include <afsconfig.h>
+#include <afs/param.h>
+
+RCSID
+ ("$Header$");
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <errno.h>
+#ifdef AFS_NT40_ENV
+#include <io.h>
+#include <WINNT/afsevent.h>
+#else
+#include <sys/param.h>
+#include <sys/file.h>
+#ifndef ITIMER_REAL
+#include <sys/time.h>
+#endif /* ITIMER_REAL */
+#endif
+#include <rx/xdr.h>
+#include <afs/afsint.h>
+#include <afs/assert.h>
+
+
+#include <fcntl.h>
+
+#ifndef AFS_NT40_ENV
+#include <afs/osi_inode.h>
+#endif
+
+#include <afs/cmd.h>
+#include <afs/afsutil.h>
+#include <afs/fileutil.h>
+
+#include "nfs.h"
+#include "lwp.h"
+#include "lock.h"
+#include "ihandle.h"
+#include "vnode.h"
+#include "volume.h"
+#include "partition.h"
+#include "daemon_com.h"
+#include "fssync.h"
+#ifdef AFS_NT40_ENV
+#include <pthread.h>
+#endif
+
+int VolumeChanged; /* hack to make dir package happy */
+
+
+struct volop_state {
+ afs_uint32 volume;
+ char partName[16];
+};
+
+struct state {
+ afs_int32 reason;
+ struct volop_state * vop;
+};
+
+static int common_prolog(struct cmd_syndesc *, struct state *);
+static int common_volop_prolog(struct cmd_syndesc *, struct state *);
+
+static int do_volop(struct state *, afs_int32 command, SYNC_response * res);
+
+static char * response_code_to_string(afs_int32);
+static char * command_code_to_string(afs_int32);
+static char * reason_code_to_string(afs_int32);
+static char * program_type_to_string(afs_int32);
+
+static int VolOnline(struct cmd_syndesc * as, char * rock);
+static int VolOffline(struct cmd_syndesc * as, char * rock);
+static int VolMode(struct cmd_syndesc * as, char * rock);
+static int VolDetach(struct cmd_syndesc * as, char * rock);
+static int VolBreakCBKs(struct cmd_syndesc * as, char * rock);
+static int VolMove(struct cmd_syndesc * as, char * rock);
+static int VolList(struct cmd_syndesc * as, char * rock);
+static int VolQuery(struct cmd_syndesc * as, char * rock);
+static int VolHdrQuery(struct cmd_syndesc * as, char * rock);
+static int VolOpQuery(struct cmd_syndesc * as, char * rock);
+static int StatsQuery(struct cmd_syndesc * as, char * rock);
+
+
+static void print_vol_stats_general(VolPkgStats * stats);
+static void print_vol_stats_viceP(struct DiskPartitionStats * stats);
+static void print_vol_stats_hash(struct VolumeHashChainStats * stats);
+#ifdef AFS_DEMAND_ATTACH_FS
+static void print_vol_stats_hdr(struct volume_hdr_LRU_stats * stats);
+#endif
+
+#ifndef AFS_NT40_ENV
+#include "AFS_component_version_number.c"
+#endif
+#define MAX_ARGS 128
+
+#define COMMON_PARMS_OFFSET 12
+#define COMMON_PARMS(ts) \
+ cmd_Seek(ts, COMMON_PARMS_OFFSET); \
+ cmd_AddParm(ts, "-reason", CMD_SINGLE, CMD_OPTIONAL, "sync protocol reason code"); \
+ cmd_AddParm(ts, "-programtype", CMD_SINGLE, CMD_OPTIONAL, "program type code")
+
+#define COMMON_VOLOP_PARMS_OFFSET 10
+#define COMMON_VOLOP_PARMS(ts) \
+ cmd_Seek(ts, COMMON_VOLOP_PARMS_OFFSET); \
+ cmd_AddParm(ts, "-volumeid", CMD_SINGLE, 0, "volume id"); \
+ cmd_AddParm(ts, "-partition", CMD_SINGLE, CMD_OPTIONAL, "partition name")
+
+#define CUSTOM_PARMS_OFFSET 1
+
+
+#define VOLOP_PARMS_DECL(ts) \
+ COMMON_VOLOP_PARMS(ts); \
+ COMMON_PARMS(ts)
+#define COMMON_PARMS_DECL(ts) \
+ COMMON_PARMS(ts)
+
+int
+main(int argc, char **argv)
+{
+ struct cmd_syndesc *ts;
+ int err = 0;
+ int i;
+ extern char cml_version_number[];
+
+ /* Initialize directory paths */
+ if (!(initAFSDirPath() & AFSDIR_SERVER_PATHS_OK)) {
+#ifdef AFS_NT40_ENV
+ ReportErrorEventAlt(AFSEVT_SVR_NO_INSTALL_DIR, 0, argv[0], 0);
+#endif
+ fprintf(stderr, "%s: Unable to obtain AFS server directory.\n",
+ argv[0]);
+ exit(2);
+ }
+
+
+ ts = cmd_CreateSyntax("online", VolOnline, 0, "bring a volume online (FSYNC_VOL_ON opcode)");
+ VOLOP_PARMS_DECL(ts);
+
+ ts = cmd_CreateSyntax("offline", VolOffline, 0, "take a volume offline (FSYNC_VOL_OFF opcode)");
+ VOLOP_PARMS_DECL(ts);
+
+ ts = cmd_CreateSyntax("mode", VolMode, 0, "change volume attach mode (FSYNC_VOL_NEEDVOLUME opcode)");
+ VOLOP_PARMS_DECL(ts);
+ cmd_CreateAlias(ts, "needvolume");
+
+ ts = cmd_CreateSyntax("detach", VolDetach, 0, "detach a volume (FSYNC_VOL_DONE opcode)");
+ VOLOP_PARMS_DECL(ts);
+
+ ts = cmd_CreateSyntax("callback", VolBreakCBKs, 0, "break callbacks for volume (FSYNC_VOL_BREAKCBKS opcode)");
+ VOLOP_PARMS_DECL(ts);
+ cmd_CreateAlias(ts, "cbk");
+
+ ts = cmd_CreateSyntax("move", VolMove, 0, "set volume moved flag (FSYNC_VOL_MOVE opcode)");
+ VOLOP_PARMS_DECL(ts);
+
+ ts = cmd_CreateSyntax("list", VolList, 0, "sync local volume list (FSYNC_VOL_LISTVOLUMES opcode)");
+ VOLOP_PARMS_DECL(ts);
+ cmd_CreateAlias(ts, "ls");
+
+ ts = cmd_CreateSyntax("query", VolQuery, 0, "get volume structure (FSYNC_VOL_QUERY opcode)");
+ VOLOP_PARMS_DECL(ts);
+ cmd_CreateAlias(ts, "qry");
+
+ ts = cmd_CreateSyntax("header", VolHdrQuery, 0, "get volume disk data structure (FSYNC_VOL_QUERY_HDR opcode)");
+ VOLOP_PARMS_DECL(ts);
+ cmd_CreateAlias(ts, "hdr");
+
+ ts = cmd_CreateSyntax("volop", VolOpQuery, 0, "get pending volume operation info (FSYNC_VOL_QUERY_VOP opcode)");
+ VOLOP_PARMS_DECL(ts);
+ cmd_CreateAlias(ts, "vop");
+
+ ts = cmd_CreateSyntax("stats", StatsQuery, 0, "see 'stats help' for more information");
+ cmd_Seek(ts, CUSTOM_PARMS_OFFSET);
+ cmd_AddParm(ts, "-cmd", CMD_SINGLE, 0, "subcommand");
+ cmd_AddParm(ts, "-arg1", CMD_SINGLE, CMD_OPTIONAL, "arg1");
+ cmd_AddParm(ts, "-arg2", CMD_SINGLE, CMD_OPTIONAL, "arg2");
+ COMMON_PARMS_DECL(ts);
+
+ err = cmd_Dispatch(argc, argv);
+ exit(err);
+}
+
+static int
+common_prolog(struct cmd_syndesc * as, struct state * state)
+{
+ register struct cmd_item *ti;
+
+#ifdef AFS_NT40_ENV
+ if (afs_winsockInit() < 0) {
+ Exit(1);
+ }
+#endif
+
+ VInitVolumePackage(debugUtility, 1, 1,
+ DONT_CONNECT_FS, 0);
+ DInit(1);
+
+ if ((ti = as->parms[COMMON_PARMS_OFFSET].items)) { /* -reason */
+ state->reason = atoi(ti->data);
+ }
+ if ((ti = as->parms[COMMON_PARMS_OFFSET+1].items)) { /* -programtype */
+ if (!strcmp(ti->data, "fileServer")) {
+ programType = fileServer;
+ } else if (!strcmp(ti->data, "volumeUtility")) {
+ programType = volumeUtility;
+ } else if (!strcmp(ti->data, "salvager")) {
+ programType = salvager;
+ } else if (!strcmp(ti->data, "salvageServer")) {
+ programType = salvageServer;
+ } else {
+ programType = (ProgramType) atoi(ti->data);
+ }
+ }
+
+ VConnectFS();
+
+ return 0;
+}
+
+static int
+common_volop_prolog(struct cmd_syndesc * as, struct state * state)
+{
+ register struct cmd_item *ti;
+ char pname[100], *temp;
+
+ state->vop = (struct volop_state *) calloc(1, sizeof(struct volop_state));
+ assert(state->vop != NULL);
+
+ if ((ti = as->parms[COMMON_VOLOP_PARMS_OFFSET].items)) { /* -volumeid */
+ state->vop->volume = atoi(ti->data);
+ } else {
+ fprintf(stderr, "required argument -volumeid not given\n");
+ }
+
+ if ((ti = as->parms[COMMON_VOLOP_PARMS_OFFSET+1].items)) { /* -partition */
+ strlcpy(state->vop->partName, ti->data, sizeof(state->vop->partName));
+ } else {
+ memset(state->vop->partName, 0, sizeof(state->vop->partName));
+ }
+
+ return 0;
+}
+
+static int
+do_volop(struct state * state, afs_int32 command, SYNC_response * res)
+{
+ afs_int32 code;
+ SYNC_PROTO_BUF_DECL(res_buf);
+ SYNC_response res_l;
+
+ if (!res) {
+ res = &res_l;
+ res->payload.len = SYNC_PROTO_MAX_LEN;
+ res->payload.buf = res_buf;
+ }
+
+ fprintf(stderr, "calling FSYNC_VolOp with command code %d (%s)\n",
+ command, command_code_to_string(command));
+
+ code = FSYNC_VolOp(state->vop->volume,
+ state->vop->partName,
+ command,
+ state->reason,
+ res);
+
+ switch (code) {
+ case SYNC_OK:
+ case SYNC_DENIED:
+ break;
+ default:
+ fprintf(stderr, "possible sync protocol error. return code was %d\n", code);
+ }
+
+ fprintf(stderr, "FSYNC_VolOp returned %d (%s)\n", code, response_code_to_string(code));
+ fprintf(stderr, "protocol response code was %d (%s)\n",
+ res->hdr.response, response_code_to_string(res->hdr.response));
+ fprintf(stderr, "protocol reason code was %d (%s)\n",
+ res->hdr.reason, reason_code_to_string(res->hdr.reason));
+
+ VDisconnectFS();
+}
+
+static char *
+response_code_to_string(afs_int32 response)
+{
+ switch (response) {
+ case SYNC_OK:
+ return "SYNC_OK";
+ case SYNC_DENIED:
+ return "SYNC_DENIED";
+ case SYNC_COM_ERROR:
+ return "SYNC_COM_ERROR";
+ case SYNC_BAD_COMMAND:
+ return "SYNC_BAD_COMMAND";
+ case SYNC_FAILED:
+ return "SYNC_FAILED";
+ default:
+ return "**UNKNOWN**";
+ }
+}
+
+static char *
+command_code_to_string(afs_int32 command)
+{
+ switch (command) {
+ case SYNC_COM_CHANNEL_CLOSE:
+ return "SYNC_COM_CHANNEL_CLOSE";
+ case FSYNC_VOL_ON:
+ return "FSYNC_VOL_ON";
+ case FSYNC_VOL_OFF:
+ return "FSYNC_VOL_OFF";
+ case FSYNC_VOL_LISTVOLUMES:
+ return "FSYNC_VOL_LISTVOLUMES";
+ case FSYNC_VOL_NEEDVOLUME:
+ return "FSYNC_VOL_NEEDVOLUME";
+ case FSYNC_VOL_MOVE:
+ return "FSYNC_VOL_MOVE";
+ case FSYNC_VOL_BREAKCBKS:
+ return "FSYNC_VOL_BREAKCBKS";
+ case FSYNC_VOL_DONE:
+ return "FSYNC_VOL_DONE";
+ case FSYNC_VOL_QUERY:
+ return "FSYNC_VOL_QUERY";
+ case FSYNC_VOL_QUERY_HDR:
+ return "FSYNC_VOL_QUERY_HDR";
+ case FSYNC_VOL_QUERY_VOP:
+ return "FSYNC_VOL_QUERY_VOP";
+ case FSYNC_VOL_STATS_GENERAL:
+ return "FSYNC_VOL_STATS_GENERAL";
+ case FSYNC_VOL_STATS_VICEP:
+ return "FSYNC_VOL_STATS_VICEP";
+ case FSYNC_VOL_STATS_HASH:
+ return "FSYNC_VOL_STATS_HASH";
+ case FSYNC_VOL_STATS_HDR:
+ return "FSYNC_VOL_STATS_HDR";
+ case FSYNC_VOL_STATS_VLRU:
+ return "FSYNC_VOL_STATS_VLRU";
+ default:
+ return "**UNKNOWN**";
+ }
+}
+
+static char *
+reason_code_to_string(afs_int32 reason)
+{
+ switch (reason) {
+ case SYNC_REASON_NONE:
+ return "SYNC_REASON_NONE";
+ case SYNC_REASON_MALFORMED_PACKET:
+ return "SYNC_REASON_MALFORMED_PACKET";
+ case FSYNC_WHATEVER:
+ return "FSYNC_WHATEVER";
+ case FSYNC_SALVAGE:
+ return "FSYNC_SALVAGE";
+ case FSYNC_MOVE:
+ return "FSYNC_MOVE";
+ case FSYNC_OPERATOR:
+ return "FSYNC_OPERATOR";
+ case FSYNC_EXCLUSIVE:
+ return "FSYNC_EXCLUSIVE";
+ case FSYNC_UNKNOWN_VOLID:
+ return "FSYNC_UNKNOWN_VOLID";
+ case FSYNC_HDR_NOT_ATTACHED:
+ return "FSYNC_HDR_NOT_ATTACHED";
+ case FSYNC_NO_PENDING_VOL_OP:
+ return "FSYNC_NO_PENDING_VOL_OP";
+ case FSYNC_VOL_PKG_ERROR:
+ return "FSYNC_VOL_PKG_ERROR";
+ default:
+ return "**UNKNOWN**";
+ }
+}
+
+static char *
+program_type_to_string(afs_int32 type)
+{
+ switch ((ProgramType)type) {
+ case fileServer:
+ return "fileServer";
+ case volumeUtility:
+ return "volumeUtility";
+ case salvager:
+ return "salvager";
+ case salvageServer:
+ return "salvageServer";
+ case debugUtility:
+ return "debugUtility";
+ default:
+ return "**UNKNOWN**";
+ }
+}
+
+static int
+VolOnline(struct cmd_syndesc * as, char * rock)
+{
+ struct state state;
+
+ common_prolog(as, &state);
+ common_volop_prolog(as, &state);
+
+ do_volop(&state, FSYNC_VOL_ON, NULL);
+
+ return 0;
+}
+
+static int
+VolOffline(struct cmd_syndesc * as, char * rock)
+{
+ struct state state;
+
+ common_prolog(as, &state);
+ common_volop_prolog(as, &state);
+
+ do_volop(&state, FSYNC_VOL_OFF, NULL);
+
+ return 0;
+}
+
+static int
+VolMode(struct cmd_syndesc * as, char * rock)
+{
+ struct state state;
+
+ common_prolog(as, &state);
+ common_volop_prolog(as, &state);
+
+ do_volop(&state, FSYNC_VOL_NEEDVOLUME, NULL);
+
+ return 0;
+}
+
+static int
+VolDetach(struct cmd_syndesc * as, char * rock)
+{
+ struct state state;
+
+ common_prolog(as, &state);
+ common_volop_prolog(as, &state);
+
+ do_volop(&state, FSYNC_VOL_DONE, NULL);
+
+ return 0;
+}
+
+static int
+VolBreakCBKs(struct cmd_syndesc * as, char * rock)
+{
+ struct state state;
+
+ common_prolog(as, &state);
+ common_volop_prolog(as, &state);
+
+ do_volop(&state, FSYNC_VOL_BREAKCBKS, NULL);
+
+ return 0;
+}
+
+static int
+VolMove(struct cmd_syndesc * as, char * rock)
+{
+ struct state state;
+
+ common_prolog(as, &state);
+ common_volop_prolog(as, &state);
+
+ do_volop(&state, FSYNC_VOL_MOVE, NULL);
+
+ return 0;
+}
+
+static int
+VolList(struct cmd_syndesc * as, char * rock)
+{
+ struct state state;
+
+ common_prolog(as, &state);
+ common_volop_prolog(as, &state);
+
+ do_volop(&state, FSYNC_VOL_LISTVOLUMES, NULL);
+
+ return 0;
+}
+
+#ifdef AFS_DEMAND_ATTACH_FS
+static char *
+vol_state_to_string(VolState state)
+{
+ switch (state) {
+ case VOL_STATE_UNATTACHED:
+ return "VOL_STATE_UNATTACHED";
+ case VOL_STATE_PREATTACHED:
+ return "VOL_STATE_PREATTACHED";
+ case VOL_STATE_ATTACHING:
+ return "VOL_STATE_ATTACHING";
+ case VOL_STATE_ATTACHED:
+ return "VOL_STATE_ATTACHED";
+ case VOL_STATE_UPDATING:
+ return "VOL_STATE_UPDATING";
+ case VOL_STATE_GET_BITMAP:
+ return "VOL_STATE_GET_BITMAP";
+ case VOL_STATE_HDR_LOADING:
+ return "VOL_STATE_HDR_LOADING";
+ case VOL_STATE_HDR_ATTACHING:
+ return "VOL_STATE_HDR_ATTACHING";
+ case VOL_STATE_SHUTTING_DOWN:
+ return "VOL_STATE_SHUTTING_DOWN";
+ case VOL_STATE_GOING_OFFLINE:
+ return "VOL_STATE_GOING_OFFLINE";
+ case VOL_STATE_OFFLINING:
+ return "VOL_STATE_OFFLINING";
+ case VOL_STATE_DETACHING:
+ return "VOL_STATE_DETACHING";
+ case VOL_STATE_SALVSYNC_REQ:
+ return "VOL_STATE_SALVSYNC_REQ";
+ case VOL_STATE_SALVAGING:
+ return "VOL_STATE_SALVAGING";
+ case VOL_STATE_ERROR:
+ return "VOL_STATE_ERROR";
+ case VOL_STATE_FREED:
+ return "VOL_STATE_FREED";
+ default:
+ return "**UNKNOWN**";
+ }
+}
+
+static char *
+vol_flags_to_string(afs_uint16 flags)
+{
+ static char str[128];
+ int count = 0;
+ str[0]='\0';
+
+ if (flags & VOL_HDR_ATTACHED) {
+ strlcat(str, "VOL_HDR_ATTACHED", sizeof(str));
+ count++;
+ }
+
+ if (flags & VOL_HDR_LOADED) {
+ if (count) {
+ strlcat(str, " | ", sizeof(str));
+ }
+ strlcat(str, "VOL_HDR_LOADED", sizeof(str));
+ count++;
+ }
+
+ if (flags & VOL_HDR_IN_LRU) {
+ if (count) {
+ strlcat(str, " | ", sizeof(str));
+ }
+ strlcat(str, "VOL_HDR_IN_LRU", sizeof(str));
+ count++;
+ }
+
+ if (flags & VOL_IN_HASH) {
+ if (count) {
+ strlcat(str, " | ", sizeof(str));
+ }
+ strlcat(str, "VOL_IN_HASH", sizeof(str));
+ count++;
+ }
+
+ if (flags & VOL_ON_VBYP_LIST) {
+ if (count) {
+ strlcat(str, " | ", sizeof(str));
+ }
+ strlcat(str, "VOL_ON_VBYP_LIST", sizeof(str));
+ count++;
+ }
+
+ if (flags & VOL_IS_BUSY) {
+ if (count) {
+ strlcat(str, " | ", sizeof(str));
+ }
+ strlcat(str, "VOL_IS_BUSY", sizeof(str));
+ count++;
+ }
+
+ if (flags & VOL_ON_VLRU) {
+ if (count) {
+ strlcat(str, " | ", sizeof(str));
+ }
+ strlcat(str, "VOL_ON_VLRU", sizeof(str));
+ }
+
+ if (flags & VOL_HDR_DONTSALV) {
+ if (count) {
+ strlcat(str, " | ", sizeof(str));
+ }
+ strlcat(str, "VOL_HDR_DONTSALV", sizeof(str));
+ }
+
+ return str;
+}
+
+static char *
+vlru_idx_to_string(int idx)
+{
+ switch (idx) {
+ case VLRU_QUEUE_NEW:
+ return "VLRU_QUEUE_NEW";
+ case VLRU_QUEUE_MID:
+ return "VLRU_QUEUE_MID";
+ case VLRU_QUEUE_OLD:
+ return "VLRU_QUEUE_OLD";
+ case VLRU_QUEUE_CANDIDATE:
+ return "VLRU_QUEUE_CANDIDATE";
+ case VLRU_QUEUE_HELD:
+ return "VLRU_QUEUE_HELD";
+ case VLRU_QUEUE_INVALID:
+ return "VLRU_QUEUE_INVALID";
+ default:
+ return "**UNKNOWN**";
+ }
+}
+#endif
+
+static int
+VolQuery(struct cmd_syndesc * as, char * rock)
+{
+ struct state state;
+ SYNC_PROTO_BUF_DECL(res_buf);
+ SYNC_response res;
+ Volume v;
+ int hi, lo;
+
+ res.hdr.response_len = sizeof(res.hdr);
+ res.payload.buf = res_buf;
+ res.payload.len = SYNC_PROTO_MAX_LEN;
+
+ common_prolog(as, &state);
+ common_volop_prolog(as, &state);
+
+ do_volop(&state, FSYNC_VOL_QUERY, &res);
+
+ if (res.hdr.response == SYNC_OK) {
+ memcpy(&v, res.payload.buf, sizeof(Volume));
+
+ printf("volume = {\n");
+ printf("\thashid = %u\n", v.hashid);
+ printf("\theader = 0x%x\n", v.header);
+ printf("\tdevice = %d\n", v.device);
+ printf("\tpartition = 0x%x\n", v.partition);
+ printf("\tlinkHandle = 0x%x\n", v.linkHandle);
+ printf("\tnextVnodeUnique = %u\n", v.nextVnodeUnique);
+ printf("\tdiskDataHandle = 0x%x\n", v.diskDataHandle);
+ printf("\tvnodeHashOffset = %u\n", v.vnodeHashOffset);
+ printf("\tshuttingDown = %d\n", v.shuttingDown);
+ printf("\tgoingOffline = %d\n", v.goingOffline);
+ printf("\tcacheCheck = %u\n", v.cacheCheck);
+ printf("\tnUsers = %d\n", v.nUsers);
+ printf("\tneedsPutBack = %d\n", v.needsPutBack);
+ printf("\tspecialStatus = %d\n", v.specialStatus);
+ printf("\tupdateTime = %u\n", v.updateTime);
+
+ printf("\tvnodeIndex[vSmall] = {\n");
+ printf("\t\thandle = 0x%x\n", v.vnodeIndex[vSmall].handle);
+ printf("\t\tbitmap = 0x%x\n", v.vnodeIndex[vSmall].bitmap);
+ printf("\t\tbitmapSize = %u\n", v.vnodeIndex[vSmall].bitmapSize);
+ printf("\t\tbitmapOffset = %u\n", v.vnodeIndex[vSmall].bitmapOffset);
+ printf("\t}\n");
+ printf("\tvnodeIndex[vLarge] = {\n");
+ printf("\t\thandle = 0x%x\n", v.vnodeIndex[vLarge].handle);
+ printf("\t\tbitmap = 0x%x\n", v.vnodeIndex[vLarge].bitmap);
+ printf("\t\tbitmapSize = %u\n", v.vnodeIndex[vLarge].bitmapSize);
+ printf("\t\tbitmapOffset = %u\n", v.vnodeIndex[vLarge].bitmapOffset);
+ printf("\t}\n");
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (res.hdr.flags & SYNC_FLAG_DAFS_EXTENSIONS) {
+ printf("\tupdateTime = %u\n", v.updateTime);
+ printf("\tattach_state = %s\n", vol_state_to_string(v.attach_state));
+ printf("\tattach_flags = %s\n", vol_flags_to_string(v.attach_flags));
+ printf("\tnWaiters = %d\n", v.nWaiters);
+ printf("\tchainCacheCheck = %d\n", v.chainCacheCheck);
+
+ /* online salvage structure */
+ printf("\tsalvage = {\n");
+ printf("\t\tprio = %u\n", v.salvage.prio);
+ printf("\t\treason = %d\n", v.salvage.reason);
+ printf("\t\trequested = %d\n", v.salvage.requested);
+ printf("\t\tscheduled = %d\n", v.salvage.scheduled);
+ printf("\t}\n");
+
+ /* statistics structure */
+ printf("\tstats = {\n");
+
+ printf("\t\thash_lookups = {\n");
+ SplitInt64(v.stats.hash_lookups,hi,lo);
+ printf("\t\t\thi = %u\n", hi);
+ printf("\t\t\tlo = %u\n", lo);
+ printf("\t\t}\n");
+
+ printf("\t\thash_short_circuits = {\n");
+ SplitInt64(v.stats.hash_short_circuits,hi,lo);
+ printf("\t\t\thi = %u\n", hi);
+ printf("\t\t\tlo = %u\n", lo);
+ printf("\t\t}\n");
+
+ printf("\t\thdr_loads = {\n");
+ SplitInt64(v.stats.hdr_loads,hi,lo);
+ printf("\t\t\thi = %u\n", hi);
+ printf("\t\t\tlo = %u\n", lo);
+ printf("\t\t}\n");
+
+ printf("\t\thdr_gets = {\n");
+ SplitInt64(v.stats.hdr_gets,hi,lo);
+ printf("\t\t\thi = %u\n", hi);
+ printf("\t\t\tlo = %u\n", lo);
+ printf("\t\t}\n");
+
+ printf("\t\tattaches = %u\n", v.stats.attaches);
+ printf("\t\tsoft_detaches = %u\n", v.stats.soft_detaches);
+ printf("\t\tsalvages = %u\n", v.stats.salvages);
+ printf("\t\tvol_ops = %u\n", v.stats.vol_ops);
+
+ printf("\t\tlast_attach = %u\n", v.stats.last_attach);
+ printf("\t\tlast_get = %u\n", v.stats.last_get);
+ printf("\t\tlast_promote = %u\n", v.stats.last_promote);
+ printf("\t\tlast_hdr_get = %u\n", v.stats.last_hdr_get);
+ printf("\t\tlast_salvage = %u\n", v.stats.last_salvage);
+ printf("\t\tlast_salvage_req = %u\n", v.stats.last_salvage_req);
+ printf("\t\tlast_vol_op = %u\n", v.stats.last_vol_op);
+ printf("\t}\n");
+
+ /* VLRU state */
+ printf("\tvlru = {\n");
+ printf("\t\tidx = %d (%s)\n",
+ v.vlru.idx, vlru_idx_to_string(v.vlru.idx));
+ printf("\t}\n");
+
+ /* volume op state */
+ printf("\tpending_vol_op = 0x%x\n", v.pending_vol_op);
+ }
+#else /* !AFS_DEMAND_ATTACH_FS */
+ if (res.hdr.flags & SYNC_FLAG_DAFS_EXTENSIONS) {
+ printf("*** server asserted demand attach extensions. fssync-debug not built to\n");
+ printf("*** recognize those extensions. please recompile fssync-debug if you need\n");
+ printf("*** to dump dafs extended state\n");
+ }
+#endif /* !AFS_DEMAND_ATTACH_FS */
+ printf("}\n");
+ }
+
+ return 0;
+}
+
+static int
+VolHdrQuery(struct cmd_syndesc * as, char * rock)
+{
+ struct state state;
+ SYNC_PROTO_BUF_DECL(res_buf);
+ SYNC_response res;
+ VolumeDiskData v;
+ int i;
+
+ res.hdr.response_len = sizeof(res.hdr);
+ res.payload.buf = res_buf;
+ res.payload.len = SYNC_PROTO_MAX_LEN;
+
+ common_prolog(as, &state);
+ common_volop_prolog(as, &state);
+
+ do_volop(&state, FSYNC_VOL_QUERY_HDR, &res);
+
+ if (res.hdr.response == SYNC_OK) {
+ memcpy(&v, res.payload.buf, sizeof(VolumeDiskData));
+
+ printf("VolumeDiskData = {\n");
+ printf("\tstamp = {\n");
+ printf("\t\tmagic = 0x%x\n", v.stamp.magic);
+ printf("\t\tversion = %u\n", v.stamp.version);
+ printf("\t}\n");
+
+ printf("\tid = %u\n", v.id);
+ printf("\tname = '%s'\n", v.name);
+ printf("\tinUse = %d\n", v.inUse);
+ printf("\tinService = %d\n", v.inService);
+ printf("\tblessed = %d\n", v.blessed);
+ printf("\tneedsSalvaged = %d\n", v.needsSalvaged);
+ printf("\tuniquifier = %u\n", v.uniquifier);
+ printf("\ttype = %d\n", v.type);
+ printf("\tparentId = %u\n", v.parentId);
+ printf("\tcloneId = %u\n", v.cloneId);
+ printf("\tbackupId = %u\n", v.backupId);
+ printf("\trestoredFromId = %u\n", v.restoredFromId);
+ printf("\tneedsCallback = %d\n", v.needsCallback);
+ printf("\tdestroyMe = %d\n", v.destroyMe);
+ printf("\tdontSalvage = %d\n", v.dontSalvage);
+ printf("\tmaxquota = %d\n", v.maxquota);
+ printf("\tminquota = %d\n", v.minquota);
+ printf("\tmaxfiles = %d\n", v.maxfiles);
+ printf("\taccountNumber = %u\n", v.accountNumber);
+ printf("\towner = %u\n", v.owner);
+ printf("\tfilecount = %d\n", v.filecount);
+ printf("\tdiskused = %d\n", v.diskused);
+ printf("\tdayUse = %d\n", v.dayUse);
+ for (i = 0; i < 7; i++) {
+ printf("\tweekUse[%d] = %d\n", i, v.weekUse[i]);
+ }
+ printf("\tdayUseDate = %u\n", v.dayUseDate);
+ printf("\tcreationDate = %u\n", v.creationDate);
+ printf("\taccessDate = %u\n", v.accessDate);
+ printf("\tupdateDate = %u\n", v.updateDate);
+ printf("\texpirationDate = %u\n", v.expirationDate);
+ printf("\tbackupDate = %u\n", v.backupDate);
+ printf("\tcopyDate = %u\n", v.copyDate);
+#ifdef OPENAFS_VOL_STATS
+ printf("\tstat_initialized = %d\n", v.stat_initialized);
+#else
+ printf("\tmtd = '%s'\n", v.motd);
+#endif
+ printf("}\n");
+ }
+
+ return 0;
+}
+
+static int
+VolOpQuery(struct cmd_syndesc * as, char * rock)
+{
+ struct state state;
+ SYNC_PROTO_BUF_DECL(res_buf);
+ SYNC_response res;
+ FSSYNC_VolOp_info vop;
+ int i;
+
+ res.hdr.response_len = sizeof(res.hdr);
+ res.payload.buf = res_buf;
+ res.payload.len = SYNC_PROTO_MAX_LEN;
+
+ common_prolog(as, &state);
+ common_volop_prolog(as, &state);
+
+ do_volop(&state, FSYNC_VOL_QUERY_VOP, &res);
+
+ if (!(res.hdr.flags & SYNC_FLAG_DAFS_EXTENSIONS)) {
+ printf("*** file server not compiled with demand attach extensions.\n");
+ printf("*** pending volume operation metadata not available.\n");
+ }
+
+ if (res.hdr.response == SYNC_OK) {
+ memcpy(&vop, res.payload.buf, sizeof(FSSYNC_VolOp_info));
+
+ printf("pending_vol_op = {\n");
+
+ printf("\tcom = {\n");
+ printf("\t\tproto_version = %u\n", vop.com.proto_version);
+ printf("\t\tprogramType = %d (%s)\n",
+ vop.com.programType, program_type_to_string(vop.com.programType));
+ printf("\t\tcommand = %d (%s)\n",
+ vop.com.command, command_code_to_string(vop.com.command));
+ printf("\t\treason = %d (%s)\n",
+ vop.com.reason, reason_code_to_string(vop.com.reason));
+ printf("\t\tcommand_len = %u\n", vop.com.command_len);
+ printf("\t\tflags = 0x%x\n", vop.com.flags);
+ printf("\t}\n");
+
+ printf("\tvop = {\n");
+ printf("\t\tvolume = %u\n", vop.vop.volume);
+ if (afs_strnlen(vop.vop.partName, sizeof(vop.vop.partName)) <
+ sizeof(vop.vop.partName)) {
+ printf("\t\tpartName = '%s'\n", vop.vop.partName);
+ } else {
+ printf("\t\tpartName = (illegal string)\n");
+ }
+ printf("\t}\n");
+
+ printf("}\n");
+ }
+
+ return 0;
+}
+
+static int
+StatsQuery(struct cmd_syndesc * as, char * rock)
+{
+ afs_int32 code;
+ int command;
+ struct cmd_item *ti;
+ struct state state;
+ SYNC_PROTO_BUF_DECL(res_buf);
+ SYNC_response res;
+ FSSYNC_StatsOp_hdr scom;
+ union {
+ void * ptr;
+ struct VolPkgStats * vol_stats;
+ struct VolumeHashChainStats * hash_stats;
+#ifdef AFS_DEMAND_ATTACH_FS
+ struct volume_hdr_LRU_stats * hdr_stats;
+#endif
+ struct DiskPartitionStats * vicep_stats;
+ } sres;
+
+ sres.ptr = res_buf;
+ res.hdr.response_len = sizeof(res.hdr);
+ res.payload.buf = res_buf;
+ res.payload.len = SYNC_PROTO_MAX_LEN;
+
+ if ((ti = as->parms[CUSTOM_PARMS_OFFSET].items)) { /* -subcommand */
+ if (!strcasecmp(ti->data, "vicep")) {
+ command = FSYNC_VOL_STATS_VICEP;
+ } else if (!strcasecmp(ti->data, "hash")) {
+ command = FSYNC_VOL_STATS_HASH;
+#ifdef AFS_DEMAND_ATTACH_FS
+ } else if (!strcasecmp(ti->data, "hdr")) {
+ command = FSYNC_VOL_STATS_HDR;
+ } else if (!strcasecmp(ti->data, "vlru")) {
+ command = FSYNC_VOL_STATS_VLRU;
+#endif
+ } else if (!strcasecmp(ti->data, "pkg")) {
+ command = FSYNC_VOL_STATS_GENERAL;
+ } else if (!strcasecmp(ti->data, "help")) {
+ fprintf(stderr, "fssync-debug stats subcommands:\n");
+ fprintf(stderr, "\tpkg\tgeneral volume package stats\n");
+ fprintf(stderr, "\tvicep\tvice partition stats\n");
+ fprintf(stderr, "\thash\tvolume hash chain stats\n");
+#ifdef AFS_DEMAND_ATTACH_FS
+ fprintf(stderr, "\thdr\tvolume header cache stats\n");
+ fprintf(stderr, "\tvlru\tvlru generation stats\n");
+#endif
+ exit(0);
+ } else {
+ fprintf(stderr, "invalid stats subcommand");
+ exit(1);
+ }
+ } else {
+ command = FSYNC_VOL_STATS_GENERAL;
+ }
+
+ if ((ti = as->parms[CUSTOM_PARMS_OFFSET+1].items)) { /* -arg1 */
+ switch (command) {
+ case FSYNC_VOL_STATS_VICEP:
+ strlcpy(scom.args.partName, ti->data, sizeof(state.vop->partName));
+ break;
+ case FSYNC_VOL_STATS_HASH:
+ scom.args.hash_bucket = atoi(ti->data);
+ break;
+ case FSYNC_VOL_STATS_VLRU:
+ scom.args.vlru_generation = atoi(ti->data);
+ break;
+ default:
+ fprintf(stderr, "unrecognized arguments\n");
+ exit(1);
+ }
+ } else {
+ switch (command) {
+ case FSYNC_VOL_STATS_VICEP:
+ case FSYNC_VOL_STATS_HASH:
+ case FSYNC_VOL_STATS_VLRU:
+ fprintf(stderr, "this subcommand requires more parameters\n");
+ exit(1);
+ }
+ }
+
+ common_prolog(as, &state);
+
+ fprintf(stderr, "calling FSYNC_askfs with command code %d (%s)\n",
+ command, command_code_to_string(command));
+
+ code = FSYNC_StatsOp(&scom, command, FSYNC_WHATEVER, &res);
+
+ switch (code) {
+ case SYNC_OK:
+ case SYNC_DENIED:
+ break;
+ default:
+ fprintf(stderr, "possible sync protocol error. return code was %d\n", code);
+ }
+
+ fprintf(stderr, "FSYNC_VolOp returned %d (%s)\n", code, response_code_to_string(code));
+ fprintf(stderr, "protocol response code was %d (%s)\n",
+ res.hdr.response, response_code_to_string(res.hdr.response));
+ fprintf(stderr, "protocol reason code was %d (%s)\n",
+ res.hdr.reason, reason_code_to_string(res.hdr.reason));
+
+ VDisconnectFS();
+
+ if (res.hdr.response == SYNC_OK) {
+ switch (command) {
+ case FSYNC_VOL_STATS_GENERAL:
+ print_vol_stats_general(sres.vol_stats);
+ break;
+ case FSYNC_VOL_STATS_VICEP:
+ print_vol_stats_viceP(sres.vicep_stats);
+ break;
+ case FSYNC_VOL_STATS_HASH:
+ print_vol_stats_hash(sres.hash_stats);
+ break;
+#ifdef AFS_DEMAND_ATTACH_FS
+ case FSYNC_VOL_STATS_HDR:
+ print_vol_stats_hdr(sres.hdr_stats);
+ break;
+#endif /* AFS_DEMAND_ATTACH_FS */
+ }
+ }
+
+ return 0;
+}
+
+static void
+print_vol_stats_general(VolPkgStats * stats)
+{
+ int i;
+ afs_uint32 hi, lo;
+
+ printf("VolPkgStats = {\n");
+#ifdef AFS_DEMAND_ATTACH_FS
+ for (i = 0; i < VOL_STATE_COUNT; i++) {
+ printf("\tvol_state_count[%s] = %d\n",
+ vol_state_to_string(i),
+ stats->state_levels[i]);
+ }
+
+ SplitInt64(stats->hash_looks, hi, lo);
+ printf("\thash_looks = {\n");
+ printf("\t\thi = %u\n", hi);
+ printf("\t\tlo = %u\n", lo);
+ printf("\t}\n");
+
+ SplitInt64(stats->hash_reorders, hi, lo);
+ printf("\thash_reorders = {\n");
+ printf("\t\thi = %u\n", hi);
+ printf("\t\tlo = %u\n", lo);
+ printf("\t}\n");
+
+ SplitInt64(stats->salvages, hi, lo);
+ printf("\tsalvages = {\n");
+ printf("\t\thi = %u\n", hi);
+ printf("\t\tlo = %u\n", lo);
+ printf("\t}\n");
+
+ SplitInt64(stats->vol_ops, hi, lo);
+ printf("\tvol_ops = {\n");
+ printf("\t\thi = %u\n", hi);
+ printf("\t\tlo = %u\n", lo);
+ printf("\t}\n");
+#endif
+ SplitInt64(stats->hdr_loads, hi, lo);
+ printf("\thdr_loads = {\n");
+ printf("\t\thi = %u\n", hi);
+ printf("\t\tlo = %u\n", lo);
+ printf("\t}\n");
+
+ SplitInt64(stats->hdr_gets, hi, lo);
+ printf("\thdr_gets = {\n");
+ printf("\t\thi = %u\n", hi);
+ printf("\t\tlo = %u\n", lo);
+ printf("\t}\n");
+
+ SplitInt64(stats->attaches, hi, lo);
+ printf("\tattaches = {\n");
+ printf("\t\thi = %u\n", hi);
+ printf("\t\tlo = %u\n", lo);
+ printf("\t}\n");
+
+ SplitInt64(stats->soft_detaches, hi, lo);
+ printf("\tsoft_detaches = {\n");
+ printf("\t\thi = %u\n", hi);
+ printf("\t\tlo = %u\n", lo);
+ printf("\t}\n");
+
+ printf("\thdr_cache_size = %d\n", stats->hdr_cache_size);
+
+ printf("}\n");
+}
+
+static void
+print_vol_stats_viceP(struct DiskPartitionStats * stats)
+{
+ printf("DiskPartitionStats = {\n");
+ printf("\tfree = %d\n", stats->free);
+ printf("\tminFree = %d\n", stats->minFree);
+ printf("\ttotalUsable = %d\n", stats->totalUsable);
+ printf("\tf_files = %d\n", stats->f_files);
+#ifdef AFS_DEMAND_ATTACH_FS
+ printf("\tvol_list_len = %d\n", stats->vol_list_len);
+#endif
+ printf("}\n");
+}
+
+static void
+print_vol_stats_hash(struct VolumeHashChainStats * stats)
+{
+ afs_uint32 hi, lo;
+
+ printf("DiskPartitionStats = {\n");
+ printf("\ttable_size = %d\n", stats->table_size);
+ printf("\tchain_len = %d\n", stats->chain_len);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ printf("\tchain_cacheCheck = %d\n", stats->chain_cacheCheck);
+ printf("\tchain_busy = %d\n", stats->chain_busy);
+
+ SplitInt64(stats->chain_looks, hi, lo);
+ printf("\tchain_looks = {\n");
+ printf("\t\thi = %u\n", hi);
+ printf("\t\tlo = %u\n", lo);
+ printf("\t}\n");
+
+ SplitInt64(stats->chain_gets, hi, lo);
+ printf("\tchain_gets = {\n");
+ printf("\t\thi = %u\n", hi);
+ printf("\t\tlo = %u\n", lo);
+ printf("\t}\n");
+
+ SplitInt64(stats->chain_reorders, hi, lo);
+ printf("\tchain_reorders = {\n");
+ printf("\t\thi = %u\n", hi);
+ printf("\t\tlo = %u\n", lo);
+ printf("\t}\n");
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ printf("}\n");
+}
+
+
+#ifdef AFS_DEMAND_ATTACH_FS
+static void
+print_vol_stats_hdr(struct volume_hdr_LRU_stats * stats)
+{
+ printf("volume_hdr_LRU_stats = {\n");
+ printf("\tfree = %d\n", stats->free);
+ printf("\tused = %d\n", stats->used);
+ printf("\tattached = %d\n", stats->attached);
+ printf("}\n");
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
--- /dev/null
+/*
+ * Copyright 2000, International Business Machines Corporation and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ *
+ * Portions Copyright (c) 2006 Sine Nomine Associates
+ */
+
+/*
+ System: VICE-TWO
+ Module: fssync.c
+ Institution: The Information Technology Center, Carnegie-Mellon University
+
+ */
+#ifdef notdef
+
+/* All this is going away in early 1989 */
+int newVLDB; /* Compatibility flag */
+
+#endif
+static int newVLDB = 1;
+
+
+#ifndef AFS_PTHREAD_ENV
+#define USUAL_PRIORITY (LWP_MAX_PRIORITY - 2)
+
+/*
+ * stack size increased from 8K because the HP machine seemed to have trouble
+ * with the smaller stack
+ */
+#define USUAL_STACK_SIZE (24 * 1024)
+#endif /* !AFS_PTHREAD_ENV */
+
+/*
+ fssync-server.c
+ File server synchronization with external volume utilities.
+ server-side implementation
+ */
+
+/* This controls the size of an fd_set; it must be defined early before
+ * the system headers define that type and the macros that operate on it.
+ * Its value should be as large as the maximum file descriptor limit we
+ * are likely to run into on any platform. Right now, that is 65536
+ * which is the default hard fd limit on Solaris 9 */
+#ifndef _WIN32
+#define FD_SETSIZE 65536
+#endif
+
+#include <afsconfig.h>
+#include <afs/param.h>
+
+RCSID
+ ("$Header$");
+
+#include <sys/types.h>
+#include <stdio.h>
+#ifdef AFS_NT40_ENV
+#include <winsock2.h>
+#include <time.h>
+#else
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <sys/time.h>
+#endif
+#include <errno.h>
+#ifdef AFS_PTHREAD_ENV
+#include <assert.h>
+#else /* AFS_PTHREAD_ENV */
+#include <afs/assert.h>
+#endif /* AFS_PTHREAD_ENV */
+#include <signal.h>
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#else
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#endif
+
+
+#include <rx/xdr.h>
+#include <afs/afsint.h>
+#include "nfs.h"
+#include <afs/errors.h>
+#include "daemon_com.h"
+#include "fssync.h"
+#include "lwp.h"
+#include "lock.h"
+#include <afs/afssyscalls.h>
+#include "ihandle.h"
+#include "vnode.h"
+#include "volume.h"
+#include "partition.h"
+
+
+#ifdef FSSYNC_BUILD_SERVER
+
+/*@printflike@*/ extern void Log(const char *format, ...);
+
+#ifdef osi_Assert
+#undef osi_Assert
+#endif
+#define osi_Assert(e) (void)(e)
+
+int (*V_BreakVolumeCallbacks) ();
+
+#define MAXHANDLERS 4 /* Up to 4 clients; must be at least 2, so that
+ * move = dump+restore can run on single server */
+#define MAXOFFLINEVOLUMES 128 /* This needs to be as big as the maximum
+ * number that would be offline for 1 operation.
+ * Current winner is salvage, which needs all
+ * cloned read-only copies offline when salvaging
+ * a single read-write volume */
+
+#define MAX_BIND_TRIES 5 /* Number of times to retry socket bind */
+
+
+
+static struct offlineInfo OfflineVolumes[MAXHANDLERS][MAXOFFLINEVOLUMES];
+
+static int AcceptSd = -1; /* Socket used by server for accepting connections */
+
+static int getport();
+
+/* Forward declarations */
+static void FSYNC_sync();
+static void FSYNC_newconnection();
+static void FSYNC_com();
+static void FSYNC_Drop();
+static void AcceptOn();
+static void AcceptOff();
+static void InitHandler();
+static void CallHandler(fd_set * fdsetp);
+static int AddHandler();
+static int FindHandler();
+static int FindHandler_r();
+static int RemoveHandler();
+static void GetHandler(fd_set * fdsetp, int *maxfdp);
+
+extern int LogLevel;
+
+static afs_int32 FSYNC_com_VolOp(int fd, SYNC_command * com, SYNC_response * res);
+
+static afs_int32 FSYNC_com_VolOn(FSSYNC_VolOp_command * com, SYNC_response * res);
+static afs_int32 FSYNC_com_VolOff(FSSYNC_VolOp_command * com, SYNC_response * res);
+static afs_int32 FSYNC_com_VolMove(FSSYNC_VolOp_command * com, SYNC_response * res);
+static afs_int32 FSYNC_com_VolBreakCBKs(FSSYNC_VolOp_command * com, SYNC_response * res);
+static afs_int32 FSYNC_com_VolDone(FSSYNC_VolOp_command * com, SYNC_response * res);
+static afs_int32 FSYNC_com_VolQuery(FSSYNC_VolOp_command * com, SYNC_response * res);
+static afs_int32 FSYNC_com_VolHdrQuery(FSSYNC_VolOp_command * com, SYNC_response * res);
+#ifdef AFS_DEMAND_ATTACH_FS
+static afs_int32 FSYNC_com_VolOpQuery(FSSYNC_VolOp_command * com, SYNC_response * res);
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+static afs_int32 FSYNC_com_StatsOp(int fd, SYNC_command * com, SYNC_response * res);
+
+static afs_int32 FSYNC_com_StatsOpGeneral(FSSYNC_StatsOp_command * scom, SYNC_response * res);
+static afs_int32 FSYNC_com_StatsOpViceP(FSSYNC_StatsOp_command * scom, SYNC_response * res);
+static afs_int32 FSYNC_com_StatsOpHash(FSSYNC_StatsOp_command * scom, SYNC_response * res);
+static afs_int32 FSYNC_com_StatsOpHdr(FSSYNC_StatsOp_command * scom, SYNC_response * res);
+static afs_int32 FSYNC_com_StatsOpVLRU(FSSYNC_StatsOp_command * scom, SYNC_response * res);
+
+
+static void FSYNC_com_to_info(FSSYNC_VolOp_command * vcom, FSSYNC_VolOp_info * info);
+
+
+/*
+ * This lock controls access to the handler array. The overhead
+ * is minimal in non-preemptive environments.
+ */
+struct Lock FSYNC_handler_lock;
+
+void
+FSYNC_fsInit(void)
+{
+#ifdef AFS_PTHREAD_ENV
+ pthread_t tid;
+ pthread_attr_t tattr;
+#else /* AFS_PTHREAD_ENV */
+ PROCESS pid;
+#endif /* AFS_PTHREAD_ENV */
+
+ Lock_Init(&FSYNC_handler_lock);
+
+#ifdef AFS_PTHREAD_ENV
+ assert(pthread_attr_init(&tattr) == 0);
+ assert(pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_DETACHED) == 0);
+ assert(pthread_create(&tid, &tattr, FSYNC_sync, NULL) == 0);
+#else /* AFS_PTHREAD_ENV */
+ assert(LWP_CreateProcess
+ (FSYNC_sync, USUAL_STACK_SIZE, USUAL_PRIORITY, (void *)0,
+ "FSYNC_sync", &pid) == LWP_SUCCESS);
+#endif /* AFS_PTHREAD_ENV */
+}
+
+static fd_set FSYNC_readfds;
+
+static int
+getport(struct sockaddr_in *addr)
+{
+ int sd;
+
+ memset(addr, 0, sizeof(*addr));
+ assert((sd = socket(AF_INET, SOCK_STREAM, 0)) >= 0);
+#ifdef STRUCT_SOCKADDR_HAS_SA_LEN
+ addr->sin_len = sizeof(struct sockaddr_in);
+#endif
+ addr->sin_addr.s_addr = htonl(0x7f000001);
+ addr->sin_family = AF_INET; /* was localhost->h_addrtype */
+ addr->sin_port = htons(2040); /* XXXX htons not _really_ neccessary */
+
+ return sd;
+}
+
+
+static void
+FSYNC_sync()
+{
+ struct sockaddr_in addr;
+ int on = 1;
+ extern int VInit;
+ int code;
+ int numTries;
+#ifdef AFS_PTHREAD_ENV
+ int tid;
+#endif
+
+#ifndef AFS_NT40_ENV
+ (void)signal(SIGPIPE, SIG_IGN);
+#endif
+
+#ifdef AFS_PTHREAD_ENV
+ /* set our 'thread-id' so that the host hold table works */
+ MUTEX_ENTER(&rx_stats_mutex); /* protects rxi_pthread_hinum */
+ tid = ++rxi_pthread_hinum;
+ MUTEX_EXIT(&rx_stats_mutex);
+ pthread_setspecific(rx_thread_id_key, (void *)tid);
+ Log("Set thread id %d for FSYNC_sync\n", tid);
+#endif /* AFS_PTHREAD_ENV */
+
+ while (!VInit) {
+ /* Let somebody else run until level > 0. That doesn't mean that
+ * all volumes have been attached. */
+#ifdef AFS_PTHREAD_ENV
+ pthread_yield();
+#else /* AFS_PTHREAD_ENV */
+ LWP_DispatchProcess();
+#endif /* AFS_PTHREAD_ENV */
+ }
+ AcceptSd = getport(&addr);
+ /* Reuseaddr needed because system inexplicably leaves crud lying around */
+ code =
+ setsockopt(AcceptSd, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
+ sizeof(on));
+ if (code)
+ Log("FSYNC_sync: setsockopt failed with (%d)\n", errno);
+
+ for (numTries = 0; numTries < MAX_BIND_TRIES; numTries++) {
+ if ((code =
+ bind(AcceptSd, (struct sockaddr *)&addr, sizeof(addr))) == 0)
+ break;
+ Log("FSYNC_sync: bind failed with (%d), will sleep and retry\n",
+ errno);
+ sleep(5);
+ }
+ assert(!code);
+ listen(AcceptSd, 100);
+ InitHandler();
+ AcceptOn();
+ for (;;) {
+ int maxfd;
+ GetHandler(&FSYNC_readfds, &maxfd);
+ /* Note: check for >= 1 below is essential since IOMGR_select
+ * doesn't have exactly same semantics as select.
+ */
+#ifdef AFS_PTHREAD_ENV
+ if (select(maxfd + 1, &FSYNC_readfds, NULL, NULL, NULL) >= 1)
+#else /* AFS_PTHREAD_ENV */
+ if (IOMGR_Select(maxfd + 1, &FSYNC_readfds, NULL, NULL, NULL) >= 1)
+#endif /* AFS_PTHREAD_ENV */
+ CallHandler(&FSYNC_readfds);
+ }
+}
+
+static void
+FSYNC_newconnection(int afd)
+{
+ struct sockaddr_in other;
+ int junk, fd;
+ junk = sizeof(other);
+ fd = accept(afd, (struct sockaddr *)&other, &junk);
+ if (fd == -1) {
+ Log("FSYNC_newconnection: accept failed, errno==%d\n", errno);
+ assert(1 == 2);
+ } else if (!AddHandler(fd, FSYNC_com)) {
+ AcceptOff();
+ assert(AddHandler(fd, FSYNC_com));
+ }
+}
+
+/* this function processes commands from an fssync file descriptor (fd) */
+afs_int32 FS_cnt = 0;
+static void
+FSYNC_com(int fd)
+{
+ SYNC_command com;
+ SYNC_response res;
+ SYNC_PROTO_BUF_DECL(com_buf);
+ SYNC_PROTO_BUF_DECL(res_buf);
+
+ memset(&res.hdr, 0, sizeof(res.hdr));
+
+ com.payload.buf = (void *)com_buf;
+ com.payload.len = SYNC_PROTO_MAX_LEN;
+ res.hdr.response_len = sizeof(res.hdr);
+ res.hdr.proto_version = FSYNC_PROTO_VERSION;
+ res.payload.len = SYNC_PROTO_MAX_LEN;
+ res.payload.buf = (void *)res_buf;
+
+ FS_cnt++;
+ if (SYNC_getCom(fd, &com)) {
+ Log("FSYNC_com: read failed; dropping connection (cnt=%d)\n", FS_cnt);
+ FSYNC_Drop(fd);
+ return;
+ }
+
+ if (com.hdr.proto_version != FSYNC_PROTO_VERSION) {
+ Log("FSYNC_com: invalid protocol version (%u)\n", com.hdr.proto_version);
+ res.hdr.response = SYNC_COM_ERROR;
+ res.hdr.flags |= SYNC_FLAG_CHANNEL_SHUTDOWN;
+ goto respond;
+ }
+
+ VOL_LOCK;
+ switch (com.hdr.command) {
+ case FSYNC_VOL_ON:
+ case FSYNC_VOL_OFF:
+ case FSYNC_VOL_LISTVOLUMES:
+ case FSYNC_VOL_NEEDVOLUME:
+ case FSYNC_VOL_MOVE:
+ case FSYNC_VOL_BREAKCBKS:
+ case FSYNC_VOL_DONE:
+ case FSYNC_VOL_QUERY:
+ case FSYNC_VOL_QUERY_HDR:
+ case FSYNC_VOL_QUERY_VOP:
+ res.hdr.response = FSYNC_com_VolOp(fd, &com, &res);
+ break;
+ case FSYNC_VOL_STATS_GENERAL:
+ case FSYNC_VOL_STATS_VICEP:
+ case FSYNC_VOL_STATS_HASH:
+ case FSYNC_VOL_STATS_HDR:
+ case FSYNC_VOL_STATS_VLRU:
+ res.hdr.response = FSYNC_com_StatsOp(fd, &com, &res);
+ break;
+ case SYNC_COM_CHANNEL_CLOSE:
+ res.hdr.response = SYNC_OK;
+ res.hdr.flags |= SYNC_FLAG_CHANNEL_SHUTDOWN;
+ break;
+ default:
+ res.hdr.response = SYNC_BAD_COMMAND;
+ break;
+ }
+ VOL_UNLOCK;
+
+ respond:
+ SYNC_putRes(fd, &res);
+ if (res.hdr.flags & SYNC_FLAG_CHANNEL_SHUTDOWN) {
+ FSYNC_Drop(fd);
+ }
+}
+
+static afs_int32
+FSYNC_com_VolOp(int fd, SYNC_command * com, SYNC_response * res)
+{
+ int i;
+ afs_int32 code = SYNC_OK;
+ FSSYNC_VolOp_command vcom;
+
+ if (com->recv_len != (sizeof(com->hdr) + sizeof(FSSYNC_VolOp_hdr))) {
+ res->hdr.reason = SYNC_REASON_MALFORMED_PACKET;
+ res->hdr.flags |= SYNC_FLAG_CHANNEL_SHUTDOWN;
+ return SYNC_COM_ERROR;
+ }
+
+ vcom.hdr = &com->hdr;
+ vcom.vop = (FSSYNC_VolOp_hdr *) com->payload.buf;
+ vcom.com = com;
+
+ vcom.volumes = OfflineVolumes[FindHandler(fd)];
+ for (vcom.v = NULL, i = 0; i < MAXOFFLINEVOLUMES; i++) {
+ if ((vcom.volumes[i].volumeID == vcom.vop->volume) &&
+ (strncmp(vcom.volumes[i].partName, vcom.vop->partName,
+ sizeof(vcom.volumes[i].partName)) == 0)) {
+ vcom.v = &vcom.volumes[i];
+ break;
+ }
+ }
+
+ switch (com->hdr.command) {
+ case FSYNC_VOL_ON:
+ code = FSYNC_com_VolOn(&vcom, res);
+ break;
+ case FSYNC_VOL_OFF:
+ case FSYNC_VOL_NEEDVOLUME:
+ code = FSYNC_com_VolOff(&vcom, res);
+ break;
+ case FSYNC_VOL_LISTVOLUMES:
+ code = SYNC_OK;
+ break;
+ case FSYNC_VOL_MOVE:
+ code = FSYNC_com_VolMove(&vcom, res);
+ break;
+ case FSYNC_VOL_BREAKCBKS:
+ code = FSYNC_com_VolBreakCBKs(&vcom, res);
+ break;
+ case FSYNC_VOL_DONE:
+ code = FSYNC_com_VolDone(&vcom, res);
+ break;
+ case FSYNC_VOL_QUERY:
+ code = FSYNC_com_VolQuery(&vcom, res);
+ break;
+ case FSYNC_VOL_QUERY_HDR:
+ code = FSYNC_com_VolHdrQuery(&vcom, res);
+ break;
+#ifdef AFS_DEMAND_ATTACH_FS
+ case FSYNC_VOL_QUERY_VOP:
+ code = FSYNC_com_VolOpQuery(&vcom, res);
+ break;
+#endif /* AFS_DEMAND_ATTACH_FS */
+ default:
+ code = SYNC_BAD_COMMAND;
+ }
+
+ return code;
+}
+
+static afs_int32
+FSYNC_com_VolOn(FSSYNC_VolOp_command * vcom, SYNC_response * res)
+{
+ afs_int32 code = SYNC_OK;
+ char tvolName[VMAXPATHLEN];
+ Volume * vp;
+ Error error;
+
+ if (SYNC_verifyProtocolString(vcom->vop->partName, sizeof(vcom->vop->partName))) {
+ res->hdr.reason = SYNC_REASON_MALFORMED_PACKET;
+ code = SYNC_FAILED;
+ goto done;
+ }
+
+ /*
+ This is where a detatched volume gets reattached. However in the
+ special case where the volume is merely busy, it is already
+ attatched and it is only necessary to clear the busy flag. See
+ defect #2080 for details.
+ */
+
+ /* is the volume already attatched? */
+#ifdef notdef
+ /*
+ * XXX With the following enabled we had bizarre problems where the backup id would
+ * be reset to 0; that was due to the interaction between fileserver/volserver in that they
+ * both keep volumes in memory and the changes wouldn't be made to the fileserver. Some of
+ * the problems were due to refcnt changes as result of VGetVolume/VPutVolume which would call
+ * VOffline, etc. when we don't want to; someday the whole #2080 issue should be revisited to
+ * be done right XXX
+ */
+ vp = VGetVolume_r(&error, vcom->vop->volume);
+ if (vp) {
+ /* yep, is the BUSY flag set? */
+ if (vp->specialStatus == VBUSY) {
+
+ /* yep, clear BUSY flag */
+
+ vp->specialStatus = 0;
+ /* make sure vol is online */
+ if (vcom->v) {
+ vcom->v->volumeID = 0;
+ V_inUse(vp) = 1; /* online */
+ }
+ VPutVolume_r(vp);
+ break;
+ }
+ VPutVolume_r(vp);
+ }
+#endif /* notdef */
+
+ /* so, we need to attach the volume */
+
+ if (vcom->v)
+ vcom->v->volumeID = 0;
+ tvolName[0] = '/';
+ snprintf(&tvolName[1], sizeof(tvolName)-1, VFORMAT, vcom->vop->volume);
+ tvolName[sizeof(tvolName)-1] = '\0';
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ vp = VPreAttachVolumeByName_r(&error, vcom->vop->partName, tvolName,
+ V_VOLUPD);
+ if (vp && vp->pending_vol_op) {
+ VDeregisterVolOp_r(vp, vp->pending_vol_op);
+ }
+#else /* AFS_DEMAND_ATTACH_FS */
+ vp = VAttachVolumeByName_r(&error, vcom->vop->partName, tvolName,
+ V_VOLUPD);
+ if (vp)
+ VPutVolume_r(vp);
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ if (error) {
+ code = SYNC_DENIED;
+ res->hdr.reason = error;
+ }
+
+ done:
+ return code;
+}
+
+static afs_int32
+FSYNC_com_VolOff(FSSYNC_VolOp_command * vcom, SYNC_response * res)
+{
+ FSSYNC_VolOp_info info;
+ afs_int32 code = SYNC_OK;
+ int i;
+ Volume * vp, * nvp;
+ Error error;
+
+ if (SYNC_verifyProtocolString(vcom->vop->partName, sizeof(vcom->vop->partName))) {
+ res->hdr.reason = SYNC_REASON_MALFORMED_PACKET;
+ code = SYNC_FAILED;
+ goto done;
+ }
+
+ /* not already offline, we need to find a slot for newly offline volume */
+ if (vcom->hdr->programType == debugUtility) {
+ /* debug utilities do not have their operations tracked */
+ vcom->v = NULL;
+ } else {
+ if (!vcom->v) {
+ for (i = 0; i < MAXOFFLINEVOLUMES; i++) {
+ if (vcom->volumes[i].volumeID == 0) {
+ vcom->v = &vcom->volumes[i];
+ break;
+ }
+ }
+ }
+ if (!vcom->v) {
+ goto deny;
+ }
+ }
+
+ FSYNC_com_to_info(vcom, &info);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ vp = VLookupVolume_r(&error, vcom->vop->volume, NULL);
+#else
+ vp = VGetVolume_r(&error, vcom->vop->volume);
+#endif
+
+ if (vp) {
+ if ((vcom->vop->partName[0] != 0) &&
+ (strncmp(vcom->vop->partName, vp->partition->name,
+ sizeof(vcom->vop->partName)) != 0)) {
+ /* volume on desired partition is not online, so we
+ * should treat this as an offline volume.
+ */
+#ifndef AFS_DEMAND_ATTACH_FS
+ VPutVolume_r(vp);
+#endif
+ vp = NULL;
+ goto done;
+ }
+ }
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (vp) {
+ ProgramType type = (ProgramType) vcom->hdr->programType;
+
+ /* do initial filtering of requests */
+
+ /* enforce mutual exclusion for volume ops */
+ if (vp->pending_vol_op) {
+ if (vp->pending_vol_op->com.programType != type) {
+ Log("volume %u already checked out\n", vp->hashid);
+ /* XXX debug */
+ Log("vp->vop = { com = { ver=%u, prog=%d, com=%d, reason=%d, len=%u, flags=0x%x }, vop = { vol=%u, part='%s' } }\n",
+ vp->pending_vol_op->com.proto_version,
+ vp->pending_vol_op->com.programType,
+ vp->pending_vol_op->com.command,
+ vp->pending_vol_op->com.reason,
+ vp->pending_vol_op->com.command_len,
+ vp->pending_vol_op->com.flags,
+ vp->pending_vol_op->vop.volume,
+ vp->pending_vol_op->vop.partName );
+ Log("vcom = { com = { ver=%u, prog=%d, com=%d, reason=%d, len=%u, flags=0x%x } , vop = { vol=%u, part='%s' } }\n",
+ vcom->hdr->proto_version,
+ vcom->hdr->programType,
+ vcom->hdr->command,
+ vcom->hdr->reason,
+ vcom->hdr->command_len,
+ vcom->hdr->flags,
+ vcom->vop->volume,
+ vcom->vop->partName);
+ res->hdr.reason = FSYNC_EXCLUSIVE;
+ goto deny;
+ } else {
+ Log("warning: volume %u recursively checked out by programType id %d\n",
+ vp->hashid, vcom->hdr->programType);
+ }
+ }
+
+ /* filter based upon requestor
+ *
+ * volume utilities are not allowed to check out volumes
+ * which are in an error state
+ *
+ * unknown utility programs will be denied on principal
+ */
+ switch (type) {
+ case salvageServer:
+ case debugUtility:
+ /* give the salvageserver lots of liberty */
+ break;
+ case volumeUtility:
+ if ((V_attachState(vp) == VOL_STATE_ERROR) ||
+ (V_attachState(vp) == VOL_STATE_SALVAGING)) {
+ goto deny;
+ }
+ break;
+ default:
+ Log("bad program type passed to FSSYNC\n");
+ goto deny;
+ }
+
+ /* short circuit for offline volume states
+ * so we can avoid I/O penalty of attachment */
+ switch (V_attachState(vp)) {
+ case VOL_STATE_UNATTACHED:
+ case VOL_STATE_PREATTACHED:
+ case VOL_STATE_SALVAGING:
+ case VOL_STATE_ERROR:
+ /* register the volume operation metadata with the volume
+ *
+ * if the volume is currently pre-attached, attach2()
+ * will evaluate the vol op metadata to determine whether
+ * attaching the volume would be safe */
+ VRegisterVolOp_r(vp, &info);
+ goto done;
+ default:
+ break;
+ }
+
+ /* convert to heavyweight ref */
+ nvp = VGetVolumeByVp_r(&error, vp);
+
+ /* register the volume operation metadata with the volume */
+ VRegisterVolOp_r(vp, &info);
+
+ if (!nvp) {
+ Log("FSYNC_com_VolOff: failed to get heavyweight reference to volume %u\n",
+ vcom->vop->volume);
+ res->hdr.reason = FSYNC_VOL_PKG_ERROR;
+ goto deny;
+ }
+ vp = nvp;
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ if (vp) {
+ if (VVolOpLeaveOnline_r(vp, &info)) {
+ VUpdateVolume_r(&error, vp, VOL_UPDATE_WAIT); /* At least get volume stats right */
+ if (LogLevel) {
+ Log("FSYNC: Volume %u (%s) was left on line for an external %s request\n",
+ V_id(vp), V_name(vp),
+ vcom->hdr->reason == V_CLONE ? "clone" :
+ vcom->hdr->reason == V_READONLY ? "readonly" :
+ vcom->hdr->reason == V_DUMP ? "dump" :
+ "UNKNOWN");
+ }
+ VPutVolume_r(vp);
+ } else {
+ if (VVolOpSetVBusy_r(vp, &info)) {
+ vp->specialStatus = VBUSY;
+ }
+
+ /* remember what volume we got, so we can keep track of how
+ * many volumes the volserver or whatever is using. Note that
+ * vp is valid since leaveonline is only set when vp is valid.
+ */
+ if (vcom->v) {
+ vcom->v->volumeID = vcom->vop->volume;
+ strlcpy(vcom->v->partName, vp->partition->name, sizeof(vcom->v->partName));
+ }
+
+ VOffline_r(vp, "A volume utility is running.");
+ vp = NULL;
+ }
+ }
+
+ done:
+ return code;
+
+ deny:
+ return SYNC_DENIED;
+}
+
+static afs_int32
+FSYNC_com_VolMove(FSSYNC_VolOp_command * vcom, SYNC_response * res)
+{
+ Error error;
+ Volume * vp;
+
+ /* Yuch: the "reason" for the move is the site it got moved to... */
+ /* still set specialStatus so we stop sending back VBUSY.
+ * also should still break callbacks. Note that I don't know
+ * how to tell if we should break all or not, so we just do it
+ * since it doesn't matter much if we do an extra break
+ * volume callbacks on a volume move within the same server */
+#ifdef AFS_DEMAND_ATTACH_FS
+ vp = VLookupVolume_r(&error, vcom->vop->volume, NULL);
+#else
+ vp = VGetVolume_r(&error, vcom->vop->volume);
+#endif
+ if (vp) {
+ vp->specialStatus = VMOVED;
+#ifndef AFS_DEMAND_ATTACH_FS
+ VPutVolume_r(vp);
+#endif
+ }
+
+ if (V_BreakVolumeCallbacks) {
+ Log("fssync: volume %u moved to %x; breaking all call backs\n",
+ vcom->vop->volume, vcom->hdr->reason);
+ VOL_UNLOCK;
+ (*V_BreakVolumeCallbacks) (vcom->vop->volume);
+ VOL_LOCK;
+ }
+
+ return SYNC_OK;
+}
+
+static afs_int32
+FSYNC_com_VolDone(FSSYNC_VolOp_command * vcom, SYNC_response * res)
+{
+#ifdef AFS_DEMAND_ATTACH_FS
+ Error error;
+ Volume * vp;
+#endif
+
+ /* don't try to put online, this call is made only after deleting
+ * a volume, in which case we want to remove the vol # from the
+ * OfflineVolumes array only */
+ if (vcom->v)
+ vcom->v->volumeID = 0;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ vp = VLookupVolume_r(&error, vcom->vop->volume, NULL);
+ if (vp && vp->pending_vol_op) {
+ VDeregisterVolOp_r(vp, vp->pending_vol_op);
+ }
+#endif
+
+ return SYNC_OK;
+}
+
+static afs_int32
+FSYNC_com_VolBreakCBKs(FSSYNC_VolOp_command * vcom, SYNC_response * res)
+{
+ /* if the volume is being restored, break all callbacks on it */
+ if (V_BreakVolumeCallbacks) {
+ Log("fssync: breaking all call backs for volume %u\n",
+ vcom->vop->volume);
+ VOL_UNLOCK;
+ (*V_BreakVolumeCallbacks) (vcom->vop->volume);
+ VOL_LOCK;
+ }
+ return SYNC_OK;
+}
+
+static afs_int32
+FSYNC_com_VolQuery(FSSYNC_VolOp_command * vcom, SYNC_response * res)
+{
+ afs_int32 code = SYNC_OK;
+ Error error;
+ Volume * vp;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ vp = VLookupVolume_r(&error, vcom->vop->volume, NULL);
+#else /* !AFS_DEMAND_ATTACH_FS */
+ vp = VGetVolume_r(&error, vcom->vop->volume);
+#endif /* !AFS_DEMAND_ATTACH_FS */
+
+ if (vp) {
+ assert(sizeof(Volume) <= res->payload.len);
+ memcpy(res->payload.buf, vp, sizeof(Volume));
+ res->hdr.response_len += sizeof(Volume);
+#ifndef AFS_DEMAND_ATTACH_FS
+ VPutVolume_r(vp);
+#endif
+ } else {
+ res->hdr.reason = FSYNC_UNKNOWN_VOLID;
+ code = SYNC_FAILED;
+ }
+ return code;
+}
+
+static afs_int32
+FSYNC_com_VolHdrQuery(FSSYNC_VolOp_command * vcom, SYNC_response * res)
+{
+ afs_int32 code = SYNC_OK;
+ Error error;
+ Volume * vp;
+ int hdr_ok = 0;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ vp = VLookupVolume_r(&error, vcom->vop->volume, NULL);
+ if (vp &&
+ (vp->header != NULL) &&
+ (V_attachFlags(vp) & VOL_HDR_ATTACHED) &&
+ (V_attachFlags(vp) & VOL_HDR_LOADED)) {
+ hdr_ok = 1;
+ }
+#else /* !AFS_DEMAND_ATTACH_FS */
+ vp = VGetVolume_r(&error, vcom->vop->volume);
+ if (vp && vp->header) {
+ hdr_ok = 1;
+ }
+#endif /* !AFS_DEMAND_ATTACH_FS */
+
+ load_done:
+ if (hdr_ok) {
+ assert(sizeof(VolumeDiskData) <= res->payload.len);
+ memcpy(res->payload.buf, &V_disk(vp), sizeof(VolumeDiskData));
+ res->hdr.response_len += sizeof(VolumeDiskData);
+#ifndef AFS_DEMAND_ATTACH_FS
+ VPutVolume_r(vp);
+#endif
+ } else {
+ if (vp) {
+ res->hdr.reason = FSYNC_HDR_NOT_ATTACHED;
+ } else {
+ res->hdr.reason = FSYNC_UNKNOWN_VOLID;
+ }
+ code = SYNC_FAILED;
+ }
+ return code;
+}
+
+#ifdef AFS_DEMAND_ATTACH_FS
+static afs_int32
+FSYNC_com_VolOpQuery(FSSYNC_VolOp_command * vcom, SYNC_response * res)
+{
+ afs_int32 code = SYNC_OK;
+ Error error;
+ Volume * vp;
+
+ vp = VLookupVolume_r(&error, vcom->vop->volume, NULL);
+
+ if (vp && vp->pending_vol_op) {
+ assert(sizeof(FSSYNC_VolOp_info) <= res->payload.len);
+ memcpy(res->payload.buf, vp->pending_vol_op, sizeof(FSSYNC_VolOp_info));
+ res->hdr.response_len += sizeof(FSSYNC_VolOp_info);
+ } else {
+ if (vp) {
+ res->hdr.reason = FSYNC_NO_PENDING_VOL_OP;
+ } else {
+ res->hdr.reason = FSYNC_UNKNOWN_VOLID;
+ }
+ code = SYNC_FAILED;
+ }
+ return code;
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+static afs_int32
+FSYNC_com_StatsOp(int fd, SYNC_command * com, SYNC_response * res)
+{
+ int i;
+ afs_int32 code = SYNC_OK;
+ FSSYNC_StatsOp_command scom;
+
+ if (com->recv_len != (sizeof(com->hdr) + sizeof(FSSYNC_StatsOp_hdr))) {
+ res->hdr.reason = SYNC_REASON_MALFORMED_PACKET;
+ res->hdr.flags |= SYNC_FLAG_CHANNEL_SHUTDOWN;
+ return SYNC_COM_ERROR;
+ }
+
+ scom.hdr = &com->hdr;
+ scom.sop = (FSSYNC_StatsOp_hdr *) com->payload.buf;
+ scom.com = com;
+
+ switch (com->hdr.command) {
+ case FSYNC_VOL_STATS_GENERAL:
+ code = FSYNC_com_StatsOpGeneral(&scom, res);
+ break;
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* statistics for the following subsystems are only tracked
+ * for demand attach fileservers */
+ case FSYNC_VOL_STATS_VICEP:
+ code = FSYNC_com_StatsOpViceP(&scom, res);
+ break;
+ case FSYNC_VOL_STATS_HASH:
+ code = FSYNC_com_StatsOpHash(&scom, res);
+ break;
+ case FSYNC_VOL_STATS_HDR:
+ code = FSYNC_com_StatsOpHdr(&scom, res);
+ break;
+ case FSYNC_VOL_STATS_VLRU:
+ code = FSYNC_com_StatsOpVLRU(&scom, res);
+ break;
+#endif /* AFS_DEMAND_ATTACH_FS */
+ default:
+ code = SYNC_BAD_COMMAND;
+ }
+
+ return code;
+}
+
+static afs_int32
+FSYNC_com_StatsOpGeneral(FSSYNC_StatsOp_command * scom, SYNC_response * res)
+{
+ afs_int32 code = SYNC_OK;
+
+ memcpy(res->payload.buf, &VStats, sizeof(VStats));
+ res->hdr.response_len += sizeof(VStats);
+
+ return code;
+}
+
+#ifdef AFS_DEMAND_ATTACH_FS
+static afs_int32
+FSYNC_com_StatsOpViceP(FSSYNC_StatsOp_command * scom, SYNC_response * res)
+{
+ afs_int32 code = SYNC_OK;
+ struct DiskPartition * dp;
+ struct DiskPartitionStats * stats;
+
+ if (SYNC_verifyProtocolString(scom->sop->args.partName, sizeof(scom->sop->args.partName))) {
+ res->hdr.reason = SYNC_REASON_MALFORMED_PACKET;
+ code = SYNC_FAILED;
+ goto done;
+ }
+
+ dp = VGetPartition_r(scom->sop->args.partName, 0);
+ if (!dp) {
+ code = SYNC_FAILED;
+ } else {
+ stats = (struct DiskPartitionStats *) res->payload.buf;
+ stats->free = dp->free;
+ stats->totalUsable = dp->totalUsable;
+ stats->minFree = dp->minFree;
+ stats->f_files = dp->f_files;
+ stats->vol_list_len = dp->vol_list.len;
+
+ res->hdr.response_len += sizeof(struct DiskPartitionStats);
+ }
+
+ done:
+ return code;
+}
+
+static afs_int32
+FSYNC_com_StatsOpHash(FSSYNC_StatsOp_command * scom, SYNC_response * res)
+{
+ afs_int32 code = SYNC_OK;
+ struct VolumeHashChainStats * stats;
+ struct VolumeHashChainHead * head;
+
+ if (scom->sop->args.hash_bucket >= VolumeHashTable.Size) {
+ return SYNC_FAILED;
+ }
+
+ head = &VolumeHashTable.Table[scom->sop->args.hash_bucket];
+ stats = (struct VolumeHashChainStats *) res->payload.buf;
+ stats->table_size = VolumeHashTable.Size;
+ stats->chain_len = head->len;
+ stats->chain_cacheCheck = head->cacheCheck;
+ stats->chain_busy = head->busy;
+ AssignInt64(head->looks, &stats->chain_looks);
+ AssignInt64(head->gets, &stats->chain_gets);
+ AssignInt64(head->reorders, &stats->chain_reorders);
+
+ res->hdr.response_len += sizeof(struct VolumeHashChainStats);
+
+ return code;
+}
+
+static afs_int32
+FSYNC_com_StatsOpHdr(FSSYNC_StatsOp_command * scom, SYNC_response * res)
+{
+ afs_int32 code = SYNC_OK;
+
+ memcpy(res->payload.buf, &volume_hdr_LRU.stats, sizeof(volume_hdr_LRU.stats));
+ res->hdr.response_len += sizeof(volume_hdr_LRU.stats);
+
+ return code;
+}
+
+static afs_int32
+FSYNC_com_StatsOpVLRU(FSSYNC_StatsOp_command * scom, SYNC_response * res)
+{
+ afs_int32 code = SYNC_OK;
+
+ code = SYNC_BAD_COMMAND;
+
+ return code;
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+static void
+FSYNC_com_to_info(FSSYNC_VolOp_command * vcom, FSSYNC_VolOp_info * info)
+{
+ memcpy(&info->com, vcom->hdr, sizeof(SYNC_command_hdr));
+ memcpy(&info->vop, vcom->vop, sizeof(FSSYNC_VolOp_hdr));
+}
+
+static void
+FSYNC_Drop(int fd)
+{
+ struct offlineInfo *p;
+ int i;
+ Error error;
+ char tvolName[VMAXPATHLEN];
+
+ VOL_LOCK;
+ p = OfflineVolumes[FindHandler(fd)];
+ for (i = 0; i < MAXOFFLINEVOLUMES; i++) {
+ if (p[i].volumeID) {
+
+ Volume *vp;
+
+ tvolName[0] = '/';
+ sprintf(&tvolName[1], VFORMAT, p[i].volumeID);
+ vp = VAttachVolumeByName_r(&error, p[i].partName, tvolName,
+ V_VOLUPD);
+ if (vp)
+ VPutVolume_r(vp);
+ p[i].volumeID = 0;
+ }
+ }
+ VOL_UNLOCK;
+ RemoveHandler(fd);
+#ifdef AFS_NT40_ENV
+ closesocket(fd);
+#else
+ close(fd);
+#endif
+ AcceptOn();
+}
+
+static int AcceptHandler = -1; /* handler id for accept, if turned on */
+
+static void
+AcceptOn()
+{
+ if (AcceptHandler == -1) {
+ assert(AddHandler(AcceptSd, FSYNC_newconnection));
+ AcceptHandler = FindHandler(AcceptSd);
+ }
+}
+
+static void
+AcceptOff()
+{
+ if (AcceptHandler != -1) {
+ assert(RemoveHandler(AcceptSd));
+ AcceptHandler = -1;
+ }
+}
+
+/* The multiple FD handling code. */
+
+static int HandlerFD[MAXHANDLERS];
+static int (*HandlerProc[MAXHANDLERS]) ();
+
+static void
+InitHandler()
+{
+ register int i;
+ ObtainWriteLock(&FSYNC_handler_lock);
+ for (i = 0; i < MAXHANDLERS; i++) {
+ HandlerFD[i] = -1;
+ HandlerProc[i] = 0;
+ }
+ ReleaseWriteLock(&FSYNC_handler_lock);
+}
+
+static void
+CallHandler(fd_set * fdsetp)
+{
+ register int i;
+ ObtainReadLock(&FSYNC_handler_lock);
+ for (i = 0; i < MAXHANDLERS; i++) {
+ if (HandlerFD[i] >= 0 && FD_ISSET(HandlerFD[i], fdsetp)) {
+ ReleaseReadLock(&FSYNC_handler_lock);
+ (*HandlerProc[i]) (HandlerFD[i]);
+ ObtainReadLock(&FSYNC_handler_lock);
+ }
+ }
+ ReleaseReadLock(&FSYNC_handler_lock);
+}
+
+static int
+AddHandler(int afd, int (*aproc) ())
+{
+ register int i;
+ ObtainWriteLock(&FSYNC_handler_lock);
+ for (i = 0; i < MAXHANDLERS; i++)
+ if (HandlerFD[i] == -1)
+ break;
+ if (i >= MAXHANDLERS) {
+ ReleaseWriteLock(&FSYNC_handler_lock);
+ return 0;
+ }
+ HandlerFD[i] = afd;
+ HandlerProc[i] = aproc;
+ ReleaseWriteLock(&FSYNC_handler_lock);
+ return 1;
+}
+
+static int
+FindHandler(register int afd)
+{
+ register int i;
+ ObtainReadLock(&FSYNC_handler_lock);
+ for (i = 0; i < MAXHANDLERS; i++)
+ if (HandlerFD[i] == afd) {
+ ReleaseReadLock(&FSYNC_handler_lock);
+ return i;
+ }
+ ReleaseReadLock(&FSYNC_handler_lock); /* just in case */
+ assert(1 == 2);
+ return -1; /* satisfy compiler */
+}
+
+static int
+FindHandler_r(register int afd)
+{
+ register int i;
+ for (i = 0; i < MAXHANDLERS; i++)
+ if (HandlerFD[i] == afd) {
+ return i;
+ }
+ assert(1 == 2);
+ return -1; /* satisfy compiler */
+}
+
+static int
+RemoveHandler(register int afd)
+{
+ ObtainWriteLock(&FSYNC_handler_lock);
+ HandlerFD[FindHandler_r(afd)] = -1;
+ ReleaseWriteLock(&FSYNC_handler_lock);
+ return 1;
+}
+
+static void
+GetHandler(fd_set * fdsetp, int *maxfdp)
+{
+ register int i;
+ register int maxfd = -1;
+ FD_ZERO(fdsetp);
+ ObtainReadLock(&FSYNC_handler_lock); /* just in case */
+ for (i = 0; i < MAXHANDLERS; i++)
+ if (HandlerFD[i] != -1) {
+ FD_SET(HandlerFD[i], fdsetp);
+ if (maxfd < HandlerFD[i])
+ maxfd = HandlerFD[i];
+ }
+ *maxfdp = maxfd;
+ ReleaseReadLock(&FSYNC_handler_lock); /* just in case */
+}
+
+#endif /* FSSYNC_BUILD_SERVER */
+++ /dev/null
-/*
- * Copyright 2000, International Business Machines Corporation and others.
- * All Rights Reserved.
- *
- * This software has been released under the terms of the IBM Public
- * License. For details, see the LICENSE file in the top-level source
- * directory or online at http://www.openafs.org/dl/license10.html
- */
-
-/*
- System: VICE-TWO
- Module: fssync.c
- Institution: The Information Technology Center, Carnegie-Mellon University
-
- */
-#ifdef notdef
-
-/* All this is going away in early 1989 */
-int newVLDB; /* Compatibility flag */
-
-#endif
-static int newVLDB = 1;
-
-
-#ifndef AFS_PTHREAD_ENV
-#define USUAL_PRIORITY (LWP_MAX_PRIORITY - 2)
-
-/*
- * stack size increased from 8K because the HP machine seemed to have trouble
- * with the smaller stack
- */
-#define USUAL_STACK_SIZE (24 * 1024)
-#endif /* !AFS_PTHREAD_ENV */
-
-/*
- fsync.c
- File server synchronization with external volume utilities.
- */
-
-/* This controls the size of an fd_set; it must be defined early before
- * the system headers define that type and the macros that operate on it.
- * Its value should be as large as the maximum file descriptor limit we
- * are likely to run into on any platform. Right now, that is 65536
- * which is the default hard fd limit on Solaris 9 */
-#ifndef _WIN32
-#define FD_SETSIZE 65536
-#endif
-
-#include <afsconfig.h>
-#include <afs/param.h>
-
-RCSID
- ("$Header$");
-
-#include <sys/types.h>
-#include <stdio.h>
-#ifdef AFS_NT40_ENV
-#include <winsock2.h>
-#include <time.h>
-#else
-#include <sys/param.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <netdb.h>
-#include <sys/time.h>
-#endif
-#include <errno.h>
-#ifdef AFS_PTHREAD_ENV
-#include <assert.h>
-#else /* AFS_PTHREAD_ENV */
-#include <afs/assert.h>
-#endif /* AFS_PTHREAD_ENV */
-#include <signal.h>
-
-#ifdef HAVE_STRING_H
-#include <string.h>
-#else
-#ifdef HAVE_STRINGS_H
-#include <strings.h>
-#endif
-#endif
-
-
-#include <rx/xdr.h>
-#include <afs/afsint.h>
-#include "nfs.h"
-#include <afs/errors.h>
-#include "fssync.h"
-#include "lwp.h"
-#include "lock.h"
-#include <afs/afssyscalls.h>
-#include "ihandle.h"
-#include "vnode.h"
-#include "volume.h"
-#include "partition.h"
-
-/*@printflike@*/ extern void Log(const char *format, ...);
-
-#ifdef osi_Assert
-#undef osi_Assert
-#endif
-#define osi_Assert(e) (void)(e)
-
-int (*V_BreakVolumeCallbacks) ();
-
-#define MAXHANDLERS 4 /* Up to 4 clients; must be at least 2, so that
- * move = dump+restore can run on single server */
-#define MAXOFFLINEVOLUMES 128 /* This needs to be as big as the maximum
- * number that would be offline for 1 operation.
- * Current winner is salvage, which needs all
- * cloned read-only copies offline when salvaging
- * a single read-write volume */
-
-#define MAX_BIND_TRIES 5 /* Number of times to retry socket bind */
-
-
-struct offlineInfo {
- VolumeId volumeID;
- char partName[16];
-};
-
-static struct offlineInfo OfflineVolumes[MAXHANDLERS][MAXOFFLINEVOLUMES];
-
-static FS_sd = -1; /* Client socket for talking to file server */
-static AcceptSd = -1; /* Socket used by server for accepting connections */
-
-static int getport();
-
-struct command {
- bit32 command;
- bit32 reason;
- VolumeId volume;
- char partName[16]; /* partition name, e.g. /vicepa */
-};
-
-/* Forward declarations */
-static void FSYNC_sync();
-static void FSYNC_newconnection();
-static void FSYNC_com();
-static void FSYNC_Drop();
-static void AcceptOn();
-static void AcceptOff();
-static void InitHandler();
-static void CallHandler(fd_set * fdsetp);
-static int AddHandler();
-static int FindHandler();
-static int FindHandler_r();
-static int RemoveHandler();
-static void GetHandler(fd_set * fdsetp, int *maxfdp);
-
-extern int LogLevel;
-
-/*
- * This lock controls access to the handler array. The overhead
- * is minimal in non-preemptive environments.
- */
-struct Lock FSYNC_handler_lock;
-
-int
-FSYNC_clientInit(void)
-{
- struct sockaddr_in addr;
- /* I can't believe the following is needed for localhost connections!! */
- static time_t backoff[] =
- { 3, 3, 3, 5, 5, 5, 7, 15, 16, 24, 32, 40, 48, 0 };
- time_t *timeout = &backoff[0];
-
- for (;;) {
- FS_sd = getport(&addr);
- if (connect(FS_sd, (struct sockaddr *)&addr, sizeof(addr)) >= 0)
- return 1;
- if (!*timeout)
- break;
- if (!(*timeout & 1))
- Log("FSYNC_clientInit temporary failure (will retry)");
- FSYNC_clientFinis();
- sleep(*timeout++);
- }
- perror("FSYNC_clientInit failed (giving up!)");
- return 0;
-}
-
-void
-FSYNC_clientFinis(void)
-{
-#ifdef AFS_NT40_ENV
- closesocket(FS_sd);
-#else
- close(FS_sd);
-#endif
- FS_sd = -1;
-}
-
-int
-FSYNC_askfs(VolumeId volume, char *partName, int com, int reason)
-{
- byte response;
- struct command command;
- int n;
- command.volume = volume;
- command.command = com;
- command.reason = reason;
- if (partName)
- strcpy(command.partName, partName);
- else
- command.partName[0] = 0;
- assert(FS_sd != -1);
- VFSYNC_LOCK;
-#ifdef AFS_NT40_ENV
- if (send(FS_sd, (char *)&command, sizeof(command), 0) != sizeof(command)) {
- printf("FSYNC_askfs: write to file server failed\n");
- response = FSYNC_DENIED;
- goto done;
- }
- while ((n = recv(FS_sd, &response, 1, 0)) != 1) {
- if (n == 0 || WSAEINTR != WSAGetLastError()) {
- printf("FSYNC_askfs: No response from file server\n");
- response = FSYNC_DENIED;
- goto done;
- }
- }
-#else
- if (write(FS_sd, &command, sizeof(command)) != sizeof(command)) {
- printf("FSYNC_askfs: write to file server failed\n");
- response = FSYNC_DENIED;
- goto done;
- }
- while ((n = read(FS_sd, &response, 1)) != 1) {
- if (n == 0 || errno != EINTR) {
- printf("FSYNC_askfs: No response from file server\n");
- response = FSYNC_DENIED;
- goto done;
- }
- }
-#endif
- if (response == 0) {
- printf
- ("FSYNC_askfs: negative response from file server; volume %u, command %d\n",
- command.volume, (int)command.command);
- }
- done:
- VFSYNC_UNLOCK;
- return response;
-}
-
-void
-FSYNC_fsInit(void)
-{
-#ifdef AFS_PTHREAD_ENV
- pthread_t tid;
- pthread_attr_t tattr;
- assert(pthread_attr_init(&tattr) == 0);
- assert(pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_DETACHED) == 0);
- assert(pthread_create(&tid, &tattr, FSYNC_sync, NULL) == 0);
-#else /* AFS_PTHREAD_ENV */
- PROCESS pid;
- assert(LWP_CreateProcess
- (FSYNC_sync, USUAL_STACK_SIZE, USUAL_PRIORITY, (void *)0,
- "FSYNC_sync", &pid) == LWP_SUCCESS);
-#endif /* AFS_PTHREAD_ENV */
-}
-
-static int
-getport(struct sockaddr_in *addr)
-{
- int sd;
-
- memset(addr, 0, sizeof(*addr));
- assert((sd = socket(AF_INET, SOCK_STREAM, 0)) >= 0);
-#ifdef STRUCT_SOCKADDR_HAS_SA_LEN
- addr->sin_len = sizeof(struct sockaddr_in);
-#endif
- addr->sin_addr.s_addr = htonl(0x7f000001);
- addr->sin_family = AF_INET; /* was localhost->h_addrtype */
- addr->sin_port = htons(2040); /* XXXX htons not _really_ neccessary */
-
- return sd;
-}
-
-static fd_set FSYNC_readfds;
-
-static void
-FSYNC_sync()
-{
- struct sockaddr_in addr;
- int on = 1;
- extern VInit;
- int code;
- int numTries;
-#ifdef AFS_PTHREAD_ENV
- int tid;
-#endif
-
-#ifndef AFS_NT40_ENV
- (void)signal(SIGPIPE, SIG_IGN);
-#endif
-
-#ifdef AFS_PTHREAD_ENV
- /* set our 'thread-id' so that the host hold table works */
- MUTEX_ENTER(&rx_stats_mutex); /* protects rxi_pthread_hinum */
- tid = ++rxi_pthread_hinum;
- MUTEX_EXIT(&rx_stats_mutex);
- pthread_setspecific(rx_thread_id_key, (void *)tid);
- Log("Set thread id %d for FSYNC_sync\n", tid);
-#endif /* AFS_PTHREAD_ENV */
-
- while (!VInit) {
- /* Let somebody else run until level > 0. That doesn't mean that
- * all volumes have been attached. */
-#ifdef AFS_PTHREAD_ENV
- pthread_yield();
-#else /* AFS_PTHREAD_ENV */
- LWP_DispatchProcess();
-#endif /* AFS_PTHREAD_ENV */
- }
- AcceptSd = getport(&addr);
- /* Reuseaddr needed because system inexplicably leaves crud lying around */
- code =
- setsockopt(AcceptSd, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
- sizeof(on));
- if (code)
- Log("FSYNC_sync: setsockopt failed with (%d)\n", errno);
-
- for (numTries = 0; numTries < MAX_BIND_TRIES; numTries++) {
- if ((code =
- bind(AcceptSd, (struct sockaddr *)&addr, sizeof(addr))) == 0)
- break;
- Log("FSYNC_sync: bind failed with (%d), will sleep and retry\n",
- errno);
- sleep(5);
- }
- assert(!code);
- listen(AcceptSd, 100);
- InitHandler();
- AcceptOn();
- for (;;) {
- int maxfd;
- GetHandler(&FSYNC_readfds, &maxfd);
- /* Note: check for >= 1 below is essential since IOMGR_select
- * doesn't have exactly same semantics as select.
- */
-#ifdef AFS_PTHREAD_ENV
- if (select(maxfd + 1, &FSYNC_readfds, NULL, NULL, NULL) >= 1)
-#else /* AFS_PTHREAD_ENV */
- if (IOMGR_Select(maxfd + 1, &FSYNC_readfds, NULL, NULL, NULL) >= 1)
-#endif /* AFS_PTHREAD_ENV */
- CallHandler(&FSYNC_readfds);
- }
-}
-
-static void
-FSYNC_newconnection(int afd)
-{
- struct sockaddr_in other;
- int junk, fd;
- junk = sizeof(other);
- fd = accept(afd, (struct sockaddr *)&other, &junk);
- if (fd == -1) {
- Log("FSYNC_newconnection: accept failed, errno==%d\n", errno);
- assert(1 == 2);
- } else if (!AddHandler(fd, FSYNC_com)) {
- AcceptOff();
- assert(AddHandler(fd, FSYNC_com));
- }
-}
-
-/*
-#define TEST2081
-*/
-
-afs_int32 FS_cnt = 0;
-static void
-FSYNC_com(int fd)
-{
- byte rc = FSYNC_OK;
- int n, i;
- Error error;
- struct command command;
- int leaveonline;
- register struct offlineInfo *volumes, *v;
- Volume *vp;
- char tvolName[VMAXPATHLEN];
-
- FS_cnt++;
-#ifdef AFS_NT40_ENV
- n = recv(fd, &command, sizeof(command), 0);
-#else
- n = read(fd, &command, sizeof(command));
-#endif
- if (n <= 0) {
- FSYNC_Drop(fd);
- return;
- }
- if (n < sizeof(command)) {
- Log("FSYNC_com: partial read (%d instead of %d); dropping connection (cnt=%d)\n", n, sizeof(command), FS_cnt);
- FSYNC_Drop(fd);
- return;
- }
- VATTACH_LOCK;
- VOL_LOCK;
- volumes = OfflineVolumes[FindHandler(fd)];
- for (v = 0, i = 0; i < MAXOFFLINEVOLUMES; i++) {
- if (volumes[i].volumeID == command.volume
- && strcmp(volumes[i].partName, command.partName) == 0) {
- v = &volumes[i];
- break;
- }
- }
- switch (command.command) {
- case FSYNC_DONE:
- /* don't try to put online, this call is made only after deleting
- * a volume, in which case we want to remove the vol # from the
- * OfflineVolumes array only */
- if (v)
- v->volumeID = 0;
- break;
- case FSYNC_ON:
-
-/*
-This is where a detatched volume gets reattached. However in the
-special case where the volume is merely busy, it is already
-attatched and it is only necessary to clear the busy flag. See
-defect #2080 for details.
-*/
-
- /* is the volume already attatched? */
-#ifdef notdef
-/*
- * XXX With the following enabled we had bizarre problems where the backup id would
- * be reset to 0; that was due to the interaction between fileserver/volserver in that they
- * both keep volumes in memory and the changes wouldn't be made to the fileserver. Some of
- * the problems were due to refcnt changes as result of VGetVolume/VPutVolume which would call
- * VOffline, etc. when we don't want to; someday the whole #2080 issue should be revisited to
- * be done right XXX
- */
- vp = VGetVolume_r(&error, command.volume);
- if (vp) {
- /* yep, is the BUSY flag set? */
- if (vp->specialStatus == VBUSY) {
-/* test harness for defect #2081 */
-
-#ifdef TEST2081
- /*
- * test #2081 by releasing TEST.2081,
- * so leave it alone here, zap it after
- */
-
- if (strcmp(vp->header->diskstuff.name, "TEST.2081") == 0)
- break;
-#endif
- /* yep, clear BUSY flag */
-
- vp->specialStatus = 0;
- /* make sure vol is online */
- if (v) {
- v->volumeID = 0;
- V_inUse(vp) = 1; /* online */
- }
- VPutVolume_r(vp);
- break;
- }
- VPutVolume_r(vp);
- }
-#endif
-
- /* so, we need to attach the volume */
-
- if (v)
- v->volumeID = 0;
- tvolName[0] = '/';
- sprintf(&tvolName[1], VFORMAT, command.volume);
-
- vp = VAttachVolumeByName_r(&error, command.partName, tvolName,
- V_VOLUPD);
- if (vp)
- VPutVolume_r(vp);
- break;
- case FSYNC_OFF:
- case FSYNC_NEEDVOLUME:{
- leaveonline = 0;
- /* not already offline, we need to find a slot for newly offline volume */
- if (!v) {
- for (i = 0; i < MAXOFFLINEVOLUMES; i++) {
- if (volumes[i].volumeID == 0) {
- v = &volumes[i];
- break;
- }
- }
- }
- if (!v) {
- rc = FSYNC_DENIED;
- break;
- }
- vp = VGetVolume_r(&error, command.volume);
- if (vp) {
- if (command.partName[0] != 0
- && strcmp(command.partName, vp->partition->name) != 0) {
- /* volume on desired partition is not online, so we
- * should treat this as an offline volume.
- */
- VPutVolume_r(vp);
- vp = (Volume *) 0;
- }
- }
- if (vp) {
- leaveonline = (command.command == FSYNC_NEEDVOLUME
- && (command.reason == V_READONLY
- || (!VolumeWriteable(vp)
- && (command.reason == V_CLONE
- || command.reason == V_DUMP))
- )
- );
- if (!leaveonline) {
- if (command.command == FSYNC_NEEDVOLUME
- && (command.reason == V_CLONE
- || command.reason == V_DUMP)) {
- vp->specialStatus = VBUSY;
- }
- /* remember what volume we got, so we can keep track of how
- * many volumes the volserver or whatever is using. Note that
- * vp is valid since leaveonline is only set when vp is valid.
- */
- v->volumeID = command.volume;
- strcpy(v->partName, vp->partition->name);
- if (!V_inUse(vp)) {
- /* in this case, VOffline just returns sans decrementing
- * ref count. We could try to fix it, but it has lots of
- * weird callers.
- */
- VPutVolume_r(vp);
- } else {
- VOffline_r(vp, "A volume utility is running.");
- }
- vp = 0;
- } else {
- VUpdateVolume_r(&error, vp); /* At least get volume stats right */
- if (LogLevel) {
- Log("FSYNC: Volume %u (%s) was left on line for an external %s request\n", V_id(vp), V_name(vp), command.reason == V_CLONE ? "clone" : command.reason == V_READONLY ? "readonly" : command.reason == V_DUMP ? "dump" : "UNKNOWN");
- }
- }
- if (vp)
- VPutVolume_r(vp);
- }
- rc = FSYNC_OK;
- break;
- }
- case FSYNC_MOVEVOLUME:
- /* Yuch: the "reason" for the move is the site it got moved to... */
- /* still set specialStatus so we stop sending back VBUSY.
- * also should still break callbacks. Note that I don't know
- * how to tell if we should break all or not, so we just do it
- * since it doesn't matter much if we do an extra break
- * volume callbacks on a volume move within the same server */
- vp = VGetVolume_r(&error, command.volume);
- if (vp) {
- vp->specialStatus = VMOVED;
- VPutVolume_r(vp);
- }
-
- if (V_BreakVolumeCallbacks) {
- Log("fssync: volume %u moved to %x; breaking all call backs\n",
- command.volume, command.reason);
- VOL_UNLOCK;
- VATTACH_UNLOCK;
- (*V_BreakVolumeCallbacks) (command.volume);
- VATTACH_LOCK;
- VOL_LOCK;
- }
- break;
- case FSYNC_RESTOREVOLUME:
- /* if the volume is being restored, break all callbacks on it */
- if (V_BreakVolumeCallbacks) {
- Log("fssync: volume %u restored; breaking all call backs\n",
- command.volume);
- VOL_UNLOCK;
- VATTACH_UNLOCK;
- (*V_BreakVolumeCallbacks) (command.volume);
- VATTACH_LOCK;
- VOL_LOCK;
- }
- break;
- default:
- rc = FSYNC_DENIED;
- break;
- }
- VOL_UNLOCK;
- VATTACH_UNLOCK;
-#ifdef AFS_NT40_ENV
- (void)send(fd, &rc, 1, 0);
-#else
- (void)write(fd, &rc, 1);
-#endif
-}
-
-static void
-FSYNC_Drop(int fd)
-{
- struct offlineInfo *p;
- register i;
- Error error;
- char tvolName[VMAXPATHLEN];
-
- VATTACH_LOCK;
- VOL_LOCK;
- p = OfflineVolumes[FindHandler(fd)];
- for (i = 0; i < MAXOFFLINEVOLUMES; i++) {
- if (p[i].volumeID) {
- Volume *vp;
-
- tvolName[0] = '/';
- sprintf(&tvolName[1], VFORMAT, p[i].volumeID);
- vp = VAttachVolumeByName_r(&error, p[i].partName, tvolName,
- V_VOLUPD);
- if (vp)
- VPutVolume_r(vp);
- p[i].volumeID = 0;
- }
- }
- VOL_UNLOCK;
- VATTACH_UNLOCK;
- RemoveHandler(fd);
-#ifdef AFS_NT40_ENV
- closesocket(fd);
-#else
- close(fd);
-#endif
- AcceptOn();
-}
-
-static int AcceptHandler = -1; /* handler id for accept, if turned on */
-
-static void
-AcceptOn()
-{
- if (AcceptHandler == -1) {
- assert(AddHandler(AcceptSd, FSYNC_newconnection));
- AcceptHandler = FindHandler(AcceptSd);
- }
-}
-
-static void
-AcceptOff()
-{
- if (AcceptHandler != -1) {
- assert(RemoveHandler(AcceptSd));
- AcceptHandler = -1;
- }
-}
-
-/* The multiple FD handling code. */
-
-static int HandlerFD[MAXHANDLERS];
-static int (*HandlerProc[MAXHANDLERS]) ();
-
-static void
-InitHandler()
-{
- register int i;
- ObtainWriteLock(&FSYNC_handler_lock);
- for (i = 0; i < MAXHANDLERS; i++) {
- HandlerFD[i] = -1;
- HandlerProc[i] = 0;
- }
- ReleaseWriteLock(&FSYNC_handler_lock);
-}
-
-static void
-CallHandler(fd_set * fdsetp)
-{
- register int i;
- ObtainReadLock(&FSYNC_handler_lock);
- for (i = 0; i < MAXHANDLERS; i++) {
- if (HandlerFD[i] >= 0 && FD_ISSET(HandlerFD[i], fdsetp)) {
- ReleaseReadLock(&FSYNC_handler_lock);
- (*HandlerProc[i]) (HandlerFD[i]);
- ObtainReadLock(&FSYNC_handler_lock);
- }
- }
- ReleaseReadLock(&FSYNC_handler_lock);
-}
-
-static int
-AddHandler(int afd, int (*aproc) ())
-{
- register int i;
- ObtainWriteLock(&FSYNC_handler_lock);
- for (i = 0; i < MAXHANDLERS; i++)
- if (HandlerFD[i] == -1)
- break;
- if (i >= MAXHANDLERS) {
- ReleaseWriteLock(&FSYNC_handler_lock);
- return 0;
- }
- HandlerFD[i] = afd;
- HandlerProc[i] = aproc;
- ReleaseWriteLock(&FSYNC_handler_lock);
- return 1;
-}
-
-static int
-FindHandler(register int afd)
-{
- register int i;
- ObtainReadLock(&FSYNC_handler_lock);
- for (i = 0; i < MAXHANDLERS; i++)
- if (HandlerFD[i] == afd) {
- ReleaseReadLock(&FSYNC_handler_lock);
- return i;
- }
- ReleaseReadLock(&FSYNC_handler_lock); /* just in case */
- assert(1 == 2);
- return -1; /* satisfy compiler */
-}
-
-static int
-FindHandler_r(register int afd)
-{
- register int i;
- for (i = 0; i < MAXHANDLERS; i++)
- if (HandlerFD[i] == afd) {
- return i;
- }
- assert(1 == 2);
- return -1; /* satisfy compiler */
-}
-
-static int
-RemoveHandler(register int afd)
-{
- ObtainWriteLock(&FSYNC_handler_lock);
- HandlerFD[FindHandler_r(afd)] = -1;
- ReleaseWriteLock(&FSYNC_handler_lock);
- return 1;
-}
-
-static void
-GetHandler(fd_set * fdsetp, int *maxfdp)
-{
- register int i;
- register int maxfd = -1;
- FD_ZERO(fdsetp);
- ObtainReadLock(&FSYNC_handler_lock); /* just in case */
- for (i = 0; i < MAXHANDLERS; i++)
- if (HandlerFD[i] != -1) {
- FD_SET(HandlerFD[i], fdsetp);
- if (maxfd < HandlerFD[i])
- maxfd = HandlerFD[i];
- }
- *maxfdp = maxfd;
- ReleaseReadLock(&FSYNC_handler_lock); /* just in case */
-}
* This software has been released under the terms of the IBM Public
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
+ *
+ * Portions Copyright (c) 2006 Sine Nomine Associates
*/
/*
*/
+#ifndef __fssync_h_
+#define __fssync_h_
-/* FSYNC commands */
-#define FSYNC_ON 1 /* Volume online */
-#define FSYNC_OFF 2 /* Volume offline */
-#define FSYNC_LISTVOLUMES 3 /* Update local volume list */
-#define FSYNC_NEEDVOLUME 4 /* Put volume in whatever mode (offline, or whatever)
- * best fits the attachment mode provided in reason */
-#define FSYNC_MOVEVOLUME 5 /* Generate temporary relocation information
- * for this volume to another site, to be used
- * if this volume disappears */
-#define FSYNC_RESTOREVOLUME 6 /* Break all the callbacks on this volume since it is being restored */
-#define FSYNC_DONE 7 /* Done with this volume (used after a delete).
- * Don't put online, but remove from list */
+#define FSYNC_PROTO_VERSION 2
-/* Reasons (these could be communicated to venus or converted to messages) */
+/* FSYNC command codes */
+#define FSYNC_VOL_ON SYNC_COM_CODE_DECL(0) /* Volume online */
+#define FSYNC_VOL_OFF SYNC_COM_CODE_DECL(1) /* Volume offline */
+#define FSYNC_VOL_LISTVOLUMES SYNC_COM_CODE_DECL(2) /* Update local volume list */
+#define FSYNC_VOL_NEEDVOLUME SYNC_COM_CODE_DECL(3) /* Put volume in whatever mode (offline, or whatever)
+ * best fits the attachment mode provided in reason */
+#define FSYNC_VOL_MOVE SYNC_COM_CODE_DECL(4) /* Generate temporary relocation information
+ * for this volume to another site, to be used
+ * if this volume disappears */
+#define FSYNC_VOL_BREAKCBKS SYNC_COM_CODE_DECL(5) /* Break all the callbacks on this volume */
+#define FSYNC_VOL_DONE SYNC_COM_CODE_DECL(6) /* Done with this volume (used after a delete).
+ * Don't put online, but remove from list */
+#define FSYNC_VOL_QUERY SYNC_COM_CODE_DECL(7) /* query the volume state */
+#define FSYNC_VOL_QUERY_HDR SYNC_COM_CODE_DECL(8) /* query the volume disk data structure */
+#define FSYNC_VOL_QUERY_VOP SYNC_COM_CODE_DECL(9) /* query the volume for pending vol op info */
+#define FSYNC_VOL_STATS_GENERAL SYNC_COM_CODE_DECL(10) /* query the general volume package statistics */
+#define FSYNC_VOL_STATS_VICEP SYNC_COM_CODE_DECL(11) /* query the per-partition volume package stats */
+#define FSYNC_VOL_STATS_HASH SYNC_COM_CODE_DECL(12) /* query the per hash-chain volume package stats */
+#define FSYNC_VOL_STATS_HDR SYNC_COM_CODE_DECL(13) /* query the volume header cache statistics */
+#define FSYNC_VOL_STATS_VLRU SYNC_COM_CODE_DECL(14) /* query the VLRU statistics */
-#define FSYNC_WHATEVER 0 /* XXXX */
-#define FSYNC_SALVAGE 1 /* volume is being salvaged */
-#define FSYNC_MOVE 2 /* volume is being moved */
-#define FSYNC_OPERATOR 3 /* operator forced volume offline */
+/* FSYNC reason codes */
+#define FSYNC_WHATEVER SYNC_REASON_CODE_DECL(0) /* XXXX */
+#define FSYNC_SALVAGE SYNC_REASON_CODE_DECL(1) /* volume is being salvaged */
+#define FSYNC_MOVE SYNC_REASON_CODE_DECL(2) /* volume is being moved */
+#define FSYNC_OPERATOR SYNC_REASON_CODE_DECL(3) /* operator forced volume offline */
+#define FSYNC_EXCLUSIVE SYNC_REASON_CODE_DECL(4) /* somebody else has the volume offline */
+#define FSYNC_UNKNOWN_VOLID SYNC_REASON_CODE_DECL(5) /* volume id not known by fileserver */
+#define FSYNC_HDR_NOT_ATTACHED SYNC_REASON_CODE_DECL(6) /* volume header not currently attached */
+#define FSYNC_NO_PENDING_VOL_OP SYNC_REASON_CODE_DECL(7) /* no volume operation pending */
+#define FSYNC_VOL_PKG_ERROR SYNC_REASON_CODE_DECL(8) /* error in the volume package */
+/* FSYNC response codes */
-/* Replies (1 byte) */
+/* FSYNC flag codes */
-#define FSYNC_DENIED 0
-#define FSYNC_OK 1
-/* Prototypes from fssync.c */
-void FSYNC_clientFinis(void);
-int FSYNC_clientInit(void);
-void FSYNC_fsInit(void);
-int FSYNC_askfs(VolumeId volume, char *partName, int com, int reason);
+struct offlineInfo {
+ afs_uint32 volumeID;
+ char partName[16];
+};
+
+typedef struct FSSYNC_VolOp_hdr {
+ afs_uint32 volume; /* volume id associated with request */
+ char partName[16]; /* partition name, e.g. /vicepa */
+} FSSYNC_VolOp_hdr;
+
+typedef struct FSSYNC_VolOp_command {
+ SYNC_command_hdr * hdr;
+ FSSYNC_VolOp_hdr * vop;
+ SYNC_command * com;
+ struct offlineInfo * v;
+ struct offlineInfo * volumes;
+} FSSYNC_VolOp_command;
+
+typedef struct FSSYNC_VolOp_info {
+ SYNC_command_hdr com;
+ FSSYNC_VolOp_hdr vop;
+} FSSYNC_VolOp_info;
+
+
+typedef struct FSSYNC_StatsOp_hdr {
+ union {
+ afs_uint32 vlru_generation;
+ afs_uint32 hash_bucket;
+ char partName[16];
+ } args;
+} FSSYNC_StatsOp_hdr;
+
+typedef struct FSSYNC_StatsOp_command {
+ SYNC_command_hdr * hdr;
+ FSSYNC_StatsOp_hdr * sop;
+ SYNC_command * com;
+} FSSYNC_StatsOp_command;
+
+
+
+/*
+ * common interfaces
+ */
+extern void FSYNC_Init(void);
+
+/*
+ * fsync client interfaces
+ */
+extern void FSYNC_clientFinis(void);
+extern int FSYNC_clientInit(void);
+extern int FSYNC_clientChildProcReconnect(void);
+
+/* generic low-level interface */
+extern afs_int32 FSYNC_askfs(SYNC_command * com, SYNC_response * res);
+
+/* generic higher-level interface */
+extern afs_int32 FSYNC_GenericOp(void * ext_hdr, size_t ext_len,
+ int command, int reason,
+ SYNC_response * res);
+
+/* volume operations interface */
+extern afs_int32 FSYNC_VolOp(VolumeId volume, char *partName, int com, int reason,
+ SYNC_response * res);
+
+/* statistics query interface */
+extern afs_int32 FSYNC_StatsOp(FSSYNC_StatsOp_hdr * scom, int command, int reason,
+ SYNC_response * res_in);
+
+#endif /* __fssync_h_ */
#include "partition.h"
#include "viceinode.h"
#include "salvage.h"
+#include "daemon_com.h"
#include "fssync.h"
#ifdef O_LARGEFILE
* directory or online at http://www.openafs.org/dl/license10.html
*
* Portions Copyright (c) 2003 Apple Computer, Inc.
+ * Portions Copyright (c) 2006 Sine Nomine Associates
*/
/*
int aixlow_water = 8; /* default 8% */
struct DiskPartition *DiskPartitionList;
+#ifdef AFS_DEMAND_ATTACH_FS
+static struct DiskPartition *DiskPartitionTable[VOLMAXPARTS+1];
+
+static struct DiskPartition * VLookupPartition_r(char * path);
+static void AddPartitionToTable_r(struct DiskPartition *);
+static void DeletePartitionFromTable_r(struct DiskPartition *);
+#endif /* AFS_DEMAND_ATTACH_FS */
+
#ifdef AFS_SGI_XFS_IOPS_ENV
/* Verify that the on disk XFS inodes on the partition are large enough to
* hold the AFS attribute. Returns -1 if the attribute can't be set or is
}
return code;
}
-#endif
+#endif /* AFS_SGI_XFS_IOPS_ENV */
+int
+VInitPartitionPackage(void)
+{
+#ifdef AFS_DEMAND_ATTACH_ENV
+ memset(&DiskPartitionTable, 0, sizeof(DiskPartitionTable));
+#endif /* AFS_DEMAND_ATTACH_ENV */
+ return 0;
+}
static void
VInitPartition_r(char *path, char *devname, Device dev)
dp->next = 0;
dp->name = (char *)malloc(strlen(path) + 1);
strncpy(dp->name, path, strlen(path) + 1);
+ dp->index = volutil_GetPartitionID(path);
#if defined(AFS_NAMEI_ENV) && !defined(AFS_NT40_ENV)
/* Create a lockfile for the partition, of the form /vicepa/Lock/vicepa */
dp->devName = (char *)malloc(2 * strlen(path) + 6);
mkdir(dp->devName, 0700);
strcat(dp->devName, path);
close(afs_open(dp->devName, O_RDWR | O_CREAT, 0600));
- dp->device = volutil_GetPartitionID(path);
+ dp->device = dp->index;
#else
dp->devName = (char *)malloc(strlen(devname) + 1);
strncpy(dp->devName, devname, strlen(devname) + 1);
(void)namei_ViceREADME(VPartitionPath(dp));
#endif
VSetPartitionDiskUsage_r(dp);
+#ifdef AFS_DEMAND_ATTACH_FS
+ AddPartitionToTable_r(dp);
+ queue_Init(&dp->vol_list);
+ assert(pthread_cond_init(&dp->vol_list.cv, NULL) == 0);
+#endif /* AFS_DEMAND_ATTACH_FS */
}
static void
return -1;
#endif
#endif /* AFS_NAMEI_ENV */
-#endif
+#endif /* !AFS_LINUX20_ENV && !AFS_NT40_ENV */
#if defined(AFS_DUX40_ENV) && !defined(AFS_NAMEI_ENV)
if (status.st_ino != ROOTINO) {
VGetPartition_r(char *name, int abortp)
{
register struct DiskPartition *dp;
+#ifdef AFS_DEMAND_ATTACH_FS
+ dp = VLookupPartition_r(name);
+#else /* AFS_DEMAND_ATTACH_FS */
for (dp = DiskPartitionList; dp; dp = dp->next) {
if (strcmp(dp->name, name) == 0)
break;
}
+#endif /* AFS_DEMAND_ATTACH_FS */
if (abortp)
assert(dp != NULL);
return dp;
VUnlockPartition_r(name);
VOL_UNLOCK;
}
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/* XXX not sure this will work on AFS_NT40_ENV
+ * needs to be tested!
+ */
+struct DiskPartition *
+VGetPartitionById_r(afs_int32 id, int abortp)
+{
+ struct DiskPartition * dp = NULL;
+
+ if ((id >= 0) && (id <= VOLMAXPARTS)) {
+ dp = DiskPartitionTable[id];
+ }
+
+ if (abortp) {
+ assert(dp != NULL);
+ }
+ return dp;
+}
+
+struct DiskPartition *
+VGetPartitionById(afs_int32 id, int abortp)
+{
+ struct Diskpartition * dp;
+
+ VOL_LOCK;
+ dp = VGetPartitionById_r(id, abortp);
+ VOL_UNLOCK;
+
+ return dp;
+}
+
+static struct DiskPartition *
+VLookupPartition_r(char * path)
+{
+ afs_int32 id = volutil_GetPartitionID(path);
+
+ if (id < 0 || id > VOLMAXPARTS)
+ return NULL;
+
+ return DiskPartitionTable[id];
+}
+
+static void
+AddPartitionToTable_r(struct DiskPartition * dp)
+{
+ assert(dp->index >= 0 && dp->index <= VOLMAXPARTS);
+ DiskPartitionTable[dp->index] = dp;
+}
+
+static void
+DeletePartitionFromTable_r(struct DiskPartition * dp)
+{
+ assert(dp->index >= 0 && dp->index <= VOLMAXPARTS);
+ DiskPartitionTable[dp->index] = NULL;
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
* This software has been released under the terms of the IBM Public
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
+ *
+ * Portions Copyright (c) 2006 Sine Nomine Associates
*/
/*
#define AFS_RDSKDEV "/dev/r"
#endif
+
/* All Vice partitions on a server will have the following name prefix */
#define VICE_PARTITION_PREFIX "/vicep"
#define VICE_PREFIX_SIZE (sizeof(VICE_PARTITION_PREFIX)-1)
char *name; /* Mounted partition name */
char *devName; /* Device mounted on */
Device device; /* device number */
+ afs_int32 index; /* partition index (0<=x<=VOLMAXPARTS) */
int lock_fd; /* File descriptor of this partition if locked; otherwise -1;
* Not used by the file server */
int free; /* Total number of blocks (1K) presumed
* from the superblock */
int flags;
int f_files; /* total number of files in this partition */
+#ifdef AFS_DEMAND_ATTACH_FS
+ struct {
+ struct rx_queue head; /* list of volumes on this partition (VByPList) */
+ afs_uint32 len; /* length of volume list */
+ int busy; /* asynch vol list op in progress */
+ pthread_cond_t cv; /* vol_list.busy change cond var */
+ } vol_list;
+#endif /* AFS_DEMAND_ATTACH_FS */
+};
+
+struct DiskPartitionStats {
+ afs_int32 free;
+ afs_int32 totalUsable;
+ afs_int32 minFree;
+ afs_int32 f_files;
+#ifdef AFS_DEMAND_ATTACH_FS
+ afs_int32 vol_list_len;
+#endif
};
+
#define PART_DONTUPDATE 1
#define PART_DUPLICATE 2 /* NT - used if we find more than one partition
* using the same drive. Will be dumped before
struct Volume; /* Potentially forward definition */
extern struct DiskPartition *DiskPartitionList;
-extern struct DiskPartition *VGetPartition();
+extern struct DiskPartition *VGetPartition(char * name, int abortp);
+extern struct DiskPartition *VGetPartition_r(char * name, int abortp);
+#ifdef AFS_DEMAND_ATTACH_FS
+extern struct DiskPartition *VGetPartitionById(afs_int32 index, int abortp);
+extern struct DiskPartition *VGetPartitionById_r(afs_int32 index, int abortp);
+#endif
extern int VAttachPartitions(void);
extern void VLockPartition(char *name);
extern void VLockPartition_r(char *name);
afs_sfsize_t blocks, afs_sfsize_t checkBlocks);
extern int VDiskUsage(struct Volume *vp, afs_sfsize_t blocks);
extern void VPrintDiskStats(void);
+extern int VInitPartitionPackage(void);
#include "volume.h"
#include "viceinode.h"
#include "partition.h"
+#include "daemon_com.h"
#include "fssync.h"
/* forward declarations */
-void PurgeIndex_r(Volume * vp, VnodeClass class);
-void PurgeHeader_r(Volume * vp);
+static int ObliterateRegion(Volume * avp, VnodeClass aclass, StreamHandle_t * afile,
+ afs_int32 * aoffset);
+static void PurgeIndex(Volume * vp, VnodeClass class);
+static void PurgeIndex_r(Volume * vp, VnodeClass class);
+static void PurgeHeader_r(Volume * vp);
+static void PurgeHeader(Volume * vp);
void
VPurgeVolume_r(Error * ec, Volume * vp)
/*
* Call the fileserver to break all call backs for that volume
*/
- FSYNC_askfs(V_id(vp), tpartp->name, FSYNC_RESTOREVOLUME, 0);
+ FSYNC_VolOp(V_id(vp), tpartp->name, FSYNC_VOL_BREAKCBKS, 0, NULL);
}
void
return -1;
}
-void
+static void
PurgeIndex(Volume * vp, VnodeClass class)
{
VOL_LOCK;
VOL_UNLOCK;
}
-void
+static void
PurgeIndex_r(Volume * vp, VnodeClass class)
{
StreamHandle_t *ifile;
FDH_CLOSE(fdP);
}
-void
+static void
PurgeHeader(Volume * vp)
{
VOL_LOCK;
VOL_UNLOCK;
}
-void
+static void
PurgeHeader_r(Volume * vp)
{
IH_REALLYCLOSE(V_diskDataHandle(vp));
*/
+#ifndef __salvage_h_
+#define __salvage_h_
+
#include <afs/afssyscalls.h>
/* Definition of DirHandle for salvager. Not the same as for the file server */
IHandle_t *dirh_handle;
afs_int32 dirh_cacheCheck;
} DirHandle;
+
+#endif /* __salvage_h_ */
--- /dev/null
+/*
+ * Copyright 2006, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/*
+ * demand attach fs
+ * online salvager daemon
+ */
+
+/* Main program file. Define globals. */
+#define MAIN 1
+
+#include <afsconfig.h>
+#include <afs/param.h>
+
+RCSID
+ ("$Header$");
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <errno.h>
+#ifdef AFS_NT40_ENV
+#include <io.h>
+#include <WINNT/afsevent.h>
+#else
+#include <sys/param.h>
+#include <sys/file.h>
+#ifndef ITIMER_REAL
+#include <sys/time.h>
+#endif /* ITIMER_REAL */
+#endif
+#if defined(AFS_AIX_ENV) || defined(AFS_SUN4_ENV)
+#define WCOREDUMP(x) (x & 0200)
+#endif
+#include <rx/xdr.h>
+#include <afs/afsint.h>
+#include <afs/assert.h>
+#if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
+#if defined(AFS_VFSINCL_ENV)
+#include <sys/vnode.h>
+#ifdef AFS_SUN5_ENV
+#include <sys/fs/ufs_inode.h>
+#else
+#if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
+#include <ufs/ufs/dinode.h>
+#include <ufs/ffs/fs.h>
+#else
+#include <ufs/inode.h>
+#endif
+#endif
+#else /* AFS_VFSINCL_ENV */
+#ifdef AFS_OSF_ENV
+#include <ufs/inode.h>
+#else /* AFS_OSF_ENV */
+#if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
+#include <sys/inode.h>
+#endif
+#endif
+#endif /* AFS_VFSINCL_ENV */
+#endif /* AFS_SGI_ENV */
+#ifdef AFS_AIX_ENV
+#include <sys/vfs.h>
+#include <sys/lockf.h>
+#else
+#ifdef AFS_HPUX_ENV
+#include <unistd.h>
+#include <checklist.h>
+#else
+#if defined(AFS_SGI_ENV)
+#include <unistd.h>
+#include <fcntl.h>
+#include <mntent.h>
+#else
+#if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
+#ifdef AFS_SUN5_ENV
+#include <unistd.h>
+#include <sys/mnttab.h>
+#include <sys/mntent.h>
+#else
+#include <mntent.h>
+#endif
+#else
+#endif /* AFS_SGI_ENV */
+#endif /* AFS_HPUX_ENV */
+#endif
+#endif
+#include <fcntl.h>
+#ifndef AFS_NT40_ENV
+#include <afs/osi_inode.h>
+#endif
+#include <afs/cmd.h>
+#include <afs/afsutil.h>
+#include <afs/fileutil.h>
+#include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
+#ifndef AFS_NT40_ENV
+#include <syslog.h>
+#endif
+
+#include "nfs.h"
+#include "lwp.h"
+#include "lock.h"
+#include <afs/afssyscalls.h>
+#include "ihandle.h"
+#include "vnode.h"
+#include "volume.h"
+#include "partition.h"
+#include "daemon_com.h"
+#include "fssync.h"
+#include "salvsync.h"
+#include "viceinode.h"
+#include "salvage.h"
+#include "volinodes.h" /* header magic number, etc. stuff */
+#include "vol-salvage.h"
+#ifdef AFS_NT40_ENV
+#include <pthread.h>
+#endif
+
+
+#if !defined(AFS_DEMAND_ATTACH_FS)
+#error "online salvager only supported for demand attach fileserver"
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+#if defined(AFS_NT40_ENV)
+#error "online salvager not supported on NT"
+#endif /* AFS_NT40_ENV */
+
+
+/* Forward declarations */
+/*@printflike@*/ void Log(const char *format, ...);
+/*@printflike@*/ void Abort(const char *format, ...);
+
+
+/*@+fcnmacros +macrofcndecl@*/
+#ifdef O_LARGEFILE
+#define afs_fopen fopen64
+#else /* !O_LARGEFILE */
+#define afs_fopen fopen
+#endif /* !O_LARGEFILE */
+/*@=fcnmacros =macrofcndecl@*/
+
+
+
+static volatile int current_workers = 0;
+static volatile struct rx_queue pending_q;
+static pthread_mutex_t worker_lock;
+static pthread_cond_t worker_cv;
+
+static void * SalvageChildReaperThread(void *);
+static int DoSalvageVolume(struct SalvageQueueNode * node, int slot);
+
+static void SalvageServer(void);
+static void SalvageClient(VolumeId vid, char * pname);
+
+static int Reap_Child(char * prog, int * pid, int * status);
+
+static void * SalvageLogCleanupThread(void *);
+static int SalvageLogCleanup(int pid);
+
+struct log_cleanup_node {
+ struct rx_queue q;
+ int pid;
+};
+
+struct {
+ struct rx_queue queue_head;
+ pthread_cond_t queue_change_cv;
+} log_cleanup_queue;
+
+
+#define DEFAULT_PARALLELISM 4 /* allow 4 parallel salvage workers by default */
+
+static int
+handleit(struct cmd_syndesc *as)
+{
+ register struct cmd_item *ti;
+ char pname[100], *temp;
+ afs_int32 seenpart = 0, seenvol = 0, vid = 0, seenany = 0;
+ struct DiskPartition *partP;
+
+
+#ifdef AFS_SGI_VNODE_GLUE
+ if (afs_init_kernel_config(-1) < 0) {
+ printf
+ ("Can't determine NUMA configuration, not starting salvager.\n");
+ exit(1);
+ }
+#endif
+
+ if (as->parms[2].items) /* -debug */
+ debug = 1;
+ if (as->parms[3].items) /* -nowrite */
+ Testing = 1;
+ if (as->parms[4].items) /* -inodes */
+ ListInodeOption = 1;
+ if (as->parms[5].items) /* -oktozap */
+ OKToZap = 1;
+ if (as->parms[6].items) /* -rootinodes */
+ ShowRootFiles = 1;
+ if (as->parms[8].items) /* -ForceReads */
+ forceR = 1;
+ if ((ti = as->parms[9].items)) { /* -Parallel # */
+ temp = ti->data;
+ if (strncmp(temp, "all", 3) == 0) {
+ PartsPerDisk = 1;
+ temp += 3;
+ }
+ if (strlen(temp) != 0) {
+ Parallel = atoi(temp);
+ if (Parallel < 1)
+ Parallel = 1;
+ if (Parallel > MAXPARALLEL) {
+ printf("Setting parallel salvages to maximum of %d \n",
+ MAXPARALLEL);
+ Parallel = MAXPARALLEL;
+ }
+ }
+ } else {
+ Parallel = MIN(DEFAULT_PARALLELISM, MAXPARALLEL);
+ }
+ if ((ti = as->parms[10].items)) { /* -tmpdir */
+ DIR *dirp;
+
+ tmpdir = ti->data;
+ dirp = opendir(tmpdir);
+ if (!dirp) {
+ printf
+ ("Can't open temporary placeholder dir %s; using current partition \n",
+ tmpdir);
+ tmpdir = NULL;
+ } else
+ closedir(dirp);
+ }
+ if ((ti = as->parms[11].items)) /* -showlog */
+ ShowLog = 1;
+ if ((ti = as->parms[12].items)) { /* -orphans */
+ if (Testing)
+ orphans = ORPH_IGNORE;
+ else if (strcmp(ti->data, "remove") == 0
+ || strcmp(ti->data, "r") == 0)
+ orphans = ORPH_REMOVE;
+ else if (strcmp(ti->data, "attach") == 0
+ || strcmp(ti->data, "a") == 0)
+ orphans = ORPH_ATTACH;
+ }
+#ifndef AFS_NT40_ENV /* ignore options on NT */
+ if ((ti = as->parms[13].items)) { /* -syslog */
+ useSyslog = 1;
+ ShowLog = 0;
+ }
+ if ((ti = as->parms[14].items)) { /* -syslogfacility */
+ useSyslogFacility = atoi(ti->data);
+ }
+
+ if ((ti = as->parms[15].items)) { /* -datelogs */
+ TimeStampLogFile(AFSDIR_SERVER_SALSRVLOG_FILEPATH);
+ }
+#endif
+
+ if ((ti = as->parms[16].items)) { /* -client */
+ if ((ti = as->parms[0].items)) { /* -partition */
+ seenpart = 1;
+ strlcpy(pname, ti->data, sizeof(pname));
+ }
+ if ((ti = as->parms[1].items)) { /* -volumeid */
+ seenvol = 1;
+ vid = atoi(ti->data);
+ }
+
+ if (!seenpart || !seenvol) {
+ printf("You must specify '-partition' and '-volumeid' with the '-client' option\n");
+ exit(-1);
+ }
+
+ SalvageClient(vid, pname);
+
+ } else { /* salvageserver mode */
+ SalvageServer();
+ }
+ return (0);
+}
+
+
+#ifndef AFS_NT40_ENV
+#include "AFS_component_version_number.c"
+#endif
+#define MAX_ARGS 128
+#ifdef AFS_NT40_ENV
+char *save_args[MAX_ARGS];
+int n_save_args = 0;
+pthread_t main_thread;
+#endif
+
+static char commandLine[150];
+
+int
+main(int argc, char **argv)
+{
+ struct cmd_syndesc *ts;
+ int err = 0;
+
+ int i;
+ extern char cml_version_number[];
+
+#ifdef AFS_AIX32_ENV
+ /*
+ * The following signal action for AIX is necessary so that in case of a
+ * crash (i.e. core is generated) we can include the user's data section
+ * in the core dump. Unfortunately, by default, only a partial core is
+ * generated which, in many cases, isn't too useful.
+ */
+ struct sigaction nsa;
+
+ sigemptyset(&nsa.sa_mask);
+ nsa.sa_handler = SIG_DFL;
+ nsa.sa_flags = SA_FULLDUMP;
+ sigaction(SIGABRT, &nsa, NULL);
+ sigaction(SIGSEGV, &nsa, NULL);
+#endif
+
+ /* Initialize directory paths */
+ if (!(initAFSDirPath() & AFSDIR_SERVER_PATHS_OK)) {
+#ifdef AFS_NT40_ENV
+ ReportErrorEventAlt(AFSEVT_SVR_NO_INSTALL_DIR, 0, argv[0], 0);
+#endif
+ fprintf(stderr, "%s: Unable to obtain AFS server directory.\n",
+ argv[0]);
+ exit(2);
+ }
+#ifdef AFS_NT40_ENV
+ main_thread = pthread_self();
+ if (spawnDatap && spawnDataLen) {
+ /* This is a child per partition salvager. Don't setup log or
+ * try to lock the salvager lock.
+ */
+ if (nt_SetupPartitionSalvage(spawnDatap, spawnDataLen) < 0)
+ exit(3);
+ } else {
+#endif
+ for (commandLine[0] = '\0', i = 0; i < argc; i++) {
+ if (i > 0)
+ strlcat(commandLine, " ", sizeof(commandLine));
+ strlcat(commandLine, argv[i], sizeof(commandLine));
+ }
+
+#ifndef AFS_NT40_ENV
+ if (geteuid() != 0) {
+ printf("Salvager must be run as root.\n");
+ fflush(stdout);
+ Exit(0);
+ }
+#endif
+
+ /* bad for normal help flag processing, but can do nada */
+
+#ifdef AFS_NT40_ENV
+ }
+#endif
+
+ ts = cmd_CreateSyntax("initcmd", handleit, 0, "initialize the program");
+ cmd_AddParm(ts, "-partition", CMD_SINGLE, CMD_OPTIONAL,
+ "Name of partition to salvage");
+ cmd_AddParm(ts, "-volumeid", CMD_SINGLE, CMD_OPTIONAL,
+ "Volume Id to salvage");
+ cmd_AddParm(ts, "-debug", CMD_FLAG, CMD_OPTIONAL,
+ "Run in Debugging mode");
+ cmd_AddParm(ts, "-nowrite", CMD_FLAG, CMD_OPTIONAL,
+ "Run readonly/test mode");
+ cmd_AddParm(ts, "-inodes", CMD_FLAG, CMD_OPTIONAL,
+ "Just list affected afs inodes - debugging flag");
+ cmd_AddParm(ts, "-oktozap", CMD_FLAG, CMD_OPTIONAL,
+ "Give permission to destroy bogus inodes/volumes - debugging flag");
+ cmd_AddParm(ts, "-rootinodes", CMD_FLAG, CMD_OPTIONAL,
+ "Show inodes owned by root - debugging flag");
+ cmd_AddParm(ts, "-salvagedirs", CMD_FLAG, CMD_OPTIONAL,
+ "Force rebuild/salvage of all directories");
+ cmd_AddParm(ts, "-blockreads", CMD_FLAG, CMD_OPTIONAL,
+ "Read smaller blocks to handle IO/bad blocks");
+ cmd_AddParm(ts, "-parallel", CMD_SINGLE, CMD_OPTIONAL,
+ "# of max parallel partition salvaging");
+ cmd_AddParm(ts, "-tmpdir", CMD_SINGLE, CMD_OPTIONAL,
+ "Name of dir to place tmp files ");
+ cmd_AddParm(ts, "-showlog", CMD_FLAG, CMD_OPTIONAL,
+ "Show log file upon completion");
+ cmd_AddParm(ts, "-orphans", CMD_SINGLE, CMD_OPTIONAL,
+ "ignore | remove | attach");
+
+ /* note - syslog isn't avail on NT, but if we make it conditional, have
+ * to deal with screwy offsets for cmd params */
+ cmd_AddParm(ts, "-syslog", CMD_FLAG, CMD_OPTIONAL,
+ "Write salvage log to syslogs");
+ cmd_AddParm(ts, "-syslogfacility", CMD_SINGLE, CMD_OPTIONAL,
+ "Syslog facility number to use");
+ cmd_AddParm(ts, "-datelogs", CMD_FLAG, CMD_OPTIONAL,
+ "Include timestamp in logfile filename");
+
+ cmd_AddParm(ts, "-client", CMD_FLAG, CMD_OPTIONAL,
+ "Use SALVSYNC to ask salvageserver to salvage a volume");
+
+ err = cmd_Dispatch(argc, argv);
+ Exit(err);
+}
+
+static void
+SalvageClient(VolumeId vid, char * pname)
+{
+ int done = 0;
+ afs_int32 code;
+ SYNC_response res;
+ SALVSYNC_response_hdr sres;
+
+ VInitVolumePackage(volumeUtility, 5, 5, DONT_CONNECT_FS, 0);
+ SALVSYNC_clientInit();
+
+ code = SALVSYNC_SalvageVolume(vid, pname, SALVSYNC_SALVAGE, SALVSYNC_OPERATOR, 0, NULL);
+ if (code != SYNC_OK) {
+ goto sync_error;
+ }
+
+ res.payload.buf = (void *) &sres;
+ res.payload.len = sizeof(sres);
+
+ while(!done) {
+ sleep(2);
+ code = SALVSYNC_SalvageVolume(vid, pname, SALVSYNC_QUERY, SALVSYNC_WHATEVER, 0, &res);
+ if (code != SYNC_OK) {
+ goto sync_error;
+ }
+ switch (sres.state) {
+ case SALVSYNC_STATE_ERROR:
+ printf("salvageserver reports salvage ended in an error; check log files for more details\n");
+ case SALVSYNC_STATE_DONE:
+ case SALVSYNC_STATE_UNKNOWN:
+ done = 1;
+ }
+ }
+ SALVSYNC_clientFinis();
+ return;
+
+ sync_error:
+ if (code == SYNC_DENIED) {
+ printf("salvageserver refused to salvage volume %u on partition %s\n",
+ vid, pname);
+ } else if (code == SYNC_BAD_COMMAND) {
+ printf("SALVSYNC protocol mismatch; please make sure fileserver, volserver, salvageserver and salvager are same version\n");
+ } else if (code == SYNC_COM_ERROR) {
+ printf("SALVSYNC communications error\n");
+ }
+ SALVSYNC_clientFinis();
+ exit(-1);
+}
+
+static int * child_slot;
+
+static void
+SalvageServer(void)
+{
+ int pid, ret;
+ struct SalvageQueueNode * node;
+ pthread_t tid;
+ pthread_attr_t attrs;
+ int slot;
+
+ /* All entries to the log will be appended. Useful if there are
+ * multiple salvagers appending to the log.
+ */
+
+ CheckLogFile(AFSDIR_SERVER_SALSRVLOG_FILEPATH);
+#ifndef AFS_NT40_ENV
+#ifdef AFS_LINUX20_ENV
+ fcntl(fileno(logFile), F_SETFL, O_APPEND); /* Isn't this redundant? */
+#else
+ fcntl(fileno(logFile), F_SETFL, FAPPEND); /* Isn't this redundant? */
+#endif
+#endif
+ setlinebuf(logFile);
+
+ fprintf(logFile, "%s\n", cml_version_number);
+ Log("Starting OpenAFS Online Salvage Server %s (%s)\n", SalvageVersion, commandLine);
+
+ /* Get and hold a lock for the duration of the salvage to make sure
+ * that no other salvage runs at the same time. The routine
+ * VInitVolumePackage (called below) makes sure that a file server or
+ * other volume utilities don't interfere with the salvage.
+ */
+
+ /* even demand attach online salvager
+ * still needs this because we don't want
+ * a stand-alone salvager to conflict with
+ * the salvager daemon */
+ ObtainSalvageLock();
+
+ child_slot = (int *) malloc(Parallel * sizeof(int));
+ assert(child_slot != NULL);
+ memset(child_slot, 0, Parallel * sizeof(int));
+
+ /* initialize things */
+ VInitVolumePackage(salvageServer, 5, 5,
+ 1, 0);
+ DInit(10);
+ queue_Init(&pending_q);
+ queue_Init(&log_cleanup_queue);
+ assert(pthread_mutex_init(&worker_lock, NULL) == 0);
+ assert(pthread_cond_init(&worker_cv, NULL) == 0);
+ assert(pthread_cond_init(&log_cleanup_queue.queue_change_cv, NULL) == 0);
+ assert(pthread_attr_init(&attrs) == 0);
+
+ /* start up the reaper and log cleaner threads */
+ assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0);
+ assert(pthread_create(&tid,
+ &attrs,
+ &SalvageChildReaperThread,
+ NULL) == 0);
+ assert(pthread_create(&tid,
+ &attrs,
+ &SalvageLogCleanupThread,
+ NULL) == 0);
+
+ /* loop forever serving requests */
+ while (1) {
+ node = SALVSYNC_getWork();
+ assert(node != NULL);
+
+ VOL_LOCK;
+ /* find a slot */
+ for (slot = 0; slot < Parallel; slot++) {
+ if (!child_slot[slot])
+ break;
+ }
+ assert (slot < Parallel);
+
+ pid = Fork();
+ if (pid == 0) {
+ VOL_UNLOCK;
+ ret = DoSalvageVolume(node, slot);
+ Exit(ret);
+ } else if (pid < 0) {
+ VOL_UNLOCK;
+ SALVSYNC_doneWork(node, 1);
+ } else {
+ child_slot[slot] = pid;
+ node->pid = pid;
+ VOL_UNLOCK;
+
+ assert(pthread_mutex_lock(&worker_lock) == 0);
+ current_workers++;
+
+ /* let the reaper thread know another worker was spawned */
+ assert(pthread_cond_broadcast(&worker_cv) == 0);
+
+ /* if we're overquota, wait for the reaper */
+ while (current_workers >= Parallel) {
+ assert(pthread_cond_wait(&worker_cv, &worker_lock) == 0);
+ }
+ assert(pthread_mutex_unlock(&worker_lock) == 0);
+ }
+ }
+}
+
+static int
+DoSalvageVolume(struct SalvageQueueNode * node, int slot)
+{
+ char childLog[AFSDIR_PATH_MAX];
+ int ret;
+ struct DiskPartition * partP;
+
+ VChildProcReconnectFS();
+
+ /* do not attempt to close parent's logFile handle as
+ * another thread may have held the lock on the FILE
+ * structure when fork was called! */
+
+ afs_snprintf(childLog, sizeof(childLog), "%s.%d",
+ AFSDIR_SERVER_SLVGLOG_FILEPATH, getpid());
+
+ logFile = afs_fopen(childLog, "a");
+ if (!logFile) { /* still nothing, use stdout */
+ logFile = stdout;
+ ShowLog = 0;
+ }
+
+ if (node->command.sop.volume <= 0) {
+ Log("salvageServer: invalid volume id specified; salvage aborted\n");
+ return 1;
+ }
+
+ partP = VGetPartition(node->command.sop.partName, 0);
+ if (!partP) {
+ Log("salvageServer: Unknown or unmounted partition %s; salvage aborted\n",
+ node->command.sop.partName);
+ return 1;
+ }
+
+ /* Salvage individual volume; don't notify fs */
+ SalvageFileSys1(partP, node->command.sop.volume);
+
+ VDisconnectFS();
+
+ fclose(logFile);
+ return 0;
+}
+
+
+static void *
+SalvageChildReaperThread(void * args)
+{
+ int slot, pid, status, code, found;
+ struct SalvageQueueNode *qp, *nqp;
+ struct log_cleanup_node * cleanup;
+
+ assert(pthread_mutex_lock(&worker_lock) == 0);
+
+ /* loop reaping our children */
+ while (1) {
+ /* wait() won't block unless we have children, so
+ * block on the cond var if we're childless */
+ while (current_workers == 0) {
+ assert(pthread_cond_wait(&worker_cv, &worker_lock) == 0);
+ }
+
+ assert(pthread_mutex_unlock(&worker_lock) == 0);
+
+ cleanup = (struct log_cleanup_node *) malloc(sizeof(struct log_cleanup_node));
+
+ while (Reap_Child("salvageserver", &pid, &status) < 0) {
+ /* try to prevent livelock if something goes wrong */
+ sleep(1);
+ }
+
+ VOL_LOCK;
+ for (slot = 0; slot < Parallel; slot++) {
+ if (child_slot[slot] == pid)
+ break;
+ }
+ assert(slot < Parallel);
+ child_slot[slot] = 0;
+ VOL_UNLOCK;
+
+ assert(pthread_mutex_lock(&worker_lock) == 0);
+
+ if (cleanup) {
+ cleanup->pid = pid;
+ queue_Append(&log_cleanup_queue, cleanup);
+ assert(pthread_cond_signal(&log_cleanup_queue.queue_change_cv) == 0);
+ }
+
+ /* ok, we've reaped a child */
+ current_workers--;
+ SALVSYNC_doneWorkByPid(pid, 0);
+ assert(pthread_cond_broadcast(&worker_cv) == 0);
+ }
+
+ return NULL;
+}
+
+static int
+Reap_Child(char *prog, int * pid, int * status)
+{
+ int ret;
+ ret = wait(status);
+
+ if (ret >= 0) {
+ *pid = ret;
+ if (WCOREDUMP(*status))
+ Log("\"%s\" core dumped!\n", prog);
+ if (WIFSIGNALED(*status) != 0 || WEXITSTATUS(*status) != 0)
+ Log("\"%s\" (pid=%d) terminated abnormally!\n", prog, ret);
+ } else {
+ Log("wait returned -1\n");
+ }
+ return ret;
+}
+
+/*
+ * thread to combine salvager child logs
+ * back into the main salvageserver log
+ */
+static void *
+SalvageLogCleanupThread(void * arg)
+{
+ struct log_cleanup_node * cleanup;
+
+ assert(pthread_mutex_lock(&worker_lock) == 0);
+
+ while (1) {
+ while (queue_IsEmpty(&log_cleanup_queue)) {
+ assert(pthread_cond_wait(&log_cleanup_queue.queue_change_cv, &worker_lock) == 0);
+ }
+
+ while (queue_IsNotEmpty(&log_cleanup_queue)) {
+ cleanup = queue_First(&log_cleanup_queue, log_cleanup_node);
+ queue_Remove(cleanup);
+ assert(pthread_mutex_unlock(&worker_lock) == 0);
+ SalvageLogCleanup(cleanup->pid);
+ free(cleanup);
+ assert(pthread_mutex_lock(&worker_lock) == 0);
+ }
+ }
+
+ assert(pthread_mutex_unlock(&worker_lock) == 0);
+ return NULL;
+}
+
+#define LOG_XFER_BUF_SIZE 65536
+static int
+SalvageLogCleanup(int pid)
+{
+ int pidlog, len;
+ char fn[AFSDIR_PATH_MAX];
+ static char buf[LOG_XFER_BUF_SIZE];
+
+ afs_snprintf(fn, sizeof(fn), "%s.%d",
+ AFSDIR_SERVER_SLVGLOG_FILEPATH, pid);
+
+
+ pidlog = open(fn, O_RDONLY);
+ unlink(fn);
+ if (pidlog < 0)
+ return 1;
+
+ len = read(pidlog, buf, LOG_XFER_BUF_SIZE);
+ while (len) {
+ fwrite(buf, len, 1, logFile);
+ len = read(pidlog, buf, LOG_XFER_BUF_SIZE);
+ }
+
+ close(pidlog);
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright 2000, International Business Machines Corporation and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/*
+ * System: VICE-TWO
+ * Module: salvager.c
+ * Institution: The Information Technology Center, Carnegie-Mellon University
+ */
+
+
+/* Main program file. Define globals. */
+#define MAIN 1
+
+#include <afsconfig.h>
+#include <afs/param.h>
+
+RCSID
+ ("$Header$");
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <errno.h>
+#ifdef AFS_NT40_ENV
+#include <io.h>
+#include <WINNT/afsevent.h>
+#else
+#include <sys/param.h>
+#include <sys/file.h>
+#ifndef ITIMER_REAL
+#include <sys/time.h>
+#endif /* ITIMER_REAL */
+#endif
+#if defined(AFS_AIX_ENV) || defined(AFS_SUN4_ENV)
+#define WCOREDUMP(x) (x & 0200)
+#endif
+#include <rx/xdr.h>
+#include <afs/afsint.h>
+#include <afs/assert.h>
+#if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
+#if defined(AFS_VFSINCL_ENV)
+#include <sys/vnode.h>
+#ifdef AFS_SUN5_ENV
+#include <sys/fs/ufs_inode.h>
+#else
+#if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
+#include <ufs/ufs/dinode.h>
+#include <ufs/ffs/fs.h>
+#else
+#include <ufs/inode.h>
+#endif
+#endif
+#else /* AFS_VFSINCL_ENV */
+#ifdef AFS_OSF_ENV
+#include <ufs/inode.h>
+#else /* AFS_OSF_ENV */
+#if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
+#include <sys/inode.h>
+#endif
+#endif
+#endif /* AFS_VFSINCL_ENV */
+#endif /* AFS_SGI_ENV */
+#ifdef AFS_AIX_ENV
+#include <sys/vfs.h>
+#include <sys/lockf.h>
+#else
+#ifdef AFS_HPUX_ENV
+#include <unistd.h>
+#include <checklist.h>
+#else
+#if defined(AFS_SGI_ENV)
+#include <unistd.h>
+#include <fcntl.h>
+#include <mntent.h>
+#else
+#if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
+#ifdef AFS_SUN5_ENV
+#include <unistd.h>
+#include <sys/mnttab.h>
+#include <sys/mntent.h>
+#else
+#include <mntent.h>
+#endif
+#else
+#endif /* AFS_SGI_ENV */
+#endif /* AFS_HPUX_ENV */
+#endif
+#endif
+#include <fcntl.h>
+#ifndef AFS_NT40_ENV
+#include <afs/osi_inode.h>
+#endif
+#include <afs/cmd.h>
+#include <afs/afsutil.h>
+#include <afs/fileutil.h>
+#include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
+#ifndef AFS_NT40_ENV
+#include <syslog.h>
+#endif
+
+#include "nfs.h"
+#include "lwp.h"
+#include "lock.h"
+#include <afs/afssyscalls.h>
+#include "ihandle.h"
+#include "vnode.h"
+#include "volume.h"
+#include "partition.h"
+#include "daemon_com.h"
+#include "fssync.h"
+#include "salvsync.h"
+#include "viceinode.h"
+#include "salvage.h"
+#include "volinodes.h" /* header magic number, etc. stuff */
+#include "vol-salvage.h"
+#ifdef AFS_NT40_ENV
+#include <pthread.h>
+#endif
+
+
+static int get_salvage_lock = 0;
+
+
+/* Forward declarations */
+/*@printflike@*/ void Log(const char *format, ...);
+/*@printflike@*/ void Abort(const char *format, ...);
+
+
+static int
+handleit(struct cmd_syndesc *as)
+{
+ register struct cmd_item *ti;
+ char pname[100], *temp;
+ afs_int32 seenpart = 0, seenvol = 0, vid = 0, seenany = 0;
+ struct DiskPartition *partP;
+
+#ifdef AFS_SGI_VNODE_GLUE
+ if (afs_init_kernel_config(-1) < 0) {
+ printf
+ ("Can't determine NUMA configuration, not starting salvager.\n");
+ exit(1);
+ }
+#endif
+
+#ifdef FAST_RESTART
+ {
+ afs_int32 i;
+ for (i = 0; i < CMD_MAXPARMS; i++) {
+ if (as->parms[i].items) {
+ seenany = 1;
+ break;
+ }
+ }
+ }
+ if (!seenany) {
+ char *msg =
+ "Exiting immediately without salvage. Look into the FileLog to find volumes which really need to be salvaged!";
+
+ if (useSyslog)
+ Log(msg);
+ else
+ printf("%s\n", msg);
+
+ Exit(0);
+ }
+#endif /* FAST_RESTART */
+ if ((ti = as->parms[0].items)) { /* -partition */
+ seenpart = 1;
+ strncpy(pname, ti->data, 100);
+ }
+ if ((ti = as->parms[1].items)) { /* -volumeid */
+ if (!seenpart) {
+ printf
+ ("You must also specify '-partition' option with the '-volumeid' option\n");
+ exit(-1);
+ }
+ seenvol = 1;
+ vid = atoi(ti->data);
+ }
+ if (as->parms[2].items) /* -debug */
+ debug = 1;
+ if (as->parms[3].items) /* -nowrite */
+ Testing = 1;
+ if (as->parms[4].items) /* -inodes */
+ ListInodeOption = 1;
+ if (as->parms[5].items) /* -force */
+ ForceSalvage = 1;
+ if (as->parms[6].items) /* -oktozap */
+ OKToZap = 1;
+ if (as->parms[7].items) /* -rootinodes */
+ ShowRootFiles = 1;
+ if (as->parms[8].items) /* -RebuildDirs */
+ RebuildDirs = 1;
+ if (as->parms[9].items) /* -ForceReads */
+ forceR = 1;
+ if ((ti = as->parms[10].items)) { /* -Parallel # */
+ temp = ti->data;
+ if (strncmp(temp, "all", 3) == 0) {
+ PartsPerDisk = 1;
+ temp += 3;
+ }
+ if (strlen(temp) != 0) {
+ Parallel = atoi(temp);
+ if (Parallel < 1)
+ Parallel = 1;
+ if (Parallel > MAXPARALLEL) {
+ printf("Setting parallel salvages to maximum of %d \n",
+ MAXPARALLEL);
+ Parallel = MAXPARALLEL;
+ }
+ }
+ }
+ if ((ti = as->parms[11].items)) { /* -tmpdir */
+ DIR *dirp;
+
+ tmpdir = ti->data;
+ dirp = opendir(tmpdir);
+ if (!dirp) {
+ printf
+ ("Can't open temporary placeholder dir %s; using current partition \n",
+ tmpdir);
+ tmpdir = NULL;
+ } else
+ closedir(dirp);
+ }
+ if ((ti = as->parms[12].items)) /* -showlog */
+ ShowLog = 1;
+ if ((ti = as->parms[13].items)) { /* -log */
+ Testing = 1;
+ ShowSuid = 1;
+ Showmode = 1;
+ }
+ if ((ti = as->parms[14].items)) { /* -showmounts */
+ Testing = 1;
+ Showmode = 1;
+ ShowMounts = 1;
+ }
+ if ((ti = as->parms[15].items)) { /* -orphans */
+ if (Testing)
+ orphans = ORPH_IGNORE;
+ else if (strcmp(ti->data, "remove") == 0
+ || strcmp(ti->data, "r") == 0)
+ orphans = ORPH_REMOVE;
+ else if (strcmp(ti->data, "attach") == 0
+ || strcmp(ti->data, "a") == 0)
+ orphans = ORPH_ATTACH;
+ }
+#ifndef AFS_NT40_ENV /* ignore options on NT */
+ if ((ti = as->parms[16].items)) { /* -syslog */
+ useSyslog = 1;
+ ShowLog = 0;
+ }
+ if ((ti = as->parms[17].items)) { /* -syslogfacility */
+ useSyslogFacility = atoi(ti->data);
+ }
+
+ if ((ti = as->parms[18].items)) { /* -datelogs */
+ TimeStampLogFile(AFSDIR_SERVER_SLVGLOG_FILEPATH);
+ }
+#endif
+
+#ifdef FAST_RESTART
+ if (ti = as->parms[19].items) { /* -DontSalvage */
+ char *msg =
+ "Exiting immediately without salvage. Look into the FileLog to find volumes which really need to be salvaged!";
+
+ if (useSyslog)
+ Log(msg);
+ else
+ printf("%s\n", msg);
+ Exit(0);
+ }
+#elif defined(DEMAND_ATTACH_ENABLE)
+ if (seenvol && !as->parms[19].items) {
+ char * msg =
+ "The standalone salvager cannot be run concurrently with a Demand Attach Fileserver. Please use 'salvageserver -client <partition> <volume id>' to manually schedule volume salvages with the salvageserver (new versions of 'bos salvage' automatically do this for you). Or, if you insist on using the standalone salvager, add the -forceDAFS flag to your salvager command line.";
+
+ if (useSyslog)
+ Log(msg);
+ else
+ printf("%s\n", msg);
+ Exit(1);
+ }
+#endif
+
+ if (get_salvage_lock) {
+ ObtainSalvageLock();
+ }
+
+ /* Note: if seenvol we initialize this as a standard volume utility: this has the
+ * implication that the file server may be running; negotations have to be made with
+ * the file server in this case to take the read write volume and associated read-only
+ * volumes off line before salvaging */
+#ifdef AFS_NT40_ENV
+ if (seenvol) {
+ if (afs_winsockInit() < 0) {
+ ReportErrorEventAlt(AFSEVT_SVR_WINSOCK_INIT_FAILED, 0,
+ AFSDIR_SALVAGER_FILE, 0);
+ Log("Failed to initailize winsock, exiting.\n");
+ Exit(1);
+ }
+ }
+#endif
+ VInitVolumePackage(seenvol ? volumeUtility : salvager, 5, 5,
+ DONT_CONNECT_FS, 0);
+ DInit(10);
+#ifdef AFS_NT40_ENV
+ if (myjob.cj_number != NOT_CHILD) {
+ if (!seenpart) {
+ seenpart = 1;
+ (void)strcpy(pname, myjob.cj_part);
+ }
+ }
+#endif
+ if (seenpart == 0) {
+ for (partP = DiskPartitionList; partP; partP = partP->next) {
+ SalvageFileSysParallel(partP);
+ }
+ SalvageFileSysParallel(0);
+ } else {
+ partP = VGetPartition(pname, 0);
+ if (!partP) {
+ Log("salvage: Unknown or unmounted partition %s; salvage aborted\n", pname);
+ Exit(1);
+ }
+ if (!seenvol)
+ SalvageFileSys(partP, 0);
+ else {
+ /* Salvage individual volume */
+ if (vid <= 0) {
+ Log("salvage: invalid volume id specified; salvage aborted\n");
+ Exit(1);
+ }
+ SalvageFileSys(partP, vid);
+ }
+ }
+ return (0);
+}
+
+
+#ifndef AFS_NT40_ENV
+#include "AFS_component_version_number.c"
+#endif
+#define MAX_ARGS 128
+#ifdef AFS_NT40_ENV
+char *save_args[MAX_ARGS];
+int n_save_args = 0;
+pthread_t main_thread;
+#endif
+
+int
+main(int argc, char **argv)
+{
+ struct cmd_syndesc *ts;
+ int err = 0;
+ char commandLine[150];
+
+ int i;
+ extern char cml_version_number[];
+
+#ifdef AFS_AIX32_ENV
+ /*
+ * The following signal action for AIX is necessary so that in case of a
+ * crash (i.e. core is generated) we can include the user's data section
+ * in the core dump. Unfortunately, by default, only a partial core is
+ * generated which, in many cases, isn't too useful.
+ */
+ struct sigaction nsa;
+
+ sigemptyset(&nsa.sa_mask);
+ nsa.sa_handler = SIG_DFL;
+ nsa.sa_flags = SA_FULLDUMP;
+ sigaction(SIGABRT, &nsa, NULL);
+ sigaction(SIGSEGV, &nsa, NULL);
+#endif
+
+ /* Initialize directory paths */
+ if (!(initAFSDirPath() & AFSDIR_SERVER_PATHS_OK)) {
+#ifdef AFS_NT40_ENV
+ ReportErrorEventAlt(AFSEVT_SVR_NO_INSTALL_DIR, 0, argv[0], 0);
+#endif
+ fprintf(stderr, "%s: Unable to obtain AFS server directory.\n",
+ argv[0]);
+ exit(2);
+ }
+#ifdef AFS_NT40_ENV
+ main_thread = pthread_self();
+ if (spawnDatap && spawnDataLen) {
+ /* This is a child per partition salvager. Don't setup log or
+ * try to lock the salvager lock.
+ */
+ if (nt_SetupPartitionSalvage(spawnDatap, spawnDataLen) < 0)
+ exit(3);
+ } else {
+#endif
+ for (commandLine[0] = '\0', i = 0; i < argc; i++) {
+ if (i > 0)
+ strcat(commandLine, " ");
+ strcat(commandLine, argv[i]);
+ }
+
+ /* All entries to the log will be appended. Useful if there are
+ * multiple salvagers appending to the log.
+ */
+
+ CheckLogFile(AFSDIR_SERVER_SLVGLOG_FILEPATH);
+#ifndef AFS_NT40_ENV
+#ifdef AFS_LINUX20_ENV
+ fcntl(fileno(logFile), F_SETFL, O_APPEND); /* Isn't this redundant? */
+#else
+ fcntl(fileno(logFile), F_SETFL, FAPPEND); /* Isn't this redundant? */
+#endif
+#endif
+ setlinebuf(logFile);
+
+#ifndef AFS_NT40_ENV
+ if (geteuid() != 0) {
+ printf("Salvager must be run as root.\n");
+ fflush(stdout);
+ Exit(0);
+ }
+#endif
+
+ /* bad for normal help flag processing, but can do nada */
+
+ fprintf(logFile, "%s\n", cml_version_number);
+ Log("STARTING AFS SALVAGER %s (%s)\n", SalvageVersion, commandLine);
+
+ /* Get and hold a lock for the duration of the salvage to make sure
+ * that no other salvage runs at the same time. The routine
+ * VInitVolumePackage (called below) makes sure that a file server or
+ * other volume utilities don't interfere with the salvage.
+ */
+ get_salvage_lock = 1;
+#ifdef AFS_NT40_ENV
+ }
+#endif
+
+ ts = cmd_CreateSyntax("initcmd", handleit, 0, "initialize the program");
+ cmd_AddParm(ts, "-partition", CMD_SINGLE, CMD_OPTIONAL,
+ "Name of partition to salvage");
+ cmd_AddParm(ts, "-volumeid", CMD_SINGLE, CMD_OPTIONAL,
+ "Volume Id to salvage");
+ cmd_AddParm(ts, "-debug", CMD_FLAG, CMD_OPTIONAL,
+ "Run in Debugging mode");
+ cmd_AddParm(ts, "-nowrite", CMD_FLAG, CMD_OPTIONAL,
+ "Run readonly/test mode");
+ cmd_AddParm(ts, "-inodes", CMD_FLAG, CMD_OPTIONAL,
+ "Just list affected afs inodes - debugging flag");
+ cmd_AddParm(ts, "-force", CMD_FLAG, CMD_OPTIONAL, "Force full salvaging");
+ cmd_AddParm(ts, "-oktozap", CMD_FLAG, CMD_OPTIONAL,
+ "Give permission to destroy bogus inodes/volumes - debugging flag");
+ cmd_AddParm(ts, "-rootinodes", CMD_FLAG, CMD_OPTIONAL,
+ "Show inodes owned by root - debugging flag");
+ cmd_AddParm(ts, "-salvagedirs", CMD_FLAG, CMD_OPTIONAL,
+ "Force rebuild/salvage of all directories");
+ cmd_AddParm(ts, "-blockreads", CMD_FLAG, CMD_OPTIONAL,
+ "Read smaller blocks to handle IO/bad blocks");
+ cmd_AddParm(ts, "-parallel", CMD_SINGLE, CMD_OPTIONAL,
+ "# of max parallel partition salvaging");
+ cmd_AddParm(ts, "-tmpdir", CMD_SINGLE, CMD_OPTIONAL,
+ "Name of dir to place tmp files ");
+ cmd_AddParm(ts, "-showlog", CMD_FLAG, CMD_OPTIONAL,
+ "Show log file upon completion");
+ cmd_AddParm(ts, "-showsuid", CMD_FLAG, CMD_OPTIONAL,
+ "Report on suid/sgid files");
+ cmd_AddParm(ts, "-showmounts", CMD_FLAG, CMD_OPTIONAL,
+ "Report on mountpoints");
+ cmd_AddParm(ts, "-orphans", CMD_SINGLE, CMD_OPTIONAL,
+ "ignore | remove | attach");
+
+ /* note - syslog isn't avail on NT, but if we make it conditional, have
+ * to deal with screwy offsets for cmd params */
+ cmd_AddParm(ts, "-syslog", CMD_FLAG, CMD_OPTIONAL,
+ "Write salvage log to syslogs");
+ cmd_AddParm(ts, "-syslogfacility", CMD_SINGLE, CMD_OPTIONAL,
+ "Syslog facility number to use");
+ cmd_AddParm(ts, "-datelogs", CMD_FLAG, CMD_OPTIONAL,
+ "Include timestamp in logfile filename");
+#ifdef FAST_RESTART
+ cmd_AddParm(ts, "-DontSalvage", CMD_FLAG, CMD_OPTIONAL,
+ "Don't salvage. This my be set in BosConfig to let the fileserver restart immediately after a crash. Bad volumes will be taken offline");
+#elif defined(DEMAND_ATTACH_ENABLE)
+ cmd_AddParm(ts, "-forceDAFS", CMD_FLAG, CMD_OPTIONAL,
+ "For Demand Attach Fileserver, permit a manual volume salvage outside of the salvageserver");
+#endif /* FAST_RESTART */
+ err = cmd_Dispatch(argc, argv);
+ Exit(err);
+}
+
--- /dev/null
+/*
+ * Copyright 2006, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/*
+ * salvsync-client.c
+ *
+ * OpenAFS demand attach fileserver
+ * Salvage server synchronization with fileserver.
+ */
+
+#include <afsconfig.h>
+#include <afs/param.h>
+
+RCSID
+ ("$Header$");
+
+#include <sys/types.h>
+#include <stdio.h>
+#ifdef AFS_NT40_ENV
+#include <winsock2.h>
+#include <time.h>
+#else
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <sys/time.h>
+#endif
+#include <errno.h>
+#include <assert.h>
+#include <signal.h>
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#else
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#endif
+
+
+#include <rx/xdr.h>
+#include <afs/afsint.h>
+#include "nfs.h"
+#include <afs/errors.h>
+#include "salvsync.h"
+#include "lwp.h"
+#include "lock.h"
+#include <afs/afssyscalls.h>
+#include "ihandle.h"
+#include "vnode.h"
+#include "volume.h"
+#include "partition.h"
+#include <rx/rx_queue.h>
+
+/*@printflike@*/ extern void Log(const char *format, ...);
+
+#ifdef osi_Assert
+#undef osi_Assert
+#endif
+#define osi_Assert(e) (void)(e)
+
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/*
+ * SALVSYNC is a feature specific to the demand attach fileserver
+ */
+
+extern int LogLevel;
+extern int VInit;
+extern pthread_mutex_t vol_salvsync_mutex;
+
+static SYNC_client_state salvsync_client_state = { -1, 2041, SALVSYNC_PROTO_VERSION, 5, 120 };
+
+/*
+ * client-side routines
+ */
+
+int
+SALVSYNC_clientInit(void)
+{
+ return SYNC_connect(&salvsync_client_state);
+}
+
+int
+SALVSYNC_clientFinis(void)
+{
+ SYNC_closeChannel(&salvsync_client_state);
+ return 1;
+}
+
+int
+SALVSYNC_clientReconnect(void)
+{
+ return SYNC_reconnect(&salvsync_client_state);
+}
+
+afs_int32
+SALVSYNC_askSalv(SYNC_command * com, SYNC_response * res)
+{
+ afs_int32 code;
+
+ VSALVSYNC_LOCK;
+ code = SYNC_ask(&salvsync_client_state, com, res);
+ VSALVSYNC_UNLOCK;
+
+ switch (code) {
+ case SYNC_OK:
+ case SYNC_FAILED:
+ break;
+ case SYNC_COM_ERROR:
+ case SYNC_BAD_COMMAND:
+ Log("SALVSYNC_askSalv: fatal SALVSYNC protocol error; online salvager functionality disabled until next fileserver restart\n");
+ break;
+ case SYNC_DENIED:
+ Log("SALVSYNC_askSalv: SALVSYNC request denied for reason=%d\n", res->hdr.reason);
+ break;
+ default:
+ Log("SALVSYNC_askSalv: unknown protocol response %d\n", code);
+ break;
+ }
+
+ return code;
+}
+
+afs_int32
+SALVSYNC_SalvageVolume(VolumeId volume, char *partName, int command, int reason,
+ afs_uint32 prio, SYNC_response * res_in)
+{
+ SYNC_command com;
+ SYNC_response res_l, *res;
+ SALVSYNC_command_hdr scom;
+ SALVSYNC_response_hdr sres;
+ int n, tot;
+
+ memset(&com, 0, sizeof(com));
+ memset(&scom, 0, sizeof(scom));
+
+ if (res_in) {
+ res = res_in;
+ } else {
+ memset(&res_l, 0, sizeof(res_l));
+ memset(&sres, 0, sizeof(sres));
+ res_l.payload.buf = (void *) &sres;
+ res_l.payload.len = sizeof(sres);
+ res = &res_l;
+ }
+
+ com.payload.buf = (void *) &scom;
+ com.payload.len = sizeof(scom);
+ com.hdr.command = command;
+ com.hdr.reason = reason;
+ com.hdr.command_len = sizeof(com.hdr) + sizeof(scom);
+ scom.volume = volume;
+ scom.prio = prio;
+
+ if (partName) {
+ strlcpy(scom.partName, partName, sizeof(scom.partName));
+ } else {
+ scom.partName[0] = '\0';
+ }
+
+ return SALVSYNC_askSalv(&com, res);
+}
+
+#endif /* AFS_DEMAND_ATTACH_FS */
--- /dev/null
+/*
+ * Copyright 2006, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/*
+ * salvsync-server.c
+ *
+ * OpenAFS demand attach fileserver
+ * Salvage server synchronization with fileserver.
+ */
+
+/* This controls the size of an fd_set; it must be defined early before
+ * the system headers define that type and the macros that operate on it.
+ * Its value should be as large as the maximum file descriptor limit we
+ * are likely to run into on any platform. Right now, that is 65536
+ * which is the default hard fd limit on Solaris 9 */
+#ifndef _WIN32
+#define FD_SETSIZE 65536
+#endif
+
+#include <afsconfig.h>
+#include <afs/param.h>
+
+RCSID
+ ("$Header$");
+
+#include <sys/types.h>
+#include <stdio.h>
+#ifdef AFS_NT40_ENV
+#include <winsock2.h>
+#include <time.h>
+#else
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <sys/time.h>
+#endif
+#include <errno.h>
+#include <assert.h>
+#include <signal.h>
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#else
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#endif
+
+
+#include <rx/xdr.h>
+#include <afs/afsint.h>
+#include "nfs.h"
+#include <afs/errors.h>
+#include "salvsync.h"
+#include "lwp.h"
+#include "lock.h"
+#include <afs/afssyscalls.h>
+#include "ihandle.h"
+#include "vnode.h"
+#include "volume.h"
+#include "partition.h"
+#include <rx/rx_queue.h>
+
+#if !defined(offsetof)
+#include <stddef.h>
+#endif
+
+/*@printflike@*/ extern void Log(const char *format, ...);
+
+#ifdef osi_Assert
+#undef osi_Assert
+#endif
+#define osi_Assert(e) (void)(e)
+
+#define MAXHANDLERS 4 /* Up to 4 clients; must be at least 2, so that
+ * move = dump+restore can run on single server */
+
+#define MAX_BIND_TRIES 5 /* Number of times to retry socket bind */
+
+
+
+/* Forward declarations */
+static void * SALVSYNC_syncThread(void *);
+static void SALVSYNC_newconnection(int fd);
+static void SALVSYNC_com(int fd);
+static void SALVSYNC_Drop(int fd);
+static void AcceptOn(void);
+static void AcceptOff(void);
+static void InitHandler(void);
+static void CallHandler(fd_set * fdsetp);
+static int AddHandler(int afd, void (*aproc) (int));
+static int FindHandler(register int afd);
+static int FindHandler_r(register int afd);
+static int RemoveHandler(register int afd);
+static void GetHandler(fd_set * fdsetp, int *maxfdp);
+
+
+/*
+ * This lock controls access to the handler array.
+ */
+struct Lock SALVSYNC_handler_lock;
+
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/*
+ * SALVSYNC is a feature specific to the demand attach fileserver
+ */
+
+static int AddToSalvageQueue(struct SalvageQueueNode * node);
+static void DeleteFromSalvageQueue(struct SalvageQueueNode * node);
+static void AddToPendingQueue(struct SalvageQueueNode * node);
+static void DeleteFromPendingQueue(struct SalvageQueueNode * node);
+static struct SalvageQueueNode * LookupPendingCommand(SALVSYNC_command_hdr * qry);
+static struct SalvageQueueNode * LookupPendingCommandByPid(int pid);
+static void RaiseCommandPrio(struct SalvageQueueNode * node, SALVSYNC_command_hdr * com);
+
+static struct SalvageQueueNode * LookupNode(VolumeId vid, char * partName);
+static struct SalvageQueueNode * LookupNodeByCommand(SALVSYNC_command_hdr * qry);
+static void AddNodeToHash(struct SalvageQueueNode * node);
+static void DeleteNodeFromHash(struct SalvageQueueNode * node);
+
+static afs_int32 SALVSYNC_com_Salvage(SALVSYNC_command * com, SALVSYNC_response * res);
+static afs_int32 SALVSYNC_com_Cancel(SALVSYNC_command * com, SALVSYNC_response * res);
+static afs_int32 SALVSYNC_com_RaisePrio(SALVSYNC_command * com, SALVSYNC_response * res);
+static afs_int32 SALVSYNC_com_Query(SALVSYNC_command * com, SALVSYNC_response * res);
+static afs_int32 SALVSYNC_com_CancelAll(SALVSYNC_command * com, SALVSYNC_response * res);
+
+
+extern int LogLevel;
+extern int VInit;
+extern pthread_mutex_t vol_salvsync_mutex;
+
+static int AcceptSd = -1; /* Socket used by server for accepting connections */
+
+
+/* be careful about rearranging elements in this structure.
+ * element placement has been optimized for locality of reference
+ * in SALVSYNC_getWork() */
+struct SalvageQueue {
+ volatile int total_len;
+ volatile afs_int32 last_insert; /* id of last partition to have a salvage node insert */
+ volatile int len[VOLMAXPARTS+1];
+ volatile struct rx_queue part[VOLMAXPARTS+1];
+ pthread_cond_t cv;
+};
+static struct SalvageQueue salvageQueue; /* volumes waiting to be salvaged */
+
+struct QueueHead {
+ volatile struct rx_queue q;
+ volatile int len;
+ pthread_cond_t queue_change_cv;
+};
+static struct QueueHead pendingQueue; /* volumes being salvaged */
+
+/* XXX
+ * whether a partition has a salvage in progress
+ *
+ * the salvager code only permits one salvage per partition at a time
+ *
+ * the following hack tries to keep salvaged parallelism high by
+ * only permitting one salvage dispatch per partition at a time
+ *
+ * unfortunately, the parallel salvager currently
+ * has a rather braindead routine that won't permit
+ * multiple salvages on the same "device". this
+ * function happens to break pretty badly on lvm, raid luns, etc.
+ *
+ * this hack isn't good enough to stop the device limiting code from
+ * crippling performance. someday that code needs to be rewritten
+ */
+static int partition_salvaging[VOLMAXPARTS+1];
+
+#define VSHASH_SIZE 64
+#define VSHASH_MASK (VSHASH_SIZE-1)
+#define VSHASH(vid) ((vid)&VSHASH_MASK)
+
+static struct QueueHead SalvageHashTable[VSHASH_SIZE];
+
+static struct SalvageQueueNode *
+LookupNode(afs_uint32 vid, char * partName)
+{
+ struct rx_queue *qp, *nqp;
+ struct SalvageQueueNode *vsp;
+ int idx = VSHASH(vid);
+
+ for (queue_Scan(&SalvageHashTable[idx], qp, nqp, rx_queue)) {
+ vsp = (struct SalvageQueueNode *)((char *)qp - offsetof(struct SalvageQueueNode, hash_chain));
+ if ((vsp->command.sop.volume == vid) &&
+ !strncmp(vsp->command.sop.partName, partName, sizeof(vsp->command.sop.partName))) {
+ break;
+ }
+ }
+
+ if (queue_IsEnd(&SalvageHashTable[idx], qp)) {
+ vsp = NULL;
+ }
+ return vsp;
+}
+
+static struct SalvageQueueNode *
+LookupNodeByCommand(SALVSYNC_command_hdr * qry)
+{
+ return LookupNode(qry->volume, qry->partName);
+}
+
+static void
+AddNodeToHash(struct SalvageQueueNode * node)
+{
+ int idx = VSHASH(node->command.sop.volume);
+
+ if (queue_IsOnQueue(&node->hash_chain)) {
+ return;
+ }
+
+ queue_Append(&SalvageHashTable[idx], &node->hash_chain);
+ SalvageHashTable[idx].len++;
+}
+
+static void
+DeleteNodeFromHash(struct SalvageQueueNode * node)
+{
+ int idx = VSHASH(node->command.sop.volume);
+
+ if (queue_IsNotOnQueue(&node->hash_chain)) {
+ return;
+ }
+
+ queue_Remove(&node->hash_chain);
+ SalvageHashTable[idx].len--;
+}
+
+void
+SALVSYNC_salvInit(void)
+{
+ int i;
+ pthread_t tid;
+ pthread_attr_t tattr;
+
+ /* initialize the queues */
+ assert(pthread_cond_init(&salvageQueue.cv, NULL) == 0);
+ for (i = 0; i <= VOLMAXPARTS; i++) {
+ queue_Init(&salvageQueue.part[i]);
+ salvageQueue.len[i] = 0;
+ }
+ assert(pthread_cond_init(&pendingQueue.queue_change_cv, NULL) == 0);
+ queue_Init(&pendingQueue);
+ salvageQueue.total_len = pendingQueue.len = 0;
+ salvageQueue.last_insert = -1;
+ memset(partition_salvaging, 0, sizeof(partition_salvaging));
+
+ for (i = 0; i < VSHASH_SIZE; i++) {
+ assert(pthread_cond_init(&SalvageHashTable[i].queue_change_cv, NULL) == 0);
+ SalvageHashTable[i].len = 0;
+ queue_Init(&SalvageHashTable[i]);
+ }
+
+ /* start the salvsync thread */
+ assert(pthread_attr_init(&tattr) == 0);
+ assert(pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_DETACHED) == 0);
+ assert(pthread_create(&tid, &tattr, SALVSYNC_syncThread, NULL) == 0);
+}
+
+static int
+getport(struct sockaddr_in *addr)
+{
+ int sd;
+
+ memset(addr, 0, sizeof(*addr));
+ assert((sd = socket(AF_INET, SOCK_STREAM, 0)) >= 0);
+#ifdef STRUCT_SOCKADDR_HAS_SA_LEN
+ addr->sin_len = sizeof(struct sockaddr_in);
+#endif
+ addr->sin_addr.s_addr = htonl(0x7f000001);
+ addr->sin_family = AF_INET; /* was localhost->h_addrtype */
+ addr->sin_port = htons(2041); /* XXXX htons not _really_ neccessary */
+
+ return sd;
+}
+
+static fd_set SALVSYNC_readfds;
+
+static void *
+SALVSYNC_syncThread(void * args)
+{
+ struct sockaddr_in addr;
+ int on = 1;
+ int code;
+ int numTries;
+ int tid;
+
+#ifndef AFS_NT40_ENV
+ (void)signal(SIGPIPE, SIG_IGN);
+#endif
+
+ /* set our 'thread-id' so that the host hold table works */
+ MUTEX_ENTER(&rx_stats_mutex); /* protects rxi_pthread_hinum */
+ tid = ++rxi_pthread_hinum;
+ MUTEX_EXIT(&rx_stats_mutex);
+ pthread_setspecific(rx_thread_id_key, (void *)tid);
+ Log("Set thread id %d for SALVSYNC_syncThread\n", tid);
+
+ AcceptSd = getport(&addr);
+ /* Reuseaddr needed because system inexplicably leaves crud lying around */
+ code =
+ setsockopt(AcceptSd, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
+ sizeof(on));
+ if (code)
+ Log("SALVSYNC_sync: setsockopt failed with (%d)\n", errno);
+
+ for (numTries = 0; numTries < MAX_BIND_TRIES; numTries++) {
+ if ((code =
+ bind(AcceptSd, (struct sockaddr *)&addr, sizeof(addr))) == 0)
+ break;
+ Log("SALVSYNC_sync: bind failed with (%d), will sleep and retry\n",
+ errno);
+ sleep(5);
+ }
+ assert(!code);
+ listen(AcceptSd, 100);
+ InitHandler();
+ AcceptOn();
+
+ for (;;) {
+ int maxfd;
+ GetHandler(&SALVSYNC_readfds, &maxfd);
+ /* Note: check for >= 1 below is essential since IOMGR_select
+ * doesn't have exactly same semantics as select.
+ */
+ if (select(maxfd + 1, &SALVSYNC_readfds, NULL, NULL, NULL) >= 1)
+ CallHandler(&SALVSYNC_readfds);
+ }
+
+ return NULL;
+}
+
+static void
+SALVSYNC_newconnection(int afd)
+{
+ struct sockaddr_in other;
+ int junk, fd;
+ junk = sizeof(other);
+ fd = accept(afd, (struct sockaddr *)&other, &junk);
+ if (fd == -1) {
+ Log("SALVSYNC_newconnection: accept failed, errno==%d\n", errno);
+ assert(1 == 2);
+ } else if (!AddHandler(fd, SALVSYNC_com)) {
+ AcceptOff();
+ assert(AddHandler(fd, SALVSYNC_com));
+ }
+}
+
+/* this function processes commands from an salvsync file descriptor (fd) */
+static afs_int32 SALV_cnt = 0;
+static void
+SALVSYNC_com(int fd)
+{
+ SYNC_command com;
+ SYNC_response res;
+ SALVSYNC_response_hdr sres_hdr;
+ SALVSYNC_command scom;
+ SALVSYNC_response sres;
+ SYNC_PROTO_BUF_DECL(buf);
+
+ com.payload.buf = (void *)buf;
+ com.payload.len = SYNC_PROTO_MAX_LEN;
+ res.payload.buf = (void *) &sres_hdr;
+ res.payload.len = sizeof(sres_hdr);
+ res.hdr.response_len = sizeof(res.hdr) + sizeof(sres_hdr);
+ res.hdr.proto_version = SALVSYNC_PROTO_VERSION;
+
+ scom.hdr = &com.hdr;
+ scom.sop = (SALVSYNC_command_hdr *) buf;
+ scom.com = &com;
+ sres.hdr = &res.hdr;
+ sres.sop = &sres_hdr;
+ sres.res = &res;
+
+ SALV_cnt++;
+ if (SYNC_getCom(fd, &com)) {
+ Log("SALVSYNC_com: read failed; dropping connection (cnt=%d)\n", SALV_cnt);
+ SALVSYNC_Drop(fd);
+ return;
+ }
+
+ if (com.hdr.proto_version != SALVSYNC_PROTO_VERSION) {
+ Log("SALVSYNC_com: invalid protocol version (%u)\n", com.hdr.proto_version);
+ res.hdr.response = SYNC_COM_ERROR;
+ res.hdr.flags |= SYNC_FLAG_CHANNEL_SHUTDOWN;
+ goto respond;
+ }
+
+ if (com.recv_len != (sizeof(com.hdr) + sizeof(SALVSYNC_command_hdr))) {
+ Log("SALVSYNC_com: invalid protocol message length (%u)\n", com.recv_len);
+ res.hdr.response = SYNC_COM_ERROR;
+ res.hdr.reason = SYNC_REASON_MALFORMED_PACKET;
+ res.hdr.flags |= SYNC_FLAG_CHANNEL_SHUTDOWN;
+ goto respond;
+ }
+
+ VOL_LOCK;
+ switch (com.hdr.command) {
+ case SALVSYNC_NOP:
+ break;
+ case SALVSYNC_SALVAGE:
+ res.hdr.response = SALVSYNC_com_Salvage(&scom, &sres);
+ break;
+ case SALVSYNC_CANCEL:
+ /* cancel a salvage */
+ res.hdr.response = SALVSYNC_com_Cancel(&scom, &sres);
+ break;
+ case SALVSYNC_CANCELALL:
+ /* cancel all queued salvages */
+ res.hdr.response = SALVSYNC_com_CancelAll(&scom, &sres);
+ break;
+ case SALVSYNC_RAISEPRIO:
+ /* raise the priority of a salvage */
+ res.hdr.response = SALVSYNC_com_RaisePrio(&scom, &sres);
+ break;
+ case SALVSYNC_QUERY:
+ /* query whether a volume is done salvaging */
+ res.hdr.response = SALVSYNC_com_Query(&scom, &sres);
+ break;
+ case SYNC_COM_CHANNEL_CLOSE:
+ res.hdr.response = SYNC_OK;
+ res.hdr.flags |= SYNC_FLAG_CHANNEL_SHUTDOWN;
+ break;
+ default:
+ res.hdr.response = SYNC_BAD_COMMAND;
+ break;
+ }
+
+ sres_hdr.sq_len = salvageQueue.total_len;
+ sres_hdr.pq_len = pendingQueue.len;
+ VOL_UNLOCK;
+
+ respond:
+ SYNC_putRes(fd, &res);
+ if (res.hdr.flags & SYNC_FLAG_CHANNEL_SHUTDOWN) {
+ SALVSYNC_Drop(fd);
+ }
+}
+
+static afs_int32
+SALVSYNC_com_Salvage(SALVSYNC_command * com, SALVSYNC_response * res)
+{
+ afs_int32 code = SYNC_OK;
+ struct SalvageQueueNode * node;
+
+ if (SYNC_verifyProtocolString(com->sop->partName, sizeof(com->sop->partName))) {
+ code = SYNC_FAILED;
+ res->hdr->reason = SYNC_REASON_MALFORMED_PACKET;
+ goto done;
+ }
+
+ node = LookupNodeByCommand(com->sop);
+
+ /* schedule a salvage for this volume */
+ if (node != NULL) {
+ switch (node->state) {
+ case SALVSYNC_STATE_ERROR:
+ case SALVSYNC_STATE_DONE:
+ memcpy(&node->command.com, com->hdr, sizeof(SYNC_command_hdr));
+ memcpy(&node->command.sop, com->sop, sizeof(SALVSYNC_command_hdr));
+ node->command.sop.prio = 0;
+ if (AddToSalvageQueue(node)) {
+ code = SYNC_DENIED;
+ }
+ break;
+ default:
+ break;
+ }
+ } else {
+ node = (struct SalvageQueueNode *) malloc(sizeof(struct SalvageQueueNode));
+ if (node == NULL) {
+ code = SYNC_DENIED;
+ goto done;
+ }
+ memset(node, 0, sizeof(struct SalvageQueueNode));
+ memcpy(&node->command.com, com->hdr, sizeof(SYNC_command_hdr));
+ memcpy(&node->command.sop, com->sop, sizeof(SALVSYNC_command_hdr));
+ AddNodeToHash(node);
+ if (AddToSalvageQueue(node)) {
+ /* roll back */
+ DeleteNodeFromHash(node);
+ free(node);
+ node = NULL;
+ code = SYNC_DENIED;
+ goto done;
+ }
+ }
+
+ res->hdr->flags |= SALVSYNC_FLAG_VOL_STATS_VALID;
+ res->sop->state = node->state;
+ res->sop->prio = node->command.sop.prio;
+
+ done:
+ return code;
+}
+
+static afs_int32
+SALVSYNC_com_Cancel(SALVSYNC_command * com, SALVSYNC_response * res)
+{
+ afs_int32 code = SYNC_OK;
+ struct SalvageQueueNode * node;
+
+ if (SYNC_verifyProtocolString(com->sop->partName, sizeof(com->sop->partName))) {
+ code = SYNC_FAILED;
+ res->hdr->reason = SYNC_REASON_MALFORMED_PACKET;
+ goto done;
+ }
+
+ node = LookupNodeByCommand(com->sop);
+
+ if (node == NULL) {
+ res->sop->state = SALVSYNC_STATE_UNKNOWN;
+ res->sop->prio = 0;
+ } else {
+ res->hdr->flags |= SALVSYNC_FLAG_VOL_STATS_VALID;
+ res->sop->prio = node->command.sop.prio;
+ res->sop->state = node->state;
+ if (node->state == SALVSYNC_STATE_QUEUED) {
+ DeleteFromSalvageQueue(node);
+ }
+ }
+
+ done:
+ return code;
+}
+
+static afs_int32
+SALVSYNC_com_CancelAll(SALVSYNC_command * com, SALVSYNC_response * res)
+{
+ struct SalvageQueueNode * np, *nnp;
+ struct DiskPartition * dp;
+
+ for (dp = DiskPartitionList ; dp ; dp = dp->next) {
+ for (queue_Scan(&salvageQueue.part[dp->index], np, nnp, SalvageQueueNode)) {
+ DeleteFromSalvageQueue(np);
+ }
+ }
+
+ return SYNC_OK;
+}
+
+static afs_int32
+SALVSYNC_com_RaisePrio(SALVSYNC_command * com, SALVSYNC_response * res)
+{
+ afs_int32 code = SYNC_OK;
+ struct SalvageQueueNode * node;
+
+ if (SYNC_verifyProtocolString(com->sop->partName, sizeof(com->sop->partName))) {
+ code = SYNC_FAILED;
+ res->hdr->reason = SYNC_REASON_MALFORMED_PACKET;
+ goto done;
+ }
+
+ node = LookupNodeByCommand(com->sop);
+
+ /* raise the priority of a salvage */
+ if (node == NULL) {
+ code = SALVSYNC_com_Salvage(com, res);
+ node = LookupNodeByCommand(com->sop);
+ } else {
+ switch (node->state) {
+ case SALVSYNC_STATE_QUEUED:
+ RaiseCommandPrio(node, com->sop);
+ break;
+ case SALVSYNC_STATE_SALVAGING:
+ break;
+ case SALVSYNC_STATE_ERROR:
+ case SALVSYNC_STATE_DONE:
+ code = SALVSYNC_com_Salvage(com, res);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (node == NULL) {
+ res->sop->prio = 0;
+ res->sop->state = SALVSYNC_STATE_UNKNOWN;
+ } else {
+ res->hdr->flags |= SALVSYNC_FLAG_VOL_STATS_VALID;
+ res->sop->prio = node->command.sop.prio;
+ res->sop->state = node->state;
+ }
+
+ done:
+ return code;
+}
+
+static afs_int32
+SALVSYNC_com_Query(SALVSYNC_command * com, SALVSYNC_response * res)
+{
+ afs_int32 code = SYNC_OK;
+ struct SalvageQueueNode * node;
+
+ if (SYNC_verifyProtocolString(com->sop->partName, sizeof(com->sop->partName))) {
+ code = SYNC_FAILED;
+ res->hdr->reason = SYNC_REASON_MALFORMED_PACKET;
+ goto done;
+ }
+
+ node = LookupNodeByCommand(com->sop);
+
+ /* query whether a volume is done salvaging */
+ if (node == NULL) {
+ res->sop->state = SALVSYNC_STATE_UNKNOWN;
+ res->sop->prio = 0;
+ } else {
+ res->hdr->flags |= SALVSYNC_FLAG_VOL_STATS_VALID;
+ res->sop->state = node->state;
+ res->sop->prio = node->command.sop.prio;
+ }
+
+ done:
+ return code;
+}
+
+static void
+SALVSYNC_Drop(int fd)
+{
+ RemoveHandler(fd);
+#ifdef AFS_NT40_ENV
+ closesocket(fd);
+#else
+ close(fd);
+#endif
+ AcceptOn();
+}
+
+static int AcceptHandler = -1; /* handler id for accept, if turned on */
+
+static void
+AcceptOn(void)
+{
+ if (AcceptHandler == -1) {
+ assert(AddHandler(AcceptSd, SALVSYNC_newconnection));
+ AcceptHandler = FindHandler(AcceptSd);
+ }
+}
+
+static void
+AcceptOff(void)
+{
+ if (AcceptHandler != -1) {
+ assert(RemoveHandler(AcceptSd));
+ AcceptHandler = -1;
+ }
+}
+
+/* The multiple FD handling code. */
+
+static int HandlerFD[MAXHANDLERS];
+static void (*HandlerProc[MAXHANDLERS]) (int);
+
+static void
+InitHandler(void)
+{
+ register int i;
+ ObtainWriteLock(&SALVSYNC_handler_lock);
+ for (i = 0; i < MAXHANDLERS; i++) {
+ HandlerFD[i] = -1;
+ HandlerProc[i] = NULL;
+ }
+ ReleaseWriteLock(&SALVSYNC_handler_lock);
+}
+
+static void
+CallHandler(fd_set * fdsetp)
+{
+ register int i;
+ ObtainReadLock(&SALVSYNC_handler_lock);
+ for (i = 0; i < MAXHANDLERS; i++) {
+ if (HandlerFD[i] >= 0 && FD_ISSET(HandlerFD[i], fdsetp)) {
+ ReleaseReadLock(&SALVSYNC_handler_lock);
+ (*HandlerProc[i]) (HandlerFD[i]);
+ ObtainReadLock(&SALVSYNC_handler_lock);
+ }
+ }
+ ReleaseReadLock(&SALVSYNC_handler_lock);
+}
+
+static int
+AddHandler(int afd, void (*aproc) (int))
+{
+ register int i;
+ ObtainWriteLock(&SALVSYNC_handler_lock);
+ for (i = 0; i < MAXHANDLERS; i++)
+ if (HandlerFD[i] == -1)
+ break;
+ if (i >= MAXHANDLERS) {
+ ReleaseWriteLock(&SALVSYNC_handler_lock);
+ return 0;
+ }
+ HandlerFD[i] = afd;
+ HandlerProc[i] = aproc;
+ ReleaseWriteLock(&SALVSYNC_handler_lock);
+ return 1;
+}
+
+static int
+FindHandler(register int afd)
+{
+ register int i;
+ ObtainReadLock(&SALVSYNC_handler_lock);
+ for (i = 0; i < MAXHANDLERS; i++)
+ if (HandlerFD[i] == afd) {
+ ReleaseReadLock(&SALVSYNC_handler_lock);
+ return i;
+ }
+ ReleaseReadLock(&SALVSYNC_handler_lock); /* just in case */
+ assert(1 == 2);
+ return -1; /* satisfy compiler */
+}
+
+static int
+FindHandler_r(register int afd)
+{
+ register int i;
+ for (i = 0; i < MAXHANDLERS; i++)
+ if (HandlerFD[i] == afd) {
+ return i;
+ }
+ assert(1 == 2);
+ return -1; /* satisfy compiler */
+}
+
+static int
+RemoveHandler(register int afd)
+{
+ ObtainWriteLock(&SALVSYNC_handler_lock);
+ HandlerFD[FindHandler_r(afd)] = -1;
+ ReleaseWriteLock(&SALVSYNC_handler_lock);
+ return 1;
+}
+
+static void
+GetHandler(fd_set * fdsetp, int *maxfdp)
+{
+ register int i;
+ register int maxfd = -1;
+ FD_ZERO(fdsetp);
+ ObtainReadLock(&SALVSYNC_handler_lock); /* just in case */
+ for (i = 0; i < MAXHANDLERS; i++)
+ if (HandlerFD[i] != -1) {
+ FD_SET(HandlerFD[i], fdsetp);
+ if (maxfd < HandlerFD[i])
+ maxfd = HandlerFD[i];
+ }
+ *maxfdp = maxfd;
+ ReleaseReadLock(&SALVSYNC_handler_lock); /* just in case */
+}
+
+static int
+AddToSalvageQueue(struct SalvageQueueNode * node)
+{
+ afs_int32 id;
+
+ id = volutil_GetPartitionID(node->command.sop.partName);
+ if (id < 0 || id > VOLMAXPARTS) {
+ return 1;
+ }
+ if (!VGetPartitionById_r(id, 0)) {
+ /* don't enqueue salvage requests for unmounted partitions */
+ return 1;
+ }
+ queue_Append(&salvageQueue.part[id], node);
+ salvageQueue.len[id]++;
+ salvageQueue.total_len++;
+ salvageQueue.last_insert = id;
+ node->partition_id = id;
+ node->state = SALVSYNC_STATE_QUEUED;
+ assert(pthread_cond_broadcast(&salvageQueue.cv) == 0);
+ return 0;
+}
+
+static void
+DeleteFromSalvageQueue(struct SalvageQueueNode * node)
+{
+ if (queue_IsOnQueue(node)) {
+ queue_Remove(node);
+ salvageQueue.len[node->partition_id]--;
+ salvageQueue.total_len--;
+ node->state = SALVSYNC_STATE_UNKNOWN;
+ assert(pthread_cond_broadcast(&salvageQueue.cv) == 0);
+ }
+}
+
+static void
+AddToPendingQueue(struct SalvageQueueNode * node)
+{
+ queue_Append(&pendingQueue, node);
+ pendingQueue.len++;
+ node->state = SALVSYNC_STATE_SALVAGING;
+ assert(pthread_cond_broadcast(&pendingQueue.queue_change_cv) == 0);
+}
+
+static void
+DeleteFromPendingQueue(struct SalvageQueueNode * node)
+{
+ if (queue_IsOnQueue(node)) {
+ queue_Remove(node);
+ pendingQueue.len--;
+ node->state = SALVSYNC_STATE_UNKNOWN;
+ assert(pthread_cond_broadcast(&pendingQueue.queue_change_cv) == 0);
+ }
+}
+
+static struct SalvageQueueNode *
+LookupPendingCommand(SALVSYNC_command_hdr * qry)
+{
+ struct SalvageQueueNode * np, * nnp;
+
+ for (queue_Scan(&pendingQueue, np, nnp, SalvageQueueNode)) {
+ if ((np->command.sop.volume == qry->volume) &&
+ !strncmp(np->command.sop.partName, qry->partName,
+ sizeof(qry->partName)))
+ break;
+ }
+
+ if (queue_IsEnd(&pendingQueue, np))
+ np = NULL;
+ return np;
+}
+
+static struct SalvageQueueNode *
+LookupPendingCommandByPid(int pid)
+{
+ struct SalvageQueueNode * np, * nnp;
+
+ for (queue_Scan(&pendingQueue, np, nnp, SalvageQueueNode)) {
+ if (np->pid == pid)
+ break;
+ }
+
+ if (queue_IsEnd(&pendingQueue, np))
+ np = NULL;
+ return np;
+}
+
+
+/* raise the priority of a previously scheduled salvage */
+static void
+RaiseCommandPrio(struct SalvageQueueNode * node, SALVSYNC_command_hdr * com)
+{
+ struct SalvageQueueNode *np, *nnp;
+ afs_int32 id;
+
+ assert(queue_IsOnQueue(node));
+
+ node->command.sop.prio = com->prio;
+ id = node->partition_id;
+ if (queue_First(&salvageQueue.part[id], SalvageQueueNode)->command.sop.prio < com->prio) {
+ queue_Remove(node);
+ queue_Prepend(&salvageQueue.part[id], node);
+ } else {
+ for (queue_ScanBackwardsFrom(&salvageQueue.part[id], node, np, nnp, SalvageQueueNode)) {
+ if (np->command.sop.prio > com->prio)
+ break;
+ }
+ if (queue_IsEnd(&salvageQueue.part[id], np)) {
+ queue_Remove(node);
+ queue_Prepend(&salvageQueue.part[id], node);
+ } else if (node != np) {
+ queue_Remove(node);
+ queue_InsertAfter(np, node);
+ }
+ }
+}
+
+/* this will need to be rearchitected if we ever want more than one thread
+ * to wait for new salvage nodes */
+struct SalvageQueueNode *
+SALVSYNC_getWork(void)
+{
+ int i, ret;
+ struct DiskPartition * dp = NULL, * fdp;
+ static afs_int32 next_part_sched = 0;
+ struct SalvageQueueNode *node = NULL, *np;
+
+ VOL_LOCK;
+
+ /*
+ * wait for work to be scheduled
+ * if there are no disk partitions, just sit in this wait loop forever
+ */
+ while (!salvageQueue.total_len || !DiskPartitionList) {
+ assert(pthread_cond_wait(&salvageQueue.cv, &vol_glock_mutex) == 0);
+ }
+
+
+ /*
+ * short circuit for simple case where only one partition has
+ * scheduled salvages
+ */
+ if (salvageQueue.last_insert >= 0 && salvageQueue.last_insert <= VOLMAXPARTS &&
+ (salvageQueue.total_len == salvageQueue.len[salvageQueue.last_insert])) {
+ node = queue_First(&salvageQueue.part[salvageQueue.last_insert], SalvageQueueNode);
+ goto have_node;
+ }
+
+
+ /*
+ * ok, more than one partition has scheduled salvages.
+ * now search for partitions with scheduled salvages, but no pending salvages.
+ */
+ dp = VGetPartitionById_r(next_part_sched, 0);
+ if (!dp) {
+ dp = DiskPartitionList;
+ }
+ fdp = dp;
+
+ for (i=0 ;
+ !i || dp != fdp ;
+ dp = (dp->next) ? dp->next : DiskPartitionList, i++ ) {
+ if (!partition_salvaging[dp->index] && salvageQueue.len[dp->index]) {
+ node = queue_First(&salvageQueue.part[dp->index], SalvageQueueNode);
+ goto have_node;
+ }
+ }
+
+
+ /*
+ * all partitions with scheduled salvages have at least one pending.
+ * now do an exhaustive search for a scheduled salvage.
+ */
+ dp = fdp;
+
+ for (i=0 ;
+ !i || dp != fdp ;
+ dp = (dp->next) ? dp->next : DiskPartitionList, i++ ) {
+ if (salvageQueue.len[dp->index]) {
+ node = queue_First(&salvageQueue.part[dp->index], SalvageQueueNode);
+ goto have_node;
+ }
+ }
+
+ /* we should never reach this line */
+ assert(1==2);
+
+ have_node:
+ assert(node != NULL);
+ node->pid = 0;
+ partition_salvaging[node->partition_id]++;
+ DeleteFromSalvageQueue(node);
+ AddToPendingQueue(node);
+
+ if (dp) {
+ /* update next_part_sched field */
+ if (dp->next) {
+ next_part_sched = dp->next->index;
+ } else if (DiskPartitionList) {
+ next_part_sched = DiskPartitionList->index;
+ } else {
+ next_part_sched = -1;
+ }
+ }
+
+ bail:
+ VOL_UNLOCK;
+ return node;
+}
+
+static void
+SALVSYNC_doneWork_r(struct SalvageQueueNode * node, int result)
+{
+ afs_int32 partid;
+ DeleteFromPendingQueue(node);
+ partid = node->partition_id;
+ if (partid >=0 && partid <= VOLMAXPARTS) {
+ partition_salvaging[partid]--;
+ }
+ if (result == 0) {
+ node->state = SALVSYNC_STATE_DONE;
+ } else {
+ node->state = SALVSYNC_STATE_ERROR;
+ }
+}
+
+void
+SALVSYNC_doneWork(struct SalvageQueueNode * node, int result)
+{
+ VOL_LOCK;
+ SALVSYNC_doneWork_r(node, result);
+ VOL_UNLOCK;
+}
+
+void
+SALVSYNC_doneWorkByPid(int pid, int result)
+{
+ struct SalvageQueueNode * node;
+
+ VOL_LOCK;
+ node = LookupPendingCommandByPid(pid);
+ if (node != NULL) {
+ SALVSYNC_doneWork_r(node, result);
+ }
+ VOL_UNLOCK;
+}
+
+#endif /* AFS_DEMAND_ATTACH_FS */
--- /dev/null
+/*
+ * Copyright 2006, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/*
+ * demand attach fs
+ * salvage server interface
+ */
+#ifndef _AFS_VOL_SALVSYNC_H
+#define _AFS_VOL_SALVSYNC_H
+
+#ifdef AFS_DEMAND_ATTACH_FS
+#include "daemon_com.h"
+
+
+#define SALVSYNC_PROTO_VERSION 1
+
+
+/* SALVSYNC command codes */
+#define SALVSYNC_NOP SYNC_COM_CODE_DECL(0) /* just return stats */
+#define SALVSYNC_SALVAGE SYNC_COM_CODE_DECL(1) /* schedule a salvage */
+#define SALVSYNC_CANCEL SYNC_COM_CODE_DECL(2) /* Cancel a salvage */
+#define SALVSYNC_RAISEPRIO SYNC_COM_CODE_DECL(3) /* move a salvage operation to
+ * the head of the work queue */
+#define SALVSYNC_QUERY SYNC_COM_CODE_DECL(4) /* query the status of a salvage */
+#define SALVSYNC_CANCELALL SYNC_COM_CODE_DECL(5) /* cancel all pending salvages */
+
+/* SALVSYNC reason codes */
+#define SALVSYNC_WHATEVER SYNC_REASON_CODE_DECL(0) /* XXXX */
+#define SALVSYNC_ERROR SYNC_REASON_CODE_DECL(1) /* volume is in error state */
+#define SALVSYNC_OPERATOR SYNC_REASON_CODE_DECL(2) /* operator forced salvage */
+#define SALVSYNC_SHUTDOWN SYNC_REASON_CODE_DECL(3) /* cancel due to shutdown */
+#define SALVSYNC_NEEDED SYNC_REASON_CODE_DECL(4) /* needsSalvaged flag set */
+
+/* SALVSYNC response codes */
+
+/* SALVSYNC flags */
+#define SALVSYNC_FLAG_VOL_STATS_VALID SYNC_FLAG_CODE_DECL(0) /* volume stats in response are valid */
+
+/* SALVSYNC command state fields */
+#define SALVSYNC_STATE_UNKNOWN 0 /* unknown state */
+#define SALVSYNC_STATE_QUEUED 1 /* salvage request on queue */
+#define SALVSYNC_STATE_SALVAGING 2 /* salvage is happening now */
+#define SALVSYNC_STATE_ERROR 3 /* salvage ended in an error */
+#define SALVSYNC_STATE_DONE 4 /* last salvage ended successfully */
+
+
+typedef struct SALVSYNC_command_hdr {
+ afs_uint32 prio;
+ afs_uint32 volume;
+ char partName[16]; /* partition name, e.g. /vicepa */
+} SALVSYNC_command_hdr;
+
+typedef struct SALVSYNC_response_hdr {
+ afs_int32 state;
+ afs_int32 prio;
+ afs_int32 sq_len;
+ afs_int32 pq_len;
+} SALVSYNC_response_hdr;
+
+typedef struct SALVSYNC_command {
+ SYNC_command_hdr * hdr;
+ SALVSYNC_command_hdr * sop;
+ SYNC_command * com;
+} SALVSYNC_command;
+
+typedef struct SALVSYNC_response {
+ SYNC_response_hdr * hdr;
+ SALVSYNC_response_hdr * sop;
+ SYNC_response * res;
+} SALVSYNC_response;
+
+typedef struct SALVSYNC_command_info {
+ SYNC_command_hdr com;
+ SALVSYNC_command_hdr sop;
+} SALVSYNC_command_info;
+
+struct SalvageQueueNode {
+ struct rx_queue q;
+ struct rx_queue hash_chain;
+ afs_uint32 state;
+ struct SALVSYNC_command_info command;
+ afs_int32 partition_id;
+ int pid;
+};
+
+
+/* Prototypes from salvsync.c */
+
+/* online salvager client interfaces */
+extern int SALVSYNC_clientFinis(void);
+extern int SALVSYNC_clientInit(void);
+extern int SALVSYNC_clientReconnect(void);
+extern afs_int32 SALVSYNC_askSalv(SYNC_command * com, SYNC_response * res);
+extern afs_int32 SALVSYNC_SalvageVolume(VolumeId volume, char *partName, int com, int reason,
+ afs_uint32 prio, SYNC_response * res);
+
+/* salvage server interfaces */
+extern void SALVSYNC_salvInit(void);
+extern struct SalvageQueueNode * SALVSYNC_getWork(void);
+extern void SALVSYNC_doneWork(struct SalvageQueueNode *, int result);
+extern void SALVSYNC_doneWorkByPid(int pid, int result);
+
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+#endif /* _AFS_VOL_SALVSYNC_H */
#include "afs/assert.h"
#include "filesignal.h"
#include "vutils.h"
+#include "daemon_com.h"
#include "fssync.h"
#include <afs/auxinode.h>
#include <afs/dir.h>
#include "afs/assert.h"
#include "filesignal.h"
#include "vutils.h"
+#include "daemon_com.h"
#include "fssync.h"
#include <afs/auxinode.h>
#include <afs/dir.h>
* This software has been released under the terms of the IBM Public
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
+ *
+ * Portions Copyright (c) 2006 Sine Nomine Associates
*/
/*
#include "vnode.h"
#include "volume.h"
#include "partition.h"
+#include "salvsync.h"
#if defined(AFS_SGI_ENV)
#include "sys/types.h"
#include "fcntl.h"
struct VnodeClassInfo VnodeClassInfo[nVNODECLASSES];
private int moveHash(register Vnode * vnp, bit32 newHash);
-void StickOnLruChain_r(register Vnode * vnp,
- register struct VnodeClassInfo *vcp);
+private void StickOnLruChain_r(register Vnode * vnp,
+ register struct VnodeClassInfo *vcp);
#define BAD_IGET -1000
#define VNODE_HASH(volumeptr,vnodenumber)\
((volumeptr->vnodeHashOffset + vnodenumber)&(VNODE_HASH_TABLE_SIZE-1))
+/*
+ * new support to secondarily hash vnodes by volume id
+ */
+#define VNVOLUME_HASH(volumeId) (volumeId&(VolumeHashTable.Mask))
+
+#include "rx/rx_queue.h"
+typedef struct VnodeHashByVolumeChainHead {
+ struct rx_queue queue;
+ int len;
+ /* someday we could put a per-chain lock here... */
+#ifdef AFS_DEMAND_ATTACH_FS
+ int busy;
+ pthread_cond_t chain_busy_cv;
+#endif /* AFS_DEMAND_ATTACH_FS */
+} VnodeHashByVolumeChainHead;
+private VnodeHashByVolumeChainHead *VnodeHashByVolumeTable = NULL;
+
+void
+VInitVnHashByVolume(void)
+{
+ register int i;
+
+ VnodeHashByVolumeTable = (VnodeHashByVolumeChainHead *) calloc(VolumeHashTable.Size,
+ sizeof(VnodeHashByVolumeChainHead));
+ assert(VnodeHashByVolumeTable != NULL);
+
+ for (i=0; i < VolumeHashTable.Size; i++) {
+ queue_Init(&VnodeHashByVolumeTable[i]);
+#ifdef AFS_DEMAND_ATTACH_FS
+ assert(pthread_cond_init(&VnodeHashByVolumeTable[i].chain_busy_cv, NULL) == 0);
+#endif /* AFS_DEMAND_ATTACH_FS */
+ }
+}
+
+static void
+AddToVnHashByVolumeTable(register Vnode * vnp)
+{
+ VnodeHashByVolumeChainHead * head;
+
+ if (queue_IsOnQueue(vnp))
+ return;
+
+ head = &VnodeHashByVolumeTable[VNVOLUME_HASH(vnp->volumePtr->hashid)];
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ while (head->busy) {
+ /* if the hash table is busy, wait */
+ assert(pthread_cond_wait(&head->chain_busy_cv, &vol_glock_mutex) == 0);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ head->len++;
+ queue_Append(head, vnp);
+}
+
+/* for demand-attach, caller MUST hold a ref count on vp */
+static void
+DeleteFromVnHashByVolumeTable(register Vnode * vnp)
+{
+ VnodeHashByVolumeChainHead * head;
+
+ if (!queue_IsOnQueue(vnp))
+ return;
+
+ head = &VnodeHashByVolumeTable[VNVOLUME_HASH(vnp->volumePtr->hashid)];
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ while (head->busy) {
+ /* if the hash table is busy, wait */
+ assert(pthread_cond_wait(&head->chain_busy_cv, &vol_glock_mutex) == 0);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ head->len--;
+ queue_Remove(vnp);
+}
+
/* Code to invalidate a vnode entry. Called when we've damaged a vnode, and want
to prevent future VGetVnode's from applying to it. Leaves it in the same hash bucket
but that shouldn't be important. */
unique = vp->nextVnodeUnique++;
if (vp->nextVnodeUnique > V_uniquifier(vp)) {
- VUpdateVolume_r(ec, vp);
+ VUpdateVolume_r(ec, vp, VOL_UPDATE_WAIT);
if (*ec)
return NULL;
}
}
/* Find a slot in the bit map */
- bitNumber = VAllocBitmapEntry_r(ec, vp, &vp->vnodeIndex[class]);
+ bitNumber = VAllocBitmapEntry_r(ec, vp, &vp->vnodeIndex[class],
+ VOL_ALLOC_BITMAP_WAIT);
if (*ec)
return NULL;
vnodeNumber = bitNumberToVnodeNumber(bitNumber, class);
vnp->volumePtr = vp;
vnp->cacheCheck = vp->cacheCheck;
vnp->nUsers = 1;
- moveHash(vnp, newHash);
/* This will never block */
ObtainWriteLock(&vnp->lock);
#ifdef AFS_PTHREAD_ENV
FdHandle_t *fdP;
off_t off = vnodeIndexOffset(vcp, vnodeNumber);
+ /* XXX we have a potential race here if two threads
+ * allocate new vnodes at the same time, and they
+ * both decide it's time to extend the index
+ * file size... */
+
VOL_UNLOCK;
fdP = IH_OPEN(ihP);
- if (fdP == NULL)
- Abort("VAllocVnode: can't open index file!\n");
- if ((size = FDH_SIZE(fdP)) < 0)
- Abort("VAllocVnode: can't stat index file!\n");
- if (FDH_SEEK(fdP, off, SEEK_SET) < 0)
- Abort("VAllocVnode: can't seek on index file!\n");
- if (off < size) {
- if (FDH_READ(fdP, &vnp->disk, vcp->diskSize) == vcp->diskSize) {
- if (vnp->disk.type != vNull)
- Abort("VAllocVnode: addled bitmap or index!\n");
+ if (fdP == NULL) {
+ Log("VAllocVnode: can't open index file!\n");
+ goto error_encountered;
+ }
+ if ((size = FDH_SIZE(fdP)) < 0) {
+ Log("VAllocVnode: can't stat index file!\n");
+ goto error_encountered;
+ }
+ if (FDH_SEEK(fdP, off, SEEK_SET) < 0) {
+ Log("VAllocVnode: can't seek on index file!\n");
+ goto error_encountered;
+ }
+ if (off + vcp->diskSize <= size) {
+ if (FDH_READ(fdP, &vnp->disk, vcp->diskSize) != vcp->diskSize) {
+ Log("VAllocVnode: can't read index file!\n");
+ goto error_encountered;
+ }
+ if (vnp->disk.type != vNull) {
+ Log("VAllocVnode: addled bitmap or index!\n");
+ goto error_encountered;
}
} else {
/* growing file - grow in a reasonable increment */
free(buf);
}
FDH_CLOSE(fdP);
+ fdP = NULL;
VOL_LOCK;
+ goto sane;
+
+ error_encountered:
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
+ if (fdP)
+ FDH_CLOSE(fdP);
+ VInvalidateVnode_r(vnp);
+ StickOnLruChain_r(vnp, vcp);
+ return NULL;
+#else
+ assert(1 == 2);
+#endif
+
}
+ sane:
VNLog(4, 2, vnodeNumber, (afs_int32) vnp);
+ AddToVnHashByVolumeTable(vnp);
+ moveHash(vnp, newHash);
}
VNLog(5, 1, (afs_int32) vnp);
vcp->reads++;
vnp = VGetFreeVnode_r(vcp);
/* Remove it from the old hash chain */
+ if (vnp->volumePtr)
+ DeleteFromVnHashByVolumeTable(vnp);
moveHash(vnp, newHash);
/* Remove it from the LRU chain */
if (vnp == vcp->lruHead)
vnp->volumePtr = vp;
vnp->cacheCheck = vp->cacheCheck;
vnp->nUsers = 1;
+ AddToVnHashByVolumeTable(vnp);
/* This will never block */
ObtainWriteLock(&vnp->lock);
if (fdP == NULL) {
Log("VGetVnode: can't open index dev=%u, i=%s\n", vp->device,
PrintInode(NULL, vp->vnodeIndex[class].handle->ih_ino));
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
+ VOL_UNLOCK;
+#endif
*ec = VIO;
mlkReason = 9;
} else if (FDH_SEEK(fdP, vnodeIndexOffset(vcp, vnodeNumber), SEEK_SET)
< 0) {
Log("VGetVnode: can't seek on index file vn=%u\n", vnodeNumber);
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
+ VOL_UNLOCK;
+#endif
*ec = VIO;
mlkReason = 10;
FDH_REALLYCLOSE(fdP);
* is not allocated */
if (n == -1 && errno == EIO) {
Log("VGetVnode: Couldn't read vnode %u, volume %u (%s); volume needs salvage\n", vnodeNumber, V_id(vp), V_name(vp));
- VForceOffline_r(vp);
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (programType == fileServer) {
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
+ *ec = VSALVAGING;
+ } else {
+ VForceOffline_r(vp, 0);
+ *ec = VSALVAGE;
+ }
+#else
+ VForceOffline_r(vp, 0);
*ec = VSALVAGE;
+#endif
mlkReason = 4;
} else {
mlkReason = 5;
*ec = VNOVNODE;
} else {
Log("VGetVnode: Bad magic number, vnode %u, volume %u (%s); volume needs salvage\n", vnodeNumber, V_id(vp), V_name(vp));
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (programType == fileServer) {
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
+ *ec = VSALVAGING;
+ } else {
+ vp->goingOffline = 1;
+ *ec = VSALVAGE;
+ }
+#else
vp->goingOffline = 1; /* used to call VOffline, but that would mess
* up the volume ref count if called here */
*ec = VSALVAGE;
+#endif
mlkReason = 7;
}
VInvalidateVnode_r(vnp);
/* The vnode has been changed. Write it out to disk */
if (!V_inUse(vp)) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
+ *ec = VSALVAGING;
+#else
assert(V_needsSalvaged(vp));
*ec = VSALVAGE;
+#endif
} else {
IHandle_t *ihP = vp->vnodeIndex[class].handle;
FdHandle_t *fdP;
VOL_UNLOCK;
fdP = IH_OPEN(ihP);
- if (fdP == NULL)
- Abort("VPutVnode: can't open index file!\n");
+ if (fdP == NULL) {
+ Log("VPutVnode: can't open index file!\n");
+ goto error_encountered;
+ }
offset = vnodeIndexOffset(vcp, vnp->vnodeNumber);
if (FDH_SEEK(fdP, offset, SEEK_SET) < 0) {
- Abort
- ("VPutVnode: can't seek on index file! fdp=0x%x offset=%d, errno=%d\n",
- fdP, offset, errno);
+ Log("VPutVnode: can't seek on index file! fdp=0x%x offset=%d, errno=%d\n",
+ fdP, offset, errno);
+ goto error_encountered;
}
code = FDH_WRITE(fdP, &vnp->disk, vcp->diskSize);
if (code != vcp->diskSize) {
*ec = VIO;
} else {
Log("VPutVnode: Couldn't write vnode %u, volume %u (%s) (error %d)\n", vnp->vnodeNumber, V_id(vnp->volumePtr), V_name(vnp->volumePtr), code);
- VForceOffline_r(vp);
+#ifdef AFS_DEMAND_ATTACH_FS
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
+ *ec = VSALVAGING;
+#else
+ VForceOffline_r(vp, 0);
*ec = VSALVAGE;
+#endif
}
VOL_UNLOCK;
FDH_REALLYCLOSE(fdP);
FDH_CLOSE(fdP);
}
VOL_LOCK;
+ goto sane;
+
+ error_encountered:
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* XXX instead of dumping core, let's try to request a salvage
+ * and just fail the putvnode */
+ if (fdP)
+ FDH_CLOSE(fdP);
+ VOL_LOCK;
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
+ *ec = VSALVAGING;
+ goto done;
+#else
+ assert(1 == 2);
+#endif
+
+ sane:
/* If the vnode is to be deleted, and we wrote the vnode out,
* free its bitmap entry. Do after the vnode is written so we
* don't allocate from bitmap before the vnode is written
vnp);
}
+ done:
/* Do not look at disk portion of vnode after this point; it may
* have been deleted above */
if (vnp->nUsers-- == 1)
/* The inode has been changed. Write it out to disk */
if (!V_inUse(vp)) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
+ *ec = VSALVAGING;
+#else
assert(V_needsSalvaged(vp));
*ec = VSALVAGE;
+#endif
} else {
IHandle_t *ihP = vp->vnodeIndex[class].handle;
FdHandle_t *fdP;
off_t off = vnodeIndexOffset(vcp, vnp->vnodeNumber);
VOL_UNLOCK;
fdP = IH_OPEN(ihP);
- if (fdP == NULL)
- Abort("VPutVnode: can't open index file!\n");
+ if (fdP == NULL) {
+ Log("VPutVnode: can't open index file!\n");
+ goto error_encountered;
+ }
code = FDH_SEEK(fdP, off, SEEK_SET);
- if (code < 0)
- Abort("VPutVnode: can't seek on index file!\n");
+ if (code < 0) {
+ Log("VPutVnode: can't seek on index file!\n");
+ goto error_encountered;
+ }
code = FDH_WRITE(fdP, &vnp->disk, vcp->diskSize);
if (code != vcp->diskSize) {
/*
*ec = VIO;
} else {
Log("VPutVnode: Couldn't write vnode %u, volume %u (%s)\n", vnp->vnodeNumber, V_id(vnp->volumePtr), V_name(vnp->volumePtr));
- VForceOffline_r(vp);
+#ifdef AFS_DEMAND_ATTACH_FS
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
+ *ec = VSALVAGING;
+#else
+ VForceOffline_r(vp, 0);
*ec = VSALVAGE;
+#endif
}
VOL_UNLOCK;
}
FDH_CLOSE(fdP);
VOL_LOCK;
+ goto sane;
+
+ error_encountered:
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (fdP)
+ FDH_CLOSE(fdP);
+ VOL_LOCK;
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
+ *ec = VSALVAGING;
+#else
+ assert(1 == 2);
+#endif
+
}
+ sane:
vcp->writes++;
vnp->changed_newTime = vnp->changed_oldTime = 0;
}
return 0;
}
-void
+private void
StickOnLruChain_r(register Vnode * vnp, register struct VnodeClassInfo *vcp)
{
/* Add it to the circular LRU list */
vcp->lruHead = vnp->lruNext;
/* If caching is turned off, set volumeptr to NULL to invalidate the
* entry */
- if (!TrustVnodeCacheEntry)
+ if (!TrustVnodeCacheEntry) {
+ DeleteFromVnHashByVolumeTable(vnp);
vnp->volumePtr = NULL;
+ }
}
/* VCloseVnodeFiles - called when a volume is going off line. All open
VCloseVnodeFiles_r(Volume * vp)
{
int i;
- Vnode *vnp;
+ Vnode *vnp, *nvnp;
+ VnodeHashByVolumeChainHead * head;
- for (i = 0; i < VNODE_HASH_TABLE_SIZE; i++) {
- for (vnp = VnodeHashTable[i]; vnp; vnp = vnp->hashNext) {
- if (vnp->volumePtr == vp) {
- IH_REALLYCLOSE(vnp->handle);
- }
+ head = &VnodeHashByVolumeTable[VNVOLUME_HASH(vp->hashid)];
+#ifdef AFS_DEMAND_ATTACH_FS
+ while (head->busy) {
+ assert(pthread_cond_wait(&head->chain_busy_cv, &vol_glock_mutex) == 0);
+ }
+
+ head->busy = 1;
+ VOL_UNLOCK;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ for (queue_Scan(head, vnp, nvnp, Vnode)) {
+ if (vnp->volumePtr == vp) {
+ IH_REALLYCLOSE(vnp->handle);
}
}
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+ head->busy = 0;
+ assert(pthread_cond_broadcast(&head->chain_busy_cv) == 0);
+#endif /* AFS_DEMAND_ATTACH_FS */
}
/* VReleaseVnodeFiles - called when a volume is going detached. All open
VReleaseVnodeFiles_r(Volume * vp)
{
int i;
- Vnode *vnp;
+ Vnode *vnp, *nvnp;
+ VnodeHashByVolumeChainHead * head;
- for (i = 0; i < VNODE_HASH_TABLE_SIZE; i++) {
- for (vnp = VnodeHashTable[i]; vnp; vnp = vnp->hashNext) {
- if (vnp->volumePtr == vp) {
- IH_RELEASE(vnp->handle);
- }
+ head = &VnodeHashByVolumeTable[VNVOLUME_HASH(vp->hashid)];
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ while (head->busy) {
+ assert(pthread_cond_wait(&head->chain_busy_cv, &vol_glock_mutex) == 0);
+ }
+
+ head->busy = 1;
+ VOL_UNLOCK;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ for (queue_Scan(head, vnp, nvnp, Vnode)) {
+ if (vnp->volumePtr == vp) {
+ IH_RELEASE(vnp->handle);
}
}
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+ head->busy = 0;
+ assert(pthread_cond_broadcast(&head->chain_busy_cv) == 0);
+#endif /* AFS_DEMAND_ATTACH_FS */
}
#define SIZEOF_LARGEDISKVNODE 256
typedef struct Vnode {
+ struct rx_queue vid_hash; /* for vnode by volume id hash */
struct Vnode *hashNext; /* Next vnode on hash conflict chain */
struct Vnode *lruNext; /* Less recently used vnode than this one */
struct Vnode *lruPrev; /* More recently used vnode than this one */
extern Vnode *VAllocVnode_r(Error * ec, struct Volume *vp, VnodeType type);
/*extern VFreeVnode();*/
extern Vnode *VGetFreeVnode_r(struct VnodeClassInfo *vcp);
+extern void VInitVnHashByVolume(void);
*/
-#define SalvageVersion "2.4"
-
-/* Main program file. Define globals. */
-#define MAIN 1
-
#include <afsconfig.h>
#include <afs/param.h>
#include "vnode.h"
#include "volume.h"
#include "partition.h"
+#include "daemon_com.h"
#include "fssync.h"
+#include "salvsync.h"
#include "viceinode.h"
#include "salvage.h"
#include "volinodes.h" /* header magic number, etc. stuff */
+#include "vol-salvage.h"
#ifdef AFS_NT40_ENV
#include <pthread.h>
#endif
#endif
static char *TimeStamp(time_t clock, int precision);
-#define ORPH_IGNORE 0
-#define ORPH_REMOVE 1
-#define ORPH_ATTACH 2
-
int debug; /* -d flag */
int Testing = 0; /* -n flag */
int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
* in the volume header */
-static FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
+FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
#define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
* partition */
VolumeDiskData VolInfo; /* A copy of the last good or salvaged volume header dealt with */
-struct InodeSummary { /* Inode summary file--an entry for each
- * volume in the inode file for a partition */
- VolId volumeId; /* Volume id */
- VolId RWvolumeId; /* RW volume associated */
- int index; /* index into inode file (0, 1, 2 ...) */
- int nInodes; /* Number of inodes for this volume */
- int nSpecialInodes; /* Number of special inodes, i.e. volume
- * header, index, etc. These are all
- * marked (viceinode.h) and will all be sorted
- * to the beginning of the information for
- * this volume. Read-only volumes should
- * ONLY have special inodes (all the other
- * inodes look as if they belong to the
- * original RW volume). */
- Unique maxUniquifier; /* The maximum uniquifier found in all the inodes.
- * This is only useful for RW volumes and is used
- * to compute a new volume uniquifier in the event
- * that the header needs to be recreated. The inode
- * uniquifier may be a truncated version of vnode
- * uniquifier (AFS_3DISPARES). The real maxUniquifer
- * is from the vnodes and later calcuated from it */
- struct VolumeSummary *volSummary;
- /* Either a pointer to the original volume
- * header summary, or constructed summary
- * information */
-} *inodeSummary;
-#define readOnly(isp) ((isp)->volumeId != (isp)->RWvolumeId)
int nVolumesInInodeFile; /* Number of read-write volumes summarized */
int inodeFd; /* File descriptor for inode file */
-struct VolumeSummary { /* Volume summary an entry for each
- * volume in a volume directory.
- * Assumption: one volume directory per
- * partition */
- char *fileName; /* File name on the partition for the volume
- * header */
- struct VolumeHeader header;
- /* volume number, rw volume number, inode
- * numbers of each major component of
- * the volume */
- IHandle_t *volumeInfoHandle;
- byte wouldNeedCallback; /* set if the file server should issue
- * call backs for all the files in this volume when
- * the volume goes back on line */
-};
-
-struct VnodeInfo {
- IHandle_t *handle; /* Inode containing this index */
- int nVnodes; /* Total number of vnodes in index */
- int nAllocatedVnodes; /* Total number actually used */
- int volumeBlockCount; /* Total number of blocks used by volume */
- Inode *inodes; /* Directory only */
- struct VnodeEssence {
- short count; /* Number of references to vnode; MUST BE SIGNED */
- unsigned claimed:1; /* Set when a parent directory containing an entry
- * referencing this vnode is found. The claim
- * is that the parent in "parent" can point to
- * this vnode, and no other */
- unsigned changed:1; /* Set if any parameters (other than the count)
- * in the vnode change. It is determined if the
- * link count has changed by noting whether it is
- * 0 after scanning all directories */
- unsigned salvaged:1; /* Set if this directory vnode has already been salvaged. */
- unsigned todelete:1; /* Set if this vnode is to be deleted (should not be claimed) */
- afs_fsize_t blockCount;
- /* Number of blocks (1K) used by this vnode,
- * approximately */
- VnodeId parent; /* parent in vnode */
- Unique unique; /* Must match entry! */
- char *name; /* Name of directory entry */
- int modeBits; /* File mode bits */
- Inode InodeNumber; /* file's inode */
- int type; /* File type */
- int author; /* File author */
- int owner; /* File owner */
- int group; /* File group */
- } *vnodes;
-} vnodeInfo[nVNODECLASSES];
-
-struct DirSummary {
- struct DirHandle dirHandle;
- VnodeId vnodeNumber;
- Unique unique;
- unsigned haveDot, haveDotDot;
- VolumeId rwVid;
- int copied; /* If the copy-on-write stuff has been applied */
- VnodeId parent;
- char *name;
- char *vname;
- IHandle_t *ds_linkH;
-};
+struct VnodeInfo vnodeInfo[nVNODECLASSES];
struct VolumeSummary *volumeSummaryp; /* Holds all the volumes in a part */
int nVolumes; /* Number of volumes (read-write and read-only)
* in volume summary */
-#ifdef AFS_NT40_ENV
-/* For NT, we can fork the per partition salvagers to gain the required
- * safety against Aborts. But there's too many complex data structures at
- * the per volume salvager layer to easilty copy the data across.
- * childJobNumber is resset from -1 to the job number if this is a
- * per partition child of the main salvager. This information is passed
- * out-of-band in the extra data area setup for the now unused parent/child
- * data transfer.
- */
-#define SALVAGER_MAGIC 0x00BBaaDD
-#define NOT_CHILD -1 /* job numbers start at 0 */
-/* If new options need to be passed to child, add them here. */
-typedef struct {
- int cj_magic;
- int cj_number;
- char cj_part[32];
-} childJob_t;
+extern char * tmpdir = 0;
+
+#ifdef AFS_NT40_ENV
/* Child job this process is running. */
childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
-
-int nt_SalvagePartition(char *partName, int jobn);
-int nt_SetupPartitionSalvage(void *datap, int len);
-
-typedef struct {
- struct InodeSummary *svgp_inodeSummaryp;
- int svgp_count;
-} SVGParms_t;
-#define canfork 0
-#else
-#define canfork 1
-#endif
+#endif /* AFS_NT40_ENV */
/* Forward declarations */
/*@printflike@*/ void Log(const char *format, ...);
/*@printflike@*/ void Abort(const char *format, ...);
-void Exit(int code);
-int Fork(void);
-int Wait(char *prog);
-char *ToString(char *s);
-void AskOffline(VolumeId volumeId);
-void AskOnline(VolumeId volumeId, char *partition);
-void CheckLogFile(void);
-#ifndef AFS_NT40_ENV
-void TimeStampLogFile(void);
-#endif
-void ClearROInUseBit(struct VolumeSummary *summary);
-void CopyAndSalvage(register struct DirSummary *dir);
-int CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume);
-void CopyOnWrite(register struct DirSummary *dir);
-void CountVolumeInodes(register struct ViceInodeInfo *ip, int maxInodes,
- register struct InodeSummary *summary);
-void DeleteExtraVolumeHeaderFile(register struct VolumeSummary *vsp);
-void DistilVnodeEssence(VolumeId vid, VnodeClass class, Inode ino,
- Unique * maxu);
-int GetInodeSummary(char *path, VolumeId singleVolumeNumber);
-void GetVolumeSummary(VolumeId singleVolumeNumber);
-void JudgeEntry(struct DirSummary *dir, char *name, VnodeId vnodeNumber,
- Unique unique);
-void MaybeZapVolume(register struct InodeSummary *isp, char *message,
- int deleteMe, int check);
-void ObtainSalvageLock(void);
-void PrintInodeList(void);
-void PrintInodeSummary(void);
-void PrintVolumeSummary(void);
-int QuickCheck(register struct InodeSummary *isp, int nVols);
-void RemoveTheForce(char *path);
-void SalvageDir(char *name, VolumeId rwVid, struct VnodeInfo *dirVnodeInfo,
- IHandle_t * alinkH, int i, struct DirSummary *rootdir,
- int *rootdirfound);
-void SalvageFileSysParallel(struct DiskPartition *partP);
-void SalvageFileSys(struct DiskPartition *partP, VolumeId singleVolumeNumber);
-void SalvageFileSys1(struct DiskPartition *partP,
- VolumeId singleVolumeNumber);
-int SalvageHeader(register struct stuff *sp, struct InodeSummary *isp,
- int check, int *deleteMe);
-int SalvageIndex(Inode ino, VnodeClass class, int RW,
- register struct ViceInodeInfo *ip, int nInodes,
- struct VolumeSummary *volSummary, int check);
-int SalvageVnodes(register struct InodeSummary *rwIsp,
- register struct InodeSummary *thisIsp,
- register struct ViceInodeInfo *inodes, int check);
-int SalvageVolume(register struct InodeSummary *rwIsp, IHandle_t * alinkH);
-void DoSalvageVolumeGroup(register struct InodeSummary *isp, int nVols);
-#ifdef AFS_NT40_ENV
-void SalvageVolumeGroup(register struct InodeSummary *isp, int nVols);
-#else
-#define SalvageVolumeGroup DoSalvageVolumeGroup
-#endif
-int SalvageVolumeHeaderFile(register struct InodeSummary *isp,
- register struct ViceInodeInfo *inodes, int RW,
- int check, int *deleteMe);
-void showlog(void);
-int UseTheForceLuke(char *path);
-
static int IsVnodeOrphaned(VnodeId vnode);
/* Uniquifier stored in the Inode */
}
-char *tmpdir = 0;
-static int
-handleit(struct cmd_syndesc *as)
-{
- register struct cmd_item *ti;
- char pname[100], *temp;
- afs_int32 seenpart = 0, seenvol = 0, vid = 0, seenany = 0;
- struct DiskPartition *partP;
-
-#ifdef AFS_SGI_VNODE_GLUE
- if (afs_init_kernel_config(-1) < 0) {
- printf
- ("Can't determine NUMA configuration, not starting salvager.\n");
- exit(1);
- }
-#endif
-
-#ifdef FAST_RESTART
- {
- afs_int32 i;
- for (i = 0; i < CMD_MAXPARMS; i++) {
- if (as->parms[i].items) {
- seenany = 1;
- break;
- }
- }
- }
- if (!seenany) {
- char *msg =
- "Exiting immediately without salvage. Look into the FileLog to find volumes which really need to be salvaged!";
-
- if (useSyslog)
- Log(msg);
- else
- printf("%s\n", msg);
-
- Exit(0);
- }
-#endif /* FAST_RESTART */
- if ((ti = as->parms[0].items)) { /* -partition */
- seenpart = 1;
- strncpy(pname, ti->data, 100);
- }
- if ((ti = as->parms[1].items)) { /* -volumeid */
- if (!seenpart) {
- printf
- ("You must also specify '-partition' option with the '-volumeid' option\n");
- exit(-1);
- }
- seenvol = 1;
- vid = atoi(ti->data);
- }
- if (as->parms[2].items) /* -debug */
- debug = 1;
- if (as->parms[3].items) /* -nowrite */
- Testing = 1;
- if (as->parms[4].items) /* -inodes */
- ListInodeOption = 1;
- if (as->parms[5].items) /* -force */
- ForceSalvage = 1;
- if (as->parms[6].items) /* -oktozap */
- OKToZap = 1;
- if (as->parms[7].items) /* -rootinodes */
- ShowRootFiles = 1;
- if (as->parms[8].items) /* -RebuildDirs */
- RebuildDirs = 1;
- if (as->parms[9].items) /* -ForceReads */
- forceR = 1;
- if ((ti = as->parms[10].items)) { /* -Parallel # */
- temp = ti->data;
- if (strncmp(temp, "all", 3) == 0) {
- PartsPerDisk = 1;
- temp += 3;
- }
- if (strlen(temp) != 0) {
- Parallel = atoi(temp);
- if (Parallel < 1)
- Parallel = 1;
- if (Parallel > MAXPARALLEL) {
- printf("Setting parallel salvages to maximum of %d \n",
- MAXPARALLEL);
- Parallel = MAXPARALLEL;
- }
- }
- }
- if ((ti = as->parms[11].items)) { /* -tmpdir */
- DIR *dirp;
-
- tmpdir = ti->data;
- dirp = opendir(tmpdir);
- if (!dirp) {
- printf
- ("Can't open temporary placeholder dir %s; using current partition \n",
- tmpdir);
- tmpdir = NULL;
- } else
- closedir(dirp);
- }
- if ((ti = as->parms[12].items)) /* -showlog */
- ShowLog = 1;
- if ((ti = as->parms[13].items)) { /* -log */
- Testing = 1;
- ShowSuid = 1;
- Showmode = 1;
- }
- if ((ti = as->parms[14].items)) { /* -showmounts */
- Testing = 1;
- Showmode = 1;
- ShowMounts = 1;
- }
- if ((ti = as->parms[15].items)) { /* -orphans */
- if (Testing)
- orphans = ORPH_IGNORE;
- else if (strcmp(ti->data, "remove") == 0
- || strcmp(ti->data, "r") == 0)
- orphans = ORPH_REMOVE;
- else if (strcmp(ti->data, "attach") == 0
- || strcmp(ti->data, "a") == 0)
- orphans = ORPH_ATTACH;
- }
-#ifndef AFS_NT40_ENV /* ignore options on NT */
- if ((ti = as->parms[16].items)) { /* -syslog */
- useSyslog = 1;
- ShowLog = 0;
- }
- if ((ti = as->parms[17].items)) { /* -syslogfacility */
- useSyslogFacility = atoi(ti->data);
- }
-
- if ((ti = as->parms[18].items)) { /* -datelogs */
- TimeStampLogFile();
- }
-#endif
-
-#ifdef FAST_RESTART
- if (ti = as->parms[19].items) { /* -DontSalvage */
- char *msg =
- "Exiting immediately without salvage. Look into the FileLog to find volumes which really need to be salvaged!";
-
- if (useSyslog)
- Log(msg);
- else
- printf("%s\n", msg);
- Exit(0);
- }
-#endif /* FAST_RESTART */
-
- /* Note: if seemvol we initialize this as a standard volume utility: this has the
- * implication that the file server may be running; negotations have to be made with
- * the file server in this case to take the read write volume and associated read-only
- * volumes off line before salvaging */
-#ifdef AFS_NT40_ENV
- if (seenvol) {
- if (afs_winsockInit() < 0) {
- ReportErrorEventAlt(AFSEVT_SVR_WINSOCK_INIT_FAILED, 0,
- AFSDIR_SALVAGER_FILE, 0);
- Log("Failed to initailize winsock, exiting.\n");
- Exit(1);
- }
- }
-#endif
- VInitVolumePackage(seenvol ? volumeUtility : salvager, 5, 5,
- DONT_CONNECT_FS, 0);
- DInit(10);
-#ifdef AFS_NT40_ENV
- if (myjob.cj_number != NOT_CHILD) {
- if (!seenpart) {
- seenpart = 1;
- (void)strcpy(pname, myjob.cj_part);
- }
- }
-#endif
- if (seenpart == 0) {
- for (partP = DiskPartitionList; partP; partP = partP->next) {
- SalvageFileSysParallel(partP);
- }
- SalvageFileSysParallel(0);
- } else {
- partP = VGetPartition(pname, 0);
- if (!partP) {
- Log("salvage: Unknown or unmounted partition %s; salvage aborted\n", pname);
- Exit(1);
- }
- if (!seenvol)
- SalvageFileSys(partP, 0);
- else {
- /* Salvage individual volume */
- if (vid <= 0) {
- Log("salvage: invalid volume id specified; salvage aborted\n");
- Exit(1);
- }
- SalvageFileSys(partP, vid);
- }
- }
- return (0);
-}
-
-
-#ifndef AFS_NT40_ENV
-#include "AFS_component_version_number.c"
-#endif
#define MAX_ARGS 128
#ifdef AFS_NT40_ENV
char *save_args[MAX_ARGS];
pthread_t main_thread;
#endif
-int
-main(int argc, char **argv)
-{
- struct cmd_syndesc *ts;
- int err = 0;
- char commandLine[150];
-
- int i;
- extern char cml_version_number[];
-
-#ifdef AFS_AIX32_ENV
- /*
- * The following signal action for AIX is necessary so that in case of a
- * crash (i.e. core is generated) we can include the user's data section
- * in the core dump. Unfortunately, by default, only a partial core is
- * generated which, in many cases, isn't too useful.
- */
- struct sigaction nsa;
-
- sigemptyset(&nsa.sa_mask);
- nsa.sa_handler = SIG_DFL;
- nsa.sa_flags = SA_FULLDUMP;
- sigaction(SIGABRT, &nsa, NULL);
- sigaction(SIGSEGV, &nsa, NULL);
-#endif
-
- /* Initialize directory paths */
- if (!(initAFSDirPath() & AFSDIR_SERVER_PATHS_OK)) {
-#ifdef AFS_NT40_ENV
- ReportErrorEventAlt(AFSEVT_SVR_NO_INSTALL_DIR, 0, argv[0], 0);
-#endif
- fprintf(stderr, "%s: Unable to obtain AFS server directory.\n",
- argv[0]);
- exit(2);
- }
-#ifdef AFS_NT40_ENV
- main_thread = pthread_self();
- if (spawnDatap && spawnDataLen) {
- /* This is a child per partition salvager. Don't setup log or
- * try to lock the salvager lock.
- */
- if (nt_SetupPartitionSalvage(spawnDatap, spawnDataLen) < 0)
- exit(3);
- } else {
-#endif
- for (commandLine[0] = '\0', i = 0; i < argc; i++) {
- if (i > 0)
- strcat(commandLine, " ");
- strcat(commandLine, argv[i]);
- }
-
- /* All entries to the log will be appended. Useful if there are
- * multiple salvagers appending to the log.
- */
-
- CheckLogFile();
-#ifndef AFS_NT40_ENV
-#ifdef AFS_LINUX20_ENV
- fcntl(fileno(logFile), F_SETFL, O_APPEND); /* Isn't this redundant? */
-#else
- fcntl(fileno(logFile), F_SETFL, FAPPEND); /* Isn't this redundant? */
-#endif
-#endif
- setlinebuf(logFile);
-
-#ifndef AFS_NT40_ENV
- if (geteuid() != 0) {
- printf("Salvager must be run as root.\n");
- fflush(stdout);
- Exit(0);
- }
-#endif
-
- /* bad for normal help flag processing, but can do nada */
-
- fprintf(logFile, "%s\n", cml_version_number);
- Log("STARTING AFS SALVAGER %s (%s)\n", SalvageVersion, commandLine);
-
- /* Get and hold a lock for the duration of the salvage to make sure
- * that no other salvage runs at the same time. The routine
- * VInitVolumePackage (called below) makes sure that a file server or
- * other volume utilities don't interfere with the salvage.
- */
- ObtainSalvageLock();
-#ifdef AFS_NT40_ENV
- }
-#endif
-
- ts = cmd_CreateSyntax("initcmd", handleit, 0, "initialize the program");
- cmd_AddParm(ts, "-partition", CMD_SINGLE, CMD_OPTIONAL,
- "Name of partition to salvage");
- cmd_AddParm(ts, "-volumeid", CMD_SINGLE, CMD_OPTIONAL,
- "Volume Id to salvage");
- cmd_AddParm(ts, "-debug", CMD_FLAG, CMD_OPTIONAL,
- "Run in Debugging mode");
- cmd_AddParm(ts, "-nowrite", CMD_FLAG, CMD_OPTIONAL,
- "Run readonly/test mode");
- cmd_AddParm(ts, "-inodes", CMD_FLAG, CMD_OPTIONAL,
- "Just list affected afs inodes - debugging flag");
- cmd_AddParm(ts, "-force", CMD_FLAG, CMD_OPTIONAL, "Force full salvaging");
- cmd_AddParm(ts, "-oktozap", CMD_FLAG, CMD_OPTIONAL,
- "Give permission to destroy bogus inodes/volumes - debugging flag");
- cmd_AddParm(ts, "-rootinodes", CMD_FLAG, CMD_OPTIONAL,
- "Show inodes owned by root - debugging flag");
- cmd_AddParm(ts, "-salvagedirs", CMD_FLAG, CMD_OPTIONAL,
- "Force rebuild/salvage of all directories");
- cmd_AddParm(ts, "-blockreads", CMD_FLAG, CMD_OPTIONAL,
- "Read smaller blocks to handle IO/bad blocks");
- cmd_AddParm(ts, "-parallel", CMD_SINGLE, CMD_OPTIONAL,
- "# of max parallel partition salvaging");
- cmd_AddParm(ts, "-tmpdir", CMD_SINGLE, CMD_OPTIONAL,
- "Name of dir to place tmp files ");
- cmd_AddParm(ts, "-showlog", CMD_FLAG, CMD_OPTIONAL,
- "Show log file upon completion");
- cmd_AddParm(ts, "-showsuid", CMD_FLAG, CMD_OPTIONAL,
- "Report on suid/sgid files");
- cmd_AddParm(ts, "-showmounts", CMD_FLAG, CMD_OPTIONAL,
- "Report on mountpoints");
- cmd_AddParm(ts, "-orphans", CMD_SINGLE, CMD_OPTIONAL,
- "ignore | remove | attach");
-
- /* note - syslog isn't avail on NT, but if we make it conditional, have
- * to deal with screwy offsets for cmd params */
- cmd_AddParm(ts, "-syslog", CMD_FLAG, CMD_OPTIONAL,
- "Write salvage log to syslogs");
- cmd_AddParm(ts, "-syslogfacility", CMD_SINGLE, CMD_OPTIONAL,
- "Syslog facility number to use");
- cmd_AddParm(ts, "-datelogs", CMD_FLAG, CMD_OPTIONAL,
- "Include timestamp in logfile filename");
-
-#ifdef FAST_RESTART
- cmd_AddParm(ts, "-DontSalvage", CMD_FLAG, CMD_OPTIONAL,
- "Don't salvage. This my be set in BosConfig to let the fileserver restart immediately after a crash. Bad volumes will be taken offline");
-#endif /* FAST_RESTART */
- err = cmd_Dispatch(argc, argv);
- Exit(err);
-}
/* Get the salvage lock if not already held. Hold until process exits. */
void
ForceSalvage = UseTheForceLuke(fileSysPath);
if (singleVolumeNumber) {
- if (!VConnectFS()) {
+ /* salvageserver already setup fssync conn for us */
+ if ((programType != salvageServer) && !VConnectFS()) {
Abort("Couldn't connect to file server\n");
}
AskOffline(singleVolumeNumber);
* if no such match, take the first determined by our sort
* order */
register struct ViceInodeInfo *lip = ip;
- register lnInodes = nInodes;
+ register int lnInodes = nInodes;
while (lnInodes
&& lip->u.vnode.vnodeNumber == vnodeNumber) {
if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
void
AskOffline(VolumeId volumeId)
{
- if (FSYNC_askfs(volumeId, NULL, FSYNC_OFF, FSYNC_SALVAGE) == FSYNC_DENIED) {
- Log("AskOffline: file server denied offline request; a general salvage is required.\n");
+ afs_int32 code, i;
+
+ for (i = 0; i < 3; i++) {
+ code = FSYNC_VolOp(volumeId, NULL, FSYNC_VOL_OFF, FSYNC_SALVAGE, NULL);
+
+ if (code == SYNC_OK) {
+ break;
+ } else if (code == SYNC_DENIED) {
+#ifdef DEMAND_ATTACH_ENABLE
+ Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
+#else
+ Log("AskOffline: file server denied offline request; a general salvage is required.\n");
+#endif
+ Abort("Salvage aborted\n");
+ } else if (code == SYNC_BAD_COMMAND) {
+ Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
+ FSYNC_VOL_OFF);
+#ifdef DEMAND_ATTACH_ENABLE
+ Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
+#else
+ Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
+#endif
+ Abort("Salvage aborted\n");
+ } else if (i < 2) {
+ /* try it again */
+ Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
+ FSYNC_clientFinis();
+ FSYNC_clientInit();
+ }
+ }
+ if (code != SYNC_OK) {
+ Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
Abort("Salvage aborted\n");
}
}
void
AskOnline(VolumeId volumeId, char *partition)
{
- if (FSYNC_askfs(volumeId, partition, FSYNC_ON, 0) == FSYNC_DENIED) {
- Log("AskOnline: file server denied online request to volume %u partition %s\n", volumeId, partition);
+ afs_int32 code, i;
+
+ for (i = 0; i < 3; i++) {
+ code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
+
+ if (code == SYNC_OK) {
+ break;
+ } else if (code == SYNC_DENIED) {
+ Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, partition);
+ } else if (code == SYNC_BAD_COMMAND) {
+ Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
+ FSYNC_VOL_ON);
+#ifdef DEMAND_ATTACH_ENABLE
+ Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
+#else
+ Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
+#endif
+ break;
+ } else if (i < 2) {
+ /* try it again */
+ Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
+ FSYNC_clientFinis();
+ FSYNC_clientInit();
+ }
}
}
}
void
-CheckLogFile(void)
+CheckLogFile(char * log_path)
{
char oldSlvgLog[AFSDIR_PATH_MAX];
}
#endif
- strcpy(oldSlvgLog, AFSDIR_SERVER_SLVGLOG_FILEPATH);
+ strcpy(oldSlvgLog, log_path);
strcat(oldSlvgLog, ".old");
if (!logFile) {
- renamefile(AFSDIR_SERVER_SLVGLOG_FILEPATH, oldSlvgLog);
- logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "a");
+ renamefile(log_path, oldSlvgLog);
+ logFile = afs_fopen(log_path, "a");
if (!logFile) { /* still nothing, use stdout */
logFile = stdout;
#ifndef AFS_NT40_ENV
void
-TimeStampLogFile(void)
+TimeStampLogFile(char * log_path)
{
char stampSlvgLog[AFSDIR_PATH_MAX];
struct tm *lt;
lt = localtime(&now);
(void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
"%s.%04d-%02d-%02d.%02d:%02d:%02d",
- AFSDIR_SERVER_SLVGLOG_FILEPATH, lt->tm_year + 1900,
+ log_path, lt->tm_year + 1900,
lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
lt->tm_sec);
/* try to link the logfile to a timestamped filename */
/* if it fails, oh well, nothing we can do */
- link(AFSDIR_SERVER_SLVGLOG_FILEPATH, stampSlvgLog);
+ link(log_path, stampSlvgLog);
}
#endif
*
* NOTE:
* The VRMIX fsck will not muck with the filesystem it is supposedly
- * fixing and create a "FORCESAVAGE" file (by design). Instead, we
+ * fixing and create a "FORCESALVAGE" file (by design). Instead, we
* muck directly with the root inode, which is within the normal
* domain of fsck.
* ListViceInodes() has a side effect of setting ForceSalvage if
--- /dev/null
+/*
+ * Copyright 2000, International Business Machines Corporation and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/*
+ * Module: vol-salvage.h
+ */
+
+#ifndef __vol_salvage_h_
+#define __vol_salvage_h_
+
+#define SalvageVersion "2.4"
+
+#include "salvage.h"
+#include "volinodes.h"
+
+/* salvager data structures */
+struct InodeSummary { /* Inode summary file--an entry for each
+ * volume in the inode file for a partition */
+ VolId volumeId; /* Volume id */
+ VolId RWvolumeId; /* RW volume associated */
+ int index; /* index into inode file (0, 1, 2 ...) */
+ int nInodes; /* Number of inodes for this volume */
+ int nSpecialInodes; /* Number of special inodes, i.e. volume
+ * header, index, etc. These are all
+ * marked (viceinode.h) and will all be sorted
+ * to the beginning of the information for
+ * this volume. Read-only volumes should
+ * ONLY have special inodes (all the other
+ * inodes look as if they belong to the
+ * original RW volume). */
+ Unique maxUniquifier; /* The maximum uniquifier found in all the inodes.
+ * This is only useful for RW volumes and is used
+ * to compute a new volume uniquifier in the event
+ * that the header needs to be recreated. The inode
+ * uniquifier may be a truncated version of vnode
+ * uniquifier (AFS_3DISPARES). The real maxUniquifer
+ * is from the vnodes and later calcuated from it */
+ struct VolumeSummary *volSummary;
+ /* Either a pointer to the original volume
+ * header summary, or constructed summary
+ * information */
+} *inodeSummary;
+#define readOnly(isp) ((isp)->volumeId != (isp)->RWvolumeId)
+
+struct VolumeSummary { /* Volume summary an entry for each
+ * volume in a volume directory.
+ * Assumption: one volume directory per
+ * partition */
+ char *fileName; /* File name on the partition for the volume
+ * header */
+ struct VolumeHeader header;
+ /* volume number, rw volume number, inode
+ * numbers of each major component of
+ * the volume */
+ IHandle_t *volumeInfoHandle;
+ byte wouldNeedCallback; /* set if the file server should issue
+ * call backs for all the files in this volume when
+ * the volume goes back on line */
+};
+
+struct VnodeInfo {
+ IHandle_t *handle; /* Inode containing this index */
+ int nVnodes; /* Total number of vnodes in index */
+ int nAllocatedVnodes; /* Total number actually used */
+ int volumeBlockCount; /* Total number of blocks used by volume */
+ Inode *inodes; /* Directory only */
+ struct VnodeEssence {
+ short count; /* Number of references to vnode; MUST BE SIGNED */
+ unsigned claimed:1; /* Set when a parent directory containing an entry
+ * referencing this vnode is found. The claim
+ * is that the parent in "parent" can point to
+ * this vnode, and no other */
+ unsigned changed:1; /* Set if any parameters (other than the count)
+ * in the vnode change. It is determined if the
+ * link count has changed by noting whether it is
+ * 0 after scanning all directories */
+ unsigned salvaged:1; /* Set if this directory vnode has already been salvaged. */
+ unsigned todelete:1; /* Set if this vnode is to be deleted (should not be claimed) */
+ afs_fsize_t blockCount;
+ /* Number of blocks (1K) used by this vnode,
+ * approximately */
+ VnodeId parent; /* parent in vnode */
+ Unique unique; /* Must match entry! */
+ char *name; /* Name of directory entry */
+ int modeBits; /* File mode bits */
+ Inode InodeNumber; /* file's inode */
+ int type; /* File type */
+ int author; /* File author */
+ int owner; /* File owner */
+ int group; /* File group */
+ } *vnodes;
+};
+
+struct DirSummary {
+ struct DirHandle dirHandle;
+ VnodeId vnodeNumber;
+ Unique unique;
+ unsigned haveDot, haveDotDot;
+ VolumeId rwVid;
+ int copied; /* If the copy-on-write stuff has been applied */
+ VnodeId parent;
+ char *name;
+ char *vname;
+ IHandle_t *ds_linkH;
+};
+
+#define ORPH_IGNORE 0
+#define ORPH_REMOVE 1
+#define ORPH_ATTACH 2
+
+
+/* command line options */
+extern int debug; /* -d flag */
+extern int Testing; /* -n flag */
+extern int ListInodeOption; /* -i flag */
+extern int ShowRootFiles; /* -r flag */
+extern int RebuildDirs; /* -sal flag */
+extern int Parallel; /* -para X flag */
+extern int PartsPerDisk; /* Salvage up to 8 partitions on same disk sequentially */
+extern int forceR; /* -b flag */
+extern int ShowLog; /* -showlog flag */
+extern int ShowSuid; /* -showsuid flag */
+extern int ShowMounts; /* -showmounts flag */
+extern int orphans; /* -orphans option */
+extern int Showmode;
+
+#ifndef AFS_NT40_ENV
+extern int useSyslog; /* -syslog flag */
+extern int useSyslogFacility; /* -syslogfacility option */
+#endif
+
+#define MAXPARALLEL 32
+
+extern int OKToZap; /* -o flag */
+extern int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
+ * in the volume header */
+
+
+#define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
+ * partition */
+extern Device fileSysDevice; /* The device number of the current
+ * partition being salvaged */
+#ifdef AFS_NT40_ENV
+extern char fileSysPath[8];
+#else
+extern char *fileSysPath; /* The path of the mounted partition currently
+ * being salvaged, i.e. the directory
+ * containing the volume headers */
+#endif /* AFS_NT40_ENV */
+extern char *fileSysPathName; /* NT needs this to make name pretty in log. */
+extern IHandle_t *VGLinkH; /* Link handle for current volume group. */
+extern int VGLinkH_cnt; /* # of references to lnk handle. */
+extern struct DiskPartition *fileSysPartition; /* Partition being salvaged */
+#ifndef AFS_NT40_ENV
+extern char *fileSysDeviceName; /* The block device where the file system
+ * being salvaged was mounted */
+extern char *filesysfulldev;
+#endif /* AFS_NT40_ENV */
+extern int VolumeChanged; /* Set by any routine which would change the volume in
+ * a way which would require callback is to be broken if the
+ * volume was put back on line by an active file server */
+
+extern VolumeDiskData VolInfo; /* A copy of the last good or salvaged volume header dealt with */
+
+extern int nVolumesInInodeFile; /* Number of read-write volumes summarized */
+extern int inodeFd; /* File descriptor for inode file */
+
+
+extern struct VnodeInfo vnodeInfo[nVNODECLASSES];
+
+
+extern struct VolumeSummary *volumeSummaryp; /* Holds all the volumes in a part */
+extern int nVolumes; /* Number of volumes (read-write and read-only)
+ * in volume summary */
+
+extern char * tmpdir;
+extern FILE *logFile; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
+
+
+#ifdef AFS_NT40_ENV
+/* For NT, we can fork the per partition salvagers to gain the required
+ * safety against Aborts. But there's too many complex data structures at
+ * the per volume salvager layer to easilty copy the data across.
+ * childJobNumber is resset from -1 to the job number if this is a
+ * per partition child of the main salvager. This information is passed
+ * out-of-band in the extra data area setup for the now unused parent/child
+ * data transfer.
+ */
+#define SALVAGER_MAGIC 0x00BBaaDD
+#define NOT_CHILD -1 /* job numbers start at 0 */
+/* If new options need to be passed to child, add them here. */
+typedef struct {
+ int cj_magic;
+ int cj_number;
+ char cj_part[32];
+} childJob_t;
+
+/* Child job this process is running. */
+extern childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
+
+extern int nt_SalvagePartition(char *partName, int jobn);
+extern int nt_SetupPartitionSalvage(void *datap, int len);
+
+typedef struct {
+ struct InodeSummary *svgp_inodeSummaryp;
+ int svgp_count;
+} SVGParms_t;
+#define canfork 0
+#else /* AFS_NT40_ENV */
+#define canfork 1
+#endif /* AFS_NT40_ENV */
+
+
+/* prototypes */
+extern void Exit(int code);
+extern int Fork(void);
+extern int Wait(char *prog);
+extern char *ToString(char *s);
+extern void AskOffline(VolumeId volumeId);
+extern void AskOnline(VolumeId volumeId, char *partition);
+extern void CheckLogFile(char * log_path);
+#ifndef AFS_NT40_ENV
+extern void TimeStampLogFile(char * log_path);
+#endif
+extern void ClearROInUseBit(struct VolumeSummary *summary);
+extern void CopyAndSalvage(register struct DirSummary *dir);
+extern int CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume);
+extern void CopyOnWrite(register struct DirSummary *dir);
+extern void CountVolumeInodes(register struct ViceInodeInfo *ip, int maxInodes,
+ register struct InodeSummary *summary);
+extern void DeleteExtraVolumeHeaderFile(register struct VolumeSummary *vsp);
+extern void DistilVnodeEssence(VolumeId vid, VnodeClass class, Inode ino,
+ Unique * maxu);
+extern int GetInodeSummary(char *path, VolumeId singleVolumeNumber);
+extern void GetVolumeSummary(VolumeId singleVolumeNumber);
+extern void JudgeEntry(struct DirSummary *dir, char *name, VnodeId vnodeNumber,
+ Unique unique);
+extern void MaybeZapVolume(register struct InodeSummary *isp, char *message,
+ int deleteMe, int check);
+extern void ObtainSalvageLock(void);
+extern void PrintInodeList(void);
+extern void PrintInodeSummary(void);
+extern void PrintVolumeSummary(void);
+extern int QuickCheck(register struct InodeSummary *isp, int nVols);
+extern void RemoveTheForce(char *path);
+extern void SalvageDir(char *name, VolumeId rwVid, struct VnodeInfo *dirVnodeInfo,
+ IHandle_t * alinkH, int i, struct DirSummary *rootdir,
+ int *rootdirfound);
+extern void SalvageFileSysParallel(struct DiskPartition *partP);
+extern void SalvageFileSys(struct DiskPartition *partP, VolumeId singleVolumeNumber);
+extern void SalvageFileSys1(struct DiskPartition *partP,
+ VolumeId singleVolumeNumber);
+extern int SalvageHeader(register struct stuff *sp, struct InodeSummary *isp,
+ int check, int *deleteMe);
+extern int SalvageIndex(Inode ino, VnodeClass class, int RW,
+ register struct ViceInodeInfo *ip, int nInodes,
+ struct VolumeSummary *volSummary, int check);
+extern int SalvageVnodes(register struct InodeSummary *rwIsp,
+ register struct InodeSummary *thisIsp,
+ register struct ViceInodeInfo *inodes, int check);
+extern int SalvageVolume(register struct InodeSummary *rwIsp, IHandle_t * alinkH);
+extern void DoSalvageVolumeGroup(register struct InodeSummary *isp, int nVols);
+#ifdef AFS_NT40_ENV
+extern void SalvageVolumeGroup(register struct InodeSummary *isp, int nVols);
+#else
+#define SalvageVolumeGroup DoSalvageVolumeGroup
+#endif
+extern int SalvageVolumeHeaderFile(register struct InodeSummary *isp,
+ register struct ViceInodeInfo *inodes, int RW,
+ int check, int *deleteMe);
+extern void showlog(void);
+extern int UseTheForceLuke(char *path);
+
+
+
+#endif /* __vol_salvage_h_ */
#define ROVOL 1
#define BACKVOL 2
+/* maximum numbe of Vice partitions */
+#define VOLMAXPARTS 255
+
/* All volumes will have a volume header name in this format */
#if defined(AFS_AIX_ENV) || defined(AFS_HPUX_ENV)
/* Note that <afs/param.h> must have been included before we get here... */
*/
+#ifndef __volinodes_h_
+#define __volinodes_h_
+
/* Used by vutil.c and salvager.c */
private struct VolumeHeader tempHeader;
#define MAXINODETYPE VI_LINKTABLE
Volume *VWaitAttachVolume();
+
+#endif /* __volinodes_h_ */
* This software has been released under the terms of the IBM Public
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
+ *
+ * Portions Copyright (c) 2006 Sine Nomine Associates
*/
/* 1/1/89: NB: this stuff is all going to be replaced. Don't take it too seriously */
#ifdef AFS_NT40_ENV
#include <io.h>
#endif
+#include "daemon_com.h"
+#include "fssync.h"
+#include "salvsync.h"
#include "vnode.h"
#include "volume.h"
#include "partition.h"
#include "afs/assert.h"
#endif /* AFS_PTHREAD_ENV */
#include "vutils.h"
-#include "fssync.h"
+#include <dir/dir.h>
#ifndef AFS_NT40_ENV
#include <unistd.h>
#endif
+#if !defined(offsetof)
+#include <stddef.h>
+#endif
+
#ifdef O_LARGEFILE
#define afs_stat stat64
#define afs_fstat fstat64
#ifdef AFS_PTHREAD_ENV
pthread_mutex_t vol_glock_mutex;
-pthread_mutex_t vol_attach_mutex;
-pthread_mutex_t vol_fsync_mutex;
pthread_mutex_t vol_trans_mutex;
pthread_cond_t vol_put_volume_cond;
pthread_cond_t vol_sleep_cond;
int vol_attach_threads = 1;
#endif /* AFS_PTHREAD_ENV */
+#ifdef AFS_DEMAND_ATTACH_FS
+pthread_mutex_t vol_salvsync_mutex;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
#ifdef AFS_OSF_ENV
extern void *calloc(), *realloc();
#endif
/*@printflike@*/ extern void Log(const char *format, ...);
/* Forward declarations */
-static Volume *attach2(Error * ec, char *path,
+static Volume *attach2(Error * ec, VolId vid, char *path,
register struct VolumeHeader *header,
- struct DiskPartition *partp, int isbusy);
+ struct DiskPartition *partp, Volume * vp,
+ int isbusy, int mode);
+static void ReallyFreeVolume(Volume * vp);
+#ifdef AFS_DEMAND_ATTACH_FS
static void FreeVolume(Volume * vp);
+#else /* !AFS_DEMAND_ATTACH_FS */
+#define FreeVolume(vp) ReallyFreeVolume(vp)
static void VScanUpdateList(void);
-static void InitLRU(int howMany);
+#endif /* !AFS_DEMAND_ATTACH_FS */
+static void VInitVolumeHeaderCache(afs_uint32 howMany);
static int GetVolumeHeader(register Volume * vp);
static void ReleaseVolumeHeader(register struct volHeader *hd);
static void FreeVolumeHeader(register Volume * vp);
static void DeleteVolumeFromHashTable(register Volume * vp);
static int VHold(Volume * vp);
static int VHold_r(Volume * vp);
-static void GetBitmap(Error * ec, Volume * vp, VnodeClass class);
+static void VGetBitmap_r(Error * ec, Volume * vp, VnodeClass class);
static void GetVolumePath(Error * ec, VolId volumeId, char **partitionp,
char **namep);
static void VReleaseVolumeHandles_r(Volume * vp);
static void VCloseVolumeHandles_r(Volume * vp);
+static void LoadVolumeHeader(Error * ec, Volume * vp);
+static int VCheckOffline(register Volume * vp);
+static int VCheckDetach(register Volume * vp);
+static Volume * GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int flags);
+static int VolumeExternalName_r(VolumeId volumeId, char * name, size_t len);
int LogLevel; /* Vice loglevel--not defined as extern so that it will be
* defined when not linked with vice, XXXX */
ProgramType programType; /* The type of program using the package */
+/* extended volume package statistics */
+VolPkgStats VStats;
+
+
#define VOLUME_BITMAP_GROWSIZE 16 /* bytes, => 128vnodes */
/* Must be a multiple of 4 (1 word) !! */
-#define VOLUME_HASH_TABLE_SIZE 128 /* Must be a power of 2!! */
-#define VOLUME_HASH(volumeId) (volumeId&(VOLUME_HASH_TABLE_SIZE-1))
-private Volume *VolumeHashTable[VOLUME_HASH_TABLE_SIZE];
+
+/* this parameter needs to be tunable at runtime.
+ * 128 was really inadequate for largish servers -- at 16384 volumes this
+ * puts average chain length at 128, thus an average 65 deref's to find a volptr.
+ * talk about bad spatial locality...
+ *
+ * an AVL or splay tree might work a lot better, but we'll just increase
+ * the default hash table size for now
+ */
+#define DEFAULT_VOLUME_HASH_SIZE 256 /* Must be a power of 2!! */
+#define DEFAULT_VOLUME_HASH_MASK (DEFAULT_VOLUME_HASH_SIZE-1)
+#define VOLUME_HASH(volumeId) (volumeId&(VolumeHashTable.Mask))
+
+/*
+ * turn volume hash chains into partially ordered lists.
+ * when the threshold is exceeded between two adjacent elements,
+ * perform a chain rebalancing operation.
+ *
+ * keep the threshold high in order to keep cache line invalidates
+ * low "enough" on SMPs
+ */
+#define VOLUME_HASH_REORDER_THRESHOLD 200
+
+/*
+ * when possible, don't just reorder single elements, but reorder
+ * entire chains of elements at once. a chain of elements that
+ * exceed the element previous to the pivot by at least CHAIN_THRESH
+ * accesses are moved in front of the chain whose elements have at
+ * least CHAIN_THRESH less accesses than the pivot element
+ */
+#define VOLUME_HASH_REORDER_CHAIN_THRESH (VOLUME_HASH_REORDER_THRESHOLD / 2)
+
+#include "rx/rx_queue.h"
+
+
+VolumeHashTable_t VolumeHashTable = {
+ DEFAULT_VOLUME_HASH_SIZE,
+ DEFAULT_VOLUME_HASH_MASK,
+ NULL
+};
+
+
+static void VInitVolumeHash(void);
+
#ifndef AFS_HAVE_FFS
/* This macro is used where an ffs() call does not exist. Was in util/ffs.c */
#endif /* !AFS_HAVE_FFS */
#ifdef AFS_PTHREAD_ENV
-#include "rx/rx_queue.h"
typedef struct diskpartition_queue_t {
struct rx_queue queue;
struct DiskPartition * diskP;
static void * VInitVolumePackageThread(void * args);
#endif /* AFS_PTHREAD_ENV */
-struct Lock vol_listLock; /* Lock obtained when listing volumes: prevents a volume from being missed if the volume is attached during a list volumes */
+static int VAttachVolumesByPartition(struct DiskPartition *diskP,
+ int * nAttached, int * nUnattached);
+
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/* demand attach fileserver extensions */
+
+/* XXX
+ * in the future we will support serialization of VLRU state into the fs_state
+ * disk dumps
+ *
+ * these structures are the beginning of that effort
+ */
+struct VLRU_DiskHeader {
+ struct versionStamp stamp; /* magic and structure version number */
+ afs_uint32 mtime; /* time of dump to disk */
+ afs_uint32 num_records; /* number of VLRU_DiskEntry records */
+};
+
+struct VLRU_DiskEntry {
+ afs_uint32 vid; /* volume ID */
+ afs_uint32 idx; /* generation */
+ afs_uint32 last_get; /* timestamp of last get */
+};
+
+struct VLRU_StartupQueue {
+ struct VLRU_DiskEntry * entry;
+ int num_entries;
+ int next_idx;
+};
+
+typedef struct vshutdown_thread_t {
+ struct rx_queue q;
+ pthread_mutex_t lock;
+ pthread_cond_t cv;
+ pthread_cond_t master_cv;
+ int n_threads;
+ int n_threads_complete;
+ int vol_remaining;
+ int schedule_version;
+ int pass;
+ byte n_parts;
+ byte n_parts_done_pass;
+ byte part_thread_target[VOLMAXPARTS+1];
+ byte part_done_pass[VOLMAXPARTS+1];
+ struct rx_queue * part_pass_head[VOLMAXPARTS+1];
+ int stats[4][VOLMAXPARTS+1];
+} vshutdown_thread_t;
+static void * VShutdownThread(void * args);
+
+
+static Volume * VAttachVolumeByVp_r(Error * ec, Volume * vp, int mode);
+static int VCheckFree(Volume * vp);
+
+/* VByP List */
+static void AddVolumeToVByPList_r(Volume * vp);
+static void DeleteVolumeFromVByPList_r(Volume * vp);
+static void VVByPListBeginExclusive_r(struct DiskPartition * dp);
+static void VVByPListEndExclusive_r(struct DiskPartition * dp);
+static void VVByPListWait_r(struct DiskPartition * dp);
+
+/* online salvager */
+static int VCheckSalvage(register Volume * vp);
+static int VUpdateSalvagePriority_r(Volume * vp);
+static int VScheduleSalvage_r(Volume * vp);
+static int VCancelSalvage_r(Volume * vp, int reason);
+
+/* Volume hash table */
+static void VReorderHash_r(VolumeHashChainHead * head, Volume * pp, Volume * vp);
+static void VHashBeginExclusive_r(VolumeHashChainHead * head);
+static void VHashEndExclusive_r(VolumeHashChainHead * head);
+static void VHashWait_r(VolumeHashChainHead * head);
+
+/* Volume state machine */
+static void VCreateReservation_r(Volume * vp);
+static void VCancelReservation_r(Volume * vp);
+static void VWaitStateChange_r(Volume * vp);
+static void VWaitExclusiveState_r(Volume * vp);
+static int IsExclusiveState(VolState state);
+static int IsErrorState(VolState state);
+static int IsValidState(VolState state);
+
+/* shutdown */
+static int ShutdownVByPForPass_r(struct DiskPartition * dp, int pass);
+static int ShutdownVolumeWalk_r(struct DiskPartition * dp, int pass,
+ struct rx_queue ** idx);
+static void ShutdownController(vshutdown_thread_t * params);
+static void ShutdownCreateSchedule(vshutdown_thread_t * params);
+
+/* VLRU */
+static void VLRU_ComputeConstants(void);
+static void VInitVLRU(void);
+static void VLRU_Init_Node_r(volatile Volume * vp);
+static void VLRU_Add_r(volatile Volume * vp);
+static void VLRU_Delete_r(volatile Volume * vp);
+static void VLRU_UpdateAccess_r(volatile Volume * vp);
+static void * VLRU_ScannerThread(void * args);
+static void VLRU_Scan_r(int idx);
+static void VLRU_Promote_r(int idx);
+static void VLRU_Demote_r(int idx);
+static void VLRU_SwitchQueues(volatile Volume * vp, int new_idx, int append);
+
+/* soft detach */
+static int VCheckSoftDetach(volatile Volume * vp, afs_uint32 thresh);
+static int VCheckSoftDetachCandidate(volatile Volume * vp, afs_uint32 thresh);
+static int VSoftDetachVolume_r(volatile Volume * vp, afs_uint32 thresh);
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+
+struct Lock vol_listLock; /* Lock obtained when listing volumes:
+ * prevents a volume from being missed
+ * if the volume is attached during a
+ * list volumes */
-extern struct Lock FSYNC_handler_lock;
static int TimeZoneCorrection; /* Number of seconds west of GMT */
* vnode will be invalidated
* access only with VOL_LOCK held */
-int VolumeCacheSize = 200, VolumeGets = 0, VolumeReplacements = 0, Vlooks = 0;
+
+/***************************************************/
+/* Startup routines */
+/***************************************************/
+
int
-VInitVolumePackage(ProgramType pt, int nLargeVnodes, int nSmallVnodes,
- int connect, int volcache)
+VInitVolumePackage(ProgramType pt, afs_uint32 nLargeVnodes, afs_uint32 nSmallVnodes,
+ int connect, afs_uint32 volcache)
{
int errors = 0; /* Number of errors while finding vice partitions. */
struct timeval tv;
programType = pt;
+#ifdef AFS_DEMAND_ATTACH_FS
+ memset(&VStats, 0, sizeof(VStats));
+ VStats.hdr_cache_size = 200;
+#endif
+
+ VInitPartitionPackage();
+ VInitVolumeHash();
+ VInitVnHashByVolume();
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (programType == fileServer) {
+ VInitVLRU();
+ } else {
+ VLRU_SetOptions(VLRU_SET_ENABLED, 0);
+ }
+#endif
+
#ifdef AFS_PTHREAD_ENV
assert(pthread_mutex_init(&vol_glock_mutex, NULL) == 0);
- assert(pthread_mutex_init(&vol_attach_mutex, NULL) == 0);
- assert(pthread_mutex_init(&vol_fsync_mutex, NULL) == 0);
assert(pthread_mutex_init(&vol_trans_mutex, NULL) == 0);
assert(pthread_cond_init(&vol_put_volume_cond, NULL) == 0);
assert(pthread_cond_init(&vol_sleep_cond, NULL) == 0);
IOMGR_Initialize();
#endif /* AFS_PTHREAD_ENV */
Lock_Init(&vol_listLock);
- Lock_Init(&FSYNC_handler_lock);
+
srandom(time(0)); /* For VGetVolumeInfo */
gettimeofday(&tv, &tz);
TimeZoneCorrection = tz.tz_minuteswest * 60;
+#ifdef AFS_DEMAND_ATTACH_FS
+ assert(pthread_mutex_init(&vol_salvsync_mutex, NULL) == 0);
+#endif /* AFS_DEMAND_ATTACH_FS */
+
/* Ok, we have done enough initialization that fileserver can
* start accepting calls, even though the volumes may not be
* available just yet.
*/
VInit = 1;
+#if defined(AFS_DEMAND_ATTACH_FS) && defined(SALVSYNC_BUILD_SERVER)
+ if (programType == salvageServer) {
+ SALVSYNC_salvInit();
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+#ifdef FSSYNC_BUILD_SERVER
if (programType == fileServer) {
- /* File server or "stand" */
FSYNC_fsInit();
}
+#endif
+#if defined(AFS_DEMAND_ATTACH_FS) && defined(SALVSYNC_BUILD_CLIENT)
+ if (programType == fileServer) {
+ /* establish a connection to the salvager at this point */
+ assert(VConnectSALV() != 0);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
- if (volcache > VolumeCacheSize)
- VolumeCacheSize = volcache;
- InitLRU(VolumeCacheSize);
+ if (volcache > VStats.hdr_cache_size)
+ VStats.hdr_cache_size = volcache;
+ VInitVolumeHeaderCache(VStats.hdr_cache_size);
VInitVnodes(vLarge, nLargeVnodes);
VInitVnodes(vSmall, nSmallVnodes);
#ifdef AFS_PTHREAD_ENV
struct vinitvolumepackage_thread_t params;
struct diskpartition_queue_t * dpq;
- int i, len;
+ int i, threads, parts;
pthread_t tid;
pthread_attr_t attrs;
params.n_threads_complete = 0;
/* create partition work queue */
- for (len=0, diskP = DiskPartitionList; diskP; diskP = diskP->next, len++) {
+ for (parts=0, diskP = DiskPartitionList; diskP; diskP = diskP->next, parts++) {
dpq = (diskpartition_queue_t *) malloc(sizeof(struct diskpartition_queue_t));
assert(dpq != NULL);
dpq->diskP = diskP;
queue_Prepend(¶ms,dpq);
}
- assert(pthread_attr_init(&attrs) == 0);
- assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0);
+ threads = MIN(parts, vol_attach_threads);
- len = MIN(len, vol_attach_threads);
-
- VOL_LOCK;
- for (i=0; i < len; i++) {
- assert(pthread_create
- (&tid, &attrs, &VInitVolumePackageThread,
- ¶ms) == 0);
- }
+ if (threads > 1) {
+ /* spawn off a bunch of initialization threads */
+ assert(pthread_attr_init(&attrs) == 0);
+ assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0);
- while(params.n_threads_complete < len) {
- pthread_cond_wait(¶ms.thread_done_cv,&vol_glock_mutex);
+ Log("VInitVolumePackage: beginning parallel fileserver startup\n");
+#ifdef AFS_DEMAND_ATTACH_FS
+ Log("VInitVolumePackage: using %d threads to pre-attach volumes on %d partitions\n",
+ threads, parts);
+#else /* AFS_DEMAND_ATTACH_FS */
+ Log("VInitVolumePackage: using %d threads to attach volumes on %d partitions\n",
+ threads, parts);
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ VOL_LOCK;
+ for (i=0; i < threads; i++) {
+ assert(pthread_create
+ (&tid, &attrs, &VInitVolumePackageThread,
+ ¶ms) == 0);
+ }
+
+ while(params.n_threads_complete < threads) {
+ pthread_cond_wait(¶ms.thread_done_cv,&vol_glock_mutex);
+ }
+ VOL_UNLOCK;
+
+ assert(pthread_attr_destroy(&attrs) == 0);
+ } else {
+ /* if we're only going to run one init thread, don't bother creating
+ * another LWP */
+ Log("VInitVolumePackage: beginning single-threaded fileserver startup\n");
+#ifdef AFS_DEMAND_ATTACH_FS
+ Log("VInitVolumePackage: using 1 thread to pre-attach volumes on %d partition(s)\n",
+ parts);
+#else /* AFS_DEMAND_ATTACH_FS */
+ Log("VInitVolumePackage: using 1 thread to attach volumes on %d partition(s)\n",
+ parts);
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ VInitVolumePackageThread(¶ms);
}
- VOL_UNLOCK;
assert(pthread_cond_destroy(¶ms.thread_done_cv) == 0);
/* Attach all the volumes in this partition */
for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
int nAttached = 0, nUnattached = 0;
- Log("Partition %s: attaching volumes\n", diskP->name);
- dirp = opendir(VPartitionPath(diskP));
- assert(dirp);
- while ((dp = readdir(dirp))) {
- char *p;
- p = strrchr(dp->d_name, '.');
- if (p != NULL && strcmp(p, VHDREXT) == 0) {
- Error error;
- Volume *vp;
- vp = VAttachVolumeByName(&error, diskP->name, dp->d_name,
- V_VOLUPD);
- (*(vp ? &nAttached : &nUnattached))++;
- if (error == VOFFLINE)
- Log("Volume %d stays offline (/vice/offline/%s exists)\n", VolumeNumber(dp->d_name), dp->d_name);
- else if (LogLevel >= 5) {
- Log("Partition %s: attached volume %d (%s)\n",
- diskP->name, VolumeNumber(dp->d_name),
- dp->d_name);
- }
- if (vp) {
- VPutVolume(vp);
- }
- }
- }
- Log("Partition %s: attached %d volumes; %d volumes not attached\n", diskP->name, nAttached, nUnattached);
- closedir(dirp);
+ assert(VAttachVolumesByPartition(diskP, &nAttached, &nUnattached) == 0);
}
#endif /* AFS_PTHREAD_ENV */
}
VInit = 2; /* Initialized, and all volumes have been attached */
+#ifdef FSSYNC_BUILD_CLIENT
if (programType == volumeUtility && connect) {
if (!VConnectFS()) {
Log("Unable to connect to file server; aborted\n");
- Lock_Destroy(&FSYNC_handler_lock);
exit(1);
}
}
+#ifdef AFS_DEMAND_ATTACH_FS
+ else if (programType == salvageServer) {
+ if (!VConnectFS()) {
+ Log("Unable to connect to file server; aborted\n");
+ exit(1);
+ }
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+#endif /* FSSYNC_BUILD_CLIENT */
return 0;
}
diskP = dpq->diskP;
free(dpq);
- Log("Partition %s: attaching volumes\n", diskP->name);
- dirp = opendir(VPartitionPath(diskP));
- assert(dirp);
- while ((dp = readdir(dirp))) {
- char *p;
- p = strrchr(dp->d_name, '.');
- if (p != NULL && strcmp(p, VHDREXT) == 0) {
- Error error;
- Volume *vp;
- vp = VAttachVolumeByName(&error, diskP->name, dp->d_name,
- V_VOLUPD);
- (*(vp ? &nAttached : &nUnattached))++;
- if (error == VOFFLINE)
- Log("Volume %d stays offline (/vice/offline/%s exists)\n", VolumeNumber(dp->d_name), dp->d_name);
- else if (LogLevel >= 5) {
- Log("Partition %s: attached volume %d (%s)\n",
- diskP->name, VolumeNumber(dp->d_name),
- dp->d_name);
- }
- if (vp) {
- VPutVolume(vp);
- }
- }
- }
- Log("Partition %s: attached %d volumes; %d volumes not attached\n", diskP->name, nAttached, nUnattached);
- closedir(dirp);
+ assert(VAttachVolumesByPartition(diskP, &nAttached, &nUnattached) == 0);
+
VOL_LOCK;
}
}
#endif /* AFS_PTHREAD_ENV */
-/* This must be called by any volume utility which needs to run while the
- file server is also running. This is separated from VInitVolumePackage so
- that a utility can fork--and each of the children can independently
- initialize communication with the file server */
-int
-VConnectFS(void)
+/*
+ * attach all volumes on a given disk partition
+ */
+static int
+VAttachVolumesByPartition(struct DiskPartition *diskP, int * nAttached, int * nUnattached)
{
- int retVal;
- VOL_LOCK;
- retVal = VConnectFS_r();
- VOL_UNLOCK;
- return retVal;
-}
+ DIR * dirp;
+ struct dirent * dp;
+ int ret = 0;
+
+ Log("Partition %s: attaching volumes\n", diskP->name);
+ dirp = opendir(VPartitionPath(diskP));
+ if (!dirp) {
+ Log("opendir on Partition %s failed!\n", diskP->name);
+ return 1;
+ }
+
+ while ((dp = readdir(dirp))) {
+ char *p;
+ p = strrchr(dp->d_name, '.');
+ if (p != NULL && strcmp(p, VHDREXT) == 0) {
+ Error error;
+ Volume *vp;
+#ifdef AFS_DEMAND_ATTACH_FS
+ vp = VPreAttachVolumeByName(&error, diskP->name, dp->d_name,
+ V_VOLUPD);
+#else /* AFS_DEMAND_ATTACH_FS */
+ vp = VAttachVolumeByName(&error, diskP->name, dp->d_name,
+ V_VOLUPD);
+#endif /* AFS_DEMAND_ATTACH_FS */
+ (*(vp ? nAttached : nUnattached))++;
+ if (error == VOFFLINE)
+ Log("Volume %d stays offline (/vice/offline/%s exists)\n", VolumeNumber(dp->d_name), dp->d_name);
+ else if (LogLevel >= 5) {
+ Log("Partition %s: attached volume %d (%s)\n",
+ diskP->name, VolumeNumber(dp->d_name),
+ dp->d_name);
+ }
+#if !defined(AFS_DEMAND_ATTACH_FS)
+ if (vp) {
+ VPutVolume(vp);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+ }
+ }
-int
-VConnectFS_r(void)
-{
- int rc;
- assert(VInit == 2 && programType == volumeUtility);
- rc = FSYNC_clientInit();
- if (rc)
- VInit = 3;
- return rc;
+ Log("Partition %s: attached %d volumes; %d volumes not attached\n", diskP->name, *nAttached, *nUnattached);
+ closedir(dirp);
+ return ret;
}
-void
-VDisconnectFS_r(void)
-{
- assert(programType == volumeUtility);
- FSYNC_clientFinis();
- VInit = 2;
-}
-void
-VDisconnectFS(void)
-{
- VOL_LOCK;
- VDisconnectFS_r();
- VOL_UNLOCK;
-}
+/***************************************************/
+/* Shutdown routines */
+/***************************************************/
+
+/*
+ * demand attach fs
+ * highly multithreaded volume package shutdown
+ *
+ * with the demand attach fileserver extensions,
+ * VShutdown has been modified to be multithreaded.
+ * In order to achieve optimal use of many threads,
+ * the shutdown code involves one control thread and
+ * n shutdown worker threads. The control thread
+ * periodically examines the number of volumes available
+ * for shutdown on each partition, and produces a worker
+ * thread allocation schedule. The idea is to eliminate
+ * redundant scheduling computation on the workers by
+ * having a single master scheduler.
+ *
+ * The scheduler's objectives are:
+ * (1) fairness
+ * each partition with volumes remaining gets allocated
+ * at least 1 thread (assuming sufficient threads)
+ * (2) performance
+ * threads are allocated proportional to the number of
+ * volumes remaining to be offlined. This ensures that
+ * the OS I/O scheduler has many requests to elevator
+ * seek on partitions that will (presumably) take the
+ * longest amount of time (from now) to finish shutdown
+ * (3) keep threads busy
+ * when there are extra threads, they are assigned to
+ * partitions using a simple round-robin algorithm
+ *
+ * In the future, we may wish to add the ability to adapt
+ * to the relative performance patterns of each disk
+ * partition.
+ *
+ *
+ * demand attach fs
+ * multi-step shutdown process
+ *
+ * demand attach shutdown is a four-step process. Each
+ * shutdown "pass" shuts down increasingly more difficult
+ * volumes. The main purpose is to achieve better cache
+ * utilization during shutdown.
+ *
+ * pass 0
+ * shutdown volumes in the unattached, pre-attached
+ * and error states
+ * pass 1
+ * shutdown attached volumes with cached volume headers
+ * pass 2
+ * shutdown all volumes in non-exclusive states
+ * pass 3
+ * shutdown all remaining volumes
+ */
void
VShutdown_r(void)
int i;
register Volume *vp, *np;
register afs_int32 code;
+#ifdef AFS_DEMAND_ATTACH_FS
+ struct DiskPartition * diskP;
+ struct diskpartition_queue_t * dpq;
+ vshutdown_thread_t params;
+ pthread_t tid;
+ pthread_attr_t attrs;
+
+ memset(¶ms, 0, sizeof(vshutdown_thread_t));
+
+ for (params.n_parts=0, diskP = DiskPartitionList;
+ diskP; diskP = diskP->next, params.n_parts++);
+
+ Log("VShutdown: shutting down on-line volumes on %d partition%s...\n",
+ params.n_parts, params.n_parts > 1 ? "s" : "");
+
+ if (vol_attach_threads > 1) {
+ /* prepare for parallel shutdown */
+ params.n_threads = vol_attach_threads;
+ assert(pthread_mutex_init(¶ms.lock, NULL) == 0);
+ assert(pthread_cond_init(¶ms.cv, NULL) == 0);
+ assert(pthread_cond_init(¶ms.master_cv, NULL) == 0);
+ assert(pthread_attr_init(&attrs) == 0);
+ assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0);
+ queue_Init(¶ms);
+
+ /* setup the basic partition information structures for
+ * parallel shutdown */
+ for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
+ /* XXX debug */
+ struct rx_queue * qp, * nqp;
+ Volume * vp;
+ int count = 0;
+
+ VVByPListWait_r(diskP);
+ VVByPListBeginExclusive_r(diskP);
+
+ /* XXX debug */
+ for (queue_Scan(&diskP->vol_list, qp, nqp, rx_queue)) {
+ vp = (Volume *)((char *)qp - offsetof(Volume, vol_list));
+ if (vp->header)
+ count++;
+ }
+ Log("VShutdown: partition %s has %d volumes with attached headers\n",
+ VPartitionPath(diskP), count);
+
+
+ /* build up the pass 0 shutdown work queue */
+ dpq = (struct diskpartition_queue_t *) malloc(sizeof(struct diskpartition_queue_t));
+ assert(dpq != NULL);
+ dpq->diskP = diskP;
+ queue_Prepend(¶ms, dpq);
+
+ params.part_pass_head[diskP->device] = queue_First(&diskP->vol_list, rx_queue);
+ }
+
+ Log("VShutdown: beginning parallel fileserver shutdown\n");
+ Log("VShutdown: using %d threads to offline volumes on %d partition%s\n",
+ vol_attach_threads, params.n_parts, params.n_parts > 1 ? "s" : "" );
+
+ /* do pass 0 shutdown */
+ assert(pthread_mutex_lock(¶ms.lock) == 0);
+ for (i=0; i < params.n_threads; i++) {
+ assert(pthread_create
+ (&tid, &attrs, &VShutdownThread,
+ ¶ms) == 0);
+ }
+
+ /* wait for all the pass 0 shutdowns to complete */
+ while (params.n_threads_complete < params.n_threads) {
+ assert(pthread_cond_wait(¶ms.master_cv, ¶ms.lock) == 0);
+ }
+ params.n_threads_complete = 0;
+ params.pass = 1;
+ assert(pthread_cond_broadcast(¶ms.cv) == 0);
+ assert(pthread_mutex_unlock(¶ms.lock) == 0);
+
+ Log("VShutdown: pass 0 completed using the 1 thread per partition algorithm\n");
+ Log("VShutdown: starting passes 1 through 3 using finely-granular mp-fast algorithm\n");
+
+ /* run the parallel shutdown scheduler. it will drop the glock internally */
+ ShutdownController(¶ms);
+
+ /* wait for all the workers to finish pass 3 and terminate */
+ while (params.pass < 4) {
+ assert(pthread_cond_wait(¶ms.cv, &vol_glock_mutex) == 0);
+ }
+
+ assert(pthread_attr_destroy(&attrs) == 0);
+ assert(pthread_cond_destroy(¶ms.cv) == 0);
+ assert(pthread_cond_destroy(¶ms.master_cv) == 0);
+ assert(pthread_mutex_destroy(¶ms.lock) == 0);
+
+ /* drop the VByPList exclusive reservations */
+ for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
+ VVByPListEndExclusive_r(diskP);
+ Log("VShutdown: %s stats : (pass[0]=%d, pass[1]=%d, pass[2]=%d, pass[3]=%d)\n",
+ VPartitionPath(diskP),
+ params.stats[0][diskP->device],
+ params.stats[1][diskP->device],
+ params.stats[2][diskP->device],
+ params.stats[3][diskP->device]);
+ }
+
+ Log("VShutdown: shutdown finished using %d threads\n", params.n_threads);
+ } else {
+ /* if we're only going to run one shutdown thread, don't bother creating
+ * another LWP */
+ Log("VShutdown: beginning single-threaded fileserver shutdown\n");
+
+ for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
+ VShutdownByPartition_r(diskP);
+ }
+ }
+ Log("VShutdown: complete.\n");
+#else /* AFS_DEMAND_ATTACH_FS */
Log("VShutdown: shutting down on-line volumes...\n");
- for (i = 0; i < VOLUME_HASH_TABLE_SIZE; i++) {
+ for (i = 0; i < VolumeHashTable.Size; i++) {
/* try to hold first volume in the hash table */
- for (vp = VolumeHashTable[i]; vp; vp = vp->hashNext) {
+ for (queue_Scan(&VolumeHashTable.Table[i],vp,np,Volume)) {
code = VHold_r(vp);
- if (code == 0)
- break; /* got it */
- /* otherwise we go around again, trying another volume */
- }
- while (vp) {
- if (LogLevel >= 5)
- Log("VShutdown: Attempting to take volume %u offline.\n",
- vp->hashid);
- /* first compute np before releasing vp, in case vp disappears
- * after releasing. Hold it, so it doesn't disapear. If we
- * can't hold it, try the next one in the chain. Invariant
- * at the top of this loop is that vp is held (has extra ref count).
- */
- for (np = vp->hashNext; np; np = np->hashNext) {
- code = VHold_r(np);
- if (code == 0)
- break; /* got it */
+ if (code == 0) {
+ if (LogLevel >= 5)
+ Log("VShutdown: Attempting to take volume %u offline.\n",
+ vp->hashid);
+
+ /* next, take the volume offline (drops reference count) */
+ VOffline_r(vp, "File server was shut down");
}
- /* next, take the volume offline (drops reference count) */
- VOffline_r(vp, "File server was shut down");
- vp = np; /* next guy to try */
}
}
Log("VShutdown: complete.\n");
+#endif /* AFS_DEMAND_ATTACH_FS */
}
void
VOL_UNLOCK;
}
-
+#ifdef AFS_DEMAND_ATTACH_FS
+/*
+ * demand attach fs
+ * shutdown control thread
+ */
static void
-ReadHeader(Error * ec, IHandle_t * h, char *to, int size, bit32 magic,
- bit32 version)
+ShutdownController(vshutdown_thread_t * params)
{
- struct versionStamp *vsn;
- FdHandle_t *fdP;
+ /* XXX debug */
+ struct DiskPartition * diskP;
+ Device id;
+ vshutdown_thread_t shadow;
- *ec = 0;
- if (h == NULL) {
- *ec = VSALVAGE;
- return;
+ ShutdownCreateSchedule(params);
+
+ while ((params->pass < 4) &&
+ (params->n_threads_complete < params->n_threads)) {
+ /* recompute schedule once per second */
+
+ memcpy(&shadow, params, sizeof(vshutdown_thread_t));
+
+ VOL_UNLOCK;
+ /* XXX debug */
+ Log("ShutdownController: schedule version=%d, vol_remaining=%d, pass=%d\n",
+ shadow.schedule_version, shadow.vol_remaining, shadow.pass);
+ Log("ShutdownController: n_threads_complete=%d, n_parts_done_pass=%d\n",
+ shadow.n_threads_complete, shadow.n_parts_done_pass);
+ for (diskP = DiskPartitionList; diskP; diskP=diskP->next) {
+ id = diskP->device;
+ Log("ShutdownController: part[%d] : (len=%d, thread_target=%d, done_pass=%d, pass_head=%p)\n",
+ id,
+ diskP->vol_list.len,
+ shadow.part_thread_target[id],
+ shadow.part_done_pass[id],
+ shadow.part_pass_head[id]);
+ }
+
+ sleep(1);
+ VOL_LOCK;
+
+ ShutdownCreateSchedule(params);
}
+}
- fdP = IH_OPEN(h);
- if (fdP == NULL) {
- *ec = VSALVAGE;
- return;
+/* create the shutdown thread work schedule.
+ * this scheduler tries to implement fairness
+ * by allocating at least 1 thread to each
+ * partition with volumes to be shutdown,
+ * and then it attempts to allocate remaining
+ * threads based upon the amount of work left
+ */
+static void
+ShutdownCreateSchedule(vshutdown_thread_t * params)
+{
+ struct DiskPartition * diskP;
+ int sum, thr_workload, thr_left;
+ int part_residue[VOLMAXPARTS+1];
+ Device id;
+
+ /* compute the total number of outstanding volumes */
+ sum = 0;
+ for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
+ sum += diskP->vol_list.len;
}
+
+ params->schedule_version++;
+ params->vol_remaining = sum;
- if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
- *ec = VSALVAGE;
- FDH_REALLYCLOSE(fdP);
+ if (!sum)
return;
+
+ /* compute average per-thread workload */
+ thr_workload = sum / params->n_threads;
+ if (sum % params->n_threads)
+ thr_workload++;
+
+ thr_left = params->n_threads;
+ memset(&part_residue, 0, sizeof(part_residue));
+
+ /* for fairness, give every partition with volumes remaining
+ * at least one thread */
+ for (diskP = DiskPartitionList; diskP && thr_left; diskP = diskP->next) {
+ id = diskP->device;
+ if (diskP->vol_list.len) {
+ params->part_thread_target[id] = 1;
+ thr_left--;
+ } else {
+ params->part_thread_target[id] = 0;
+ }
}
- vsn = (struct versionStamp *)to;
- if (FDH_READ(fdP, to, size) != size || vsn->magic != magic) {
- *ec = VSALVAGE;
- FDH_REALLYCLOSE(fdP);
- return;
+
+ if (thr_left && thr_workload) {
+ /* compute length-weighted workloads */
+ int delta;
+
+ for (diskP = DiskPartitionList; diskP && thr_left; diskP = diskP->next) {
+ id = diskP->device;
+ delta = (diskP->vol_list.len / thr_workload) -
+ params->part_thread_target[id];
+ if (delta < 0) {
+ continue;
+ }
+ if (delta < thr_left) {
+ params->part_thread_target[id] += delta;
+ thr_left -= delta;
+ } else {
+ params->part_thread_target[id] += thr_left;
+ thr_left = 0;
+ break;
+ }
+ }
}
- FDH_CLOSE(fdP);
- /* Check is conditional, in case caller wants to inspect version himself */
- if (version && vsn->version != version) {
- *ec = VSALVAGE;
+ if (thr_left) {
+ /* try to assign any leftover threads to partitions that
+ * had volume lengths closer to needing thread_target+1 */
+ int max_residue, max_id;
+
+ /* compute the residues */
+ for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
+ id = diskP->device;
+ part_residue[id] = diskP->vol_list.len -
+ (params->part_thread_target[id] * thr_workload);
+ }
+
+ /* now try to allocate remaining threads to partitions with the
+ * highest residues */
+ while (thr_left) {
+ max_residue = 0;
+ for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
+ id = diskP->device;
+ if (part_residue[id] > max_residue) {
+ max_residue = part_residue[id];
+ max_id = id;
+ }
+ }
+
+ if (!max_residue) {
+ break;
+ }
+
+ params->part_thread_target[max_id]++;
+ thr_left--;
+ part_residue[max_id] = 0;
+ }
+ }
+
+ if (thr_left) {
+ /* punt and give any remaining threads equally to each partition */
+ int alloc;
+ if (thr_left >= params->n_parts) {
+ alloc = thr_left / params->n_parts;
+ for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
+ id = diskP->device;
+ params->part_thread_target[id] += alloc;
+ thr_left -= alloc;
+ }
+ }
+
+ /* finish off the last of the threads */
+ for (diskP = DiskPartitionList; thr_left && diskP; diskP = diskP->next) {
+ id = diskP->device;
+ params->part_thread_target[id]++;
+ thr_left--;
+ }
}
}
-/* VolumeHeaderToDisk
- * Allows for storing 64 bit inode numbers in on-disk volume header
- * file.
- */
-void
-VolumeHeaderToDisk(VolumeDiskHeader_t * dh, VolumeHeader_t * h)
+/* worker thread for parallel shutdown */
+static void *
+VShutdownThread(void * args)
{
+ struct rx_queue *qp;
+ Volume * vp;
+ vshutdown_thread_t * params;
+ int part, code, found, pass, schedule_version_save, count;
+ struct DiskPartition *diskP;
+ struct diskpartition_queue_t * dpq;
+ Device id;
- memset((char *)dh, 0, sizeof(VolumeDiskHeader_t));
- dh->stamp = h->stamp;
- dh->id = h->id;
+ params = (vshutdown_thread_t *) args;
+
+ /* acquire the shutdown pass 0 lock */
+ assert(pthread_mutex_lock(¶ms->lock) == 0);
+
+ /* if there's still pass 0 work to be done,
+ * get a work entry, and do a pass 0 shutdown */
+ if (queue_IsNotEmpty(params)) {
+ dpq = queue_First(params, diskpartition_queue_t);
+ queue_Remove(dpq);
+ assert(pthread_mutex_unlock(¶ms->lock) == 0);
+ diskP = dpq->diskP;
+ free(dpq);
+ id = diskP->device;
+
+ count = 0;
+ while (ShutdownVolumeWalk_r(diskP, 0, ¶ms->part_pass_head[id]))
+ count++;
+ params->stats[0][diskP->device] = count;
+ assert(pthread_mutex_lock(¶ms->lock) == 0);
+ }
+
+ params->n_threads_complete++;
+ if (params->n_threads_complete == params->n_threads) {
+ /* notify control thread that all workers have completed pass 0 */
+ assert(pthread_cond_signal(¶ms->master_cv) == 0);
+ }
+ while (params->pass == 0) {
+ assert(pthread_cond_wait(¶ms->cv, ¶ms->lock) == 0);
+ }
+
+ /* switch locks */
+ assert(pthread_mutex_unlock(¶ms->lock) == 0);
+ VOL_LOCK;
+
+ pass = params->pass;
+ assert(pass > 0);
+
+ /* now escalate through the more complicated shutdowns */
+ while (pass <= 3) {
+ schedule_version_save = params->schedule_version;
+ found = 0;
+ /* find a disk partition to work on */
+ for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
+ id = diskP->device;
+ if (params->part_thread_target[id] && !params->part_done_pass[id]) {
+ params->part_thread_target[id]--;
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ /* hmm. for some reason the controller thread couldn't find anything for
+ * us to do. let's see if there's anything we can do */
+ for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
+ id = diskP->device;
+ if (diskP->vol_list.len && !params->part_done_pass[id]) {
+ found = 1;
+ break;
+ } else if (!params->part_done_pass[id]) {
+ params->part_done_pass[id] = 1;
+ params->n_parts_done_pass++;
+ if (pass == 3) {
+ Log("VShutdown: done shutting down volumes on partition %s.\n",
+ VPartitionPath(diskP));
+ }
+ }
+ }
+ }
+
+ /* do work on this partition until either the controller
+ * creates a new schedule, or we run out of things to do
+ * on this partition */
+ if (found) {
+ count = 0;
+ while (!params->part_done_pass[id] &&
+ (schedule_version_save == params->schedule_version)) {
+ /* ShutdownVolumeWalk_r will drop the glock internally */
+ if (!ShutdownVolumeWalk_r(diskP, pass, ¶ms->part_pass_head[id])) {
+ if (!params->part_done_pass[id]) {
+ params->part_done_pass[id] = 1;
+ params->n_parts_done_pass++;
+ if (pass == 3) {
+ Log("VShutdown: done shutting down volumes on partition %s.\n",
+ VPartitionPath(diskP));
+ }
+ }
+ break;
+ }
+ count++;
+ }
+
+ params->stats[pass][id] += count;
+ } else {
+ /* ok, everyone is done this pass, proceed */
+
+ /* barrier lock */
+ params->n_threads_complete++;
+ while (params->pass == pass) {
+ if (params->n_threads_complete == params->n_threads) {
+ /* we are the last thread to complete, so we will
+ * reinitialize worker pool state for the next pass */
+ params->n_threads_complete = 0;
+ params->n_parts_done_pass = 0;
+ params->pass++;
+ for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
+ id = diskP->device;
+ params->part_done_pass[id] = 0;
+ params->part_pass_head[id] = queue_First(&diskP->vol_list, rx_queue);
+ }
+
+ /* compute a new thread schedule before releasing all the workers */
+ ShutdownCreateSchedule(params);
+
+ /* wake up all the workers */
+ assert(pthread_cond_broadcast(¶ms->cv) == 0);
+
+ VOL_UNLOCK;
+ Log("VShutdown: pass %d completed using %d threads on %d partitions\n",
+ pass, params->n_threads, params->n_parts);
+ VOL_LOCK;
+ } else {
+ assert(pthread_cond_wait(¶ms->cv, &vol_glock_mutex) == 0);
+ }
+ }
+ pass = params->pass;
+ }
+
+ /* for fairness */
+ VOL_UNLOCK;
+ pthread_yield();
+ VOL_LOCK;
+ }
+
+ VOL_UNLOCK;
+
+ return NULL;
+}
+
+/* shut down all volumes on a given disk partition
+ *
+ * note that this function will not allow mp-fast
+ * shutdown of a partition */
+int
+VShutdownByPartition_r(struct DiskPartition * dp)
+{
+ int pass, retVal;
+ int pass_stats[4];
+ int total;
+
+ /* wait for other exclusive ops to finish */
+ VVByPListWait_r(dp);
+
+ /* begin exclusive access */
+ VVByPListBeginExclusive_r(dp);
+
+ /* pick the low-hanging fruit first,
+ * then do the complicated ones last
+ * (has the advantage of keeping
+ * in-use volumes up until the bitter end) */
+ for (pass = 0, total=0; pass < 4; pass++) {
+ pass_stats[pass] = ShutdownVByPForPass_r(dp, pass);
+ total += pass_stats[pass];
+ }
+
+ /* end exclusive access */
+ VVByPListEndExclusive_r(dp);
+
+ Log("VShutdownByPartition: shut down %d volumes on %s (pass[0]=%d, pass[1]=%d, pass[2]=%d, pass[3]=%d)\n",
+ total, VPartitionPath(dp), pass_stats[0], pass_stats[1], pass_stats[2], pass_stats[3]);
+
+ return retVal;
+}
+
+/* internal shutdown functionality
+ *
+ * for multi-pass shutdown:
+ * 0 to only "shutdown" {pre,un}attached and error state volumes
+ * 1 to also shutdown attached volumes w/ volume header loaded
+ * 2 to also shutdown attached volumes w/o volume header loaded
+ * 3 to also shutdown exclusive state volumes
+ *
+ * caller MUST hold exclusive access on the hash chain
+ * because we drop vol_glock_mutex internally
+ *
+ * this function is reentrant for passes 1--3
+ * (e.g. multiple threads can cooperate to
+ * shutdown a partition mp-fast)
+ *
+ * pass 0 is not scaleable because the volume state data is
+ * synchronized by vol_glock mutex, and the locking overhead
+ * is too high to drop the lock long enough to do linked list
+ * traversal
+ */
+static int
+ShutdownVByPForPass_r(struct DiskPartition * dp, int pass)
+{
+ struct rx_queue * q = queue_First(&dp->vol_list, rx_queue);
+ register int i = 0;
+
+ while (ShutdownVolumeWalk_r(dp, pass, &q))
+ i++;
+
+ return i;
+}
+
+/* conditionally shutdown one volume on partition dp
+ * returns 1 if a volume was shutdown in this pass,
+ * 0 otherwise */
+static int
+ShutdownVolumeWalk_r(struct DiskPartition * dp, int pass,
+ struct rx_queue ** idx)
+{
+ struct rx_queue *qp, *nqp;
+ Volume * vp;
+
+ qp = *idx;
+
+ for (queue_ScanFrom(&dp->vol_list, qp, qp, nqp, rx_queue)) {
+ vp = (Volume *) (((char *)qp) - offsetof(Volume, vol_list));
+
+ switch (pass) {
+ case 0:
+ if ((V_attachState(vp) != VOL_STATE_UNATTACHED) &&
+ (V_attachState(vp) != VOL_STATE_ERROR) &&
+ (V_attachState(vp) != VOL_STATE_PREATTACHED)) {
+ break;
+ }
+ case 1:
+ if ((V_attachState(vp) == VOL_STATE_ATTACHED) &&
+ (vp->header == NULL)) {
+ break;
+ }
+ case 2:
+ if (IsExclusiveState(V_attachState(vp))) {
+ break;
+ }
+ case 3:
+ *idx = nqp;
+ DeleteVolumeFromVByPList_r(vp);
+ VShutdownVolume_r(vp);
+ vp = NULL;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * shutdown a specific volume
+ */
+/* caller MUST NOT hold a heavyweight ref on vp */
+int
+VShutdownVolume_r(Volume * vp)
+{
+ int code;
+
+ VCreateReservation_r(vp);
+
+ if (LogLevel >= 5) {
+ Log("VShutdownVolume_r: vid=%u, device=%d, state=%hu\n",
+ vp->hashid, vp->partition->device, V_attachState(vp));
+ }
+
+ /* wait for other blocking ops to finish */
+ VWaitExclusiveState_r(vp);
+
+ assert(IsValidState(V_attachState(vp)));
+
+ switch(V_attachState(vp)) {
+ case VOL_STATE_SALVAGING:
+ /* make sure salvager knows we don't want
+ * the volume back */
+ VCancelSalvage_r(vp, SALVSYNC_SHUTDOWN);
+ case VOL_STATE_PREATTACHED:
+ case VOL_STATE_ERROR:
+ VChangeState_r(vp, VOL_STATE_UNATTACHED);
+ case VOL_STATE_UNATTACHED:
+ break;
+ case VOL_STATE_GOING_OFFLINE:
+ case VOL_STATE_SHUTTING_DOWN:
+ case VOL_STATE_ATTACHED:
+ code = VHold_r(vp);
+ if (!code) {
+ if (LogLevel >= 5)
+ Log("VShutdown: Attempting to take volume %u offline.\n",
+ vp->hashid);
+
+ /* take the volume offline (drops reference count) */
+ VOffline_r(vp, "File server was shut down");
+ }
+ break;
+ }
+
+ VCancelReservation_r(vp);
+ vp = NULL;
+ return 0;
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+
+/***************************************************/
+/* Header I/O routines */
+/***************************************************/
+
+/* open a descriptor for the inode (h),
+ * read in an on-disk structure into buffer (to) of size (size),
+ * verify versionstamp in structure has magic (magic) and
+ * optionally verify version (version) if (version) is nonzero
+ */
+static void
+ReadHeader(Error * ec, IHandle_t * h, char *to, int size, bit32 magic,
+ bit32 version)
+{
+ struct versionStamp *vsn;
+ FdHandle_t *fdP;
+
+ *ec = 0;
+ if (h == NULL) {
+ *ec = VSALVAGE;
+ return;
+ }
+
+ fdP = IH_OPEN(h);
+ if (fdP == NULL) {
+ *ec = VSALVAGE;
+ return;
+ }
+
+ if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
+ *ec = VSALVAGE;
+ FDH_REALLYCLOSE(fdP);
+ return;
+ }
+ vsn = (struct versionStamp *)to;
+ if (FDH_READ(fdP, to, size) != size || vsn->magic != magic) {
+ *ec = VSALVAGE;
+ FDH_REALLYCLOSE(fdP);
+ return;
+ }
+ FDH_CLOSE(fdP);
+
+ /* Check is conditional, in case caller wants to inspect version himself */
+ if (version && vsn->version != version) {
+ *ec = VSALVAGE;
+ }
+}
+
+void
+WriteVolumeHeader_r(Error * ec, Volume * vp)
+{
+ IHandle_t *h = V_diskDataHandle(vp);
+ FdHandle_t *fdP;
+
+ *ec = 0;
+
+ fdP = IH_OPEN(h);
+ if (fdP == NULL) {
+ *ec = VSALVAGE;
+ return;
+ }
+ if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
+ *ec = VSALVAGE;
+ FDH_REALLYCLOSE(fdP);
+ return;
+ }
+ if (FDH_WRITE(fdP, (char *)&V_disk(vp), sizeof(V_disk(vp)))
+ != sizeof(V_disk(vp))) {
+ *ec = VSALVAGE;
+ FDH_REALLYCLOSE(fdP);
+ return;
+ }
+ FDH_CLOSE(fdP);
+}
+
+/* VolumeHeaderToDisk
+ * Allows for storing 64 bit inode numbers in on-disk volume header
+ * file.
+ */
+/* convert in-memory representation of a volume header to the
+ * on-disk representation of a volume header */
+void
+VolumeHeaderToDisk(VolumeDiskHeader_t * dh, VolumeHeader_t * h)
+{
+
+ memset((char *)dh, 0, sizeof(VolumeDiskHeader_t));
+ dh->stamp = h->stamp;
+ dh->id = h->id;
dh->parent = h->parent;
#ifdef AFS_64BIT_IOPS_ENV
}
/* DiskToVolumeHeader
- * Reads volume header file from disk, convering 64 bit inodes
- * if required. Makes the assumption that AFS has *always*
+ * Converts an on-disk representation of a volume header to
+ * the in-memory representation of a volume header.
+ *
+ * Makes the assumption that AFS has *always*
* zero'd the volume header file so that high parts of inode
* numbers are 0 in older (SGI EFS) volume header files.
*/
}
-void
-WriteVolumeHeader_r(ec, vp)
- Error *ec;
- Volume *vp;
-{
- IHandle_t *h = V_diskDataHandle(vp);
- FdHandle_t *fdP;
-
- *ec = 0;
-
- fdP = IH_OPEN(h);
- if (fdP == NULL) {
- *ec = VSALVAGE;
- return;
- }
- if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
- *ec = VSALVAGE;
- FDH_REALLYCLOSE(fdP);
- return;
- }
- if (FDH_WRITE(fdP, (char *)&V_disk(vp), sizeof(V_disk(vp)))
- != sizeof(V_disk(vp))) {
- *ec = VSALVAGE;
- FDH_REALLYCLOSE(fdP);
- return;
- }
- FDH_CLOSE(fdP);
-}
+/***************************************************/
+/* Volume Attachment routines */
+/***************************************************/
-/* Attach an existing volume, given its pathname, and return a
- pointer to the volume header information. The volume also
- normally goes online at this time. An offline volume
- must be reattached to make it go online */
+#ifdef AFS_DEMAND_ATTACH_FS
+/* pre-attach a volume given its path
+ *
+ * a pre-attached volume will only have its partition
+ * and hashid fields initialized
+ *
+ * at first call to VGetVolume, the volume will be
+ * fully attached
+ */
Volume *
-VAttachVolumeByName(Error * ec, char *partition, char *name, int mode)
+VPreAttachVolumeByName(Error * ec, char *partition, char *name, int mode)
{
- Volume *retVal;
- VATTACH_LOCK;
+ Volume * vp;
VOL_LOCK;
- retVal = VAttachVolumeByName_r(ec, partition, name, mode);
+ vp = VPreAttachVolumeByName_r(ec, partition, name, mode);
VOL_UNLOCK;
- VATTACH_UNLOCK;
- return retVal;
+ return vp;
}
Volume *
-VAttachVolumeByName_r(Error * ec, char *partition, char *name, int mode)
+VPreAttachVolumeByName_r(Error * ec, char *partition, char *name, int mode)
{
- register Volume *vp;
+ register Volume *vp = NULL;
int fd, n;
struct afs_stat status;
- struct VolumeDiskHeader diskHeader;
- struct VolumeHeader iheader;
struct DiskPartition *partp;
char path[64];
int isbusy = 0;
+ VolId volumeId;
*ec = 0;
- if (programType == volumeUtility) {
- assert(VInit == 3);
- VLockPartition_r(partition);
- }
- if (programType == fileServer) {
- vp = VGetVolume_r(ec, VolumeNumber(name));
- if (vp) {
- if (V_inUse(vp))
- return vp;
- if (vp->specialStatus == VBUSY)
- isbusy = 1;
- VDetachVolume_r(ec, vp);
- if (*ec) {
- Log("VAttachVolume: Error detaching volume (%s)\n", name);
- }
- }
- }
+
+ assert(programType == fileServer);
if (!(partp = VGetPartition_r(partition, 0))) {
*ec = VNOVOL;
- Log("VAttachVolume: Error getting partition (%s)\n", partition);
- goto done;
+ Log("VPreAttachVolume: Error getting partition (%s)\n", partition);
+ return NULL;
}
- *ec = 0;
- strcpy(path, VPartitionPath(partp));
- strcat(path, "/");
- strcat(path, name);
+ volumeId = VolumeNumber(name);
+
+ vp = VLookupVolume_r(ec, volumeId, NULL);
+ if (*ec) {
+ return NULL;
+ }
+
+ return VPreAttachVolumeById_r(ec, partp, vp, volumeId);
+}
+
+/* pre-attach a volume given its partition and volume id
+ *
+ * if vp == NULL, then a new vp is created
+ * if vp != NULL, then we assumed it is already on the hash chain
+ */
+Volume *
+VPreAttachVolumeById_r(Error * ec, struct DiskPartition * partp,
+ Volume * vp, int vid)
+{
+ Volume *nvp = NULL;
+
+ *ec = 0;
+
+ /* check to see if pre-attach already happened */
+ if (vp &&
+ (V_attachState(vp) != VOL_STATE_UNATTACHED) &&
+ !IsErrorState(V_attachState(vp))) {
+ goto done;
+ } else if (vp) {
+ /* we're re-attaching a volume; clear out some old state */
+ memset(&vp->salvage, 0, sizeof(struct VolumeOnlineSalvage));
+ } else {
+ /* if we need to allocate a new Volume struct,
+ * go ahead and drop the vol glock, otherwise
+ * do the basic setup synchronised, as it's
+ * probably not worth dropping the lock */
+ VOL_UNLOCK;
+
+ /* allocate the volume structure */
+ vp = nvp = (Volume *) malloc(sizeof(Volume));
+ assert(vp != NULL);
+ memset(vp, 0, sizeof(Volume));
+ assert(pthread_cond_init(&V_attachCV(vp), NULL) == 0);
+ }
+
+ /* link the volume with its associated vice partition */
+ vp->device = partp->device;
+ vp->partition = partp;
+ vp->hashid = vid;
+
+ /* if we dropped the lock, reacquire the lock,
+ * check for pre-attach races, and then add
+ * the volume to the hash table */
+ if (nvp) {
+ VOL_LOCK;
+ nvp = VLookupVolume_r(ec, vid, NULL);
+ if (*ec) {
+ free(vp);
+ vp = NULL;
+ goto done;
+ } else if (nvp) { /* race detected */
+ free(vp);
+ vp = nvp;
+ goto done;
+ } else {
+ /* hack to make up for VChangeState_r() decrementing
+ * the old state counter */
+ VStats.state_levels[0]++;
+ }
+ }
+
+ /* put pre-attached volume onto the hash table
+ * and bring it up to the pre-attached state */
+ AddVolumeToHashTable(vp, vp->hashid);
+ AddVolumeToVByPList_r(vp);
+ VLRU_Init_Node_r(vp);
+ VChangeState_r(vp, VOL_STATE_PREATTACHED);
+
+ if (LogLevel >= 5)
+ Log("VPreAttachVolumeById_r: volume %u pre-attached\n", vp->hashid);
+
+ done:
+ if (*ec)
+ return NULL;
+ else
+ return vp;
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+/* Attach an existing volume, given its pathname, and return a
+ pointer to the volume header information. The volume also
+ normally goes online at this time. An offline volume
+ must be reattached to make it go online */
+Volume *
+VAttachVolumeByName(Error * ec, char *partition, char *name, int mode)
+{
+ Volume *retVal;
+ VOL_LOCK;
+ retVal = VAttachVolumeByName_r(ec, partition, name, mode);
+ VOL_UNLOCK;
+ return retVal;
+}
+
+Volume *
+VAttachVolumeByName_r(Error * ec, char *partition, char *name, int mode)
+{
+ register Volume *vp = NULL, *svp = NULL;
+ int fd, n;
+ struct afs_stat status;
+ struct VolumeDiskHeader diskHeader;
+ struct VolumeHeader iheader;
+ struct DiskPartition *partp;
+ char path[64];
+ int isbusy = 0;
+ VolId volumeId;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VolumeStats stats_save;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ *ec = 0;
+
+ volumeId = VolumeNumber(name);
+
+ if (!(partp = VGetPartition_r(partition, 0))) {
+ *ec = VNOVOL;
+ Log("VAttachVolume: Error getting partition (%s)\n", partition);
+ goto done;
+ }
+
+ if (programType == volumeUtility) {
+ assert(VInit == 3);
+ VLockPartition_r(partition);
+ } else if (programType == fileServer) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* lookup the volume in the hash table */
+ vp = VLookupVolume_r(ec, volumeId, NULL);
+ if (*ec) {
+ return NULL;
+ }
+
+ if (vp) {
+ /* save any counters that are supposed to
+ * be monotonically increasing over the
+ * lifetime of the fileserver */
+ memcpy(&stats_save, &vp->stats, sizeof(VolumeStats));
+ } else {
+ memset(&stats_save, 0, sizeof(VolumeStats));
+ }
+
+ /* if there's something in the hash table, and it's not
+ * in the pre-attach state, then we may need to detach
+ * it before proceeding */
+ if (vp && (V_attachState(vp) != VOL_STATE_PREATTACHED)) {
+ VCreateReservation_r(vp);
+ VWaitExclusiveState_r(vp);
+
+ /* at this point state must be one of:
+ * UNATTACHED,
+ * ATTACHED,
+ * SHUTTING_DOWN,
+ * GOING_OFFLINE,
+ * SALVAGING,
+ * ERROR
+ */
+
+ if (vp->specialStatus == VBUSY)
+ isbusy = 1;
+
+ /* if it's already attached, see if we can return it */
+ if (V_attachState(vp) == VOL_STATE_ATTACHED) {
+ VGetVolumeByVp_r(ec, vp);
+ if (V_inUse(vp)) {
+ VCancelReservation_r(vp);
+ return vp;
+ }
+
+ /* otherwise, we need to detach, and attempt to re-attach */
+ VDetachVolume_r(ec, vp);
+ if (*ec) {
+ Log("VAttachVolume: Error detaching old volume instance (%s)\n", name);
+ }
+ } else {
+ /* if it isn't fully attached, delete from the hash tables,
+ and let the refcounter handle the rest */
+ DeleteVolumeFromHashTable(vp);
+ DeleteVolumeFromVByPList_r(vp);
+ }
+
+ VCancelReservation_r(vp);
+ vp = NULL;
+ }
+
+ /* pre-attach volume if it hasn't been done yet */
+ if (!vp ||
+ (V_attachState(vp) == VOL_STATE_UNATTACHED) ||
+ (V_attachState(vp) == VOL_STATE_ERROR)) {
+ svp = vp;
+ vp = VPreAttachVolumeById_r(ec, partp, vp, volumeId);
+ if (*ec) {
+ return NULL;
+ }
+ }
+
+ assert(vp != NULL);
+
+ /* handle pre-attach races
+ *
+ * multiple threads can race to pre-attach a volume,
+ * but we can't let them race beyond that
+ *
+ * our solution is to let the first thread to bring
+ * the volume into an exclusive state win; the other
+ * threads just wait until it finishes bringing the
+ * volume online, and then they do a vgetvolumebyvp
+ */
+ if (svp && (svp != vp)) {
+ /* wait for other exclusive ops to finish */
+ VCreateReservation_r(vp);
+ VWaitExclusiveState_r(vp);
+
+ /* get a heavyweight ref, kill the lightweight ref, and return */
+ VGetVolumeByVp_r(ec, vp);
+ VCancelReservation_r(vp);
+ return vp;
+ }
+
+ /* at this point, we are chosen as the thread to do
+ * demand attachment for this volume. all other threads
+ * doing a getvolume on vp->hashid will block until we finish */
+
+ /* make sure any old header cache entries are invalidated
+ * before proceeding */
+ FreeVolumeHeader(vp);
+
+ VChangeState_r(vp, VOL_STATE_ATTACHING);
+
+ /* restore any saved counters */
+ memcpy(&vp->stats, &stats_save, sizeof(VolumeStats));
+#else /* AFS_DEMAND_ATTACH_FS */
+ vp = VGetVolume_r(ec, volumeId);
+ if (vp) {
+ if (V_inUse(vp))
+ return vp;
+ if (vp->specialStatus == VBUSY)
+ isbusy = 1;
+ VDetachVolume_r(ec, vp);
+ if (*ec) {
+ Log("VAttachVolume: Error detaching volume (%s)\n", name);
+ }
+ vp = NULL;
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+ }
+
+ *ec = 0;
+ strcpy(path, VPartitionPath(partp));
+
VOL_UNLOCK;
+
+ strcat(path, "/");
+ strcat(path, name);
if ((fd = afs_open(path, O_RDONLY)) == -1 || afs_fstat(fd, &status) == -1) {
Log("VAttachVolume: Failed to open %s (errno %d)\n", path, errno);
if (fd > -1)
close(fd);
- VOL_LOCK;
*ec = VNOVOL;
+ VOL_LOCK;
goto done;
}
n = read(fd, &diskHeader, sizeof(diskHeader));
close(fd);
- VOL_LOCK;
if (n != sizeof(diskHeader)
|| diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
Log("VAttachVolume: Error reading volume header %s\n", path);
*ec = VSALVAGE;
+ VOL_LOCK;
goto done;
}
if (diskHeader.stamp.version != VOLUMEHEADERVERSION) {
Log("VAttachVolume: Volume %s, version number is incorrect; volume needs salvaged\n", path);
*ec = VSALVAGE;
+ VOL_LOCK;
goto done;
}
DiskToVolumeHeader(&iheader, &diskHeader);
+#ifdef FSSYNC_BUILD_CLIENT
if (programType == volumeUtility && mode != V_SECRETLY && mode != V_PEEK) {
- if (FSYNC_askfs(iheader.id, partition, FSYNC_NEEDVOLUME, mode)
- == FSYNC_DENIED) {
+ VOL_LOCK;
+ if (FSYNC_VolOp(iheader.id, partition, FSYNC_VOL_NEEDVOLUME, mode, NULL)
+ != SYNC_OK) {
Log("VAttachVolume: attach of volume %u apparently denied by file server\n", iheader.id);
*ec = VNOVOL; /* XXXX */
goto done;
}
+ VOL_UNLOCK;
+ }
+#endif
+
+ if (!vp) {
+ vp = (Volume *) calloc(1, sizeof(Volume));
+ assert(vp != NULL);
+ vp->device = partp->device;
+ vp->partition = partp;
+#ifdef AFS_DEMAND_ATTACH_FS
+ assert(pthread_cond_init(&V_attachCV(vp), NULL) == 0);
+#endif /* AFS_DEMAND_ATTACH_FS */
}
- vp = attach2(ec, path, &iheader, partp, isbusy);
+ /* attach2 is entered without any locks, and returns
+ * with vol_glock_mutex held */
+ vp = attach2(ec, volumeId, path, &iheader, partp, vp, isbusy, mode);
+
if (programType == volumeUtility && vp) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* for dafs, we should tell the fileserver, except for V_PEEK
+ * where we know it is not necessary */
+ if (mode == V_PEEK) {
+ vp->needsPutBack = 0;
+ } else {
+ vp->needsPutBack = 1;
+ }
+#else /* !AFS_DEMAND_ATTACH_FS */
/* duplicate computation in fssync.c about whether the server
* takes the volume offline or not. If the volume isn't
* offline, we must not return it when we detach the volume,
vp->needsPutBack = 0;
else
vp->needsPutBack = 1;
+#endif /* !AFS_DEMAND_ATTACH_FS */
}
/* OK, there's a problem here, but one that I don't know how to
* fix right now, and that I don't think should arise often.
* for all of that to happen, but if it does, probably the right
* fix is for the server to allow the return of readonly volumes
* that it doesn't think are really checked out. */
+#ifdef FSSYNC_BUILD_CLIENT
if (programType == volumeUtility && vp == NULL &&
mode != V_SECRETLY && mode != V_PEEK) {
- FSYNC_askfs(iheader.id, partition, FSYNC_ON, 0);
- } else if (programType == fileServer && vp) {
+ FSYNC_VolOp(iheader.id, partition, FSYNC_VOL_ON, 0, NULL);
+ } else
+#endif
+ if (programType == fileServer && vp) {
V_needsCallback(vp) = 0;
#ifdef notdef
if (VInit >= 2 && V_BreakVolumeCallbacks) {
(*V_BreakVolumeCallbacks) (V_id(vp));
}
#endif
- VUpdateVolume_r(ec, vp);
+ VUpdateVolume_r(ec, vp, 0);
if (*ec) {
Log("VAttachVolume: Error updating volume\n");
if (vp)
goto done;
}
if (VolumeWriteable(vp) && V_dontSalvage(vp) == 0) {
- /* This is a hack: by temporarily settint the incore
+#ifndef AFS_DEMAND_ATTACH_FS
+ /* This is a hack: by temporarily setting the incore
* dontSalvage flag ON, the volume will be put back on the
* Update list (with dontSalvage OFF again). It will then
* come back in N minutes with DONT_SALVAGE eventually
* offline without DONT SALVAGE having been set also
* eventually get it set */
V_dontSalvage(vp) = DONT_SALVAGE;
+#endif /* !AFS_DEMAND_ATTACH_FS */
VAddToVolumeUpdateList_r(ec, vp);
if (*ec) {
Log("VAttachVolume: Error adding volume to update list\n");
if (programType == volumeUtility) {
VUnlockPartition_r(partition);
}
- if (*ec)
+ if (*ec) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (vp) {
+ V_attachState(vp) = VOL_STATE_ERROR;
+ assert(pthread_cond_broadcast(&V_attachCV(vp)) == 0);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
return NULL;
- else
+ } else {
return vp;
+ }
}
-private Volume *
-attach2(Error * ec, char *path, register struct VolumeHeader * header,
- struct DiskPartition * partp, int isbusy)
+#ifdef AFS_DEMAND_ATTACH_FS
+/* VAttachVolumeByVp_r
+ *
+ * finish attaching a volume that is
+ * in a less than fully attached state
+ */
+/* caller MUST hold a ref count on vp */
+static Volume *
+VAttachVolumeByVp_r(Error * ec, Volume * vp, int mode)
{
- register Volume *vp;
+ char name[VMAXPATHLEN];
+ int fd, n, reserve = 0;
+ struct afs_stat status;
+ struct VolumeDiskHeader diskHeader;
+ struct VolumeHeader iheader;
+ struct DiskPartition *partp;
+ char path[64];
+ int isbusy = 0;
+ VolId volumeId;
+ Volume * nvp;
+ VolumeStats stats_save;
+ *ec = 0;
- VOL_UNLOCK;
+ /* volume utility should never call AttachByVp */
+ assert(programType == fileServer);
+
+ volumeId = vp->hashid;
+ partp = vp->partition;
+ VolumeExternalName_r(volumeId, name, sizeof(name));
+
+
+ /* if another thread is performing a blocking op, wait */
+ VWaitExclusiveState_r(vp);
+
+ memcpy(&stats_save, &vp->stats, sizeof(VolumeStats));
+
+ /* if it's already attached, see if we can return it */
+ if (V_attachState(vp) == VOL_STATE_ATTACHED) {
+ VGetVolumeByVp_r(ec, vp);
+ if (V_inUse(vp)) {
+ return vp;
+ } else {
+ if (vp->specialStatus == VBUSY)
+ isbusy = 1;
+ VDetachVolume_r(ec, vp);
+ if (*ec) {
+ Log("VAttachVolume: Error detaching volume (%s)\n", name);
+ }
+ vp = NULL;
+ }
+ }
- vp = (Volume *) calloc(1, sizeof(Volume));
+ /* pre-attach volume if it hasn't been done yet */
+ if (!vp ||
+ (V_attachState(vp) == VOL_STATE_UNATTACHED) ||
+ (V_attachState(vp) == VOL_STATE_ERROR)) {
+ nvp = VPreAttachVolumeById_r(ec, partp, vp, volumeId);
+ if (*ec) {
+ return NULL;
+ }
+ if (nvp != vp) {
+ reserve = 1;
+ VCreateReservation_r(nvp);
+ vp = nvp;
+ }
+ }
+
assert(vp != NULL);
+ VChangeState_r(vp, VOL_STATE_ATTACHING);
+
+ /* restore monotonically increasing stats */
+ memcpy(&vp->stats, &stats_save, sizeof(VolumeStats));
+
+ *ec = 0;
+
+
+ /* compute path to disk header,
+ * read in header,
+ * and verify magic and version stamps */
+ strcpy(path, VPartitionPath(partp));
+
+ VOL_UNLOCK;
+
+ strcat(path, "/");
+ strcat(path, name);
+ if ((fd = afs_open(path, O_RDONLY)) == -1 || afs_fstat(fd, &status) == -1) {
+ Log("VAttachVolume: Failed to open %s (errno %d)\n", path, errno);
+ if (fd > -1)
+ close(fd);
+ *ec = VNOVOL;
+ VOL_LOCK;
+ goto done;
+ }
+ n = read(fd, &diskHeader, sizeof(diskHeader));
+ close(fd);
+ if (n != sizeof(diskHeader)
+ || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
+ Log("VAttachVolume: Error reading volume header %s\n", path);
+ *ec = VSALVAGE;
+ VOL_LOCK;
+ goto done;
+ }
+ if (diskHeader.stamp.version != VOLUMEHEADERVERSION) {
+ Log("VAttachVolume: Volume %s, version number is incorrect; volume needs salvaged\n", path);
+ *ec = VSALVAGE;
+ VOL_LOCK;
+ goto done;
+ }
+
+ /* convert on-disk header format to in-memory header format */
+ DiskToVolumeHeader(&iheader, &diskHeader);
+
+ /* do volume attach
+ *
+ * NOTE: attach2 is entered without any locks, and returns
+ * with vol_glock_mutex held */
+ vp = attach2(ec, volumeId, path, &iheader, partp, vp, isbusy, mode);
+
+ if (*ec || vp == NULL) {
+ goto done;
+ }
+
+ V_needsCallback(vp) = 0;
+ VUpdateVolume_r(ec, vp, 0);
+ if (*ec) {
+ Log("VAttachVolume: Error updating volume %u\n", vp->hashid);
+ VPutVolume_r(vp);
+ goto done;
+ }
+ if (VolumeWriteable(vp) && V_dontSalvage(vp) == 0) {
+#ifndef AFS_DEMAND_ATTACH_FS
+ /* This is a hack: by temporarily setting the incore
+ * dontSalvage flag ON, the volume will be put back on the
+ * Update list (with dontSalvage OFF again). It will then
+ * come back in N minutes with DONT_SALVAGE eventually
+ * set. This is the way that volumes that have never had
+ * it set get it set; or that volumes that have been
+ * offline without DONT SALVAGE having been set also
+ * eventually get it set */
+ V_dontSalvage(vp) = DONT_SALVAGE;
+#endif /* !AFS_DEMAND_ATTACH_FS */
+ VAddToVolumeUpdateList_r(ec, vp);
+ if (*ec) {
+ Log("VAttachVolume: Error adding volume %u to update list\n", vp->hashid);
+ if (vp)
+ VPutVolume_r(vp);
+ goto done;
+ }
+ }
+ if (LogLevel)
+ Log("VOnline: volume %u (%s) attached and online\n", V_id(vp),
+ V_name(vp));
+ done:
+ if (reserve) {
+ VCancelReservation_r(nvp);
+ reserve = 0;
+ }
+ if (*ec && (*ec != VOFFLINE) && (*ec != VSALVAGE)) {
+ if (vp && !IsErrorState(V_attachState(vp))) {
+ VChangeState_r(vp, VOL_STATE_ERROR);
+ }
+ return NULL;
+ } else {
+ return vp;
+ }
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+/*
+ * called without any locks held
+ * returns with vol_glock_mutex held
+ */
+private Volume *
+attach2(Error * ec, VolId volumeId, char *path, register struct VolumeHeader * header,
+ struct DiskPartition * partp, register Volume * vp, int isbusy, int mode)
+{
vp->specialStatus = (byte) (isbusy ? VBUSY : 0);
- vp->device = partp->device;
- vp->partition = partp;
IH_INIT(vp->vnodeIndex[vLarge].handle, partp->device, header->parent,
header->largeVnodeIndex);
IH_INIT(vp->vnodeIndex[vSmall].handle, partp->device, header->parent,
vp->shuttingDown = 0;
vp->goingOffline = 0;
vp->nUsers = 1;
+#ifdef AFS_DEMAND_ATTACH_FS
+ vp->stats.last_attach = FT_ApproxTime();
+ vp->stats.attaches++;
+#endif
VOL_LOCK;
+#ifdef AFS_DEMAND_ATTACH_FS
+ IncUInt64(&VStats.attaches);
+#endif
vp->cacheCheck = ++VolumeCacheCheck;
/* just in case this ever rolls over */
if (!vp->cacheCheck)
GetVolumeHeader(vp);
VOL_UNLOCK;
+#if defined(AFS_DEMAND_ATTACH_FS) && defined(FSSYNC_BUILD_CLIENT)
+ /* demand attach changes the V_PEEK mechanism
+ *
+ * we can now suck the current disk data structure over
+ * the fssync interface without going to disk
+ *
+ * (technically, we don't need to restrict this feature
+ * to demand attach fileservers. However, I'm trying
+ * to limit the number of common code changes)
+ */
+ if (programType != fileServer && mode == V_PEEK) {
+ SYNC_response res;
+ res.payload.len = sizeof(VolumeDiskData);
+ res.payload.buf = &vp->header->diskstuff;
+
+ if (FSYNC_VolOp(volumeId,
+ VPartitionPath(partp),
+ FSYNC_VOL_QUERY_HDR,
+ FSYNC_WHATEVER,
+ &res) == SYNC_OK) {
+ goto disk_header_loaded;
+ }
+ }
+#endif /* AFS_DEMAND_ATTACH_FS && FSSYNC_BUILD_CLIENT */
(void)ReadHeader(ec, V_diskDataHandle(vp), (char *)&V_disk(vp),
sizeof(V_disk(vp)), VOLUMEINFOMAGIC, VOLUMEINFOVERSION);
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* update stats */
VOL_LOCK;
+ IncUInt64(&VStats.hdr_loads);
+ IncUInt64(&vp->stats.hdr_loads);
+ VOL_UNLOCK;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
if (*ec) {
Log("VAttachVolume: Error reading diskDataHandle vol header %s; error=%u\n", path, *ec);
}
+
+ disk_header_loaded:
+
+#ifdef AFS_DEMAND_ATTACH_FS
if (!*ec) {
- struct IndexFileHeader iHead;
+
+ /* check for pending volume operations */
+ if (vp->pending_vol_op) {
+ /* see if the pending volume op requires exclusive access */
+ if (!VVolOpLeaveOnline_r(vp, vp->pending_vol_op)) {
+ /* mark the volume down */
+ *ec = VOFFLINE;
+ VChangeState_r(vp, VOL_STATE_UNATTACHED);
+ if (V_offlineMessage(vp)[0] == '\0')
+ strlcpy(V_offlineMessage(vp),
+ "A volume utility is running.",
+ sizeof(V_offlineMessage(vp)));
+ V_offlineMessage(vp)[sizeof(V_offlineMessage(vp)) - 1] = '\0';
+
+ /* check to see if we should set the specialStatus flag */
+ if (VVolOpSetVBusy_r(vp, vp->pending_vol_op)) {
+ vp->specialStatus = VBUSY;
+ }
+ }
+ }
+
+ V_attachFlags(vp) |= VOL_HDR_LOADED;
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ if (!*ec) {
+ struct IndexFileHeader iHead;
#if OPENAFS_VOL_STATS
/*
V_stat_initialized(vp) = 1;
}
#endif /* OPENAFS_VOL_STATS */
- VOL_UNLOCK;
+
(void)ReadHeader(ec, vp->vnodeIndex[vSmall].handle,
(char *)&iHead, sizeof(iHead),
SMALLINDEXMAGIC, SMALLINDEXVERSION);
- VOL_LOCK;
+
if (*ec) {
Log("VAttachVolume: Error reading smallVnode vol header %s; error=%u\n", path, *ec);
}
}
+
if (!*ec) {
struct IndexFileHeader iHead;
- VOL_UNLOCK;
+
(void)ReadHeader(ec, vp->vnodeIndex[vLarge].handle,
(char *)&iHead, sizeof(iHead),
LARGEINDEXMAGIC, LARGEINDEXVERSION);
- VOL_LOCK;
+
if (*ec) {
Log("VAttachVolume: Error reading largeVnode vol header %s; error=%u\n", path, *ec);
}
}
+
#ifdef AFS_NAMEI_ENV
if (!*ec) {
struct versionStamp stamp;
- VOL_UNLOCK;
+
(void)ReadHeader(ec, V_linkHandle(vp), (char *)&stamp,
sizeof(stamp), LINKTABLEMAGIC, LINKTABLEVERSION);
- VOL_LOCK;
+
if (*ec) {
Log("VAttachVolume: Error reading namei vol header %s; error=%u\n", path, *ec);
}
}
-#endif
+#endif /* AFS_NAMEI_ENV */
+
+#if defined(AFS_DEMAND_ATTACH_FS)
+ if (*ec && ((*ec != VOFFLINE) || (V_attachState(vp) != VOL_STATE_UNATTACHED))) {
+ VOL_LOCK;
+ if (programType == fileServer) {
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ vp->nUsers = 0;
+ *ec = VSALVAGING;
+ } else {
+ Log("VAttachVolume: Error attaching volume %s; volume needs salvage; error=%u\n", path, *ec);
+ FreeVolume(vp);
+ *ec = VSALVAGE;
+ }
+ return NULL;
+ } else if (*ec) {
+ /* volume operation in progress */
+ VOL_LOCK;
+ return NULL;
+ }
+#else /* AFS_DEMAND_ATTACH_FS */
if (*ec) {
Log("VAttachVolume: Error attaching volume %s; volume needs salvage; error=%u\n", path, *ec);
+ VOL_LOCK;
FreeVolume(vp);
return NULL;
}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
if (V_needsSalvaged(vp)) {
if (vp->specialStatus)
vp->specialStatus = 0;
- Log("VAttachVolume: volume salvage flag is ON for %s; volume needs salvage\n", path);
- *ec = VSALVAGE;
+ VOL_LOCK;
+#if defined(AFS_DEMAND_ATTACH_FS)
+ if (programType == fileServer) {
+ VRequestSalvage_r(vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER);
+ vp->nUsers = 0;
+ *ec = VSALVAGING;
+ } else {
+ Log("VAttachVolume: volume salvage flag is ON for %s; volume needs salvage\n", path);
+ FreeVolume(vp);
+ *ec = VSALVAGE;
+ }
+#else /* AFS_DEMAND_ATTACH_FS */
FreeVolume(vp);
+ *ec = VSALVAGE;
+#endif /* AFS_DEMAND_ATTACH_FS */
return NULL;
}
+
+ VOL_LOCK;
if (programType == fileServer) {
#ifndef FAST_RESTART
if (V_inUse(vp) && VolumeWriteable(vp)) {
if (!V_needsSalvaged(vp)) {
V_needsSalvaged(vp) = 1;
- VUpdateVolume_r(ec, vp);
+ VUpdateVolume_r(ec, vp, 0);
}
- FreeVolume(vp);
+#if defined(AFS_DEMAND_ATTACH_FS)
+ VRequestSalvage_r(vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER);
+ vp->nUsers = 0;
+ *ec = VSALVAGING;
+#else /* AFS_DEMAND_ATTACH_FS */
Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path);
+ FreeVolume(vp);
*ec = VSALVAGE;
+#endif /* AFS_DEMAND_ATTACH_FS */
return NULL;
}
#endif /* FAST_RESTART */
+
if (V_destroyMe(vp) == DESTROY_ME) {
+#if defined(AFS_DEMAND_ATTACH_FS)
+ /* schedule a salvage so the volume goes away on disk */
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ VChangeState_r(vp, VOL_STATE_ERROR);
+ vp->nUsers = 0;
+#endif /* AFS_DEMAND_ATTACH_FS */
FreeVolume(vp);
Log("VAttachVolume: volume %s is junk; it should be destroyed at next salvage\n", path);
*ec = VNOVOL;
}
}
- AddVolumeToHashTable(vp, V_id(vp));
vp->nextVnodeUnique = V_uniquifier(vp);
vp->vnodeIndex[vSmall].bitmap = vp->vnodeIndex[vLarge].bitmap = NULL;
#ifndef BITMAP_LATER
if (programType == fileServer && VolumeWriteable(vp)) {
int i;
for (i = 0; i < nVNODECLASSES; i++) {
- VOL_UNLOCK;
- GetBitmap(ec, vp, i);
- VOL_LOCK;
+ VGetBitmap_r(ec, vp, i);
if (*ec) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ vp->nUsers = 0;
+ *ec = VSALVAGING;
+#else /* AFS_DEMAND_ATTACH_FS */
FreeVolume(vp);
+#endif /* AFS_DEMAND_ATTACH_FS */
Log("VAttachVolume: error getting bitmap for volume (%s)\n",
path);
return NULL;
}
}
+ AddVolumeToHashTable(vp, V_id(vp));
+#ifdef AFS_DEMAND_ATTACH_FS
+ AddVolumeToVByPList_r(vp);
+ VLRU_Add_r(vp);
+ VChangeState_r(vp, VOL_STATE_ATTACHED);
+#endif
return vp;
}
VAttachVolume(Error * ec, VolumeId volumeId, int mode)
{
Volume *retVal;
- VATTACH_LOCK;
VOL_LOCK;
retVal = VAttachVolume_r(ec, volumeId, mode);
VOL_UNLOCK;
- VATTACH_UNLOCK;
return retVal;
}
* we still guarantee we won't context swap, but the ref count won't be
* incremented (otherwise we'd violate the invariant).
*/
+/* NOTE: with the demand attach fileserver extensions, the global lock
+ * is dropped within VHold */
+#ifdef AFS_DEMAND_ATTACH_FS
static int
VHold_r(register Volume * vp)
{
Error error;
- if (vp->nUsers == 0 && !GetVolumeHeader(vp)) {
- VolumeReplacements++;
- ReadHeader(&error, V_diskDataHandle(vp), (char *)&V_disk(vp),
- sizeof(V_disk(vp)), VOLUMEINFOMAGIC, VOLUMEINFOVERSION);
- if (error)
- return error;
+ VCreateReservation_r(vp);
+ VWaitExclusiveState_r(vp);
+
+ LoadVolumeHeader(&error, vp);
+ if (error) {
+ VCancelReservation_r(vp);
+ return error;
}
vp->nUsers++;
+ VCancelReservation_r(vp);
+ return 0;
+}
+#else /* AFS_DEMAND_ATTACH_FS */
+static int
+VHold_r(register Volume * vp)
+{
+ Error error;
+
+ LoadVolumeHeader(&error, vp);
+ if (error)
+ return error;
+ vp->nUsers++;
return 0;
}
+#endif /* AFS_DEMAND_ATTACH_FS */
static int
VHold(register Volume * vp)
return retVal;
}
-void
-VTakeOffline_r(register Volume * vp)
-{
- assert(vp->nUsers > 0);
- assert(programType == fileServer);
- vp->goingOffline = 1;
- V_needsSalvaged(vp) = 1;
-}
-void
-VTakeOffline(register Volume * vp)
-{
- VOL_LOCK;
- VTakeOffline_r(vp);
- VOL_UNLOCK;
-}
+/***************************************************/
+/* get and put volume routines */
+/***************************************************/
void
VPutVolume_r(register Volume * vp)
{
assert(--vp->nUsers >= 0);
if (vp->nUsers == 0) {
+ VCheckOffline(vp);
ReleaseVolumeHeader(vp->header);
- if (vp->goingOffline) {
- Error error;
- assert(programType == fileServer);
- vp->goingOffline = 0;
- V_inUse(vp) = 0;
- VUpdateVolume_r(&error, vp);
- VCloseVolumeHandles_r(vp);
- if (LogLevel) {
- Log("VOffline: Volume %u (%s) is now offline", V_id(vp),
- V_name(vp));
- if (V_offlineMessage(vp)[0])
- Log(" (%s)", V_offlineMessage(vp));
- Log("\n");
- }
-#ifdef AFS_PTHREAD_ENV
- assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
-#else /* AFS_PTHREAD_ENV */
- LWP_NoYieldSignal(VPutVolume);
-#endif /* AFS_PTHREAD_ENV */
- }
- if (vp->shuttingDown) {
- VReleaseVolumeHandles_r(vp);
- FreeVolume(vp);
- if (programType == fileServer)
-#ifdef AFS_PTHREAD_ENV
- assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
-#else /* AFS_PTHREAD_ENV */
- LWP_NoYieldSignal(VPutVolume);
-#endif /* AFS_PTHREAD_ENV */
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (!VCheckDetach(vp)) {
+ VCheckSalvage(vp);
+ VCheckFree(vp);
}
+#else /* AFS_DEMAND_ATTACH_FS */
+ VCheckDetach(vp);
+#endif /* AFS_DEMAND_ATTACH_FS */
}
}
VOL_UNLOCK;
}
+
/* Get a pointer to an attached volume. The pointer is returned regardless
of whether or not the volume is in service or on/off line. An error
code, however, is returned with an indication of the volume's status */
Volume *
-VGetVolume(Error * ec, VolId volumeId)
+VGetVolume(Error * ec, Error * client_ec, VolId volumeId)
{
Volume *retVal;
VOL_LOCK;
- retVal = VGetVolume_r(ec, volumeId);
+ retVal = GetVolume(ec, client_ec, volumeId, NULL, 0);
VOL_UNLOCK;
return retVal;
}
Volume *
VGetVolume_r(Error * ec, VolId volumeId)
{
- Volume *vp;
- unsigned short V0 = 0, V1 = 0, V2 = 0, V3 = 0, V4 = 0, V5 = 0, V6 =
+ return GetVolume(ec, NULL, volumeId, NULL, 0);
+}
+
+/* try to get a volume we've previously looked up */
+/* for demand attach fs, caller MUST NOT hold a ref count on vp */
+Volume *
+VGetVolumeByVp_r(Error * ec, Volume * vp)
+{
+ return GetVolume(ec, NULL, vp->hashid, vp, 0);
+}
+
+/* private interface for getting a volume handle
+ * volumeId must be provided.
+ * hint is an optional parameter to speed up hash lookups
+ * flags is not used at this time
+ */
+/* for demand attach fs, caller MUST NOT hold a ref count on hint */
+static Volume *
+GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int flags)
+{
+ Volume *vp = hint;
+ /* pull this profiling/debugging code out of regular builds */
+#ifdef notdef
+#define VGET_CTR_INC(x) x++
+ unsigned short V0 = 0, V1 = 0, V2 = 0, V3 = 0, V5 = 0, V6 =
0, V7 = 0, V8 = 0, V9 = 0;
unsigned short V10 = 0, V11 = 0, V12 = 0, V13 = 0, V14 = 0, V15 = 0;
+#else
+#define VGET_CTR_INC(x)
+#endif
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ Volume *avp, * rvp = hint;
+
+ if (rvp) {
+ VCreateReservation_r(rvp);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
for (;;) {
*ec = 0;
- V0++;
- for (vp = VolumeHashTable[VOLUME_HASH(volumeId)];
- vp && vp->hashid != volumeId; vp = vp->hashNext)
- Vlooks++;
+ if (client_ec)
+ *client_ec = 0;
+ VGET_CTR_INC(V0);
+
+ vp = VLookupVolume_r(ec, volumeId, vp);
+ if (*ec) {
+ vp = NULL;
+ break;
+ }
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (rvp && (rvp != vp)) {
+ /* break reservation on old vp */
+ VCancelReservation_r(rvp);
+ rvp = NULL;
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
if (!vp) {
- V1++;
+ VGET_CTR_INC(V1);
if (VInit < 2) {
- V2++;
+ VGET_CTR_INC(V2);
/* Until we have reached an initialization level of 2
* we don't know whether this volume exists or not.
* We can't sleep and retry later because before a volume
break;
}
- V3++;
- VolumeGets++;
- if (vp->nUsers == 0 && !GetVolumeHeader(vp)) {
- V5++;
- VolumeReplacements++;
- ReadHeader(ec, V_diskDataHandle(vp), (char *)&V_disk(vp),
- sizeof(V_disk(vp)), VOLUMEINFOMAGIC,
- VOLUMEINFOVERSION);
+ VGET_CTR_INC(V3);
+ IncUInt64(&VStats.hdr_gets);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* block if someone else is performing an exclusive op on this volume */
+ if (rvp != vp) {
+ rvp = vp;
+ VCreateReservation_r(rvp);
+ }
+ VWaitExclusiveState_r(vp);
+
+ /* short circuit with VNOVOL in the following circumstances:
+ *
+ * VOL_STATE_ERROR
+ * VOL_STATE_SHUTTING_DOWN
+ */
+ if ((V_attachState(vp) == VOL_STATE_ERROR) ||
+ (V_attachState(vp) == VOL_STATE_SHUTTING_DOWN)) {
+ *ec = VNOVOL;
+ vp = NULL;
+ break;
+ }
+
+ /* allowable states:
+ * UNATTACHED
+ * PREATTACHED
+ * ATTACHED
+ * GOING_OFFLINE
+ * SALVAGING
+ */
+
+ if (vp->salvage.requested) {
+ VUpdateSalvagePriority_r(vp);
+ }
+
+ if (V_attachState(vp) == VOL_STATE_PREATTACHED) {
+ avp = VAttachVolumeByVp_r(ec, vp, 0);
+ if (avp) {
+ if (vp != avp) {
+ /* VAttachVolumeByVp_r can return a pointer
+ * != the vp passed to it under certain
+ * conditions; make sure we don't leak
+ * reservations if that happens */
+ vp = avp;
+ VCancelReservation_r(rvp);
+ rvp = avp;
+ VCreateReservation_r(rvp);
+ }
+ VPutVolume_r(avp);
+ }
if (*ec) {
- V6++;
- /* Only log the error if it was a totally unexpected error. Simply
- * a missing inode is likely to be caused by the volume being deleted */
- if (errno != ENXIO || LogLevel)
- Log("Volume %u: couldn't reread volume header\n",
- vp->hashid);
+ int endloop = 0;
+ switch (*ec) {
+ case VSALVAGING:
+ break;
+ case VOFFLINE:
+ if (!vp->pending_vol_op) {
+ endloop = 1;
+ }
+ break;
+ default:
+ *ec = VNOVOL;
+ endloop = 1;
+ }
+ if (endloop) {
+ vp = NULL;
+ break;
+ }
+ }
+ }
+
+ if ((V_attachState(vp) == VOL_STATE_SALVAGING) ||
+ (*ec == VSALVAGING)) {
+ if (client_ec) {
+ /* see CheckVnode() in afsfileprocs.c for an explanation
+ * of this error code logic */
+ afs_uint32 now = FT_ApproxTime();
+ if ((vp->stats.last_salvage + (10 * 60)) >= now) {
+ *client_ec = VBUSY;
+ } else {
+ *client_ec = VRESTARTING;
+ }
+ }
+ *ec = VSALVAGING;
+ vp = NULL;
+ break;
+ }
+
+ if (vp->pending_vol_op && !VVolOpLeaveOnline_r(vp, vp->pending_vol_op)) {
+ if (client_ec) {
+ /* see CheckVnode() in afsfileprocs.c for an explanation
+ * of this error code logic */
+ afs_uint32 now = FT_ApproxTime();
+ if ((vp->stats.last_vol_op + (10 * 60)) >= now) {
+ *client_ec = VBUSY;
+ } else {
+ *client_ec = VRESTARTING;
+ }
+ }
+ *ec = VOFFLINE;
+ vp = NULL;
+ break;
+ }
+
+ if (V_attachState(vp) == VOL_STATE_UNATTACHED) {
+ *ec = VOFFLINE;
+ vp = NULL;
+ break;
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ LoadVolumeHeader(ec, vp);
+ if (*ec) {
+ VGET_CTR_INC(V6);
+ /* Only log the error if it was a totally unexpected error. Simply
+ * a missing inode is likely to be caused by the volume being deleted */
+ if (errno != ENXIO || LogLevel)
+ Log("Volume %u: couldn't reread volume header\n",
+ vp->hashid);
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (programType == fileServer) {
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ *ec = VSALVAGING;
+ } else {
FreeVolume(vp);
vp = NULL;
- break;
}
+#else /* AFS_DEMAND_ATTACH_FS */
+ FreeVolume(vp);
+ vp = NULL;
+#endif /* AFS_DEMAND_ATTACH_FS */
+ break;
}
- V7++;
+
+ VGET_CTR_INC(V7);
if (vp->shuttingDown) {
- V8++;
+ VGET_CTR_INC(V8);
*ec = VNOVOL;
vp = NULL;
break;
}
+
if (programType == fileServer) {
- V9++;
+ VGET_CTR_INC(V9);
if (vp->goingOffline) {
- V10++;
-#ifdef AFS_PTHREAD_ENV
- pthread_cond_wait(&vol_put_volume_cond, &vol_glock_mutex);
+ VGET_CTR_INC(V10);
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* wait for the volume to go offline */
+ if (V_attachState(vp) == VOL_STATE_GOING_OFFLINE) {
+ VWaitStateChange_r(vp);
+ }
+#elif defined(AFS_PTHREAD_ENV)
+ assert(pthread_cond_wait(&vol_put_volume_cond, &vol_glock_mutex) == 0);
#else /* AFS_PTHREAD_ENV */
LWP_WaitProcess(VPutVolume);
#endif /* AFS_PTHREAD_ENV */
continue;
}
if (vp->specialStatus) {
- V11++;
+ VGET_CTR_INC(V11);
*ec = vp->specialStatus;
} else if (V_inService(vp) == 0 || V_blessed(vp) == 0) {
- V12++;
+ VGET_CTR_INC(V12);
*ec = VNOVOL;
} else if (V_inUse(vp) == 0) {
- V13++;
+ VGET_CTR_INC(V13);
*ec = VOFFLINE;
} else {
- V14++;
+ VGET_CTR_INC(V14);
}
}
break;
}
- V15++;
+ VGET_CTR_INC(V15);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* if no error, bump nUsers */
+ if (vp) {
+ vp->nUsers++;
+ VLRU_UpdateAccess_r(vp);
+ }
+ if (rvp) {
+ VCancelReservation_r(rvp);
+ rvp = NULL;
+ }
+ if (client_ec && !*client_ec) {
+ *client_ec = *ec;
+ }
+#else /* AFS_DEMAND_ATTACH_FS */
/* if no error, bump nUsers */
- if (vp)
+ if (vp) {
vp->nUsers++;
+ }
+ if (client_ec) {
+ *client_ec = *ec;
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
assert(vp || *ec);
return vp;
}
-/* For both VForceOffline and VOffline, we close all relevant handles.
- * For VOffline, if we re-attach the volume, the files may possible be
- * different than before.
- */
-static void
-VReleaseVolumeHandles_r(Volume * vp)
+/***************************************************/
+/* Volume offline/detach routines */
+/***************************************************/
+
+/* caller MUST hold a heavyweight ref on vp */
+#ifdef AFS_DEMAND_ATTACH_FS
+void
+VTakeOffline_r(register Volume * vp)
{
- DFlushVolume(V_id(vp));
- VReleaseVnodeFiles_r(vp);
+ assert(vp->nUsers > 0);
+ assert(programType == fileServer);
- /* Too time consuming and unnecessary for the volserver */
- if (programType != volumeUtility) {
- IH_CONDSYNC(vp->vnodeIndex[vLarge].handle);
- IH_CONDSYNC(vp->vnodeIndex[vSmall].handle);
- IH_CONDSYNC(vp->diskDataHandle);
-#ifdef AFS_NT40_ENV
- IH_CONDSYNC(vp->linkHandle);
-#endif /* AFS_NT40_ENV */
- }
+ VCreateReservation_r(vp);
+ VWaitExclusiveState_r(vp);
- IH_RELEASE(vp->vnodeIndex[vLarge].handle);
- IH_RELEASE(vp->vnodeIndex[vSmall].handle);
- IH_RELEASE(vp->diskDataHandle);
- IH_RELEASE(vp->linkHandle);
+ vp->goingOffline = 1;
+ V_needsSalvaged(vp) = 1;
+
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
+ VCancelReservation_r(vp);
+}
+#else /* AFS_DEMAND_ATTACH_FS */
+void
+VTakeOffline_r(register Volume * vp)
+{
+ assert(vp->nUsers > 0);
+ assert(programType == fileServer);
+
+ vp->goingOffline = 1;
+ V_needsSalvaged(vp) = 1;
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+void
+VTakeOffline(register Volume * vp)
+{
+ VOL_LOCK;
+ VTakeOffline_r(vp);
+ VOL_UNLOCK;
}
/* Force the volume offline, set the salvage flag. No further references to
* the volume through the volume package will be honored. */
+/* for demand attach, caller MUST hold ref count on vp */
void
-VForceOffline_r(Volume * vp)
+VForceOffline_r(Volume * vp, int flags)
{
Error error;
if (!V_inUse(vp))
V_inUse(vp) = 0;
vp->goingOffline = 0;
V_needsSalvaged(vp) = 1;
- VUpdateVolume_r(&error, vp);
+ if (!(flags & VOL_FORCEOFF_NOUPDATE)) {
+ VUpdateVolume_r(&error, vp, VOL_UPDATE_WAIT | VOL_UPDATE_NOFORCEOFF);
+ }
+#ifdef AFS_DEMAND_ATTACH_FS
+#ifdef SALVSYNC_BUILD_CLIENT
+ if (programType == fileServer) {
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ }
+#endif
+ VChangeState_r(vp, VOL_STATE_ERROR);
+#endif /* AFS_DEMAND_ATTACH_FS */
#ifdef AFS_PTHREAD_ENV
assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
#else /* AFS_PTHREAD_ENV */
#endif /* AFS_PTHREAD_ENV */
VReleaseVolumeHandles_r(vp);
-
}
void
VForceOffline(Volume * vp)
{
VOL_LOCK;
- VForceOffline_r(vp);
+ VForceOffline_r(vp, 0);
VOL_UNLOCK;
}
{
Error error;
VolumeId vid = V_id(vp);
+
assert(programType != volumeUtility);
if (!V_inUse(vp)) {
VPutVolume_r(vp);
if (V_offlineMessage(vp)[0] == '\0')
strncpy(V_offlineMessage(vp), message, sizeof(V_offlineMessage(vp)));
V_offlineMessage(vp)[sizeof(V_offlineMessage(vp)) - 1] = '\0';
+
vp->goingOffline = 1;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VChangeState_r(vp, VOL_STATE_GOING_OFFLINE);
+ VCreateReservation_r(vp);
+ VPutVolume_r(vp);
+
+ /* wait for the volume to go offline */
+ if (V_attachState(vp) == VOL_STATE_GOING_OFFLINE) {
+ VWaitStateChange_r(vp);
+ }
+ VCancelReservation_r(vp);
+#else /* AFS_DEMAND_ATTACH_FS */
VPutVolume_r(vp);
vp = VGetVolume_r(&error, vid); /* Wait for it to go offline */
if (vp) /* In case it was reattached... */
VPutVolume_r(vp);
+#endif /* AFS_DEMAND_ATTACH_FS */
}
void
VOL_UNLOCK;
}
-/* For VDetachVolume, we close all cached file descriptors, but keep
- * the Inode handles in case we need to read from a busy volume.
- */
-static void
-VCloseVolumeHandles_r(Volume * vp)
-{
- DFlushVolume(V_id(vp));
- VCloseVnodeFiles_r(vp);
-
- /* Too time consuming and unnecessary for the volserver */
- if (programType != volumeUtility) {
- IH_CONDSYNC(vp->vnodeIndex[vLarge].handle);
- IH_CONDSYNC(vp->vnodeIndex[vSmall].handle);
- IH_CONDSYNC(vp->diskDataHandle);
-#ifdef AFS_NT40_ENV
- IH_CONDSYNC(vp->linkHandle);
-#endif /* AFS_NT40_ENV */
- }
-
- IH_REALLYCLOSE(vp->vnodeIndex[vLarge].handle);
- IH_REALLYCLOSE(vp->vnodeIndex[vSmall].handle);
- IH_REALLYCLOSE(vp->diskDataHandle);
- IH_REALLYCLOSE(vp->linkHandle);
-}
-
/* This gets used for the most part by utility routines that don't want
* to keep all the volume headers around. Generally, the file server won't
* call this routine, because then the offline message in the volume header
- * (or other information) will still be available to clients. For NAMEI, also
- * close the file handles.
+ * (or other information) won't be available to clients. For NAMEI, also
+ * close the file handles. However, the fileserver does call this during
+ * an attach following a volume operation.
*/
void
VDetachVolume_r(Error * ec, Volume * vp)
volume = V_id(vp);
DeleteVolumeFromHashTable(vp);
vp->shuttingDown = 1;
+#ifdef AFS_DEMAND_ATTACH_FS
+ DeleteVolumeFromVByPList_r(vp);
+ VLRU_Delete_r(vp);
+ VChangeState_r(vp, VOL_STATE_SHUTTING_DOWN);
+#endif /* AFS_DEMAND_ATTACH_FS */
VPutVolume_r(vp);
/* Will be detached sometime in the future--this is OK since volume is offline */
+ /* XXX the following code should really be moved to VCheckDetach() since the volume
+ * is not technically detached until the refcounts reach zero
+ */
+#ifdef FSSYNC_BUILD_CLIENT
if (programType == volumeUtility && notifyServer) {
/*
* Note: The server is not notified in the case of a bogus volume
* would be two instances of the same volume, one of them bogus,
* which the file server would attempt to put on line
*/
- if (useDone)
+ if (useDone) {
/* don't put online */
- FSYNC_askfs(volume, tpartp->name, FSYNC_DONE, 0);
- else {
+ FSYNC_VolOp(volume, tpartp->name, FSYNC_VOL_DONE, 0, NULL);
+ } else {
/* fs can use it again */
- FSYNC_askfs(volume, tpartp->name, FSYNC_ON, 0);
+ FSYNC_VolOp(volume, tpartp->name, FSYNC_VOL_ON, 0, NULL);
+
+ /* XXX this code path is only hit by volume utilities, thus
+ * V_BreakVolumeCallbacks will always be NULL. if we really
+ * want to break callbacks in this path we need to use FSYNC_VolOp() */
+#ifdef notdef
/* Dettaching it so break all callbacks on it */
if (V_BreakVolumeCallbacks) {
Log("volume %u detached; breaking all call backs\n", volume);
(*V_BreakVolumeCallbacks) (volume);
}
+#endif
}
}
+#endif /* FSSYNC_BUILD_CLIENT */
}
void
}
-VnodeId
-VAllocBitmapEntry_r(Error * ec, Volume * vp, register struct vnodeIndex
- *index)
+/***************************************************/
+/* Volume fd/inode handle closing routines */
+/***************************************************/
+
+/* For VDetachVolume, we close all cached file descriptors, but keep
+ * the Inode handles in case we need to read from a busy volume.
+ */
+/* for demand attach, caller MUST hold ref count on vp */
+static void
+VCloseVolumeHandles_r(Volume * vp)
{
- register byte *bp, *ep;
- *ec = 0;
- /* This test is probably redundant */
- if (!VolumeWriteable(vp)) {
- *ec = (bit32) VREADONLY;
- return 0;
- }
-#ifdef BITMAP_LATER
- if ((programType == fileServer) && !index->bitmap) {
- int i;
- int wasVBUSY = 0;
- if (vp->specialStatus == VBUSY) {
- if (vp->goingOffline) { /* vos dump waiting for the volume to
- * go offline. We probably come here
- * from AddNewReadableResidency */
- wasVBUSY = 1;
- } else {
- VOL_UNLOCK;
- while (vp->specialStatus == VBUSY)
-#ifdef AFS_PTHREAD_ENV
- sleep(2);
-#else /* AFS_PTHREAD_ENV */
- IOMGR_Sleep(2);
-#endif /* AFS_PTHREAD_ENV */
- VOL_LOCK;
- }
- }
- if (!index->bitmap) {
- vp->specialStatus = VBUSY; /* Stop anyone else from using it. */
- for (i = 0; i < nVNODECLASSES; i++) {
- VOL_UNLOCK;
- GetBitmap(ec, vp, i);
- VOL_LOCK;
- if (*ec) {
- vp->specialStatus = 0;
- vp->shuttingDown = 1; /* Let who has it free it. */
- return NULL;
- }
- }
- if (!wasVBUSY)
- vp->specialStatus = 0; /* Allow others to have access. */
- }
- }
-#endif /* BITMAP_LATER */
- bp = index->bitmap + index->bitmapOffset;
- ep = index->bitmap + index->bitmapSize;
- while (bp < ep) {
- if ((*(bit32 *) bp) != (bit32) 0xffffffff) {
- int o;
- index->bitmapOffset = (afs_uint32) (bp - index->bitmap);
- while (*bp == 0xff)
- bp++;
- o = ffs(~*bp) - 1; /* ffs is documented in BSTRING(3) */
- *bp |= (1 << o);
- return (VnodeId) ((bp - index->bitmap) * 8 + o);
- }
- bp += sizeof(bit32) /* i.e. 4 */ ;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VolState state_save;
+
+ state_save = VChangeState_r(vp, VOL_STATE_OFFLINING);
+#endif
+
+ /* demand attach fs
+ *
+ * XXX need to investigate whether we can perform
+ * DFlushVolume outside of vol_glock_mutex...
+ *
+ * VCloseVnodeFiles_r drops the glock internally */
+ DFlushVolume(V_id(vp));
+ VCloseVnodeFiles_r(vp);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_UNLOCK;
+#endif
+
+ /* Too time consuming and unnecessary for the volserver */
+ if (programType != volumeUtility) {
+ IH_CONDSYNC(vp->vnodeIndex[vLarge].handle);
+ IH_CONDSYNC(vp->vnodeIndex[vSmall].handle);
+ IH_CONDSYNC(vp->diskDataHandle);
+#ifdef AFS_NT40_ENV
+ IH_CONDSYNC(vp->linkHandle);
+#endif /* AFS_NT40_ENV */
}
- /* No bit map entry--must grow bitmap */
- bp = (byte *)
- realloc(index->bitmap, index->bitmapSize + VOLUME_BITMAP_GROWSIZE);
- assert(bp != NULL);
- index->bitmap = bp;
- bp += index->bitmapSize;
- memset(bp, 0, VOLUME_BITMAP_GROWSIZE);
- index->bitmapOffset = index->bitmapSize;
- index->bitmapSize += VOLUME_BITMAP_GROWSIZE;
- *bp = 1;
- return index->bitmapOffset * 8;
-}
-VnodeId
-VAllocBitmapEntry(Error * ec, Volume * vp, register struct vnodeIndex * index)
-{
- VnodeId retVal;
+ IH_REALLYCLOSE(vp->vnodeIndex[vLarge].handle);
+ IH_REALLYCLOSE(vp->vnodeIndex[vSmall].handle);
+ IH_REALLYCLOSE(vp->diskDataHandle);
+ IH_REALLYCLOSE(vp->linkHandle);
+
+#ifdef AFS_DEMAND_ATTACH_FS
VOL_LOCK;
- retVal = VAllocBitmapEntry_r(ec, vp, index);
- VOL_UNLOCK;
- return retVal;
+ VChangeState_r(vp, state_save);
+#endif
}
-void
-VFreeBitMapEntry_r(Error * ec, register struct vnodeIndex *index,
- unsigned bitNumber)
+/* For both VForceOffline and VOffline, we close all relevant handles.
+ * For VOffline, if we re-attach the volume, the files may possible be
+ * different than before.
+ */
+/* for demand attach, caller MUST hold a ref count on vp */
+static void
+VReleaseVolumeHandles_r(Volume * vp)
{
- unsigned int offset;
- *ec = 0;
-#ifdef BITMAP_LATER
- if (!index->bitmap)
- return;
-#endif /* BITMAP_LATER */
- offset = bitNumber >> 3;
- if (offset >= index->bitmapSize) {
- *ec = VNOVNODE;
- return;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VolState state_save;
+
+ state_save = VChangeState_r(vp, VOL_STATE_DETACHING);
+#endif
+
+ /* XXX need to investigate whether we can perform
+ * DFlushVolume outside of vol_glock_mutex... */
+ DFlushVolume(V_id(vp));
+
+ VReleaseVnodeFiles_r(vp); /* releases the glock internally */
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_UNLOCK;
+#endif
+
+ /* Too time consuming and unnecessary for the volserver */
+ if (programType != volumeUtility) {
+ IH_CONDSYNC(vp->vnodeIndex[vLarge].handle);
+ IH_CONDSYNC(vp->vnodeIndex[vSmall].handle);
+ IH_CONDSYNC(vp->diskDataHandle);
+#ifdef AFS_NT40_ENV
+ IH_CONDSYNC(vp->linkHandle);
+#endif /* AFS_NT40_ENV */
}
- if (offset < index->bitmapOffset)
- index->bitmapOffset = offset & ~3; /* Truncate to nearest bit32 */
- *(index->bitmap + offset) &= ~(1 << (bitNumber & 0x7));
-}
-void
-VFreeBitMapEntry(Error * ec, register struct vnodeIndex *index,
- unsigned bitNumber)
-{
+ IH_RELEASE(vp->vnodeIndex[vLarge].handle);
+ IH_RELEASE(vp->vnodeIndex[vSmall].handle);
+ IH_RELEASE(vp->diskDataHandle);
+ IH_RELEASE(vp->linkHandle);
+
+#ifdef AFS_DEMAND_ATTACH_FS
VOL_LOCK;
- VFreeBitMapEntry_r(ec, index, bitNumber);
- VOL_UNLOCK;
+ VChangeState_r(vp, state_save);
+#endif
}
+
+/***************************************************/
+/* Volume write and fsync routines */
+/***************************************************/
+
void
-VUpdateVolume_r(Error * ec, Volume * vp)
+VUpdateVolume_r(Error * ec, Volume * vp, int flags)
{
+#ifdef AFS_DEMAND_ATTACH_FS
+ VolState state_save;
+
+ if (flags & VOL_UPDATE_WAIT) {
+ VCreateReservation_r(vp);
+ VWaitExclusiveState_r(vp);
+ }
+#endif
+
*ec = 0;
if (programType == fileServer)
V_uniquifier(vp) =
(V_inUse(vp) ? V_nextVnodeUnique(vp) +
200 : V_nextVnodeUnique(vp));
- /*printf("Writing volume header for '%s'\n", V_name(vp)); */
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ state_save = VChangeState_r(vp, VOL_STATE_UPDATING);
+ VOL_UNLOCK;
+#endif
+
WriteVolumeHeader_r(ec, vp);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+ VChangeState_r(vp, state_save);
+ if (flags & VOL_UPDATE_WAIT) {
+ VCancelReservation_r(vp);
+ }
+#endif
+
if (*ec) {
Log("VUpdateVolume: error updating volume header, volume %u (%s)\n",
V_id(vp), V_name(vp));
- VForceOffline_r(vp);
+ /* try to update on-disk header,
+ * while preventing infinite recursion */
+ if (!(flags & VOL_UPDATE_NOFORCEOFF)) {
+ VForceOffline_r(vp, VOL_FORCEOFF_NOUPDATE);
+ }
}
}
VUpdateVolume(Error * ec, Volume * vp)
{
VOL_LOCK;
- VUpdateVolume_r(ec, vp);
+ VUpdateVolume_r(ec, vp, VOL_UPDATE_WAIT);
VOL_UNLOCK;
}
void
-VSyncVolume_r(Error * ec, Volume * vp)
+VSyncVolume_r(Error * ec, Volume * vp, int flags)
{
FdHandle_t *fdP;
- VUpdateVolume_r(ec, vp);
- if (!ec) {
- int code;
+ int code;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VolState state_save;
+#endif
+
+ if (flags & VOL_SYNC_WAIT) {
+ VUpdateVolume_r(ec, vp, VOL_UPDATE_WAIT);
+ } else {
+ VUpdateVolume_r(ec, vp, 0);
+ }
+ if (!*ec) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ state_save = VChangeState_r(vp, VOL_STATE_UPDATING);
+ VOL_UNLOCK;
+#endif
fdP = IH_OPEN(V_diskDataHandle(vp));
assert(fdP != NULL);
code = FDH_SYNC(fdP);
assert(code == 0);
FDH_CLOSE(fdP);
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+ VChangeState_r(vp, state_save);
+#endif
}
}
VSyncVolume(Error * ec, Volume * vp)
{
VOL_LOCK;
- VSyncVolume_r(ec, vp);
+ VSyncVolume_r(ec, vp, VOL_SYNC_WAIT);
VOL_UNLOCK;
}
+
+/***************************************************/
+/* Volume dealloaction routines */
+/***************************************************/
+
+#ifdef AFS_DEMAND_ATTACH_FS
static void
FreeVolume(Volume * vp)
{
+ /* free the heap space, iff it's safe.
+ * otherwise, pull it out of the hash table, so it
+ * will get deallocated when all refs to it go away */
+ if (!VCheckFree(vp)) {
+ DeleteVolumeFromHashTable(vp);
+ DeleteVolumeFromVByPList_r(vp);
+
+ /* make sure we invalidate the header cache entry */
+ FreeVolumeHeader(vp);
+ }
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+static void
+ReallyFreeVolume(Volume * vp)
+{
int i;
if (!vp)
return;
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* debug */
+ VChangeState_r(vp, VOL_STATE_FREED);
+ if (vp->pending_vol_op)
+ free(vp->pending_vol_op);
+#endif /* AFS_DEMAND_ATTACH_FS */
for (i = 0; i < nVNODECLASSES; i++)
if (vp->vnodeIndex[i].bitmap)
free(vp->vnodeIndex[i].bitmap);
FreeVolumeHeader(vp);
+#ifndef AFS_DEMAND_ATTACH_FS
DeleteVolumeFromHashTable(vp);
+#endif /* AFS_DEMAND_ATTACH_FS */
free(vp);
}
-static void
-GetBitmap(Error * ec, Volume * vp, VnodeClass class)
+/* check to see if we should shutdown this volume
+ * returns 1 if volume was freed, 0 otherwise */
+#ifdef AFS_DEMAND_ATTACH_FS
+static int
+VCheckDetach(register Volume * vp)
{
- StreamHandle_t *file;
- int nVnodes;
- int size;
- struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
- struct vnodeIndex *vip = &vp->vnodeIndex[class];
- struct VnodeDiskObject *vnode;
- unsigned int unique = 0;
- FdHandle_t *fdP;
-#ifdef BITMAP_LATER
- byte *BitMap = 0;
-#endif /* BITMAP_LATER */
+ int ret = 0;
- *ec = 0;
+ if (vp->nUsers || vp->nWaiters)
+ return ret;
- fdP = IH_OPEN(vip->handle);
- assert(fdP != NULL);
- file = FDH_FDOPEN(fdP, "r");
- assert(file != NULL);
- vnode = (VnodeDiskObject *) malloc(vcp->diskSize);
- assert(vnode != NULL);
- size = OS_SIZE(fdP->fd_fd);
- assert(size != -1);
- nVnodes = (size <= vcp->diskSize ? 0 : size - vcp->diskSize)
- >> vcp->logSize;
- vip->bitmapSize = ((nVnodes / 8) + 10) / 4 * 4; /* The 10 is a little extra so
- * a few files can be created in this volume,
- * the whole thing is rounded up to nearest 4
- * bytes, because the bit map allocator likes
- * it that way */
-#ifdef BITMAP_LATER
- BitMap = (byte *) calloc(1, vip->bitmapSize);
- assert(BitMap != NULL);
-#else /* BITMAP_LATER */
- vip->bitmap = (byte *) calloc(1, vip->bitmapSize);
- assert(vip->bitmap != NULL);
- vip->bitmapOffset = 0;
-#endif /* BITMAP_LATER */
- if (STREAM_SEEK(file, vcp->diskSize, 0) != -1) {
- int bitNumber = 0;
- for (bitNumber = 0; bitNumber < nVnodes + 100; bitNumber++) {
- if (STREAM_READ(vnode, vcp->diskSize, 1, file) != 1)
- break;
- if (vnode->type != vNull) {
- if (vnode->vnodeMagic != vcp->magic) {
- Log("GetBitmap: addled vnode index in volume %s; volume needs salvage\n", V_name(vp));
- *ec = VSALVAGE;
- break;
- }
-#ifdef BITMAP_LATER
- *(BitMap + (bitNumber >> 3)) |= (1 << (bitNumber & 0x7));
-#else /* BITMAP_LATER */
- *(vip->bitmap + (bitNumber >> 3)) |= (1 << (bitNumber & 0x7));
-#endif /* BITMAP_LATER */
- if (unique <= vnode->uniquifier)
- unique = vnode->uniquifier + 1;
- }
-#ifndef AFS_PTHREAD_ENV
- if ((bitNumber & 0x00ff) == 0x0ff) { /* every 256 iterations */
- IOMGR_Poll();
- }
-#endif /* !AFS_PTHREAD_ENV */
+ if (vp->shuttingDown) {
+ ret = 1;
+ VReleaseVolumeHandles_r(vp);
+ VCheckSalvage(vp);
+ ReallyFreeVolume(vp);
+ if (programType == fileServer) {
+ assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
}
}
- if (vp->nextVnodeUnique < unique) {
- Log("GetBitmap: bad volume uniquifier for volume %s; volume needs salvage\n", V_name(vp));
- *ec = VSALVAGE;
+ return ret;
+}
+#else /* AFS_DEMAND_ATTACH_FS */
+static int
+VCheckDetach(register Volume * vp)
+{
+ int ret = 0;
+
+ if (vp->nUsers)
+ return ret;
+
+ if (vp->shuttingDown) {
+ ret = 1;
+ VReleaseVolumeHandles_r(vp);
+ ReallyFreeVolume(vp);
+ if (programType == fileServer) {
+#if defined(AFS_PTHREAD_ENV)
+ assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
+#else /* AFS_PTHREAD_ENV */
+ LWP_NoYieldSignal(VPutVolume);
+#endif /* AFS_PTHREAD_ENV */
+ }
}
- /* Paranoia, partly justified--I think fclose after fdopen
- * doesn't seem to close fd. In any event, the documentation
- * doesn't specify, so it's safer to close it twice.
- */
- STREAM_CLOSE(file);
- FDH_CLOSE(fdP);
- free(vnode);
-#ifdef BITMAP_LATER
- /* There may have been a racing condition with some other thread, both
- * creating the bitmaps for this volume. If the other thread was faster
- * the pointer to bitmap should already be filled and we can free ours.
- */
- if (vip->bitmap == NULL) {
- vip->bitmap = BitMap;
- vip->bitmapOffset = 0;
- } else
- free((byte *) BitMap);
-#endif /* BITMAP_LATER */
+ return ret;
}
+#endif /* AFS_DEMAND_ATTACH_FS */
-static void
-GetVolumePath(Error * ec, VolId volumeId, char **partitionp, char **namep)
+/* check to see if we should offline this volume
+ * return 1 if volume went offline, 0 otherwise */
+#ifdef AFS_DEMAND_ATTACH_FS
+static int
+VCheckOffline(register Volume * vp)
{
- static char partition[VMAXPATHLEN], name[VMAXPATHLEN];
- char path[VMAXPATHLEN];
- int found = 0;
- struct DiskPartition *dp;
+ Volume * rvp = NULL;
+ int ret = 0;
- *ec = 0;
- name[0] = '/';
- (void)afs_snprintf(&name[1], (sizeof name) - 1, VFORMAT, volumeId);
- for (dp = DiskPartitionList; dp; dp = dp->next) {
- struct afs_stat status;
- strcpy(path, VPartitionPath(dp));
- strcat(path, name);
- if (afs_stat(path, &status) == 0) {
- strcpy(partition, dp->name);
- found = 1;
- break;
+ if (vp->goingOffline && !vp->nUsers) {
+ Error error;
+ assert(programType == fileServer);
+ assert((V_attachState(vp) != VOL_STATE_ATTACHED) &&
+ (V_attachState(vp) != VOL_STATE_FREED) &&
+ (V_attachState(vp) != VOL_STATE_PREATTACHED) &&
+ (V_attachState(vp) != VOL_STATE_UNATTACHED));
+
+ /* valid states:
+ *
+ * VOL_STATE_GOING_OFFLINE
+ * VOL_STATE_SHUTTING_DOWN
+ * IsErrorState(V_attachState(vp))
+ * IsExclusiveState(V_attachState(vp))
+ */
+
+ VCreateReservation_r(vp);
+ VChangeState_r(vp, VOL_STATE_OFFLINING);
+
+ ret = 1;
+ /* must clear the goingOffline flag before we drop the glock */
+ vp->goingOffline = 0;
+ V_inUse(vp) = 0;
+
+ VLRU_Delete_r(vp);
+
+ /* perform async operations */
+ VUpdateVolume_r(&error, vp, 0);
+ VCloseVolumeHandles_r(vp);
+
+ /* invalidate the volume header cache entry */
+ FreeVolumeHeader(vp);
+
+ if (LogLevel) {
+ Log("VOffline: Volume %u (%s) is now offline", V_id(vp),
+ V_name(vp));
+ if (V_offlineMessage(vp)[0])
+ Log(" (%s)", V_offlineMessage(vp));
+ Log("\n");
+ }
+
+ /* if nothing changed state to error or salvaging,
+ * drop state to unattached */
+ if (!IsErrorState(V_attachState(vp))) {
+ VChangeState_r(vp, VOL_STATE_UNATTACHED);
}
+ VCancelReservation_r(vp);
}
- if (!found) {
- *ec = VNOVOL;
- *partitionp = *namep = NULL;
- } else {
- *partitionp = partition;
- *namep = name;
+ return ret;
+}
+#else /* AFS_DEMAND_ATTACH_FS */
+static int
+VCheckOffline(register Volume * vp)
+{
+ Volume * rvp = NULL;
+ int ret = 0;
+
+ if (vp->goingOffline && !vp->nUsers) {
+ Error error;
+ assert(programType == fileServer);
+
+ ret = 1;
+ vp->goingOffline = 0;
+ V_inUse(vp) = 0;
+ VUpdateVolume_r(&error, vp, 0);
+ VCloseVolumeHandles_r(vp);
+ FreeVolumeHeader(vp);
+ if (LogLevel) {
+ Log("VOffline: Volume %u (%s) is now offline", V_id(vp),
+ V_name(vp));
+ if (V_offlineMessage(vp)[0])
+ Log(" (%s)", V_offlineMessage(vp));
+ Log("\n");
+ }
+#ifdef AFS_PTHREAD_ENV
+ assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
+#else /* AFS_PTHREAD_ENV */
+ LWP_NoYieldSignal(VPutVolume);
+#endif /* AFS_PTHREAD_ENV */
}
+ return ret;
}
+#endif /* AFS_DEMAND_ATTACH_FS */
-int
-VolumeNumber(char *name)
+/***************************************************/
+/* demand attach fs ref counting routines */
+/***************************************************/
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/* the following two functions handle reference counting for
+ * asynchronous operations on volume structs.
+ *
+ * their purpose is to prevent a VDetachVolume or VShutdown
+ * from free()ing the Volume struct during an async i/o op */
+
+/* register with the async volume op ref counter */
+static void
+VCreateReservation_r(Volume * vp)
{
- if (*name == '/')
- name++;
- return atoi(name + 1);
+ vp->nWaiters++;
}
-char *
-VolumeExternalName(VolumeId volumeId)
+/* unregister with the async volume op ref counter */
+static void
+VCancelReservation_r(Volume * vp)
{
- static char name[VMAXPATHLEN];
- (void)afs_snprintf(name, sizeof name, VFORMAT, volumeId);
- return name;
+ assert(--vp->nWaiters >= 0);
+ if (vp->nWaiters == 0) {
+ VCheckOffline(vp);
+ if (!VCheckDetach(vp)) {
+ VCheckSalvage(vp);
+ VCheckFree(vp);
+ }
+ }
}
-#if OPENAFS_VOL_STATS
-#define OneDay (86400) /* 24 hours' worth of seconds */
-#else
-#define OneDay (24*60*60) /* 24 hours */
-#endif /* OPENAFS_VOL_STATS */
+/* check to see if we should free this volume now
+ * return 1 if volume was freed, 0 otherwise */
+static int
+VCheckFree(Volume * vp)
+{
+ int ret = 0;
+ if ((vp->nUsers == 0) &&
+ (vp->nWaiters == 0) &&
+ !(V_attachFlags(vp) & (VOL_IN_HASH |
+ VOL_ON_VBYP_LIST |
+ VOL_IS_BUSY |
+ VOL_ON_VLRU))) {
+ ReallyFreeVolume(vp);
+ ret = 1;
+ }
+ return ret;
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
-#define Midnight(date) ((date-TimeZoneCorrection)/OneDay*OneDay+TimeZoneCorrection)
-/*------------------------------------------------------------------------
- * [export] VAdjustVolumeStatistics
- *
- * Description:
- * If we've passed midnight, we need to update all the day use
- * statistics as well as zeroing the detailed volume statistics
- * (if we are implementing them).
- *
- * Arguments:
- * vp : Pointer to the volume structure describing the lucky
- * volume being considered for update.
- *
- * Returns:
- * 0 (always!)
- *
- * Environment:
- * Nothing interesting.
- *
- * Side Effects:
- * As described.
- *------------------------------------------------------------------------*/
+/***************************************************/
+/* online volume operations routines */
+/***************************************************/
+#ifdef AFS_DEMAND_ATTACH_FS
int
-VAdjustVolumeStatistics_r(register Volume * vp)
+VRegisterVolOp_r(Volume * vp, FSSYNC_VolOp_info * vopinfo)
{
- unsigned int now = FT_ApproxTime();
-
- if (now - V_dayUseDate(vp) > OneDay) {
- register ndays, i;
-
- ndays = (now - V_dayUseDate(vp)) / OneDay;
- for (i = 6; i > ndays - 1; i--)
- V_weekUse(vp)[i] = V_weekUse(vp)[i - ndays];
- for (i = 0; i < ndays - 1 && i < 7; i++)
- V_weekUse(vp)[i] = 0;
- if (ndays <= 7)
- V_weekUse(vp)[ndays - 1] = V_dayUse(vp);
- V_dayUse(vp) = 0;
- V_dayUseDate(vp) = Midnight(now);
+ FSSYNC_VolOp_info * info;
-#if OPENAFS_VOL_STATS
- /*
- * All we need to do is bzero the entire VOL_STATS_BYTES of
- * the detailed volume statistics area.
- */
- memset((char *)(V_stat_area(vp)), 0, VOL_STATS_BYTES);
-#endif /* OPENAFS_VOL_STATS */
- }
+ /* attach a vol op info node to the volume struct */
+ info = (FSSYNC_VolOp_info *) malloc(sizeof(FSSYNC_VolOp_info));
+ assert(info != NULL);
+ memcpy(info, vopinfo, sizeof(FSSYNC_VolOp_info));
+ vp->pending_vol_op = info;
- /*It's been more than a day of collection */
- /*
- * Always return happily.
- */
- return (0);
-} /*VAdjustVolumeStatistics */
+ /* update stats */
+ vp->stats.last_vol_op = FT_ApproxTime();
+ vp->stats.vol_ops++;
+ IncUInt64(&VStats.vol_ops);
-int
-VAdjustVolumeStatistics(register Volume * vp)
-{
- int retVal;
- VOL_LOCK;
- retVal = VAdjustVolumeStatistics_r(vp);
- VOL_UNLOCK;
- return retVal;
+ return 0;
}
-void
-VBumpVolumeUsage_r(register Volume * vp)
+int
+VDeregisterVolOp_r(Volume * vp, FSSYNC_VolOp_info * vopinfo)
{
- unsigned int now = FT_ApproxTime();
- if (now - V_dayUseDate(vp) > OneDay)
- VAdjustVolumeStatistics_r(vp);
- /*
- * Save the volume header image to disk after every 128 bumps to dayUse.
- */
- if ((V_dayUse(vp)++ & 127) == 0) {
- Error error;
- VUpdateVolume_r(&error, vp);
+ if (vp->pending_vol_op) {
+ free(vp->pending_vol_op);
+ vp->pending_vol_op = NULL;
}
+ return 0;
}
+#endif /* AFS_DEMAND_ATTACH_FS */
-void
-VBumpVolumeUsage(register Volume * vp)
+int
+VVolOpLeaveOnline_r(Volume * vp, FSSYNC_VolOp_info * vopinfo)
{
- VOL_LOCK;
- VBumpVolumeUsage_r(vp);
- VOL_UNLOCK;
+ return (vopinfo->com.command == FSYNC_VOL_NEEDVOLUME &&
+ (vopinfo->com.reason == V_READONLY ||
+ (!VolumeWriteable(vp) &&
+ (vopinfo->com.reason == V_CLONE ||
+ vopinfo->com.reason == V_DUMP))));
}
-void
-VSetDiskUsage_r(void)
+int
+VVolOpSetVBusy_r(Volume * vp, FSSYNC_VolOp_info * vopinfo)
{
- static int FifteenMinuteCounter = 0;
+ return (vopinfo->com.command == FSYNC_VOL_NEEDVOLUME &&
+ (vopinfo->com.reason == V_CLONE ||
+ vopinfo->com.reason == V_DUMP));
+}
- while (VInit < 2) {
- /* NOTE: Don't attempt to access the partitions list until the
- * initialization level indicates that all volumes are attached,
- * which implies that all partitions are initialized. */
-#ifdef AFS_PTHREAD_ENV
- sleep(10);
-#else /* AFS_PTHREAD_ENV */
- IOMGR_Sleep(10);
-#endif /* AFS_PTHREAD_ENV */
- }
- VResetDiskUsage_r();
- if (++FifteenMinuteCounter == 3) {
- FifteenMinuteCounter = 0;
- VScanUpdateList();
+/***************************************************/
+/* online salvager routines */
+/***************************************************/
+#if defined(AFS_DEMAND_ATTACH_FS)
+#define SALVAGE_PRIO_UPDATE_INTERVAL 3 /* number of seconds between prio updates */
+#define SALVAGE_COUNT_MAX 16 /* number of online salvages we
+ * allow before moving the volume
+ * into a permanent error state
+ *
+ * once this threshold is reached,
+ * the operator will have to manually
+ * issue a 'bos salvage' to bring
+ * the volume back online
+ */
+
+/* check to see if we should salvage this volume
+ * returns 1 if salvage scheduled, 0 otherwise */
+static int
+VCheckSalvage(register Volume * vp)
+{
+ int ret = 0;
+#ifdef SALVSYNC_BUILD_CLIENT
+ if (vp->nUsers || vp->nWaiters)
+ return ret;
+ if (vp->salvage.requested) {
+ VScheduleSalvage_r(vp);
+ ret = 1;
}
+#endif /* SALVSYNC_BUILD_CLIENT */
+ return ret;
}
-void
-VSetDiskUsage(void)
+/*
+ * request that a salvage be performed once
+ * ref counts reach zero
+ */
+int
+VRequestSalvage_r(Volume * vp, int reason, int flags)
{
- VOL_LOCK;
- VSetDiskUsage_r();
- VOL_UNLOCK;
+#ifdef SALVSYNC_BUILD_CLIENT
+ if (programType != fileServer)
+ return 1;
+
+ if (!vp->salvage.requested) {
+ vp->salvage.requested = 1;
+ vp->salvage.reason = reason;
+ vp->stats.last_salvage = FT_ApproxTime();
+ if (flags & VOL_SALVAGE_INVALIDATE_HEADER) {
+ ReleaseVolumeHeader(vp->header);
+ }
+ if (vp->stats.salvages < SALVAGE_COUNT_MAX) {
+ VChangeState_r(vp, VOL_STATE_SALVAGING);
+ } else {
+ Log("VRequestSalvage: volume %u online salvaged too many times; forced offline.\n", vp->hashid);
+ VChangeState_r(vp, VOL_STATE_ERROR);
+ }
+ }
+#endif /* SALVSYNC_BUILD_CLIENT */
+ return 0;
}
-/* The number of minutes that a volume hasn't been updated before the
- * "Dont salvage" flag in the volume header will be turned on */
-#define SALVAGE_INTERVAL (10*60)
+/*
+ * update salvage priority
+ */
+static int
+VUpdateSalvagePriority_r(Volume * vp)
+{
+ int code, ret=0;
+ afs_uint32 now;
-static VolumeId *UpdateList; /* Pointer to array of Volume ID's */
-static int nUpdatedVolumes; /* Updated with entry in UpdateList, salvage after crash flag on */
-static int updateSize; /* number of entries possible */
-#define UPDATE_LIST_SIZE 100 /* size increment */
+#ifdef SALVSYNC_BUILD_CLIENT
+ vp->salvage.prio++;
+ now = FT_ApproxTime();
-void
-VAddToVolumeUpdateList_r(Error * ec, Volume * vp)
-{
- *ec = 0;
- vp->updateTime = FT_ApproxTime();
- if (V_dontSalvage(vp) == 0)
- return;
- V_dontSalvage(vp) = 0;
- VSyncVolume_r(ec, vp);
- if (*ec)
- return;
- if (!UpdateList) {
- updateSize = UPDATE_LIST_SIZE;
- UpdateList = (VolumeId *) malloc(sizeof(VolumeId) * updateSize);
- } else {
- if (nUpdatedVolumes == updateSize) {
- updateSize += UPDATE_LIST_SIZE;
- UpdateList =
- (VolumeId *) realloc(UpdateList,
- sizeof(VolumeId) * updateSize);
+ /* update the salvageserver priority queue occasionally so that
+ * frequently requested volumes get moved to the head of the queue
+ */
+ if ((vp->salvage.scheduled) &&
+ (vp->stats.last_salvage_req < (now-SALVAGE_PRIO_UPDATE_INTERVAL))) {
+ code = SALVSYNC_SalvageVolume(vp->hashid,
+ VPartitionPath(vp->partition),
+ SALVSYNC_RAISEPRIO,
+ vp->salvage.reason,
+ vp->salvage.prio,
+ NULL);
+ vp->stats.last_salvage_req = now;
+ if (code != SYNC_OK) {
+ ret = 1;
}
}
- assert(UpdateList != NULL);
- UpdateList[nUpdatedVolumes++] = V_id(vp);
+#endif /* SALVSYNC_BUILD_CLIENT */
+ return ret;
}
-static void
-VScanUpdateList(void)
+
+/*
+ * schedule a salvage with the salvage server
+ */
+static int
+VScheduleSalvage_r(Volume * vp)
{
- register int i, gap;
- register Volume *vp;
- Error error;
+ int code, ret=0;
+#ifdef SALVSYNC_BUILD_CLIENT
+ VolState state_save;
+ char partName[16];
+
+ if (vp->nWaiters || vp->nUsers) {
+ return 1;
+ }
+
+ /* prevent endless salvage,attach,salvage,attach,... loops */
+ if (vp->stats.salvages >= SALVAGE_COUNT_MAX)
+ return 1;
+
+ if (!vp->salvage.scheduled) {
+ /* if we haven't previously scheduled a salvage, do so now
+ *
+ * set the volume to an exclusive state and drop the lock
+ * around the SALVSYNC call
+ */
+ strlcpy(partName, VPartitionPath(vp->partition), sizeof(partName));
+ state_save = VChangeState_r(vp, VOL_STATE_SALVSYNC_REQ);
+ V_attachFlags(vp) |= VOL_IS_BUSY;
+ VOL_UNLOCK;
+
+ /* can't use V_id() since there's no guarantee
+ * we have the disk data header at this point */
+ code = SALVSYNC_SalvageVolume(vp->hashid,
+ partName,
+ SALVSYNC_SALVAGE,
+ vp->salvage.reason,
+ vp->salvage.prio,
+ NULL);
+ VOL_LOCK;
+ VChangeState_r(vp, state_save);
+ V_attachFlags(vp) &= ~(VOL_IS_BUSY);
+
+ if (code == SYNC_OK) {
+ vp->salvage.scheduled = 1;
+ vp->stats.salvages++;
+ vp->stats.last_salvage_req = FT_ApproxTime();
+ IncUInt64(&VStats.salvages);
+ } else {
+ ret = 1;
+ switch(code) {
+ case SYNC_BAD_COMMAND:
+ case SYNC_COM_ERROR:
+ break;
+ case SYNC_DENIED:
+ Log("VScheduleSalvage_r: SALVSYNC request denied\n");
+ break;
+ default:
+ Log("VScheduleSalvage_r: SALVSYNC unknown protocol error\n");
+ break;
+ }
+ }
+ }
+#endif /* SALVSYNC_BUILD_CLIENT */
+ return ret;
+}
+
+/*
+ * cancel a scheduled salvage operation
+ */
+static int
+VCancelSalvage_r(Volume * vp, int reason)
+{
+ int code, ret = 0;
+
+#ifdef SALVSYNC_BUILD_CLIENT
+ if (vp->salvage.scheduled) {
+ code = SALVSYNC_SalvageVolume(vp->hashid,
+ VPartitionPath(vp->partition),
+ SALVSYNC_CANCEL,
+ reason,
+ 0,
+ NULL);
+ if (code == SYNC_OK) {
+ vp->salvage.scheduled = 0;
+ } else {
+ ret = 1;
+ }
+ }
+#endif /* SALVSYNC_BUILD_CLIENT */
+ return ret;
+}
+
+/* This must be called by any volume utility which needs to run while the
+ file server is also running. This is separated from VInitVolumePackage so
+ that a utility can fork--and each of the children can independently
+ initialize communication with the file server */
+#ifdef SALVSYNC_BUILD_CLIENT
+int
+VConnectSALV(void)
+{
+ int retVal;
+ VOL_LOCK;
+ retVal = VConnectSALV_r();
+ VOL_UNLOCK;
+ return retVal;
+}
+
+int
+VConnectSALV_r(void)
+{
+ assert((programType != salvageServer) &&
+ (programType != volumeUtility));
+ return SALVSYNC_clientInit();
+}
+
+int
+VDisconnectSALV(void)
+{
+ int retVal;
+ VOL_LOCK;
+ VDisconnectSALV_r();
+ VOL_UNLOCK;
+ return retVal;
+}
+
+int
+VDisconnectSALV_r(void)
+{
+ assert((programType != salvageServer) &&
+ (programType != volumeUtility));
+ return SALVSYNC_clientFinis();
+}
+
+int
+VReconnectSALV(void)
+{
+ int retVal;
+ VOL_LOCK;
+ retVal = VReconnectSALV_r();
+ VOL_UNLOCK;
+ return retVal;
+}
+
+int
+VReconnectSALV_r(void)
+{
+ assert((programType != salvageServer) &&
+ (programType != volumeUtility));
+ return SALVSYNC_clientReconnect();
+}
+#endif /* SALVSYNC_BUILD_CLIENT */
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+
+/***************************************************/
+/* FSSYNC routines */
+/***************************************************/
+
+/* This must be called by any volume utility which needs to run while the
+ file server is also running. This is separated from VInitVolumePackage so
+ that a utility can fork--and each of the children can independently
+ initialize communication with the file server */
+#ifdef FSSYNC_BUILD_CLIENT
+int
+VConnectFS(void)
+{
+ int retVal;
+ VOL_LOCK;
+ retVal = VConnectFS_r();
+ VOL_UNLOCK;
+ return retVal;
+}
+
+int
+VConnectFS_r(void)
+{
+ int rc;
+ assert((VInit == 2) &&
+ (programType != fileServer) &&
+ (programType != salvager));
+ rc = FSYNC_clientInit();
+ if (rc)
+ VInit = 3;
+ return rc;
+}
+
+void
+VDisconnectFS_r(void)
+{
+ assert((programType != fileServer) &&
+ (programType != salvager));
+ FSYNC_clientFinis();
+ VInit = 2;
+}
+
+void
+VDisconnectFS(void)
+{
+ VOL_LOCK;
+ VDisconnectFS_r();
+ VOL_UNLOCK;
+}
+
+static int
+VChildProcReconnectFS_r(void)
+{
+ return FSYNC_clientChildProcReconnect();
+}
+
+int
+VChildProcReconnectFS(void)
+{
+ int ret;
+ VOL_LOCK;
+ ret = VChildProcReconnectFS_r();
+ VOL_UNLOCK;
+ return ret;
+}
+#endif /* FSSYNC_BUILD_CLIENT */
+
+
+/***************************************************/
+/* volume bitmap routines */
+/***************************************************/
+
+/*
+ * For demand attach fs, flags parameter controls
+ * locking behavior. If (flags & VOL_ALLOC_BITMAP_WAIT)
+ * is set, then this function will create a reservation
+ * and block on any other exclusive operations. Otherwise,
+ * this function assumes the caller already has exclusive
+ * access to vp, and we just change the volume state.
+ */
+VnodeId
+VAllocBitmapEntry_r(Error * ec, Volume * vp,
+ struct vnodeIndex *index, int flags)
+{
+ VnodeId ret;
+ register byte *bp, *ep;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VolState state_save;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ *ec = 0;
+
+ /* This test is probably redundant */
+ if (!VolumeWriteable(vp)) {
+ *ec = (bit32) VREADONLY;
+ return 0;
+ }
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (flags & VOL_ALLOC_BITMAP_WAIT) {
+ VCreateReservation_r(vp);
+ VWaitExclusiveState_r(vp);
+ }
+ state_save = VChangeState_r(vp, VOL_STATE_GET_BITMAP);
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+#ifdef BITMAP_LATER
+ if ((programType == fileServer) && !index->bitmap) {
+ int i;
+#ifndef AFS_DEMAND_ATTACH_FS
+ /* demand attach fs uses the volume state to avoid races.
+ * specialStatus field is not used at all */
+ int wasVBUSY = 0;
+ if (vp->specialStatus == VBUSY) {
+ if (vp->goingOffline) { /* vos dump waiting for the volume to
+ * go offline. We probably come here
+ * from AddNewReadableResidency */
+ wasVBUSY = 1;
+ } else {
+ while (vp->specialStatus == VBUSY) {
+#ifdef AFS_PTHREAD_ENV
+ VOL_UNLOCK;
+ sleep(2);
+ VOL_LOCK;
+#else /* AFS_PTHREAD_ENV */
+ IOMGR_Sleep(2);
+#endif /* AFS_DEMAND_ATTACH_FS */
+ }
+ }
+ }
+#endif /* !AFS_DEMAND_ATTACH_FS */
+
+ if (!index->bitmap) {
+#ifndef AFS_DEMAND_ATTACH_FS
+ vp->specialStatus = VBUSY; /* Stop anyone else from using it. */
+#endif /* AFS_DEMAND_ATTACH_FS */
+ for (i = 0; i < nVNODECLASSES; i++) {
+ VGetBitmap_r(ec, vp, i);
+ if (*ec) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ VRequestSalvage_r(vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ *ec = VSALVAGING;
+#else /* AFS_DEMAND_ATTACH_FS */
+ DeleteVolumeFromHashTable(vp);
+ vp->shuttingDown = 1; /* Let who has it free it. */
+ vp->specialStatus = 0;
+#endif /* AFS_DEMAND_ATTACH_FS */
+ ret = NULL;
+ goto done;
+ }
+ }
+#ifndef AFS_DEMAND_ATTACH_FS
+ if (!wasVBUSY)
+ vp->specialStatus = 0; /* Allow others to have access. */
+#endif /* AFS_DEMAND_ATTACH_FS */
+ }
+ }
+#endif /* BITMAP_LATER */
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_UNLOCK;
+#endif /* AFS_DEMAND_ATTACH_FS */
+ bp = index->bitmap + index->bitmapOffset;
+ ep = index->bitmap + index->bitmapSize;
+ while (bp < ep) {
+ if ((*(bit32 *) bp) != (bit32) 0xffffffff) {
+ int o;
+ index->bitmapOffset = (afs_uint32) (bp - index->bitmap);
+ while (*bp == 0xff)
+ bp++;
+ o = ffs(~*bp) - 1; /* ffs is documented in BSTRING(3) */
+ *bp |= (1 << o);
+ ret = (VnodeId) ((bp - index->bitmap) * 8 + o);
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+#endif /* AFS_DEMAND_ATTACH_FS */
+ goto done;
+ }
+ bp += sizeof(bit32) /* i.e. 4 */ ;
+ }
+ /* No bit map entry--must grow bitmap */
+ bp = (byte *)
+ realloc(index->bitmap, index->bitmapSize + VOLUME_BITMAP_GROWSIZE);
+ assert(bp != NULL);
+ index->bitmap = bp;
+ bp += index->bitmapSize;
+ memset(bp, 0, VOLUME_BITMAP_GROWSIZE);
+ index->bitmapOffset = index->bitmapSize;
+ index->bitmapSize += VOLUME_BITMAP_GROWSIZE;
+ *bp = 1;
+ ret = index->bitmapOffset * 8;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ done:
+#ifdef AFS_DEMAND_ATTACH_FS
+ VChangeState_r(vp, state_save);
+ if (flags & VOL_ALLOC_BITMAP_WAIT) {
+ VCancelReservation_r(vp);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+ return ret;
+}
+
+VnodeId
+VAllocBitmapEntry(Error * ec, Volume * vp, register struct vnodeIndex * index)
+{
+ VnodeId retVal;
+ VOL_LOCK;
+ retVal = VAllocBitmapEntry_r(ec, vp, index, VOL_ALLOC_BITMAP_WAIT);
+ VOL_UNLOCK;
+ return retVal;
+}
+
+void
+VFreeBitMapEntry_r(Error * ec, register struct vnodeIndex *index,
+ unsigned bitNumber)
+{
+ unsigned int offset;
+
+ *ec = 0;
+#ifdef BITMAP_LATER
+ if (!index->bitmap)
+ return;
+#endif /* BITMAP_LATER */
+ offset = bitNumber >> 3;
+ if (offset >= index->bitmapSize) {
+ *ec = VNOVNODE;
+ return;
+ }
+ if (offset < index->bitmapOffset)
+ index->bitmapOffset = offset & ~3; /* Truncate to nearest bit32 */
+ *(index->bitmap + offset) &= ~(1 << (bitNumber & 0x7));
+}
+
+void
+VFreeBitMapEntry(Error * ec, register struct vnodeIndex *index,
+ unsigned bitNumber)
+{
+ VOL_LOCK;
+ VFreeBitMapEntry_r(ec, index, bitNumber);
+ VOL_UNLOCK;
+}
+
+/* this function will drop the glock internally.
+ * for old pthread fileservers, this is safe thanks to vbusy.
+ *
+ * for demand attach fs, caller must have already called
+ * VCreateReservation_r and VWaitExclusiveState_r */
+static void
+VGetBitmap_r(Error * ec, Volume * vp, VnodeClass class)
+{
+ StreamHandle_t *file;
+ int nVnodes;
+ int size;
+ struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
+ struct vnodeIndex *vip = &vp->vnodeIndex[class];
+ struct VnodeDiskObject *vnode;
+ unsigned int unique = 0;
+ FdHandle_t *fdP;
+#ifdef BITMAP_LATER
+ byte *BitMap = 0;
+#endif /* BITMAP_LATER */
+#ifdef AFS_DEMAND_ATTACH_FS
+ VolState state_save;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ *ec = 0;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ state_save = VChangeState_r(vp, VOL_STATE_GET_BITMAP);
+#endif /* AFS_DEMAND_ATTACH_FS */
+ VOL_UNLOCK;
+
+ fdP = IH_OPEN(vip->handle);
+ assert(fdP != NULL);
+ file = FDH_FDOPEN(fdP, "r");
+ assert(file != NULL);
+ vnode = (VnodeDiskObject *) malloc(vcp->diskSize);
+ assert(vnode != NULL);
+ size = OS_SIZE(fdP->fd_fd);
+ assert(size != -1);
+ nVnodes = (size <= vcp->diskSize ? 0 : size - vcp->diskSize)
+ >> vcp->logSize;
+ vip->bitmapSize = ((nVnodes / 8) + 10) / 4 * 4; /* The 10 is a little extra so
+ * a few files can be created in this volume,
+ * the whole thing is rounded up to nearest 4
+ * bytes, because the bit map allocator likes
+ * it that way */
+#ifdef BITMAP_LATER
+ BitMap = (byte *) calloc(1, vip->bitmapSize);
+ assert(BitMap != NULL);
+#else /* BITMAP_LATER */
+ vip->bitmap = (byte *) calloc(1, vip->bitmapSize);
+ assert(vip->bitmap != NULL);
+ vip->bitmapOffset = 0;
+#endif /* BITMAP_LATER */
+ if (STREAM_SEEK(file, vcp->diskSize, 0) != -1) {
+ int bitNumber = 0;
+ for (bitNumber = 0; bitNumber < nVnodes + 100; bitNumber++) {
+ if (STREAM_READ(vnode, vcp->diskSize, 1, file) != 1)
+ break;
+ if (vnode->type != vNull) {
+ if (vnode->vnodeMagic != vcp->magic) {
+ Log("GetBitmap: addled vnode index in volume %s; volume needs salvage\n", V_name(vp));
+ *ec = VSALVAGE;
+ break;
+ }
+#ifdef BITMAP_LATER
+ *(BitMap + (bitNumber >> 3)) |= (1 << (bitNumber & 0x7));
+#else /* BITMAP_LATER */
+ *(vip->bitmap + (bitNumber >> 3)) |= (1 << (bitNumber & 0x7));
+#endif /* BITMAP_LATER */
+ if (unique <= vnode->uniquifier)
+ unique = vnode->uniquifier + 1;
+ }
+#ifndef AFS_PTHREAD_ENV
+ if ((bitNumber & 0x00ff) == 0x0ff) { /* every 256 iterations */
+ IOMGR_Poll();
+ }
+#endif /* !AFS_PTHREAD_ENV */
+ }
+ }
+ if (vp->nextVnodeUnique < unique) {
+ Log("GetBitmap: bad volume uniquifier for volume %s; volume needs salvage\n", V_name(vp));
+ *ec = VSALVAGE;
+ }
+ /* Paranoia, partly justified--I think fclose after fdopen
+ * doesn't seem to close fd. In any event, the documentation
+ * doesn't specify, so it's safer to close it twice.
+ */
+ STREAM_CLOSE(file);
+ FDH_CLOSE(fdP);
+ free(vnode);
+
+ VOL_LOCK;
+#ifdef BITMAP_LATER
+ /* There may have been a racing condition with some other thread, both
+ * creating the bitmaps for this volume. If the other thread was faster
+ * the pointer to bitmap should already be filled and we can free ours.
+ */
+ if (vip->bitmap == NULL) {
+ vip->bitmap = BitMap;
+ vip->bitmapOffset = 0;
+ } else
+ free((byte *) BitMap);
+#endif /* BITMAP_LATER */
+#ifdef AFS_DEMAND_ATTACH_FS
+ VChangeState_r(vp, state_save);
+#endif /* AFS_DEMAND_ATTACH_FS */
+}
+
+
+/***************************************************/
+/* demand attach fs state machine routines */
+/***************************************************/
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/* wait for the volume to change states */
+static void
+VWaitStateChange_r(Volume * vp)
+{
+ VolState state_save = V_attachState(vp);
+
+ assert(vp->nWaiters || vp->nUsers);
+ do {
+ assert(pthread_cond_wait(&V_attachCV(vp), &vol_glock_mutex) == 0);
+ } while (V_attachState(vp) == state_save);
+ assert(V_attachState(vp) != VOL_STATE_FREED);
+}
+
+/* wait for blocking ops to end */
+static void
+VWaitExclusiveState_r(Volume * vp)
+{
+ assert(vp->nWaiters || vp->nUsers);
+ while (IsExclusiveState(V_attachState(vp))) {
+ assert(pthread_cond_wait(&V_attachCV(vp), &vol_glock_mutex) == 0);
+ }
+ assert(V_attachState(vp) != VOL_STATE_FREED);
+}
+
+/* change state, and notify other threads,
+ * return previous state to caller */
+VolState
+VChangeState_r(Volume * vp, VolState new_state)
+{
+ VolState old_state = V_attachState(vp);
+
+ /* XXX profiling need to make sure these counters
+ * don't kill performance... */
+ VStats.state_levels[old_state]--;
+ VStats.state_levels[new_state]++;
+
+ V_attachState(vp) = new_state;
+ assert(pthread_cond_broadcast(&V_attachCV(vp)) == 0);
+ return old_state;
+}
+
+/* tells caller whether or not the current state requires
+ * exclusive access without holding glock */
+static int
+IsExclusiveState(VolState state)
+{
+ switch (state) {
+ case VOL_STATE_UPDATING:
+ case VOL_STATE_ATTACHING:
+ case VOL_STATE_GET_BITMAP:
+ case VOL_STATE_HDR_LOADING:
+ case VOL_STATE_HDR_ATTACHING:
+ case VOL_STATE_OFFLINING:
+ case VOL_STATE_DETACHING:
+ return 1;
+ }
+ return 0;
+}
+
+/* tell caller whether V_attachState is an error condition */
+static int
+IsErrorState(VolState state)
+{
+ switch (state) {
+ case VOL_STATE_ERROR:
+ case VOL_STATE_SALVAGING:
+ return 1;
+ }
+ return 0;
+}
+
+/* tell caller whether V_attachState is valid */
+static int
+IsValidState(VolState state)
+{
+ if ((state >= 0) &&
+ (state < VOL_STATE_COUNT) &&
+ (state != VOL_STATE_FREED)) {
+ return 1;
+ }
+ return 0;
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+
+/***************************************************/
+/* Volume Path and Volume Number utility routines */
+/***************************************************/
+
+static void
+GetVolumePath(Error * ec, VolId volumeId, char **partitionp, char **namep)
+{
+ static char partition[VMAXPATHLEN], name[VMAXPATHLEN];
+ char path[VMAXPATHLEN];
+ int found = 0;
+ struct DiskPartition *dp;
+
+ *ec = 0;
+ name[0] = '/';
+ (void)afs_snprintf(&name[1], (sizeof name) - 1, VFORMAT, volumeId);
+ for (dp = DiskPartitionList; dp; dp = dp->next) {
+ struct afs_stat status;
+ strcpy(path, VPartitionPath(dp));
+ strcat(path, name);
+ if (afs_stat(path, &status) == 0) {
+ strcpy(partition, dp->name);
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ *ec = VNOVOL;
+ *partitionp = *namep = NULL;
+ } else {
+ *partitionp = partition;
+ *namep = name;
+ }
+}
+
+int
+VolumeNumber(char *name)
+{
+ if (*name == '/')
+ name++;
+ return atoi(name + 1);
+}
+
+char *
+VolumeExternalName(VolumeId volumeId)
+{
+ static char name[VMAXPATHLEN];
+ (void)afs_snprintf(name, sizeof name, VFORMAT, volumeId);
+ return name;
+}
+
+static int
+VolumeExternalName_r(VolumeId volumeId, char * name, size_t len)
+{
+ return afs_snprintf(name, len, VFORMAT, volumeId);
+}
+
+
+/***************************************************/
+/* Volume Usage Statistics routines */
+/***************************************************/
+
+#if OPENAFS_VOL_STATS
+#define OneDay (86400) /* 24 hours' worth of seconds */
+#else
+#define OneDay (24*60*60) /* 24 hours */
+#endif /* OPENAFS_VOL_STATS */
+
+#define Midnight(date) ((date-TimeZoneCorrection)/OneDay*OneDay+TimeZoneCorrection)
+
+/*------------------------------------------------------------------------
+ * [export] VAdjustVolumeStatistics
+ *
+ * Description:
+ * If we've passed midnight, we need to update all the day use
+ * statistics as well as zeroing the detailed volume statistics
+ * (if we are implementing them).
+ *
+ * Arguments:
+ * vp : Pointer to the volume structure describing the lucky
+ * volume being considered for update.
+ *
+ * Returns:
+ * 0 (always!)
+ *
+ * Environment:
+ * Nothing interesting.
+ *
+ * Side Effects:
+ * As described.
+ *------------------------------------------------------------------------*/
+
+int
+VAdjustVolumeStatistics_r(register Volume * vp)
+{
+ unsigned int now = FT_ApproxTime();
+
+ if (now - V_dayUseDate(vp) > OneDay) {
+ register int ndays, i;
+
+ ndays = (now - V_dayUseDate(vp)) / OneDay;
+ for (i = 6; i > ndays - 1; i--)
+ V_weekUse(vp)[i] = V_weekUse(vp)[i - ndays];
+ for (i = 0; i < ndays - 1 && i < 7; i++)
+ V_weekUse(vp)[i] = 0;
+ if (ndays <= 7)
+ V_weekUse(vp)[ndays - 1] = V_dayUse(vp);
+ V_dayUse(vp) = 0;
+ V_dayUseDate(vp) = Midnight(now);
+
+#if OPENAFS_VOL_STATS
+ /*
+ * All we need to do is bzero the entire VOL_STATS_BYTES of
+ * the detailed volume statistics area.
+ */
+ memset((char *)(V_stat_area(vp)), 0, VOL_STATS_BYTES);
+#endif /* OPENAFS_VOL_STATS */
+ }
+
+ /*It's been more than a day of collection */
+ /*
+ * Always return happily.
+ */
+ return (0);
+} /*VAdjustVolumeStatistics */
+
+int
+VAdjustVolumeStatistics(register Volume * vp)
+{
+ int retVal;
+ VOL_LOCK;
+ retVal = VAdjustVolumeStatistics_r(vp);
+ VOL_UNLOCK;
+ return retVal;
+}
+
+void
+VBumpVolumeUsage_r(register Volume * vp)
+{
+ unsigned int now = FT_ApproxTime();
+ if (now - V_dayUseDate(vp) > OneDay)
+ VAdjustVolumeStatistics_r(vp);
+ /*
+ * Save the volume header image to disk after every 128 bumps to dayUse.
+ */
+ if ((V_dayUse(vp)++ & 127) == 0) {
+ Error error;
+ VUpdateVolume_r(&error, vp, VOL_UPDATE_WAIT);
+ }
+}
+
+void
+VBumpVolumeUsage(register Volume * vp)
+{
+ VOL_LOCK;
+ VBumpVolumeUsage_r(vp);
+ VOL_UNLOCK;
+}
+
+void
+VSetDiskUsage_r(void)
+{
+#ifndef AFS_DEMAND_ATTACH_FS
+ static int FifteenMinuteCounter = 0;
+#endif
+
+ while (VInit < 2) {
+ /* NOTE: Don't attempt to access the partitions list until the
+ * initialization level indicates that all volumes are attached,
+ * which implies that all partitions are initialized. */
+#ifdef AFS_PTHREAD_ENV
+ sleep(10);
+#else /* AFS_PTHREAD_ENV */
+ IOMGR_Sleep(10);
+#endif /* AFS_PTHREAD_ENV */
+ }
+
+ VResetDiskUsage_r();
+
+#ifndef AFS_DEMAND_ATTACH_FS
+ if (++FifteenMinuteCounter == 3) {
+ FifteenMinuteCounter = 0;
+ VScanUpdateList();
+ }
+#endif /* !AFS_DEMAND_ATTACH_FS */
+}
+
+void
+VSetDiskUsage(void)
+{
+ VOL_LOCK;
+ VSetDiskUsage_r();
+ VOL_UNLOCK;
+}
+
+
+/***************************************************/
+/* Volume Update List routines */
+/***************************************************/
+
+/* The number of minutes that a volume hasn't been updated before the
+ * "Dont salvage" flag in the volume header will be turned on */
+#define SALVAGE_INTERVAL (10*60)
+
+/*
+ * demand attach fs
+ *
+ * volume update list functionality has been moved into the VLRU
+ * the DONT_SALVAGE flag is now set during VLRU demotion
+ */
+
+#ifndef AFS_DEMAND_ATTACH_FS
+static VolumeId *UpdateList = NULL; /* Pointer to array of Volume ID's */
+static int nUpdatedVolumes = 0; /* Updated with entry in UpdateList, salvage after crash flag on */
+static int updateSize = 0; /* number of entries possible */
+#define UPDATE_LIST_SIZE 128 /* initial size increment (must be a power of 2!) */
+#endif /* !AFS_DEMAND_ATTACH_FS */
+
+void
+VAddToVolumeUpdateList_r(Error * ec, Volume * vp)
+{
+ *ec = 0;
+ vp->updateTime = FT_ApproxTime();
+ if (V_dontSalvage(vp) == 0)
+ return;
+ V_dontSalvage(vp) = 0;
+ VSyncVolume_r(ec, vp, 0);
+#ifdef AFS_DEMAND_ATTACH_FS
+ V_attachFlags(vp) &= ~(VOL_HDR_DONTSALV);
+#else /* !AFS_DEMAND_ATTACH_FS */
+ if (*ec)
+ return;
+ if (UpdateList == NULL) {
+ updateSize = UPDATE_LIST_SIZE;
+ UpdateList = (VolumeId *) malloc(sizeof(VolumeId) * updateSize);
+ } else {
+ if (nUpdatedVolumes == updateSize) {
+ updateSize << 1;
+ if (updateSize > 524288) {
+ Log("warning: there is likely a bug in the volume update scanner\n");
+ return;
+ }
+ UpdateList =
+ (VolumeId *) realloc(UpdateList,
+ sizeof(VolumeId) * updateSize);
+ }
+ }
+ assert(UpdateList != NULL);
+ UpdateList[nUpdatedVolumes++] = V_id(vp);
+#endif /* !AFS_DEMAND_ATTACH_FS */
+}
+
+#ifndef AFS_DEMAND_ATTACH_FS
+static void
+VScanUpdateList(void)
+{
+ register int i, gap;
+ register Volume *vp;
+ Error error;
afs_uint32 now = FT_ApproxTime();
/* Be careful with this code, since it works with interleaved calls to AddToVolumeUpdateList */
for (i = gap = 0; i < nUpdatedVolumes; i++) {
+ if (gap)
+ UpdateList[i - gap] = UpdateList[i];
+
+ /* XXX this routine needlessly messes up the Volume LRU by
+ * breaking the LRU temporal-locality assumptions.....
+ * we should use a special volume header allocator here */
vp = VGetVolume_r(&error, UpdateList[i - gap] = UpdateList[i]);
if (error) {
gap++;
} else if (vp->nUsers == 1 && now - vp->updateTime > SALVAGE_INTERVAL) {
V_dontSalvage(vp) = DONT_SALVAGE;
- VUpdateVolume_r(&error, vp); /* No need to fsync--not critical */
+ VUpdateVolume_r(&error, vp, 0); /* No need to fsync--not critical */
gap++;
}
- if (vp)
+
+ if (vp) {
+ VPutVolume_r(vp);
+ }
+
+#ifndef AFS_PTHREAD_ENV
+ IOMGR_Poll();
+#endif /* !AFS_PTHREAD_ENV */
+ }
+ nUpdatedVolumes -= gap;
+}
+#endif /* !AFS_DEMAND_ATTACH_FS */
+
+
+/***************************************************/
+/* Volume LRU routines */
+/***************************************************/
+
+/* demand attach fs
+ * volume LRU
+ *
+ * with demand attach fs, we attempt to soft detach(1)
+ * volumes which have not been accessed in a long time
+ * in order to speed up fileserver shutdown
+ *
+ * (1) by soft detach we mean a process very similar
+ * to VOffline, except the final state of the
+ * Volume will be VOL_STATE_PREATTACHED, instead
+ * of the usual VOL_STATE_UNATTACHED
+ */
+#ifdef AFS_DEMAND_ATTACH_FS
+
+/* implementation is reminiscent of a generational GC
+ *
+ * queue 0 is newly attached volumes. this queue is
+ * sorted by attach timestamp
+ *
+ * queue 1 is volumes that have been around a bit
+ * longer than queue 0. this queue is sorted by
+ * attach timestamp
+ *
+ * queue 2 is volumes tha have been around the longest.
+ * this queue is unsorted
+ *
+ * queue 3 is volumes that have been marked as
+ * candidates for soft detachment. this queue is
+ * unsorted
+ */
+#define VLRU_GENERATIONS 3 /* number of generations in VLRU */
+#define VLRU_QUEUES 5 /* total number of VLRU queues */
+struct VLRU_q {
+ volatile struct rx_queue q;
+ volatile int len;
+ volatile int busy;
+ pthread_cond_t cv;
+};
+struct VLRU {
+ struct VLRU_q q[VLRU_QUEUES];
+
+ /* VLRU config */
+ afs_uint32 promotion_interval[VLRU_GENERATIONS-1]; /* interval between promotions */
+ afs_uint32 scan_interval[VLRU_GENERATIONS+1]; /* interval between scans for candidates */
+
+ /* state */
+ int next_idx;
+ afs_uint32 last_promotion[VLRU_GENERATIONS-1]; /* timestamp of last promotion scan */
+ afs_uint32 last_scan[VLRU_GENERATIONS+1]; /* timestamp of last detach scan */
+
+ int scanner_state; /* state of scanner thread */
+ pthread_cond_t cv; /* state transition CV */
+};
+
+static struct VLRU volume_LRU;
+
+/* valid scanner states */
+#define VLRU_SCANNER_STATE_OFFLINE 0
+#define VLRU_SCANNER_STATE_ONLINE 1
+#define VLRU_SCANNER_STATE_SHUTTING_DOWN 2
+#define VLRU_SCANNER_STATE_PAUSING 3
+#define VLRU_SCANNER_STATE_PAUSED 4
+
+/* vlru disk data header stuff */
+#define VLRU_DISK_MAGIC 0x7a8b9cad
+#define VLRU_DISK_VERSION 1
+
+/* vlru default expiration time (for eventual fs state serialization of vlru data) */
+#define VLRU_DUMP_EXPIRATION_TIME (60*60*24*7) /* expire vlru data after 1 week */
+
+
+static afs_uint32 VLRU_offline_thresh = VLRU_DEFAULT_OFFLINE_THRESH;
+static afs_uint32 VLRU_offline_interval = VLRU_DEFAULT_OFFLINE_INTERVAL;
+static afs_uint32 VLRU_offline_max = VLRU_DEFAULT_OFFLINE_MAX;
+static afs_uint32 VLRU_enabled = 1;
+
+/* queue synchronization routines */
+static void VLRU_BeginExclusive_r(struct VLRU_q * q);
+static void VLRU_EndExclusive_r(struct VLRU_q * q);
+static void VLRU_Wait_r(struct VLRU_q * q);
+
+/* set the VLRU parameters
+ *
+ * valid options are:
+ * VLRU_SET_THRESH -- set the period of inactivity after
+ * which volumes are eligible for being detached
+ * VLRU_SET_INTERVAL -- the time interval between calls
+ * to the volume LRU "garbage collector"
+ * VLRU_SET_MAX -- the max number of volumes to deallocate
+ * in one GC pass
+ */
+void
+VLRU_SetOptions(int option, afs_uint32 val)
+{
+ if (option == VLRU_SET_THRESH) {
+ VLRU_offline_thresh = val;
+ } else if (option == VLRU_SET_INTERVAL) {
+ VLRU_offline_interval = val;
+ } else if (option == VLRU_SET_MAX) {
+ VLRU_offline_max = val;
+ } else if (option == VLRU_SET_ENABLED) {
+ VLRU_enabled = val;
+ }
+ VLRU_ComputeConstants();
+}
+
+/* compute the VLRU internal timing parameters based upon the user's inputs */
+static void
+VLRU_ComputeConstants(void)
+{
+ afs_uint32 factor = VLRU_offline_thresh / VLRU_offline_interval;
+
+ /* compute the candidate scan interval */
+ volume_LRU.scan_interval[VLRU_QUEUE_CANDIDATE] = VLRU_offline_interval;
+
+ /* compute the promotion intervals */
+ volume_LRU.promotion_interval[VLRU_QUEUE_NEW] = VLRU_offline_thresh * 2;
+ volume_LRU.promotion_interval[VLRU_QUEUE_MID] = VLRU_offline_thresh * 4;
+
+ if (factor > 16) {
+ /* compute the gen 0 scan interval */
+ volume_LRU.scan_interval[VLRU_QUEUE_NEW] = VLRU_offline_thresh / 8;
+ } else {
+ /* compute the gen 0 scan interval */
+ volume_LRU.scan_interval[VLRU_QUEUE_NEW] = VLRU_offline_interval * 2;
+ }
+}
+
+/* initialize VLRU */
+static void
+VInitVLRU(void)
+{
+ pthread_t tid;
+ pthread_attr_t attrs;
+ int i;
+
+ if (!VLRU_enabled) {
+ Log("VLRU: disabled\n");
+ return;
+ }
+
+ /* initialize each of the VLRU queues */
+ for (i = 0; i < VLRU_QUEUES; i++) {
+ queue_Init(&volume_LRU.q[i]);
+ volume_LRU.q[i].len = 0;
+ volume_LRU.q[i].busy = 0;
+ assert(pthread_cond_init(&volume_LRU.q[i].cv, NULL) == 0);
+ }
+
+ /* setup the timing constants */
+ VLRU_ComputeConstants();
+
+ /* XXX put inside LogLevel check? */
+ Log("VLRU: starting scanner with the following configuration parameters:\n");
+ Log("VLRU: offlining volumes after minimum of %d seconds of inactivity\n", VLRU_offline_thresh);
+ Log("VLRU: running VLRU soft detach pass every %d seconds\n", VLRU_offline_interval);
+ Log("VLRU: taking up to %d volumes offline per pass\n", VLRU_offline_max);
+ Log("VLRU: scanning generation 0 for inactive volumes every %d seconds\n", volume_LRU.scan_interval[0]);
+ Log("VLRU: scanning for promotion/demotion between generations 0 and 1 every %d seconds\n", volume_LRU.promotion_interval[0]);
+ Log("VLRU: scanning for promotion/demotion between generations 1 and 2 every %d seconds\n", volume_LRU.promotion_interval[1]);
+
+ /* start up the VLRU scanner */
+ volume_LRU.scanner_state = VLRU_SCANNER_STATE_OFFLINE;
+ if (programType == fileServer) {
+ assert(pthread_cond_init(&volume_LRU.cv, NULL) == 0);
+ assert(pthread_attr_init(&attrs) == 0);
+ assert(pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED) == 0);
+ assert(pthread_create(&tid, &attrs, &VLRU_ScannerThread, NULL) == 0);
+ }
+}
+
+/* initialize LRU support for a volume */
+static void
+VLRU_Init_Node_r(volatile Volume * vp)
+{
+ if (!VLRU_enabled)
+ return;
+
+ assert(queue_IsNotOnQueue(&vp->vlru));
+ vp->vlru.idx = VLRU_QUEUE_INVALID;
+}
+
+/* add volume to VLRU
+ * now supports adding to queues other
+ * than new for vlru state restore
+ * caller MUST hold a ref count on vp */
+static void
+VLRU_Add_r(volatile Volume * vp)
+{
+ int idx;
+
+ if (!VLRU_enabled)
+ return;
+
+ if (queue_IsOnQueue(&vp->vlru))
+ return;
+
+ VLRU_Wait_r(&volume_LRU.q[VLRU_QUEUE_NEW]);
+
+ /* repeat check since VLRU_Wait_r may have dropped
+ * the glock */
+ if (queue_IsNotOnQueue(&vp->vlru)) {
+ idx = vp->vlru.idx;
+ if ((idx < 0) || (idx >= VLRU_QUEUE_INVALID)) {
+ idx = vp->vlru.idx = VLRU_QUEUE_NEW;
+ }
+ queue_Prepend(&volume_LRU.q[idx], &vp->vlru);
+ volume_LRU.q[idx].len++;
+ V_attachFlags(vp) |= VOL_ON_VLRU;
+ vp->stats.last_promote = FT_ApproxTime();
+ }
+}
+
+/* delete volume from VLRU
+ * caller MUST hold a ref count on vp */
+static void
+VLRU_Delete_r(volatile Volume * vp)
+{
+ int idx;
+
+ if (!VLRU_enabled)
+ return;
+
+ if (queue_IsNotOnQueue(&vp->vlru))
+ return;
+
+ /* handle races */
+ do {
+ idx = vp->vlru.idx;
+ if (idx == VLRU_QUEUE_INVALID)
+ return;
+ VLRU_Wait_r(&volume_LRU.q[idx]);
+ } while (idx != vp->vlru.idx);
+
+ /* now remove from the VLRU and update
+ * the appropriate counter */
+ queue_Remove(&vp->vlru);
+ volume_LRU.q[idx].len--;
+ vp->vlru.idx = VLRU_QUEUE_INVALID;
+ V_attachFlags(vp) &= ~(VOL_ON_VLRU);
+}
+
+/* signal that volume was just accessed.
+ * caller MUST hold a ref count on vp */
+static void
+VLRU_UpdateAccess_r(volatile Volume * vp)
+{
+ afs_uint32 live_interval;
+ Volume * rvp = NULL;
+
+ if (!VLRU_enabled)
+ return;
+
+ if (queue_IsNotOnQueue(&vp->vlru))
+ return;
+
+ assert(V_attachFlags(vp) & VOL_ON_VLRU);
+
+ /* update the access timestamp */
+ vp->stats.last_get = FT_ApproxTime();
+
+ /*
+ * if the volume is on the soft detach candidate
+ * list, we need to safely move it back to a
+ * regular generation. this has to be done
+ * carefully so we don't race against the scanner
+ * thread.
+ */
+
+ /* if this volume is on the soft detach candidate queue,
+ * then grab exclusive access to the necessary queues */
+ if (vp->vlru.idx == VLRU_QUEUE_CANDIDATE) {
+ rvp = vp;
+ VCreateReservation_r(rvp);
+
+ VLRU_Wait_r(&volume_LRU.q[VLRU_QUEUE_NEW]);
+ VLRU_BeginExclusive_r(&volume_LRU.q[VLRU_QUEUE_NEW]);
+ VLRU_Wait_r(&volume_LRU.q[VLRU_QUEUE_CANDIDATE]);
+ VLRU_BeginExclusive_r(&volume_LRU.q[VLRU_QUEUE_CANDIDATE]);
+ }
+
+ /* make sure multiple threads don't race to update */
+ if (vp->vlru.idx == VLRU_QUEUE_CANDIDATE) {
+ VLRU_SwitchQueues(vp, VLRU_QUEUE_NEW, 1);
+ }
+
+ if (rvp) {
+ VLRU_EndExclusive_r(&volume_LRU.q[VLRU_QUEUE_CANDIDATE]);
+ VLRU_EndExclusive_r(&volume_LRU.q[VLRU_QUEUE_NEW]);
+ VCancelReservation_r(rvp);
+ }
+}
+
+/* switch a volume between two VLRU queues */
+static void
+VLRU_SwitchQueues(volatile Volume * vp, int new_idx, int append)
+{
+ if (queue_IsNotOnQueue(&vp->vlru))
+ return;
+
+ queue_Remove(&vp->vlru);
+ volume_LRU.q[vp->vlru.idx].len--;
+
+ /* put the volume back on the correct generational queue */
+ if (append) {
+ queue_Append(&volume_LRU.q[new_idx], &vp->vlru);
+ } else {
+ queue_Prepend(&volume_LRU.q[new_idx], &vp->vlru);
+ }
+
+ volume_LRU.q[new_idx].len++;
+ vp->vlru.idx = new_idx;
+}
+
+/* VLRU GC thread */
+static void *
+VLRU_ScannerThread(void * args)
+{
+ afs_uint32 now, min_delay, delay;
+ afs_uint32 next_scan[VLRU_GENERATIONS];
+ afs_uint32 next_promotion[VLRU_GENERATIONS];
+ int i, min_idx, min_op, overdue, state;
+
+ /* set t=0 for promotion cycle to be
+ * fileserver startup */
+ now = FT_ApproxTime();
+ for (i=0; i < VLRU_GENERATIONS-1; i++) {
+ volume_LRU.last_promotion[i] = now;
+ }
+
+ /* don't start the scanner until VLRU_offline_thresh
+ * plus a small delay for VInitVolumePackage to finish
+ * has gone by */
+
+ sleep(VLRU_offline_thresh + 60);
+
+ /* set t=0 for scan cycle to be now */
+ now = FT_ApproxTime();
+ for (i=0; i < VLRU_GENERATIONS+1; i++) {
+ volume_LRU.last_scan[i] = now;
+ }
+
+ VOL_LOCK;
+ if (volume_LRU.scanner_state == VLRU_SCANNER_STATE_OFFLINE) {
+ volume_LRU.scanner_state = VLRU_SCANNER_STATE_ONLINE;
+ }
+
+ while ((state = volume_LRU.scanner_state) != VLRU_SCANNER_STATE_SHUTTING_DOWN) {
+ /* check to see if we've been asked to pause */
+ if (volume_LRU.scanner_state == VLRU_SCANNER_STATE_PAUSING) {
+ volume_LRU.scanner_state = VLRU_SCANNER_STATE_PAUSED;
+ assert(pthread_cond_broadcast(&volume_LRU.cv) == 0);
+ do {
+ assert(pthread_cond_wait(&volume_LRU.cv, &vol_glock_mutex) == 0);
+ } while (volume_LRU.scanner_state == VLRU_SCANNER_STATE_PAUSED);
+ }
+
+ /* scheduling can happen outside the glock */
+ VOL_UNLOCK;
+
+ /* figure out what is next on the schedule */
+
+ /* figure out a potential schedule for the new generation first */
+ overdue = 0;
+ min_delay = volume_LRU.scan_interval[0] + volume_LRU.last_scan[0] - now;
+ min_idx = 0;
+ min_op = 0;
+ if (min_delay > volume_LRU.scan_interval[0]) {
+ /* unsigned overflow -- we're overdue to run this scan */
+ min_delay = 0;
+ overdue = 1;
+ }
+
+ /* if we're not overdue for gen 0, figure out schedule for candidate gen */
+ if (!overdue) {
+ i = VLRU_QUEUE_CANDIDATE;
+ delay = volume_LRU.scan_interval[i] + volume_LRU.last_scan[i] - now;
+ if (delay < min_delay) {
+ min_delay = delay;
+ min_idx = i;
+ }
+ if (delay > volume_LRU.scan_interval[i]) {
+ /* unsigned overflow -- we're overdue to run this scan */
+ min_delay = 0;
+ min_idx = i;
+ overdue = 1;
+ break;
+ }
+ }
+
+ /* if we're still not overdue for something, figure out schedules for promotions */
+ for (i=0; !overdue && i < VLRU_GENERATIONS-1; i++) {
+ delay = volume_LRU.promotion_interval[i] + volume_LRU.last_promotion[i] - now;
+ if (delay < min_delay) {
+ min_delay = delay;
+ min_idx = i;
+ min_op = 1;
+ }
+ if (delay > volume_LRU.promotion_interval[i]) {
+ /* unsigned overflow -- we're overdue to run this promotion */
+ min_delay = 0;
+ min_idx = i;
+ min_op = 1;
+ overdue = 1;
+ break;
+ }
+ }
+
+ /* sleep as needed */
+ if (min_delay) {
+ sleep(min_delay);
+ }
+
+ /* do whatever is next */
+ VOL_LOCK;
+ if (min_op) {
+ VLRU_Promote_r(min_idx);
+ VLRU_Demote_r(min_idx+1);
+ } else {
+ VLRU_Scan_r(min_idx);
+ }
+ now = FT_ApproxTime();
+ }
+
+ Log("VLRU scanner asked to go offline (scanner_state=%d)\n", state);
+
+ /* signal that scanner is down */
+ volume_LRU.scanner_state = VLRU_SCANNER_STATE_OFFLINE;
+ assert(pthread_cond_broadcast(&volume_LRU.cv) == 0);
+ VOL_UNLOCK;
+ return NULL;
+}
+
+/* run the promotions */
+static void
+VLRU_Promote_r(int idx)
+{
+ int len, chaining, promote;
+ afs_uint32 now, thresh;
+ struct rx_queue *qp, *nqp;
+ Volume * vp, *start, *end;
+
+ /* get exclusive access to two chains, and drop the glock */
+ VLRU_Wait_r(&volume_LRU.q[idx]);
+ VLRU_BeginExclusive_r(&volume_LRU.q[idx]);
+ VLRU_Wait_r(&volume_LRU.q[idx+1]);
+ VLRU_BeginExclusive_r(&volume_LRU.q[idx+1]);
+ VOL_UNLOCK;
+
+ thresh = volume_LRU.promotion_interval[idx];
+ now = FT_ApproxTime();
+
+ len = chaining = 0;
+ for (queue_ScanBackwards(&volume_LRU.q[idx], qp, nqp, rx_queue)) {
+ vp = (Volume *)((char *)qp - offsetof(Volume, vlru));
+ promote = (((vp->stats.last_promote + thresh) <= now) &&
+ (vp->stats.last_get >= vp->stats.last_promote));
+
+ if (chaining) {
+ if (promote) {
+ vp->vlru.idx++;
+ len++;
+ start = vp;
+ } else {
+ /* promote and prepend chain */
+ queue_MoveChainAfter(&volume_LRU.q[idx+1], &start->vlru, &end->vlru);
+ chaining = 0;
+ }
+ } else {
+ if (promote) {
+ vp->vlru.idx++;
+ len++;
+ chaining = 1;
+ start = end = vp;
+ }
+ }
+ }
+
+ if (chaining) {
+ /* promote and prepend */
+ queue_MoveChainAfter(&volume_LRU.q[idx+1], &start->vlru, &end->vlru);
+ }
+
+ if (len) {
+ volume_LRU.q[idx].len -= len;
+ volume_LRU.q[idx+1].len += len;
+ }
+
+ /* release exclusive access to the two chains */
+ VOL_LOCK;
+ volume_LRU.last_promotion[idx] = now;
+ VLRU_EndExclusive_r(&volume_LRU.q[idx+1]);
+ VLRU_EndExclusive_r(&volume_LRU.q[idx]);
+}
+
+/* run the demotions */
+static void
+VLRU_Demote_r(int idx)
+{
+ Error ec;
+ int len, chaining, demote;
+ afs_uint32 now, thresh;
+ struct rx_queue *qp, *nqp;
+ Volume * vp, *start, *end;
+ Volume ** salv_flag_vec = NULL;
+ int salv_vec_offset = 0;
+
+ assert(idx == VLRU_QUEUE_MID || idx == VLRU_QUEUE_OLD);
+
+ /* get exclusive access to two chains, and drop the glock */
+ VLRU_Wait_r(&volume_LRU.q[idx-1]);
+ VLRU_BeginExclusive_r(&volume_LRU.q[idx-1]);
+ VLRU_Wait_r(&volume_LRU.q[idx]);
+ VLRU_BeginExclusive_r(&volume_LRU.q[idx]);
+ VOL_UNLOCK;
+
+ /* no big deal if this allocation fails */
+ if (volume_LRU.q[idx].len) {
+ salv_flag_vec = (Volume **) malloc(volume_LRU.q[idx].len * sizeof(Volume *));
+ }
+
+ now = FT_ApproxTime();
+ thresh = volume_LRU.promotion_interval[idx-1];
+
+ len = chaining = 0;
+ for (queue_ScanBackwards(&volume_LRU.q[idx], qp, nqp, rx_queue)) {
+ vp = (Volume *)((char *)qp - offsetof(Volume, vlru));
+ demote = (((vp->stats.last_promote + thresh) <= now) &&
+ (vp->stats.last_get < (now - thresh)));
+
+ /* we now do volume update list DONT_SALVAGE flag setting during
+ * demotion passes */
+ if (salv_flag_vec &&
+ !(V_attachFlags(vp) & VOL_HDR_DONTSALV) &&
+ demote &&
+ (vp->updateTime < (now - SALVAGE_INTERVAL)) &&
+ (V_attachState(vp) == VOL_STATE_ATTACHED)) {
+ salv_flag_vec[salv_vec_offset++] = vp;
+ VCreateReservation_r(vp);
+ }
+
+ if (chaining) {
+ if (demote) {
+ vp->vlru.idx--;
+ len++;
+ start = vp;
+ } else {
+ /* demote and append chain */
+ queue_MoveChainBefore(&volume_LRU.q[idx-1], &start->vlru, &end->vlru);
+ chaining = 0;
+ }
+ } else {
+ if (demote) {
+ vp->vlru.idx--;
+ len++;
+ chaining = 1;
+ start = end = vp;
+ }
+ }
+ }
+
+ if (chaining) {
+ queue_MoveChainBefore(&volume_LRU.q[idx-1], &start->vlru, &end->vlru);
+ }
+
+ if (len) {
+ volume_LRU.q[idx].len -= len;
+ volume_LRU.q[idx-1].len += len;
+ }
+
+ /* release exclusive access to the two chains */
+ VOL_LOCK;
+ VLRU_EndExclusive_r(&volume_LRU.q[idx]);
+ VLRU_EndExclusive_r(&volume_LRU.q[idx-1]);
+
+ /* now go back and set the DONT_SALVAGE flags as appropriate */
+ if (salv_flag_vec) {
+ int i;
+ for (i = 0; i < salv_vec_offset; i++) {
+ vp = salv_flag_vec[i];
+ if (!(V_attachFlags(vp) & VOL_HDR_DONTSALV) &&
+ (vp->updateTime < (now - SALVAGE_INTERVAL)) &&
+ (V_attachState(vp) == VOL_STATE_ATTACHED)) {
+ ec = VHold_r(vp);
+ if (!ec) {
+ V_attachFlags(vp) |= VOL_HDR_DONTSALV;
+ V_dontSalvage(vp) = DONT_SALVAGE;
+ VUpdateVolume_r(&ec, vp, 0);
+ VPutVolume_r(vp);
+ }
+ }
+ VCancelReservation_r(vp);
+ }
+ free(salv_flag_vec);
+ }
+}
+
+/* run a pass of the VLRU GC scanner */
+static void
+VLRU_Scan_r(int idx)
+{
+ afs_uint32 now, thresh;
+ struct rx_queue *qp, *nqp;
+ volatile Volume * vp;
+ int i, locked = 1;
+
+ assert(idx == VLRU_QUEUE_NEW || idx == VLRU_QUEUE_CANDIDATE);
+
+ /* gain exclusive access to the idx VLRU */
+ VLRU_Wait_r(&volume_LRU.q[idx]);
+ VLRU_BeginExclusive_r(&volume_LRU.q[idx]);
+
+ if (idx != VLRU_QUEUE_CANDIDATE) {
+ /* gain exclusive access to the candidate VLRU */
+ VLRU_Wait_r(&volume_LRU.q[VLRU_QUEUE_CANDIDATE]);
+ VLRU_BeginExclusive_r(&volume_LRU.q[VLRU_QUEUE_CANDIDATE]);
+ }
+
+ now = FT_ApproxTime();
+ thresh = now - VLRU_offline_thresh;
+
+ /* perform candidate selection and soft detaching */
+ if (idx == VLRU_QUEUE_CANDIDATE) {
+ /* soft detach some volumes from the candidate pool */
+ VOL_UNLOCK;
+ locked = 0;
+
+ for (i=0,queue_ScanBackwards(&volume_LRU.q[idx], qp, nqp, rx_queue)) {
+ vp = (Volume *)((char *)qp - offsetof(Volume, vlru));
+ if (i >= VLRU_offline_max) {
+ break;
+ }
+ /* check timestamp to see if it's a candidate for soft detaching */
+ if (vp->stats.last_get <= thresh) {
+ VOL_LOCK;
+ if (VCheckSoftDetach(vp, thresh))
+ i++;
+ VOL_UNLOCK;
+ }
+ }
+ } else {
+ /* scan for volumes to become soft detach candidates */
+ for (i=1,queue_ScanBackwards(&volume_LRU.q[idx], qp, nqp, rx_queue),i++) {
+ vp = (Volume *)((char *)qp - offsetof(Volume, vlru));
+
+ /* check timestamp to see if it's a candidate for soft detaching */
+ if (vp->stats.last_get <= thresh) {
+ VCheckSoftDetachCandidate(vp, thresh);
+ }
+
+ if (!(i&0x7f)) { /* lock coarsening optimization */
+ VOL_UNLOCK;
+ pthread_yield();
+ VOL_LOCK;
+ }
+ }
+ }
+
+ /* relinquish exclusive access to the VLRU chains */
+ if (!locked) {
+ VOL_LOCK;
+ }
+ volume_LRU.last_scan[idx] = now;
+ if (idx != VLRU_QUEUE_CANDIDATE) {
+ VLRU_EndExclusive_r(&volume_LRU.q[VLRU_QUEUE_CANDIDATE]);
+ }
+ VLRU_EndExclusive_r(&volume_LRU.q[idx]);
+}
+
+/* check whether volume is safe to soft detach
+ * caller MUST NOT hold a ref count on vp */
+static int
+VCheckSoftDetach(volatile Volume * vp, afs_uint32 thresh)
+{
+ int ret=0;
+
+ if (vp->nUsers || vp->nWaiters)
+ return 0;
+
+ if (vp->stats.last_get <= thresh) {
+ ret = VSoftDetachVolume_r(vp, thresh);
+ }
+
+ return ret;
+}
+
+/* check whether volume should be made a
+ * soft detach candidate */
+static int
+VCheckSoftDetachCandidate(volatile Volume * vp, afs_uint32 thresh)
+{
+ int idx, ret = 0;
+ if (vp->nUsers || vp->nWaiters)
+ return 0;
+
+ idx = vp->vlru.idx;
+
+ assert(idx == VLRU_QUEUE_NEW);
+
+ if (vp->stats.last_get <= thresh) {
+ /* move to candidate pool */
+ queue_Remove(&vp->vlru);
+ volume_LRU.q[VLRU_QUEUE_NEW].len--;
+ queue_Prepend(&volume_LRU.q[VLRU_QUEUE_CANDIDATE], &vp->vlru);
+ vp->vlru.idx = VLRU_QUEUE_CANDIDATE;
+ volume_LRU.q[VLRU_QUEUE_CANDIDATE].len++;
+ ret = 1;
+ }
+
+ return ret;
+}
+
+
+/* begin exclusive access on VLRU */
+static void
+VLRU_BeginExclusive_r(struct VLRU_q * q)
+{
+ assert(q->busy == 0);
+ q->busy = 1;
+}
+
+/* end exclusive access on VLRU */
+static void
+VLRU_EndExclusive_r(struct VLRU_q * q)
+{
+ assert(q->busy);
+ q->busy = 0;
+ assert(pthread_cond_broadcast(&q->cv) == 0);
+}
+
+/* wait for another thread to end exclusive access on VLRU */
+static void
+VLRU_Wait_r(struct VLRU_q * q)
+{
+ while(q->busy) {
+ assert(pthread_cond_wait(&q->cv, &vol_glock_mutex) == 0);
+ }
+}
+
+/* demand attach fs
+ * volume soft detach
+ *
+ * caller MUST NOT hold a ref count on vp */
+static int
+VSoftDetachVolume_r(volatile Volume * vp, afs_uint32 thresh)
+{
+ afs_uint32 ts_save;
+ int ret = 0;
+
+ assert(vp->vlru.idx == VLRU_QUEUE_CANDIDATE);
+
+ ts_save = vp->stats.last_get;
+ if (ts_save > thresh)
+ return 0;
+
+ if (vp->nUsers || vp->nWaiters)
+ return 0;
+
+ if (IsExclusiveState(V_attachState(vp))) {
+ return 0;
+ }
+
+ switch (V_attachState(vp)) {
+ case VOL_STATE_UNATTACHED:
+ case VOL_STATE_PREATTACHED:
+ case VOL_STATE_ERROR:
+ case VOL_STATE_GOING_OFFLINE:
+ case VOL_STATE_SHUTTING_DOWN:
+ case VOL_STATE_SALVAGING:
+ volume_LRU.q[vp->vlru.idx].len--;
+
+ /* create and cancel a reservation to
+ * give the volume an opportunity to
+ * be deallocated */
+ VCreateReservation_r(vp);
+ queue_Remove(&vp->vlru);
+ vp->vlru.idx = VLRU_QUEUE_INVALID;
+ V_attachFlags(vp) &= ~(VOL_ON_VLRU);
+ VCancelReservation_r(vp);
+ return 0;
+ }
+
+ /* hold the volume and take it offline.
+ * no need for reservations, as VHold_r
+ * takes care of that internally. */
+ if (VHold_r(vp) == 0) {
+ /* vhold drops the glock, so now we should
+ * check to make sure we aren't racing against
+ * other threads. if we are racing, offlining vp
+ * would be wasteful, and block the scanner for a while
+ */
+ if (vp->nWaiters ||
+ (vp->nUsers > 1) ||
+ (vp->shuttingDown) ||
+ (vp->goingOffline) ||
+ (vp->stats.last_get != ts_save)) {
+ /* looks like we're racing someone else. bail */
VPutVolume_r(vp);
-#ifndef AFS_PTHREAD_ENV
- IOMGR_Poll();
-#endif /* !AFS_PTHREAD_ENV */
+ vp = NULL;
+ } else {
+ /* pull it off the VLRU */
+ assert(vp->vlru.idx == VLRU_QUEUE_CANDIDATE);
+ volume_LRU.q[VLRU_QUEUE_CANDIDATE].len--;
+ queue_Remove(&vp->vlru);
+ vp->vlru.idx = VLRU_QUEUE_INVALID;
+ V_attachFlags(vp) &= ~(VOL_ON_VLRU);
+
+ /* take if offline */
+ VOffline_r(vp, "volume has been soft detached");
+
+ /* invalidate the volume header cache */
+ FreeVolumeHeader(vp);
+
+ /* update stats */
+ IncUInt64(&VStats.soft_detaches);
+ vp->stats.soft_detaches++;
+
+ /* put in pre-attached state so demand
+ * attacher can work on it */
+ VChangeState_r(vp, VOL_STATE_PREATTACHED);
+ ret = 1;
+ }
}
- nUpdatedVolumes -= gap;
+ return ret;
}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
/***************************************************/
-/* Add on routines to manage a volume header cache */
+/* Volume Header Cache routines */
/***************************************************/
-static struct volHeader *volumeLRU;
+struct volume_hdr_LRU_t volume_hdr_LRU;
/* Allocate a bunch of headers; string them together */
static void
-InitLRU(int howMany)
+VInitVolumeHeaderCache(afs_uint32 howMany)
{
register struct volHeader *hp;
if (programType != fileServer)
return;
+ queue_Init(&volume_hdr_LRU);
+#ifdef AFS_DEMAND_ATTACH_FS
+ volume_hdr_LRU.stats.free = 0;
+ volume_hdr_LRU.stats.used = howMany;
+ volume_hdr_LRU.stats.attached = 0;
+#endif
hp = (struct volHeader *)(calloc(howMany, sizeof(struct volHeader)));
while (howMany--)
ReleaseVolumeHeader(hp++);
}
+#ifdef AFS_DEMAND_ATTACH_FS
/* Get a volume header from the LRU list; update the old one if necessary */
/* Returns 1 if there was already a header, which is removed from the LRU list */
+/* caller MUST has a ref count on vp */
static int
GetVolumeHeader(register Volume * vp)
{
int old;
static int everLogged = 0;
+ /* XXX debug 9/19/05 we've apparently got
+ * a ref counting bug somewhere that's
+ * breaking the nUsers == 0 => header on LRU
+ * assumption */
+ if (vp->header && queue_IsNotOnQueue(vp->header)) {
+ Log("nUsers == 0, but header not on LRU\n");
+ return 1;
+ }
+
old = (vp->header != NULL); /* old == volume already has a header */
+
if (programType != fileServer) {
+ /* for volume utilities, we allocate volHeaders as needed */
if (!vp->header) {
hd = (struct volHeader *)calloc(1, sizeof(*vp->header));
assert(hd != NULL);
vp->header = hd;
hd->back = vp;
+ V_attachFlags(vp) |= VOL_HDR_ATTACHED;
}
} else {
if (old) {
+ /* the header we previously dropped in the lru is
+ * still available. pull it off the lru and return */
hd = vp->header;
- if (volumeLRU == hd)
- volumeLRU = hd->next;
+ queue_Remove(hd);
assert(hd->back == vp);
} else {
- if (volumeLRU)
- /* not currently in use and least recently used */
- hd = volumeLRU->prev;
- else {
- hd = (struct volHeader *)calloc(1, sizeof(*vp->header));
- /* make it look like single elt LRU */
- hd->prev = hd->next = hd;
+ /* we need to grab a new element off the LRU */
+ if (queue_IsNotEmpty(&volume_hdr_LRU)) {
+ /* grab an element and pull off of LRU */
+ hd = queue_First(&volume_hdr_LRU, volHeader);
+ queue_Remove(hd);
+ } else {
+ /* LRU is empty, so allocate a new volHeader
+ * this is probably indicative of a leak, so let the user know */
+ hd = (struct volHeader *)calloc(1, sizeof(struct volHeader));
+ assert(hd != NULL);
if (!everLogged) {
Log("****Allocated more volume headers, probably leak****\n");
everLogged = 1;
}
+ volume_hdr_LRU.stats.free++;
}
if (hd->back) {
+ VolState vp_save, back_save;
+ /* this header used to belong to someone else.
+ * we'll need to check if the header needs to
+ * be sync'd out to disk */
+
+ /* if hd->back were in an exclusive state, then
+ * its volHeader would not be on the LRU... */
+ assert(!IsExclusiveState(V_attachState(hd->back)));
+
if (hd->diskstuff.inUse) {
+ /* volume was in use, so we'll need to sync
+ * its header to disk */
+ back_save = VChangeState_r(hd->back, VOL_STATE_UPDATING);
+ vp_save = VChangeState_r(vp, VOL_STATE_HDR_ATTACHING);
+ VCreateReservation_r(hd->back);
+ VOL_UNLOCK;
+
WriteVolumeHeader_r(&error, hd->back);
/* Ignore errors; catch them later */
+
+ VOL_LOCK;
+ }
+
+ V_attachFlags(hd->back) &= ~(VOL_HDR_ATTACHED | VOL_HDR_LOADED | VOL_HDR_IN_LRU);
+ hd->back->header = NULL;
+
+ if (hd->diskstuff.inUse) {
+ VChangeState_r(hd->back, back_save);
+ VCancelReservation_r(hd->back);
+ VChangeState_r(vp, vp_save);
}
- hd->back->header = 0;
+ } else {
+ volume_hdr_LRU.stats.attached++;
}
hd->back = vp;
vp->header = hd;
+ V_attachFlags(vp) |= VOL_HDR_ATTACHED;
+ }
+ volume_hdr_LRU.stats.free--;
+ volume_hdr_LRU.stats.used++;
+ }
+ IncUInt64(&VStats.hdr_gets);
+ IncUInt64(&vp->stats.hdr_gets);
+ vp->stats.last_hdr_get = FT_ApproxTime();
+ return old;
+}
+#else /* AFS_DEMAND_ATTACH_FS */
+/* Get a volume header from the LRU list; update the old one if necessary */
+/* Returns 1 if there was already a header, which is removed from the LRU list */
+static int
+GetVolumeHeader(register Volume * vp)
+{
+ Error error;
+ register struct volHeader *hd;
+ int old;
+ static int everLogged = 0;
+
+ old = (vp->header != NULL); /* old == volume already has a header */
+
+ if (programType != fileServer) {
+ /* for volume utilities, we allocate volHeaders as needed */
+ if (!vp->header) {
+ hd = (struct volHeader *)calloc(1, sizeof(*vp->header));
+ assert(hd != NULL);
+ vp->header = hd;
+ hd->back = vp;
}
- if (hd->next) { /* hd->next != 0 --> in LRU chain (we zero it later) */
- hd->prev->next = hd->next; /* pull hd out of LRU list */
- hd->next->prev = hd->prev; /* if hd only element, this is noop */
+ } else {
+ /* for the fileserver, we keep a volume header cache */
+ if (old) {
+ /* the header we previously dropped in the lru is
+ * still available. pull it off the lru and return */
+ hd = vp->header;
+ queue_Remove(hd);
+ assert(hd->back == vp);
+ } else {
+ /* we need to grab a new element off the LRU */
+ if (queue_IsNotEmpty(&volume_hdr_LRU)) {
+ /* grab an element */
+ hd = queue_First(&volume_hdr_LRU, volHeader);
+ queue_Remove(hd);
+ } else {
+ /* LRU is empty, so allocate a new volHeader
+ * this is probably indicative of a leak, so let the user know */
+ hd = (struct volHeader *)calloc(1, sizeof(struct volHeader));
+ assert(hd != NULL);
+ if (!everLogged) {
+ Log("****Allocated more volume headers, probably leak****\n");
+ everLogged = 1;
+ }
+ }
+ if (hd->back) {
+ /* this header used to belong to someone else.
+ * we'll need to check if the header needs to
+ * be sync'd out to disk */
+
+ if (hd->diskstuff.inUse) {
+ WriteVolumeHeader_r(&error, hd->back);
+ /* Ignore errors; catch them later */
+ }
+ hd->back->header = NULL;
+ }
+ hd->back = vp;
+ vp->header = hd;
}
- hd->next = hd->prev = 0;
- /* if not in LRU chain, next test won't be true */
- if (hd == volumeLRU) /* last header item, turn into empty list */
- volumeLRU = NULL;
}
return old;
}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+
+/* make sure a volume header is attached to
+ * vp, and has the correct data loaded from
+ * disk. */
+#ifdef AFS_DEMAND_ATTACH_FS
+/* caller MUST hold a ref count on vp */
+static void
+LoadVolumeHeader(Error * ec, Volume * vp)
+{
+ VolState state_save;
+ *ec = 0;
+
+ if (vp->nUsers == 0 && !GetVolumeHeader(vp)) {
+ IncUInt64(&VStats.hdr_loads);
+ state_save = VChangeState_r(vp, VOL_STATE_HDR_LOADING);
+ VOL_UNLOCK;
+
+ ReadHeader(ec, V_diskDataHandle(vp), (char *)&V_disk(vp),
+ sizeof(V_disk(vp)), VOLUMEINFOMAGIC,
+ VOLUMEINFOVERSION);
+ IncUInt64(&vp->stats.hdr_loads);
+
+ VOL_LOCK;
+ if (!*ec)
+ V_attachFlags(vp) |= VOL_HDR_LOADED;
+ VChangeState_r(vp, state_save);
+ }
+ if (*ec) {
+ /* maintain (nUsers==0) => header in LRU invariant */
+ ReleaseVolumeHeader(vp->header);
+ }
+}
+#else /* AFS_DEMAND_ATTACH_FS */
+static void
+LoadVolumeHeader(Error * ec, Volume * vp)
+{
+ *ec = 0;
+ if (vp->nUsers == 0 && !GetVolumeHeader(vp)) {
+ IncUInt64(&VStats.hdr_loads);
+
+ ReadHeader(ec, V_diskDataHandle(vp), (char *)&V_disk(vp),
+ sizeof(V_disk(vp)), VOLUMEINFOMAGIC,
+ VOLUMEINFOVERSION);
+ }
+ if (*ec) {
+ /* maintain (nUsers==0) => header in LRU invariant */
+ ReleaseVolumeHeader(vp->header);
+ }
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
/* Put it at the top of the LRU chain */
static void
{
if (programType != fileServer)
return;
- if (!hd || hd->next) /* no header, or header already released */
+ if (!hd || queue_IsOnQueue(hd)) /* no header, or header already released */
return;
- if (!volumeLRU) {
- hd->next = hd->prev = hd;
- } else {
- hd->prev = volumeLRU->prev;
- hd->next = volumeLRU;
- hd->prev->next = hd->next->prev = hd;
+ queue_Append(&volume_hdr_LRU, hd);
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (hd->back) {
+ V_attachFlags(hd->back) |= VOL_HDR_IN_LRU;
}
- volumeLRU = hd;
+ volume_hdr_LRU.stats.free++;
+ volume_hdr_LRU.stats.used--;
+#endif
}
+/* for fileserver, return header to LRU, and
+ * invalidate it as a cache entry.
+ *
+ * for volume utilities, free the heap space */
static void
FreeVolumeHeader(register Volume * vp)
{
return;
if (programType == fileServer) {
ReleaseVolumeHeader(hd);
- hd->back = 0;
+ hd->back = NULL;
} else {
free(hd);
}
- vp->header = 0;
+#ifdef AFS_DEMAND_ATTACH_FS
+ V_attachFlags(vp) &= ~(VOL_HDR_ATTACHED | VOL_HDR_IN_LRU | VOL_HDR_LOADED);
+ volume_hdr_LRU.stats.attached--;
+#endif
+ vp->header = NULL;
}
/***************************************************/
-/* Routines to add volume to hash chain, delete it */
+/* Volume Hash Table routines */
/***************************************************/
+int
+VSetVolHashSize(int logsize)
+{
+ /* 64 to 16384 hash buckets seems like a reasonable range */
+ if ((logsize < 6 ) || (logsize > 14)) {
+ return -1;
+ }
+
+ if (!VInit) {
+ VolumeHashTable.Size = 1 << logsize;
+ VolumeHashTable.Mask = VolumeHashTable.Size - 1;
+ } else {
+ /* we can't yet support runtime modification of this
+ * parameter. we'll need a configuration rwlock to
+ * make runtime modification feasible.... */
+ return -1;
+ }
+ return 0;
+}
+
+static void
+VInitVolumeHash(void)
+{
+ register int i;
+
+ VolumeHashTable.Table = (VolumeHashChainHead *) calloc(VolumeHashTable.Size,
+ sizeof(VolumeHashChainHead));
+ assert(VolumeHashTable.Table != NULL);
+
+ for (i=0; i < VolumeHashTable.Size; i++) {
+ queue_Init(&VolumeHashTable.Table[i]);
+#ifdef AFS_DEMAND_ATTACH_FS
+ assert(pthread_cond_init(&VolumeHashTable.Table[i].chain_busy_cv, NULL) == 0);
+#endif /* AFS_DEMAND_ATTACH_FS */
+ }
+}
+
+/* for demand-attach, caller MUST hold a ref count on vp */
static void
AddVolumeToHashTable(register Volume * vp, int hashid)
{
- int hash = VOLUME_HASH(hashid);
+ VolumeHashChainHead * head;
+
+ if (queue_IsOnQueue(vp))
+ return;
+
+ head = &VolumeHashTable.Table[VOLUME_HASH(hashid)];
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* wait for the hash chain to become available */
+ VHashWait_r(head);
+
+ V_attachFlags(vp) |= VOL_IN_HASH;
+ vp->chainCacheCheck = ++head->cacheCheck;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ head->len++;
vp->hashid = hashid;
- vp->hashNext = VolumeHashTable[hash];
- VolumeHashTable[hash] = vp;
+ queue_Append(head, vp);
vp->vnodeHashOffset = VolumeHashOffset_r();
}
+/* for demand-attach, caller MUST hold a ref count on vp */
static void
DeleteVolumeFromHashTable(register Volume * vp)
{
- int hash = VOLUME_HASH(vp->hashid);
- if (VolumeHashTable[hash] == vp)
- VolumeHashTable[hash] = vp->hashNext;
- else {
- Volume *tvp = VolumeHashTable[hash];
- if (tvp == NULL)
- return;
- while (tvp->hashNext && tvp->hashNext != vp)
- tvp = tvp->hashNext;
- if (tvp->hashNext == NULL)
- return;
- tvp->hashNext = vp->hashNext;
+ VolumeHashChainHead * head;
+
+ if (!queue_IsOnQueue(vp))
+ return;
+
+ head = &VolumeHashTable.Table[VOLUME_HASH(vp->hashid)];
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* wait for the hash chain to become available */
+ VHashWait_r(head);
+
+ V_attachFlags(vp) &= ~(VOL_IN_HASH);
+ head->cacheCheck++;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ head->len--;
+ queue_Remove(vp);
+ /* do NOT reset hashid to zero, as the online
+ * salvager package may need to know the volume id
+ * after the volume is removed from the hash */
+}
+
+/* - look up a volume id in the hash table
+ * - occasionally rebalance hash chains
+ * - update lookup statistics accordingly
+ */
+/* the hint parameter allows us to short-circuit on
+ * DEMAND_ATTACH_FS if the cacheChecks match between
+ * the hash chain head and hint
+ * caller MUST hold a refcount on hint */
+Volume *
+VLookupVolume_r(Error * ec, VolId volumeId, Volume * hint)
+{
+ register int looks = 0;
+ Volume * vp, *np, *pp;
+ VolumeHashChainHead * head;
+ *ec = 0;
+
+ head = &VolumeHashTable.Table[VOLUME_HASH(volumeId)];
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* wait for the hash chain to become available */
+ VHashWait_r(head);
+
+ /* check to see if we can short circuit without walking the hash chain */
+ if (hint && (hint->chainCacheCheck == head->cacheCheck)) {
+ IncUInt64(&hint->stats.hash_short_circuits);
+ return hint;
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ /* someday we need to either do per-chain locks, RWlocks,
+ * or both for volhash access.
+ * (and move to a data structure with better cache locality) */
+
+ /* search the chain for this volume id */
+ for(queue_Scan(head, vp, np, Volume)) {
+ looks++;
+ if ((vp->hashid == volumeId)) {
+ break;
+ }
+ }
+
+ if (queue_IsEnd(head, vp)) {
+ vp = NULL;
+ }
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* update hash chain statistics */
+ {
+ afs_uint64 lks;
+ FillInt64(lks, 0, looks);
+ AddUInt64(head->looks, lks, &head->looks);
+ AddUInt64(VStats.hash_looks, lks, &VStats.hash_looks);
+ IncUInt64(&head->gets);
+ }
+
+ if (vp) {
+ afs_uint64 thresh;
+ IncUInt64(&vp->stats.hash_lookups);
+
+ /* for demand attach fileserver, we permit occasional hash chain reordering
+ * so that frequently looked up volumes move towards the head of the chain */
+ pp = queue_Prev(vp, Volume);
+ if (!queue_IsEnd(head, pp)) {
+ FillInt64(thresh, 0, VOLUME_HASH_REORDER_THRESHOLD);
+ AddUInt64(thresh, pp->stats.hash_lookups, &thresh);
+ if (GEInt64(vp->stats.hash_lookups, thresh)) {
+ VReorderHash_r(head, pp, vp);
+ }
+ }
+
+ /* update the short-circuit cache check */
+ vp->chainCacheCheck = head->cacheCheck;
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ return vp;
+}
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/* perform volume hash chain reordering.
+ *
+ * advance a subchain beginning at vp ahead of
+ * the adjacent subchain ending at pp */
+static void
+VReorderHash_r(VolumeHashChainHead * head, Volume * pp, Volume * vp)
+{
+ Volume *tp, *np, *lp;
+ afs_uint64 move_thresh;
+
+ /* this should never be called if the chain is already busy, so
+ * no need to wait for other exclusive chain ops to finish */
+
+ /* this is a rather heavy set of operations,
+ * so let's set the chain busy flag and drop
+ * the vol_glock */
+ VHashBeginExclusive_r(head);
+ VOL_UNLOCK;
+
+ /* scan forward in the chain from vp looking for the last element
+ * in the chain we want to advance */
+ FillInt64(move_thresh, 0, VOLUME_HASH_REORDER_CHAIN_THRESH);
+ AddUInt64(move_thresh, pp->stats.hash_lookups, &move_thresh);
+ for(queue_ScanFrom(head, vp, tp, np, Volume)) {
+ if (LTInt64(tp->stats.hash_lookups, move_thresh)) {
+ break;
+ }
+ }
+ lp = queue_Prev(tp, Volume);
+
+ /* scan backwards from pp to determine where to splice and
+ * insert the subchain we're advancing */
+ for(queue_ScanBackwardsFrom(head, pp, tp, np, Volume)) {
+ if (GTInt64(tp->stats.hash_lookups, move_thresh)) {
+ break;
+ }
+ }
+ tp = queue_Next(tp, Volume);
+
+ /* rebalance chain(vp,...,lp) ahead of chain(tp,...,pp) */
+ queue_MoveChainBefore(tp,vp,lp);
+
+ VOL_LOCK;
+ IncUInt64(&VStats.hash_reorders);
+ head->cacheCheck++;
+ IncUInt64(&head->reorders);
+
+ /* wake up any threads waiting for the hash chain */
+ VHashEndExclusive_r(head);
+}
+
+
+/* demand-attach fs volume hash
+ * asynchronous exclusive operations */
+
+/* take exclusive control over the hash chain */
+static void
+VHashBeginExclusive_r(VolumeHashChainHead * head)
+{
+ assert(head->busy == 0);
+ head->busy = 1;
+}
+
+/* relinquish exclusive control over the hash chain */
+static void
+VHashEndExclusive_r(VolumeHashChainHead * head)
+{
+ assert(head->busy);
+ head->busy = 0;
+ assert(pthread_cond_broadcast(&head->chain_busy_cv) == 0);
+}
+
+/* wait for another thread to finish its exclusive ops */
+static void
+VHashWait_r(VolumeHashChainHead * head)
+{
+ while (head->busy) {
+ assert(pthread_cond_wait(&head->chain_busy_cv, &vol_glock_mutex) == 0);
+ }
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+
+/***************************************************/
+/* Volume by Partition List routines */
+/***************************************************/
+
+/*
+ * demand attach fileserver adds a
+ * linked list of volumes to each
+ * partition object, thus allowing
+ * for quick enumeration of all
+ * volumes on a partition
+ */
+
+#ifdef AFS_DEMAND_ATTACH_FS
+static void
+AddVolumeToVByPList_r(Volume * vp)
+{
+ if (queue_IsNotOnQueue(&vp->vol_list)) {
+ queue_Append(&vp->partition->vol_list, &vp->vol_list);
+ V_attachFlags(vp) |= VOL_ON_VBYP_LIST;
+ vp->partition->vol_list.len++;
+ }
+}
+
+static void
+DeleteVolumeFromVByPList_r(Volume * vp)
+{
+ if (queue_IsOnQueue(&vp->vol_list)) {
+ queue_Remove(&vp->vol_list);
+ V_attachFlags(vp) &= ~(VOL_ON_VBYP_LIST);
+ vp->partition->vol_list.len--;
+ }
+}
+
+/* take exclusive control over the list */
+static void
+VVByPListBeginExclusive_r(struct DiskPartition * dp)
+{
+ assert(dp->vol_list.busy == 0);
+ dp->vol_list.busy = 1;
+}
+
+/* relinquish exclusive control over the list */
+static void
+VVByPListEndExclusive_r(struct DiskPartition * dp)
+{
+ assert(dp->vol_list.busy);
+ dp->vol_list.busy = 0;
+ assert(pthread_cond_broadcast(&dp->vol_list.cv) == 0);
+}
+
+/* wait for another thread to finish its exclusive ops */
+static void
+VVByPListWait_r(struct DiskPartition * dp)
+{
+ while (dp->vol_list.busy) {
+ assert(pthread_cond_wait(&dp->vol_list.cv, &vol_glock_mutex) == 0);
}
- vp->hashid = 0;
}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+/***************************************************/
+/* Volume Cache Statistics routines */
+/***************************************************/
void
VPrintCacheStats_r(void)
{
+ afs_uint32 get_hi, get_lo, load_hi, load_lo;
register struct VnodeClassInfo *vcp;
vcp = &VnodeClassInfo[vLarge];
Log("Large vnode cache, %d entries, %d allocs, %d gets (%d reads), %d writes\n", vcp->cacheSize, vcp->allocs, vcp->gets, vcp->reads, vcp->writes);
vcp = &VnodeClassInfo[vSmall];
Log("Small vnode cache,%d entries, %d allocs, %d gets (%d reads), %d writes\n", vcp->cacheSize, vcp->allocs, vcp->gets, vcp->reads, vcp->writes);
+ SplitInt64(VStats.hdr_gets, get_hi, get_lo);
+ SplitInt64(VStats.hdr_loads, load_hi, load_lo);
Log("Volume header cache, %d entries, %d gets, %d replacements\n",
- VolumeCacheSize, VolumeGets, VolumeReplacements);
+ VStats.hdr_cache_size, get_lo, load_lo);
}
void
VOL_UNLOCK;
}
+#ifdef AFS_DEMAND_ATTACH_FS
+static double
+UInt64ToDouble(afs_uint64 * x)
+{
+ static double c32 = 4.0 * 1.073741824 * 1000000000.0;
+ afs_uint32 h, l;
+ SplitInt64(*x, h, l);
+ return (((double)h) * c32) + ((double) l);
+}
+
+static char *
+DoubleToPrintable(double x, char * buf, int len)
+{
+ static double billion = 1000000000.0;
+ afs_uint32 y[3];
+
+ y[0] = (afs_uint32) (x / (billion * billion));
+ y[1] = (afs_uint32) ((x - (((double)y[0]) * billion * billion)) / billion);
+ y[2] = (afs_uint32) (x - ((((double)y[0]) * billion * billion) + (((double)y[1]) * billion)));
+
+ if (y[0]) {
+ snprintf(buf, len, "%d%09d%09d", y[0], y[1], y[2]);
+ } else if (y[1]) {
+ snprintf(buf, len, "%d%09d", y[1], y[2]);
+ } else {
+ snprintf(buf, len, "%d", y[2]);
+ }
+ buf[len-1] = '\0';
+ return buf;
+}
+
+static void
+VPrintExtendedCacheStats_r(int flags)
+{
+ int i, j;
+ struct stats {
+ double min;
+ double max;
+ double sum;
+ double avg;
+ };
+ struct stats looks, gets, reorders, len;
+ struct stats ch_looks, ch_gets, ch_reorders;
+ char pr_buf[4][32];
+ VolumeHashChainHead *head;
+ Volume *vp, *np;
+
+ /* zero out stats */
+ memset(&looks, 0, sizeof(struct stats));
+ memset(&gets, 0, sizeof(struct stats));
+ memset(&reorders, 0, sizeof(struct stats));
+ memset(&len, 0, sizeof(struct stats));
+ memset(&ch_looks, 0, sizeof(struct stats));
+ memset(&ch_gets, 0, sizeof(struct stats));
+ memset(&ch_reorders, 0, sizeof(struct stats));
+
+ for (i = 0; i < VolumeHashTable.Size; i++) {
+ head = &VolumeHashTable.Table[i];
+
+ VHashWait_r(head);
+ VHashBeginExclusive_r(head);
+ VOL_UNLOCK;
+
+ ch_looks.sum = UInt64ToDouble(&head->looks);
+ ch_gets.sum = UInt64ToDouble(&head->gets);
+ ch_reorders.sum = UInt64ToDouble(&head->reorders);
+
+ /* update global statistics */
+ {
+ looks.sum += ch_looks.sum;
+ gets.sum += ch_gets.sum;
+ reorders.sum += ch_reorders.sum;
+ len.sum += (double)head->len;
+
+ if (i == 0) {
+ len.min = (double) head->len;
+ len.max = (double) head->len;
+ looks.min = ch_looks.sum;
+ looks.max = ch_looks.sum;
+ gets.min = ch_gets.sum;
+ gets.max = ch_gets.sum;
+ reorders.min = ch_reorders.sum;
+ reorders.max = ch_reorders.sum;
+ } else {
+ if (((double)head->len) < len.min)
+ len.min = (double) head->len;
+ if (((double)head->len) > len.max)
+ len.max = (double) head->len;
+ if (ch_looks.sum < looks.min)
+ looks.min = ch_looks.sum;
+ else if (ch_looks.sum > looks.max)
+ looks.max = ch_looks.sum;
+ if (ch_gets.sum < gets.min)
+ gets.min = ch_gets.sum;
+ else if (ch_gets.sum > gets.max)
+ gets.max = ch_gets.sum;
+ if (ch_reorders.sum < reorders.min)
+ reorders.min = ch_reorders.sum;
+ else if (ch_reorders.sum > reorders.max)
+ reorders.max = ch_reorders.sum;
+ }
+ }
+
+ if ((flags & VOL_STATS_PER_CHAIN2) && queue_IsNotEmpty(head)) {
+ /* compute detailed per-chain stats */
+ struct stats hdr_loads, hdr_gets;
+ double v_looks, v_loads, v_gets;
+
+ /* initialize stats with data from first element in chain */
+ vp = queue_First(head, Volume);
+ v_looks = UInt64ToDouble(&vp->stats.hash_lookups);
+ v_loads = UInt64ToDouble(&vp->stats.hdr_loads);
+ v_gets = UInt64ToDouble(&vp->stats.hdr_gets);
+ ch_gets.min = ch_gets.max = v_looks;
+ hdr_loads.min = hdr_loads.max = v_loads;
+ hdr_gets.min = hdr_gets.max = v_gets;
+ hdr_loads.sum = hdr_gets.sum = 0;
+
+ vp = queue_Next(vp, Volume);
+
+ /* pull in stats from remaining elements in chain */
+ for (queue_ScanFrom(head, vp, vp, np, Volume)) {
+ v_looks = UInt64ToDouble(&vp->stats.hash_lookups);
+ v_loads = UInt64ToDouble(&vp->stats.hdr_loads);
+ v_gets = UInt64ToDouble(&vp->stats.hdr_gets);
+
+ hdr_loads.sum += v_loads;
+ hdr_gets.sum += v_gets;
+
+ if (v_looks < ch_gets.min)
+ ch_gets.min = v_looks;
+ else if (v_looks > ch_gets.max)
+ ch_gets.max = v_looks;
+
+ if (v_loads < hdr_loads.min)
+ hdr_loads.min = v_loads;
+ else if (v_loads > hdr_loads.max)
+ hdr_loads.max = v_loads;
+
+ if (v_gets < hdr_gets.min)
+ hdr_gets.min = v_gets;
+ else if (v_gets > hdr_gets.max)
+ hdr_gets.max = v_gets;
+ }
+
+ /* compute per-chain averages */
+ ch_gets.avg = ch_gets.sum / ((double)head->len);
+ hdr_loads.avg = hdr_loads.sum / ((double)head->len);
+ hdr_gets.avg = hdr_gets.sum / ((double)head->len);
+
+ /* dump per-chain stats */
+ Log("Volume hash chain %d : len=%d, looks=%s, reorders=%s\n",
+ i, head->len,
+ DoubleToPrintable(ch_looks.sum, pr_buf[0], sizeof(pr_buf[0])),
+ DoubleToPrintable(ch_reorders.sum, pr_buf[1], sizeof(pr_buf[1])));
+ Log("\tVolume gets : min=%s, max=%s, avg=%s, total=%s\n",
+ DoubleToPrintable(ch_gets.min, pr_buf[0], sizeof(pr_buf[0])),
+ DoubleToPrintable(ch_gets.max, pr_buf[1], sizeof(pr_buf[1])),
+ DoubleToPrintable(ch_gets.avg, pr_buf[2], sizeof(pr_buf[2])),
+ DoubleToPrintable(ch_gets.sum, pr_buf[3], sizeof(pr_buf[3])));
+ Log("\tHDR gets : min=%s, max=%s, avg=%s, total=%s\n",
+ DoubleToPrintable(hdr_gets.min, pr_buf[0], sizeof(pr_buf[0])),
+ DoubleToPrintable(hdr_gets.max, pr_buf[1], sizeof(pr_buf[1])),
+ DoubleToPrintable(hdr_gets.avg, pr_buf[2], sizeof(pr_buf[2])),
+ DoubleToPrintable(hdr_gets.sum, pr_buf[3], sizeof(pr_buf[3])));
+ Log("\tHDR loads : min=%s, max=%s, avg=%s, total=%s\n",
+ DoubleToPrintable(hdr_loads.min, pr_buf[0], sizeof(pr_buf[0])),
+ DoubleToPrintable(hdr_loads.max, pr_buf[1], sizeof(pr_buf[1])),
+ DoubleToPrintable(hdr_loads.avg, pr_buf[2], sizeof(pr_buf[2])),
+ DoubleToPrintable(hdr_loads.sum, pr_buf[3], sizeof(pr_buf[3])));
+ } else if (flags & VOL_STATS_PER_CHAIN) {
+ /* dump simple per-chain stats */
+ Log("Volume hash chain %d : len=%d, looks=%s, gets=%s, reorders=%s\n",
+ i, head->len,
+ DoubleToPrintable(ch_looks.sum, pr_buf[0], sizeof(pr_buf[0])),
+ DoubleToPrintable(ch_gets.sum, pr_buf[1], sizeof(pr_buf[1])),
+ DoubleToPrintable(ch_reorders.sum, pr_buf[2], sizeof(pr_buf[2])));
+ }
+
+ VOL_LOCK;
+ VHashEndExclusive_r(head);
+ }
+
+ VOL_UNLOCK;
+
+ /* compute global averages */
+ len.avg = len.sum / ((double)VolumeHashTable.Size);
+ looks.avg = looks.sum / ((double)VolumeHashTable.Size);
+ gets.avg = gets.sum / ((double)VolumeHashTable.Size);
+ reorders.avg = reorders.sum / ((double)VolumeHashTable.Size);
+
+ /* dump global stats */
+ Log("Volume hash summary: %d buckets\n", VolumeHashTable.Size);
+ Log(" chain length : min=%s, max=%s, avg=%s, total=%s\n",
+ DoubleToPrintable(len.min, pr_buf[0], sizeof(pr_buf[0])),
+ DoubleToPrintable(len.max, pr_buf[1], sizeof(pr_buf[1])),
+ DoubleToPrintable(len.avg, pr_buf[2], sizeof(pr_buf[2])),
+ DoubleToPrintable(len.sum, pr_buf[3], sizeof(pr_buf[3])));
+ Log(" looks : min=%s, max=%s, avg=%s, total=%s\n",
+ DoubleToPrintable(looks.min, pr_buf[0], sizeof(pr_buf[0])),
+ DoubleToPrintable(looks.max, pr_buf[1], sizeof(pr_buf[1])),
+ DoubleToPrintable(looks.avg, pr_buf[2], sizeof(pr_buf[2])),
+ DoubleToPrintable(looks.sum, pr_buf[3], sizeof(pr_buf[3])));
+ Log(" gets : min=%s, max=%s, avg=%s, total=%s\n",
+ DoubleToPrintable(gets.min, pr_buf[0], sizeof(pr_buf[0])),
+ DoubleToPrintable(gets.max, pr_buf[1], sizeof(pr_buf[1])),
+ DoubleToPrintable(gets.avg, pr_buf[2], sizeof(pr_buf[2])),
+ DoubleToPrintable(gets.sum, pr_buf[3], sizeof(pr_buf[3])));
+ Log(" reorders : min=%s, max=%s, avg=%s, total=%s\n",
+ DoubleToPrintable(reorders.min, pr_buf[0], sizeof(pr_buf[0])),
+ DoubleToPrintable(reorders.max, pr_buf[1], sizeof(pr_buf[1])),
+ DoubleToPrintable(reorders.avg, pr_buf[2], sizeof(pr_buf[2])),
+ DoubleToPrintable(reorders.sum, pr_buf[3], sizeof(pr_buf[3])));
+
+ /* print extended disk related statistics */
+ {
+ struct DiskPartition * diskP;
+ afs_uint32 vol_count[VOLMAXPARTS+1];
+ byte part_exists[VOLMAXPARTS+1];
+ Device id;
+ int i;
+
+ memset(vol_count, 0, sizeof(vol_count));
+ memset(part_exists, 0, sizeof(part_exists));
+
+ VOL_LOCK;
+
+ for (diskP = DiskPartitionList; diskP; diskP = diskP->next) {
+ id = diskP->device;
+ vol_count[id] = diskP->vol_list.len;
+ part_exists[id] = 1;
+ }
+
+ VOL_UNLOCK;
+ for (i = 0; i <= VOLMAXPARTS; i++) {
+ if (part_exists[i]) {
+ diskP = VGetPartitionById_r(i, 0);
+ if (diskP) {
+ Log("Partition %s has %d online volumes\n",
+ VPartitionPath(diskP), diskP->vol_list.len);
+ }
+ }
+ }
+ VOL_LOCK;
+ }
+
+}
+
+void
+VPrintExtendedCacheStats(int flags)
+{
+ VOL_LOCK;
+ VPrintExtendedCacheStats_r(flags);
+ VOL_UNLOCK;
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
* This software has been released under the terms of the IBM Public
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
+ *
+ * Portions Copyright (c) 2006 Sine Nomine Associates
*/
/*
#define VolumeWriteable2(vol) (vol.type == readwriteVolume)
typedef bit32 FileOffset; /* Offset in this file */
#define Date afs_uint32
+#include "daemon_com.h"
+#include "fssync.h"
#ifdef AFS_PTHREAD_ENV
#include <assert.h>
#include <pthread.h>
extern pthread_mutex_t vol_glock_mutex;
-extern pthread_mutex_t vol_attach_mutex;
-extern pthread_mutex_t vol_fsync_mutex;
extern pthread_mutex_t vol_trans_mutex;
extern pthread_cond_t vol_put_volume_cond;
extern pthread_cond_t vol_sleep_cond;
extern int vol_attach_threads;
-/* this lock has been deprecated */
-#define VATTACH_LOCK
-#define VATTACH_UNLOCK
#define VOL_LOCK \
assert(pthread_mutex_lock(&vol_glock_mutex) == 0)
#define VOL_UNLOCK \
assert(pthread_mutex_unlock(&vol_glock_mutex) == 0)
-#define VFSYNC_LOCK \
- assert(pthread_mutex_lock(&vol_fsync_mutex) == 0)
-#define VFSYNC_UNLOCK \
- assert(pthread_mutex_unlock(&vol_fsync_mutex) == 0)
+#define VSALVSYNC_LOCK \
+ assert(pthread_mutex_lock(&vol_salvsync_mutex) == 0)
+#define VSALVSYNC_UNLOCK \
+ assert(pthread_mutex_unlock(&vol_salvsync_mutex) == 0)
#define VTRANS_LOCK \
assert(pthread_mutex_lock(&vol_trans_mutex) == 0)
#define VTRANS_UNLOCK \
assert(pthread_mutex_unlock(&vol_trans_mutex) == 0)
#else /* AFS_PTHREAD_ENV */
-#define VATTACH_LOCK
-#define VATTACH_UNLOCK
#define VOL_LOCK
#define VOL_UNLOCK
-#define VFSYNC_LOCK
-#define VFSYNC_UNLOCK
+#define VSALVSYNC_LOCK
+#define VSALVSYNC_UNLOCK
#define VTRANS_LOCK
#define VTRANS_UNLOCK
#endif /* AFS_PTHREAD_ENV */
-typedef enum { fileServer, volumeUtility, salvager } ProgramType;
+typedef enum { fileServer, /* the fileserver process */
+ volumeUtility, /* volserver, or a single volume salvager (non-dafs) */
+ salvager, /* standalone whole-partition salvager */
+ salvageServer, /* dafs online salvager */
+ debugUtility /* fssync-debug or similar utility */
+} ProgramType;
extern ProgramType programType; /* The type of program using the package */
/* Some initialization parameters for the volume package */
* that created this file */
};
+#ifdef AFS_DEMAND_ATTACH_FS
+/*
+ * demand attach fs
+ * volume state machine
+ *
+ * these must be contiguous in order for IsValidState() to work correctly
+ */
+#define VOL_STATE_UNATTACHED 0 /* volume is unattached */
+#define VOL_STATE_PREATTACHED 1 /* volume has been pre-attached */
+#define VOL_STATE_ATTACHING 2 /* volume is transitioning to fully attached */
+#define VOL_STATE_ATTACHED 3 /* volume has been fully attached */
+#define VOL_STATE_UPDATING 4 /* volume is updating on-disk structures */
+#define VOL_STATE_GET_BITMAP 5 /* volume is getting bitmap entries */
+#define VOL_STATE_HDR_LOADING 6 /* volume is loading disk header */
+#define VOL_STATE_HDR_ATTACHING 7 /* volume is getting a header from the LRU */
+#define VOL_STATE_SHUTTING_DOWN 8 /* volume is shutting down */
+#define VOL_STATE_GOING_OFFLINE 9 /* volume is going offline */
+#define VOL_STATE_OFFLINING 10 /* volume is transitioning to offline */
+#define VOL_STATE_DETACHING 11 /* volume is transitioning to detached */
+#define VOL_STATE_SALVSYNC_REQ 12 /* volume is blocked on a salvsync request */
+#define VOL_STATE_SALVAGING 13 /* volume is being salvaged */
+#define VOL_STATE_ERROR 14 /* volume is in an error state */
+#define VOL_STATE_FREED 15 /* debugging aid */
+
+#define VOL_STATE_COUNT 16 /* total number of valid states */
+
+/* V_attachFlags bits */
+#define VOL_HDR_ATTACHED 0x1 /* volume header is attached to Volume struct */
+#define VOL_HDR_LOADED 0x2 /* volume header contents are valid */
+#define VOL_HDR_IN_LRU 0x4 /* volume header is in LRU */
+#define VOL_IN_HASH 0x8 /* volume is in hash table */
+#define VOL_ON_VBYP_LIST 0x10 /* volume is on VByP list */
+#define VOL_IS_BUSY 0x20 /* volume is not to be free()d */
+#define VOL_ON_VLRU 0x40 /* volume is on the VLRU */
+#define VOL_HDR_DONTSALV 0x80 /* volume header DONTSALVAGE flag is set */
+
+/* VPrintExtendedCacheStats flags */
+#define VOL_STATS_PER_CHAIN 0x1 /* compute simple per-chain stats */
+#define VOL_STATS_PER_CHAIN2 0x2 /* compute per-chain stats that require scanning
+ * every element of the chain */
+
+/* VLRU_SetOptions options */
+#define VLRU_SET_THRESH 1
+#define VLRU_SET_INTERVAL 2
+#define VLRU_SET_MAX 3
+#define VLRU_SET_ENABLED 4
+
+/* valid VLRU queue names */
+#define VLRU_QUEUE_NEW 0 /* LRU queue for new volumes */
+#define VLRU_QUEUE_MID 1 /* survivor generation */
+#define VLRU_QUEUE_OLD 2 /* old generation */
+#define VLRU_QUEUE_CANDIDATE 3 /* soft detach candidate pool */
+#define VLRU_QUEUE_HELD 4 /* volumes which are not allowed
+ * to be soft detached */
+#define VLRU_QUEUE_INVALID 5 /* invalid queue id */
+
+/* default scanner timing parameters */
+#define VLRU_DEFAULT_OFFLINE_THRESH (60*60*2) /* 2 hours */
+#define VLRU_DEFAULT_OFFLINE_INTERVAL (60*2) /* 2 minutes */
+#define VLRU_DEFAULT_OFFLINE_MAX 8 /* 8 volumes */
+
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+
/* Magic numbers and version stamps for each type of file */
#define VOLUMEHEADERMAGIC ((bit32)0x88a1bb3c)
#define VOLUMEINFOMAGIC ((bit32)0x78a1b2c5)
/**************************************/
/* Memory resident volume information */
/**************************************/
+
+/* global volume package stats */
+typedef struct VolPkgStats {
+#ifdef AFS_DEMAND_ATTACH_FS
+ /*
+ * demand attach fs
+ * extended volume package statistics
+ */
+
+ /* levels */
+ afs_uint32 state_levels[VOL_STATE_COUNT];
+
+ /* counters */
+ afs_uint64 hash_looks; /* number of hash chain element traversals */
+ afs_uint64 hash_reorders; /* number of hash chain reorders */
+ afs_uint64 salvages; /* online salvages since fileserver start */
+ afs_uint64 vol_ops; /* volume operations since fileserver start */
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ afs_uint64 hdr_loads; /* header loads from disk */
+ afs_uint64 hdr_gets; /* header pulls out of LRU */
+ afs_uint64 attaches; /* volume attaches since fileserver start */
+ afs_uint64 soft_detaches; /* soft detach ops since fileserver start */
+
+ /* configuration parameters */
+ afs_uint32 hdr_cache_size; /* size of volume header cache */
+} VolPkgStats;
+extern VolPkgStats VStats;
+
+/*
+ * volume header cache supporting structures
+ */
+#ifdef AFS_DEMAND_ATTACH_FS
+struct volume_hdr_LRU_stats {
+ afs_uint32 free;
+ afs_uint32 used;
+ afs_uint32 attached;
+};
+#endif
+
+struct volume_hdr_LRU_t {
+ struct rx_queue lru;
+#ifdef AFS_DEMAND_ATTACH_FS
+ struct volume_hdr_LRU_stats stats;
+#endif
+};
+extern struct volume_hdr_LRU_t volume_hdr_LRU;
+
+/*
+ * volume hash chain supporting structures
+ */
+typedef struct VolumeHashChainHead {
+ struct rx_queue queue;
+ int len;
+ /* someday we could put a per-chain lock here... */
+#ifdef AFS_DEMAND_ATTACH_FS
+ int busy;
+ int cacheCheck;
+
+ /* per-chain statistics */
+ afs_uint64 looks;
+ afs_uint64 gets;
+ afs_uint64 reorders;
+
+ pthread_cond_t chain_busy_cv;
+#endif /* AFS_DEMAND_ATTACH_FS */
+} VolumeHashChainHead;
+
+typedef struct VolumeHashTable {
+ int Size;
+ int Mask;
+ VolumeHashChainHead * Table;
+} VolumeHashTable_t;
+extern VolumeHashTable_t VolumeHashTable;
+
+struct VolumeHashChainStats {
+ afs_int32 table_size;
+ afs_int32 chain_len;
+#ifdef AFS_DEMAND_ATTACH_FS
+ afs_int32 chain_cacheCheck;
+ afs_int32 chain_busy;
+ afs_uint64 chain_looks;
+ afs_uint64 chain_gets;
+ afs_uint64 chain_reorders;
+#endif
+};
+
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/* demand attach fs
+ * extended per-volume statistics
+ *
+ * please note that this structure lives across the entire
+ * lifetime of the fileserver process
+ */
+typedef struct VolumeStats {
+ /* counters */
+ afs_uint64 hash_lookups; /* hash table lookups */
+ afs_uint64 hash_short_circuits; /* short circuited hash lookups (due to cacheCheck) */
+ afs_uint64 hdr_loads; /* header loads from disk */
+ afs_uint64 hdr_gets; /* header pulls out of LRU */
+ afs_uint16 attaches; /* attaches of this volume since fileserver start */
+ afs_uint16 soft_detaches; /* soft detaches of this volume */
+ afs_uint16 salvages; /* online salvages since fileserver start */
+ afs_uint16 vol_ops; /* volume operations since fileserver start */
+
+ /* timestamps */
+ afs_uint32 last_attach; /* unix timestamp of last VAttach */
+ afs_uint32 last_get; /* unix timestamp of last VGet/VHold */
+ afs_uint32 last_promote; /* unix timestamp of last VLRU promote/demote */
+ afs_uint32 last_hdr_get; /* unix timestamp of last GetVolumeHeader() */
+ afs_uint32 last_salvage; /* unix timestamp of last initiation of an online salvage */
+ afs_uint32 last_salvage_req; /* unix timestamp of last SALVSYNC request */
+ afs_uint32 last_vol_op; /* unix timestamp of last volume operation */
+} VolumeStats;
+
+/* demand attach fs
+ * online salvager state */
+typedef struct VolumeOnlineSalvage {
+ afs_uint32 prio; /* number of VGetVolume's since salvage requested */
+ int reason; /* reason for requesting online salvage */
+ byte requested; /* flag specifying that salvage should be scheduled */
+ byte scheduled; /* flag specifying whether online salvage scheduled */
+ byte reserved[2]; /* padding */
+} VolumeOnlineSalvage;
+
+/* demand attach fs
+ * volume LRU state */
+typedef struct VolumeVLRUState {
+ struct rx_queue lru; /* VLRU queue pointers */
+ int idx; /* VLRU generation index */
+} VolumeVLRUState;
+
+typedef afs_uint16 VolState; /* attachment state type */
+#endif /* AFS_DEMAND_ATTACH_FS */
+
typedef struct Volume {
- struct Volume *hashNext; /* Next in hash resolution table */
+ struct rx_queue q; /* Volume hash chain pointers */
VolumeId hashid; /* Volume number -- for hash table lookup */
struct volHeader *header; /* Cached disk data */
Device device; /* Unix device for the volume */
afs_uint32 updateTime; /* Time that this volume was put on the updated
* volume list--the list of volumes that will be
* salvaged should the file server crash */
+#ifdef AFS_DEMAND_ATTACH_FS
+ VolState attach_state; /* what stage of attachment has been completed */
+ afs_uint16 attach_flags; /* flags related to attachment state */
+ pthread_cond_t attach_cv; /* state change condition variable */
+ short nWaiters; /* volume package internal ref count */
+ int chainCacheCheck; /* Volume hash chain cache check */
+ struct rx_queue vol_list; /* per-partition volume list (VByPList) */
+
+ VolumeOnlineSalvage salvage; /* online salvager state */
+ VolumeStats stats; /* per-volume statistics */
+ VolumeVLRUState vlru; /* state specific to the VLRU */
+ FSSYNC_VolOp_info * pending_vol_op; /* fssync command info for any pending vol ops */
+#endif /* AFS_DEMAND_ATTACH_FS */
} Volume;
struct volHeader {
- struct volHeader *prev, *next; /* LRU pointers */
+ struct rx_queue lru;
VolumeDiskData diskstuff; /* General volume info read from disk */
Volume *back; /* back pointer to current volume structure */
};
#define V_vnodeIndex(vp) ((vp)->vnodeIndex)
#define V_nextVnodeUnique(vp) ((vp)->nextVnodeUnique)
#define V_linkHandle(vp) ((vp)->linkHandle)
+#ifdef AFS_DEMAND_ATTACH_FS
+#define V_attachState(vp) ((vp)->attach_state)
+#define V_attachFlags(vp) ((vp)->attach_flags)
+#define V_attachCV(vp) ((vp)->attach_cv)
+#endif /* AFS_DEMAND_ATTACH_FS */
/* N.B. V_id must be this, rather than vp->id, or some programs will break, probably */
#define V_stamp(vp) ((vp)->header->diskstuff.stamp)
extern char *VSalvageMessage; /* Canonical message when a volume is forced
* offline */
-extern Volume *VGetVolume(Error * ec, VolId volumeId);
+extern Volume *VGetVolume(Error * ec, Error * client_ec, VolId volumeId);
extern Volume *VGetVolume_r(Error * ec, VolId volumeId);
extern void VPutVolume(Volume *);
extern void VPutVolume_r(Volume *);
extern void VOffline_r(Volume * vp, char *message);
extern int VConnectFS(void);
extern int VConnectFS_r(void);
+extern void VDisconnectFS(void);
+extern void VDisconnectFS_r(void);
+extern int VChildProcReconnectFS(void);
extern Volume *VAttachVolume(Error * ec, VolumeId volumeId, int mode);
extern Volume *VAttachVolume_r(Error * ec, VolumeId volumeId, int mode);
extern Volume *VCreateVolume(Error * ec, char *partname, VolId volumeId,
extern VnodeId VAllocBitmapEntry(Error * ec, Volume * vp,
struct vnodeIndex *index);
extern VnodeId VAllocBitmapEntry_r(Error * ec, Volume * vp,
- struct vnodeIndex *index);
+ struct vnodeIndex *index, int flags);
extern void VFreeBitMapEntry(Error * ec, register struct vnodeIndex *index,
unsigned bitNumber);
extern void VFreeBitMapEntry_r(Error * ec, register struct vnodeIndex *index,
int mode);
extern void VShutdown(void);
extern void VUpdateVolume(Error * ec, Volume * vp);
-extern void VUpdateVolume_r(Error * ec, Volume * vp);
+extern void VUpdateVolume_r(Error * ec, Volume * vp, int flags);
extern void VAddToVolumeUpdateList(Error * ec, Volume * vp);
extern void VAddToVolumeUpdateList_r(Error * ec, Volume * vp);
extern void VDetachVolume(Error * ec, Volume * vp);
extern void VDetachVolume_r(Error * ec, Volume * vp);
extern void VForceOffline(Volume * vp);
-extern void VForceOffline_r(Volume * vp);
+extern void VForceOffline_r(Volume * vp, int flags);
extern void VBumpVolumeUsage(register Volume * vp);
extern void VBumpVolumeUsage_r(register Volume * vp);
extern void VSetDiskUsage(void);
extern void VCloseVnodeFiles_r(Volume * vp);
extern struct DiskPartition *VGetPartition(char *name, int abortp);
extern struct DiskPartition *VGetPartition_r(char *name, int abortp);
-extern int VInitVolumePackage(ProgramType pt, int nLargeVnodes,
- int nSmallVnodes, int connect, int volcache);
+extern int VInitVolumePackage(ProgramType pt, afs_uint32 nLargeVnodes,
+ afs_uint32 nSmallVnodes, int connect, afs_uint32 volcache);
extern void DiskToVolumeHeader(VolumeHeader_t * h, VolumeDiskHeader_t * dh);
extern void VolumeHeaderToDisk(VolumeDiskHeader_t * dh, VolumeHeader_t * h);
extern void VTakeOffline_r(register Volume * vp);
extern void VTakeOffline(register Volume * vp);
+extern Volume * VLookupVolume_r(Error * ec, VolId volumeId, Volume * hint);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+extern Volume *VPreAttachVolumeByName(Error * ec, char *partition, char *name,
+ int mode);
+extern Volume *VPreAttachVolumeByName_r(Error * ec, char *partition, char *name,
+ int mode);
+extern Volume *VPreAttachVolumeById_r(Error * ec, struct DiskPartition * partp,
+ Volume * vp, int volume_id);
+extern Volume *VGetVolumeByVp_r(Error * ec, Volume * vp);
+extern int VShutdownByPartition_r(struct DiskPartition * dp);
+extern int VShutdownVolume_r(Volume * vp);
+extern int VConnectSALV(void);
+extern int VConnectSALV_r(void);
+extern int VReconnectSALV(void);
+extern int VReconnectSALV_r(void);
+extern int VDisconnectSALV(void);
+extern int VDisconnectSALV_r(void);
+extern void VPrintExtendedCacheStats(int flags);
+extern void VPrintExtendedCacheStats_r(int flags);
+extern VolState VChangeState_r(Volume * vp, VolState new_state);
+extern void VLRU_SetOptions(int option, afs_uint32 val);
+extern int VSetVolHashSize(int logsize);
+extern int VRequestSalvage_r(Volume * vp, int reason, int flags);
+extern int VRegisterVolOp_r(Volume * vp, FSSYNC_VolOp_info * vopinfo);
+extern int VDeregisterVolOp_r(Volume * vp, FSSYNC_VolOp_info * vopinfo);
+#endif /* AFS_DEMAND_ATTACH_FS */
+extern int VVolOpLeaveOnline_r(Volume * vp, FSSYNC_VolOp_info * vopinfo);
+extern int VVolOpSetVBusy_r(Volume * vp, FSSYNC_VolOp_info * vopinfo);
/* Naive formula relating number of file size to number of 1K blocks in file */
* getting the most recent data. */
+
+/* VUpdateVolume_r flags */
+#define VOL_UPDATE_WAIT 0x1 /* for demand attach, wait for other exclusive ops to end */
+#define VOL_UPDATE_NOFORCEOFF 0x2 /* don't force offline on failure. this is to prevent
+ * infinite recursion between vupdate and vforceoff */
+
+/* VForceOffline_r flags */
+#define VOL_FORCEOFF_NOUPDATE 0x1 /* don't force update on forceoff. this is to prevent
+ * infinite recursion between vupdate and vforceoff */
+
+/* VSyncVolume_r flags */
+#define VOL_SYNC_WAIT 0x1 /* for demand attach, wait for other exclusive ops to end */
+
+/* VAllocBitmapEntry_r flags */
+#define VOL_ALLOC_BITMAP_WAIT 0x1 /* for demand attach, wait for other exclusive ops to end */
+
+/* VRequestSalvage_r flags */
+#define VOL_SALVAGE_INVALIDATE_HEADER 0x1 /* for demand attach fs, invalidate volume header cache */
+
+
#if defined(NEARINODE_HINT)
#define V_pref(vp,nearInode) nearInodeHash(V_id(vp),(nearInode)); (nearInode) %= V_partition(vp)->f_files
#else
# License. For details, see the LICENSE file in the top-level source
# directory or online at http://www.openafs.org/dl/license10.html
+AFSDEV_AUXCDEFINES = -DFSSYNC_BUILD_CLIENT
+
RELDIR=volser
!INCLUDE ..\config\NTMakefile.$(SYS_NAME)
!INCLUDE ..\config\NTMakefile.version
#include <afs/volume.h>
#include <afs/partition.h>
#include "dump.h"
+#include <afs/daemon_com.h>
#include <afs/fssync.h>
#include <afs/acl.h>
#include "volser.h"
#include <afs/volume.h>
#include <afs/partition.h>
#include "vol.h"
+#include <afs/daemon_com.h>
#include <afs/fssync.h>
#include <afs/acl.h>
#include "afs/audit.h"
{
struct DiskPartition *tpartp = originalvp->partition;
- FSYNC_askfs(cloneId, tpartp->name, FSYNC_RESTOREVOLUME, 0);
+ FSYNC_VolOp(cloneId, tpartp->name, FSYNC_VOL_BREAKCBKS, 0, NULL);
}
return 0;
DFlushVolume(V_parentId(tt->volume)); /* Ensure dir buffers get dropped */
code = RestoreVolume(acid, tt->volume, (aflags & 1), cookie); /* last is incrementalp */
- FSYNC_askfs(tt->volid, NULL, FSYNC_RESTOREVOLUME, 0l); /*break call backs on the
- * restored volume */
+ FSYNC_VolOp(tt->volid, NULL, FSYNC_VOL_BREAKCBKS, 0l, NULL);
tt->rxCallPtr = (struct rx_call *)0;
tcode = TRELE(tt);
}
strcpy(tt->lastProcName, "SetForwarding");
tt->rxCallPtr = acid;
- FSYNC_askfs(tt->volid, NULL, FSYNC_MOVEVOLUME, anewsite);
+ FSYNC_VolOp(tt->volid, NULL, FSYNC_VOL_MOVE, anewsite, NULL);
tt->rxCallPtr = (struct rx_call *)0;
if (TRELE(tt))
return VOLSERTRELE_ERROR;
/* Only report attached partitions */
for (i = 0; i < VOLMAXPARTS; i++) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ dp = VGetPartitionById(i, 0);
+#else
if (i < 26) {
namehead[6] = i + 'a';
namehead[7] = '\0';
namehead[8] = '\0';
}
dp = VGetPartition(namehead, 0);
+#endif
if (dp)
partList.partId[j++] = i;
}
pntr->volid = volid;
goto drop;
}
- tv = VAttachVolumeByName(&error, pname, volname, V_READONLY);
+ tv = VAttachVolumeByName(&error, pname, volname, V_PEEK);
if (error) {
pntr->status = 0; /*things are messed up */
strcpy(pntr->name, volname);
/*
* Attach the volume, give up on the volume if we can't.
*/
- tv = VAttachVolumeByName(&error, pname, volname, V_READONLY);
+ tv = VAttachVolumeByName(&error, pname, volname, V_PEEK);
if (error) {
xInfoP->status = 0; /*things are messed up */
strcpy(xInfoP->name, volname);
return EIO;
}
close(fd);
- FSYNC_askfs(volumeId, pname, FSYNC_RESTOREVOLUME, 0);
+ FSYNC_VolOp(volumeId, pname, FSYNC_VOL_BREAKCBKS, 0, NULL);
for (dp = DiskPartitionList; dp && strcmp(dp->name, pname);
dp = dp->next);
if (unlink(opath) < 0) {
Log("1 SAFS_VolConvertROtoRWvolume: Couldn't unlink RO header, error = %d\n", error);
}
- FSYNC_askfs(volumeId, pname, FSYNC_DONE, 0);
- FSYNC_askfs(h.id, pname, FSYNC_ON, 0);
+ FSYNC_VolOp(volumeId, pname, FSYNC_VOL_DONE, 0, NULL);
+ FSYNC_VolOp(h.id, pname, FSYNC_VOL_ON, 0, NULL);
return 0;
#else /* AFS_NAMEI_ENV */
return EINVAL;
#include <pthread.h>
#endif
+#include <afs/voldefs.h>
+
/* vflags, representing state of the volume */
#define VTDeleteOnSalvage 1 /* delete on next salvage */
#define VTOutOfService 2 /* never put this volume online */
#define INVALID_BID 0
#define VOLSER_MAXVOLNAME 65
#define VOLSER_OLDMAXVOLNAME 32
-#define VOLMAXPARTS 255
/*flags used for interfacing with the backup system */
struct volDescription { /*used for interfacing with the backup system */