trying to attach the volume later, only to find the volume is not
blessed and take the volume offline.
+ -- FSYNC_VG_QUERY (DAFS only)
+
+This queries the fileserver VGC (volume group cache) for the volume
+group of the requested volume. The payload consists of an
+FSSYNC_VGQry_response_t, specifying the volume group and all of the
+volumes in that volume group.
+
+If the VGC for the requested partition is currently being populated,
+this will fail with SYNC_FAILED, and the FSYNC_PART_SCANNING reason
+code. If the VGC for the requested partition is currently completely
+unpopulated, a VGC scan for the partition will be started automatically
+in the background, and FSYNC_PART_SCANNING will still be returned.
+
+The demand-salvager uses this to find out what volumes are in the volume
+group it is salvaging; it can also be used for debugging the VGC.
+
+ -- FSYNC_VG_SCAN (DAFS only)
+
+This discards any information in the VGC for the specified partition,
+and re-scans the partition to populate the VGC in the background. This
+should normally not be needed, since scans start automatically when VGC
+information is requested. This can be used as a debugging tool, or to
+force the VGC to discard incorrect information that somehow got into the
+VGC.
+
+Note that the scan is scheduled in the background, so getting a response
+from this command does not imply that the scan has started; it may start
+sometime in the future.
+
+ -- FSYNC_VG_SCAN_ALL
+
+This is the same as FSYNC_VG_SCAN, but schedules scans for all
+partitions on the fileserver, instead of a particular one.
+
-- FSYNC_VOL_QUERY_VNODE
Asks the fileserver for information about specific vnode. This takes a
This is intended to retrieve stats for the VLRU generation specified in
sop->vlru_generation. However, it is not yet implemented and currently
always results in a SYNC_BAD_COMMAND result from the fileserver.
+
+ -- VGC update FSSYNC commands
+
+FSSYNC commands involving updating the VGC (volume group cache) take an
+FSSYNC_VGUpdate_command struct as their command arguments. The parent
+and child fields specify the (parent,child) entry in the partName VGC to
+add or remove.
+
+ -- FSYNC_VG_ADD (DAFS only)
+
+Adds an entry to the fileserver VGC. This merely adds the specified
+child volume to the specified parent volume group, and creates the
+parent volume group if it does not exist. This is used by programs that
+create new volumes, in order to keep the VGC up to date.
+
+ -- FSYNC_VG_DEL (DAFS only)
+
+Deletes an entry from the fileserver VGC. This merely removes the
+specified child volume from the specified parent volume group, deleting
+the volume group if the last entry was deleted. This is used by programs
+that destroy volumes, in order to keep the VGC up to date.
VOLOBJS= vnode.o volume.o vutil.o partition.o fssync-server.o \
clone.o devname.o common.o ihandle.o listinodes.o namei_ops.o \
- fstab.o salvsync-client.o daemon_com.o
+ fstab.o salvsync-client.o daemon_com.o vg_cache.o vg_scan.o
FSINTOBJS= afsaux.o afscbint.cs.o afsint.ss.o afsint.xdr.o
partition.o: ${VOL}/partition.c
${CCRULE}
+vg_cache.o: ${VOL}/vg_cache.c
+ ${CCRULE}
+
+vg_scan.o: ${VOL}/vg_scan.c
+ ${CCRULE}
+
fssync-server.o: ${VOL}/fssync-server.c
${CCRULE}
return code;
}
+
+/**
+ * FSSYNC volume operations client interface.
+ *
+ * @param[in] volume volume id
+ * @param[in] partName partition name string
+ * @param[in] com FSSYNC command code
+ * @param[in] reason FSSYNC reason sub-code
+ * @param[out] res response message
+ *
+ * @return operation status
+ * @retval SYNC_OK success
+ */
afs_int32
FSYNC_GenericOp(void * ext_hdr, size_t ext_len,
int command, int reason,
return FSYNC_GenericOp(scom, sizeof(*scom), command, reason, res);
}
+/**
+ * query the volume group cache.
+ *
+ * @param[in] part vice partition path
+ * @param[in] volid volume id
+ * @param[out] qry query response object
+ * @param[out] res SYNC response message
+ *
+ * @return operation status
+ * @retval SYNC_OK success
+ */
+afs_int32
+FSYNC_VGCQuery(char * part,
+ VolumeId volid,
+ FSSYNC_VGQry_response_t * qry,
+ SYNC_response *res)
+{
+ SYNC_response lres;
+
+ if (!res) {
+ res = &lres;
+ }
+
+ res->hdr.response_len = sizeof(res->hdr);
+ res->payload.buf = qry;
+ res->payload.len = sizeof(*qry);
+
+ return FSYNC_VolOp(volid, part, FSYNC_VG_QUERY, 0, res);
+}
+
+/**
+ * perform an update operation on the VGC.
+ *
+ * @param[in] parent rw volume
+ * @param[in] child volume id to add
+ * @param[in] partition name of vice partition on which this VG resides
+ * @param[in] opcode FSSYNC VG cache opcode
+ * @param[in] reason FSSYNC reason code
+ * @param[out] res SYNC response message
+ *
+ * @return operation status
+ * @retval SYNC_OK success
+ *
+ * @internal
+ */
+static afs_int32
+_FSYNC_VGCUpdate(char * partition,
+ VolumeId parent,
+ VolumeId child,
+ int opcode,
+ int reason,
+ SYNC_response *res)
+{
+ FSSYNC_VGUpdate_command_t vcom;
+
+ memset(&vcom, 0, sizeof(vcom));
+
+ vcom.parent = parent;
+ vcom.child = child;
+ if (partition)
+ strlcpy(vcom.partName, partition, sizeof(vcom.partName));
+
+ return FSYNC_GenericOp(&vcom, sizeof(vcom), opcode, reason, res);
+}
+
+/**
+ * Add volume to volume group cache.
+ *
+ * @param[in] parent rw volume
+ * @param[in] child volume id to add
+ * @param[in] partition name of vice partition on which this VG resides
+ * @param[in] reason FSSYNC reason code
+ * @param[out] res SYNC response message
+ *
+ * @return operation status
+ * @retval SYNC_OK success
+ */
+afs_int32
+FSYNC_VGCAdd(char * partition,
+ VolumeId parent,
+ VolumeId child,
+ int reason,
+ SYNC_response *res)
+{
+ return _FSYNC_VGCUpdate(partition, parent, child, FSYNC_VG_ADD, reason, res);
+}
+
+/**
+ * Delete volume from volume group cache.
+ *
+ * @param[in] parent rw volume
+ * @param[in] child volume id to add
+ * @param[in] partition name of vice partition on which this VG resides
+ * @param[in] reason FSSYNC reason code
+ * @param[out] res SYNC response message
+ *
+ * @return operation status
+ * @retval SYNC_OK success
+ */
+afs_int32
+FSYNC_VGCDel(char * partition,
+ VolumeId parent,
+ VolumeId child,
+ int reason,
+ SYNC_response *res)
+{
+ return _FSYNC_VGCUpdate(partition, parent, child, FSYNC_VG_DEL, reason, res);
+}
+
+/**
+ * perform an asynchronous volume group scan.
+ *
+ * @param[in] partition vice partition string
+ * @param[in] reason FSSYNC reason code
+ *
+ * @note if partition is NULL, all vice partitions will be scanned.
+ *
+ * @return operation status
+ * @retval SYNC_OK success
+ */
+afs_int32
+FSYNC_VGCScan(char * partition, int reason)
+{
+ int command;
+
+ if (partition == NULL) {
+ command = FSYNC_VG_SCAN_ALL;
+ partition = "";
+ } else {
+ command = FSYNC_VG_SCAN;
+ }
+
+ return FSYNC_VolOp(0, partition, command, reason, NULL);
+}
#endif /* FSSYNC_BUILD_CLIENT */
/*
- * Copyright 2006-2008, Sine Nomine Associates and others.
+ * Copyright 2006-2010, Sine Nomine Associates and others.
* All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
#include "partition.h"
#include "daemon_com.h"
#include "fssync.h"
+#include "vg_cache.h"
#ifdef AFS_NT40_ENV
#include <pthread.h>
#endif
static int StatsQuery(struct cmd_syndesc * as, void * rock);
static int VnQuery(struct cmd_syndesc * as, void * rock);
+static int VGCQuery(struct cmd_syndesc * as, void * rock);
+static int VGCAdd(struct cmd_syndesc * as, void * rock);
+static int VGCDel(struct cmd_syndesc * as, void * rock);
+static int VGCScan(struct cmd_syndesc * as, void * rock);
+static int VGCScanAll(struct cmd_syndesc * as, void * rock);
+
static void print_vol_stats_general(VolPkgStats * stats);
static void print_vol_stats_viceP(struct DiskPartitionStats64 * stats);
static void print_vol_stats_hash(struct VolumeHashChainStats * stats);
cmd_AddParm(ts, "-arg2", CMD_SINGLE, CMD_OPTIONAL, "arg2");
COMMON_PARMS_DECL(ts);
+ ts = cmd_CreateSyntax("vgcquery", VGCQuery, 0, "query volume group cache (FSYNC_VG_QUERY opcode)");
+ cmd_Seek(ts, CUSTOM_PARMS_OFFSET);
+ cmd_AddParm(ts, "-partition", CMD_SINGLE, 0, "partition name");
+ cmd_AddParm(ts, "-volumeid", CMD_SINGLE, 0, "volume id");
+ COMMON_PARMS_DECL(ts);
+ cmd_CreateAlias(ts, "vgcqry");
+
+ ts = cmd_CreateSyntax("vgcadd", VGCAdd, 0, "add entry to volume group cache (FSYNC_VG_ADD opcode)");
+ cmd_Seek(ts, CUSTOM_PARMS_OFFSET);
+ cmd_AddParm(ts, "-partition", CMD_SINGLE, 0, "partition name");
+ cmd_AddParm(ts, "-parent", CMD_SINGLE, 0, "parent volume id");
+ cmd_AddParm(ts, "-child", CMD_SINGLE, 0, "child volume id");
+ COMMON_PARMS_DECL(ts);
+
+ ts = cmd_CreateSyntax("vgcdel", VGCDel, 0, "delete entry from volume group cache (FSYNC_VG_DEL opcode)");
+ cmd_Seek(ts, CUSTOM_PARMS_OFFSET);
+ cmd_AddParm(ts, "-partition", CMD_SINGLE, 0, "partition name");
+ cmd_AddParm(ts, "-parent", CMD_SINGLE, 0, "parent volume id");
+ cmd_AddParm(ts, "-child", CMD_SINGLE, 0, "child volume id");
+ COMMON_PARMS_DECL(ts);
+
+ ts = cmd_CreateSyntax("vgcscan", VGCScan, 0,
+ "start volume group cache re-scan"
+ " (FSYNC_VG_SCAN opcode)");
+ cmd_Seek(ts, CUSTOM_PARMS_OFFSET);
+ cmd_AddParm(ts, "-partition", CMD_SINGLE, 0, "partition name");
+ COMMON_PARMS_DECL(ts);
+
+ ts = cmd_CreateSyntax("vgcscanall", VGCScanAll, 0,
+ "start whole-server volume group cache re-scan"
+ " (FSYNC_VG_SCAN_ALL opcode)");
+ COMMON_PARMS_DECL(ts);
+
err = cmd_Dispatch(argc, argv);
exit(err);
}
}
static int
+debug_response(afs_int32 code, SYNC_response * res)
+{
+ switch (code) {
+ case SYNC_OK:
+ case SYNC_DENIED:
+ break;
+ default:
+ fprintf(stderr, "warning: response code indicates possible protocol error.\n");
+ }
+
+ fprintf(stderr, "FSSYNC service returned %d (%s)\n", code, response_code_to_string(code));
+
+ if (res) {
+ fprintf(stderr, "protocol header response code was %d (%s)\n",
+ res->hdr.response, response_code_to_string(res->hdr.response));
+ fprintf(stderr, "protocol reason code was %d (%s)\n",
+ res->hdr.reason, reason_code_to_string(res->hdr.reason));
+ }
+
+ return 0;
+}
+
+static int
do_volop(struct state * state, afs_int32 command, SYNC_response * res)
{
afs_int32 code;
state->reason,
res);
- switch (code) {
- case SYNC_OK:
- case SYNC_DENIED:
- break;
- default:
- fprintf(stderr, "possible sync protocol error. return code was %d\n", code);
- }
-
- fprintf(stderr, "FSYNC_VolOp returned %d (%s)\n", code, response_code_to_string(code));
- fprintf(stderr, "protocol response code was %d (%s)\n",
- res->hdr.response, response_code_to_string(res->hdr.response));
- fprintf(stderr, "protocol reason code was %d (%s)\n",
- res->hdr.reason, reason_code_to_string(res->hdr.reason));
+ debug_response(code, res);
VDisconnectFS();
ENUMCASE(FSYNC_VOL_FORCE_ERROR);
ENUMCASE(FSYNC_VOL_LEAVE_OFF);
ENUMCASE(FSYNC_VOL_QUERY_VNODE);
+ ENUMCASE(FSYNC_VG_QUERY);
+ ENUMCASE(FSYNC_VG_ADD);
+ ENUMCASE(FSYNC_VG_DEL);
+ ENUMCASE(FSYNC_VG_SCAN);
+ ENUMCASE(FSYNC_VG_SCAN_ALL);
+
default:
return "**UNKNOWN**";
}
ENUMCASE(FSYNC_NO_PENDING_VOL_OP);
ENUMCASE(FSYNC_VOL_PKG_ERROR);
ENUMCASE(FSYNC_UNKNOWN_VNID);
+ ENUMCASE(FSYNC_WRONG_PART);
+ ENUMCASE(FSYNC_BAD_STATE);
+ ENUMCASE(FSYNC_BAD_PART);
+ ENUMCASE(FSYNC_PART_SCANNING);
default:
return "**UNKNOWN**";
}
code = FSYNC_GenericOp(&qry, sizeof(qry), command, FSYNC_OPERATOR, res);
- switch (code) {
- case SYNC_OK:
- case SYNC_DENIED:
- break;
- default:
- fprintf(stderr, "possible sync protocol error. return code was %d\n", code);
- }
-
- fprintf(stderr, "FSYNC_GenericOp returned %d (%s)\n", code, response_code_to_string(code));
- fprintf(stderr, "protocol response code was %d (%s)\n",
- res->hdr.response, response_code_to_string(res->hdr.response));
- fprintf(stderr, "protocol reason code was %d (%s)\n",
- res->hdr.reason, reason_code_to_string(res->hdr.reason));
+ debug_response(code, res);
VDisconnectFS();
}
#endif /* AFS_DEMAND_ATTACH_FS */
+
+/**
+ * query VGC.
+ *
+ * @notes args:
+ * - CUSTOM_PARMS_OFFSET+0 is partition string
+ * - CUSTOM_PARMS_OFFSET+1 is volume id
+ *
+ * @return operation status
+ * @retval 0 success
+ */
+static int
+VGCQuery(struct cmd_syndesc * as, void * rock)
+{
+ afs_int32 code;
+ struct state state;
+ char * partName;
+ VolumeId volid;
+ FSSYNC_VGQry_response_t q_res;
+ SYNC_response res;
+ int i;
+ struct cmd_item *ti;
+
+ if (!(ti = as->parms[CUSTOM_PARMS_OFFSET+0].items)) { /* -partition */
+ return -1;
+ }
+ partName = ti->data;
+
+ if (!(ti = as->parms[CUSTOM_PARMS_OFFSET+1].items)) { /* -volumeid */
+ return -1;
+ }
+ volid = atoi(ti->data);
+
+ common_prolog(as, &state);
+
+ fprintf(stderr, "calling FSYNC_VCGQuery\n");
+
+ code = FSYNC_VGCQuery(partName, volid, &q_res, &res);
+
+ debug_response(code, &res);
+
+ if (code == SYNC_OK) {
+ printf("VG = {\n");
+ printf("\trw\t=\t%u\n", q_res.rw);
+ printf("\tchildren\t= (\n");
+ for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
+ if (q_res.children[i]) {
+ printf("\t\t%u\n", q_res.children[i]);
+ }
+ }
+ printf("\t)\n");
+ }
+
+ VDisconnectFS();
+
+ return 0;
+}
+
+static int
+VGCAdd(struct cmd_syndesc * as, void * rock)
+{
+ afs_int32 code;
+ struct state state;
+ char * partName;
+ VolumeId parent, child;
+ struct cmd_item *ti;
+ SYNC_response res;
+
+ if (!(ti = as->parms[CUSTOM_PARMS_OFFSET+0].items)) { /* -partition */
+ return -1;
+ }
+ partName = ti->data;
+
+ if (!(ti = as->parms[CUSTOM_PARMS_OFFSET+1].items)) { /* -parent */
+ return -1;
+ }
+ parent = atoi(ti->data);
+
+ if (!(ti = as->parms[CUSTOM_PARMS_OFFSET+2].items)) { /* -child */
+ return -1;
+ }
+ child = atoi(ti->data);
+
+ common_prolog(as, &state);
+ fprintf(stderr, "calling FSYNC_VCGAdd\n");
+ code = FSYNC_VGCAdd(partName, parent, child, state.reason, &res);
+ debug_response(code, &res);
+
+ VDisconnectFS();
+
+ return 0;
+}
+
+static int
+VGCDel(struct cmd_syndesc * as, void * rock)
+{
+ afs_int32 code;
+ struct state state;
+ char * partName;
+ VolumeId parent, child;
+ struct cmd_item *ti;
+ SYNC_response res;
+
+ if (!(ti = as->parms[CUSTOM_PARMS_OFFSET+0].items)) { /* -partition */
+ return -1;
+ }
+ partName = ti->data;
+
+ if (!(ti = as->parms[CUSTOM_PARMS_OFFSET+1].items)) { /* -parent */
+ return -1;
+ }
+ parent = atoi(ti->data);
+
+ if (!(ti = as->parms[CUSTOM_PARMS_OFFSET+2].items)) { /* -child */
+ return -1;
+ }
+ child = atoi(ti->data);
+
+ state.reason = FSYNC_WHATEVER;
+
+ common_prolog(as, &state);
+ fprintf(stderr, "calling FSYNC_VCGDel\n");
+ code = FSYNC_VGCDel(partName, parent, child, state.reason, &res);
+ debug_response(code, &res);
+
+ VDisconnectFS();
+
+ return 0;
+}
+
+static int
+VGCScan(struct cmd_syndesc * as, void * rock)
+{
+ afs_int32 code;
+ struct state state;
+ char * partName;
+ struct cmd_item *ti;
+
+ if (!(ti = as->parms[CUSTOM_PARMS_OFFSET+0].items)) { /* -partition */
+ return -1;
+ }
+ partName = ti->data;
+
+ common_prolog(as, &state);
+ fprintf(stderr, "calling FSYNC_VCGScan\n");
+ code = FSYNC_VGCScan(partName, state.reason);
+ debug_response(code, NULL);
+
+ VDisconnectFS();
+
+ return 0;
+}
+
+static int
+VGCScanAll(struct cmd_syndesc * as, void * rock)
+{
+ afs_int32 code;
+ struct state state;
+
+ common_prolog(as, &state);
+ fprintf(stderr, "calling FSYNC_VCGScanAll\n");
+ code = FSYNC_VGCScan(NULL, state.reason);
+ debug_response(code, NULL);
+
+ VDisconnectFS();
+
+ return 0;
+}
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
*
- * Portions Copyright (c) 2006-2008 Sine Nomine Associates
+ * Portions Copyright (c) 2006-2010 Sine Nomine Associates
*/
/*
#include "volume.h"
#include "volume_inline.h"
#include "partition.h"
+#include "vg_cache.h"
#ifdef HAVE_POLL
#include <sys/poll.h>
static afs_int32 FSYNC_com_VolHdrQuery(FSSYNC_VolOp_command * com, SYNC_response * res);
#ifdef AFS_DEMAND_ATTACH_FS
static afs_int32 FSYNC_com_VolOpQuery(FSSYNC_VolOp_command * com, SYNC_response * res);
+static afs_int32 FSYNC_com_VGQuery(FSSYNC_VolOp_command * com, SYNC_response * res);
+static afs_int32 FSYNC_com_VGUpdate(SYNC_command * com, SYNC_response * res);
+static afs_int32 FSYNC_com_VGScan(FSSYNC_VolOp_command * com, SYNC_response * res);
+static afs_int32 FSYNC_com_VGScanAll(FSSYNC_VolOp_command * com, SYNC_response * res);
#endif /* AFS_DEMAND_ATTACH_FS */
static afs_int32 FSYNC_com_VnQry(osi_socket fd, SYNC_command * com, SYNC_response * res);
memcpy(thread_opts, &VThread_defaults, sizeof(VThread_defaults));
thread_opts->disallow_salvsync = 1;
assert(pthread_setspecific(VThread_key, thread_opts) == 0);
+
+ code = VVGCache_PkgInit();
+ assert(code == 0);
#endif
InitHandler();
case FSYNC_VOL_QUERY:
case FSYNC_VOL_QUERY_HDR:
case FSYNC_VOL_QUERY_VOP:
+#ifdef AFS_DEMAND_ATTACH_FS
+ case FSYNC_VG_QUERY:
+ case FSYNC_VG_SCAN:
+ case FSYNC_VG_SCAN_ALL:
+#endif
res.hdr.response = FSYNC_com_VolOp(fd, &com, &res);
break;
case FSYNC_VOL_STATS_GENERAL:
case FSYNC_VOL_QUERY_VNODE:
res.hdr.response = FSYNC_com_VnQry(fd, &com, &res);
break;
+#ifdef AFS_DEMAND_ATTACH_FS
+ case FSYNC_VG_ADD:
+ case FSYNC_VG_DEL:
+ res.hdr.response = FSYNC_com_VGUpdate(&com, &res);
+ break;
+#endif
default:
res.hdr.response = SYNC_BAD_COMMAND;
break;
case FSYNC_VOL_QUERY_VOP:
code = FSYNC_com_VolOpQuery(&vcom, res);
break;
+ case FSYNC_VG_QUERY:
+ code = FSYNC_com_VGQuery(&vcom, res);
+ break;
+ case FSYNC_VG_SCAN:
+ code = FSYNC_com_VGScan(&vcom, res);
+ break;
+ case FSYNC_VG_SCAN_ALL:
+ code = FSYNC_com_VGScanAll(&vcom, res);
+ break;
#endif /* AFS_DEMAND_ATTACH_FS */
default:
code = SYNC_BAD_COMMAND;
}
return code;
}
+
+static afs_int32
+FSYNC_com_VGQuery(FSSYNC_VolOp_command * vcom, SYNC_response * res)
+{
+ afs_int32 code = SYNC_FAILED;
+ int rc;
+ struct DiskPartition64 * dp;
+
+ if (SYNC_verifyProtocolString(vcom->vop->partName, sizeof(vcom->vop->partName))) {
+ res->hdr.reason = SYNC_REASON_MALFORMED_PACKET;
+ goto done;
+ }
+
+ dp = VGetPartition_r(vcom->vop->partName, 0);
+ if (dp == NULL) {
+ res->hdr.reason = FSYNC_BAD_PART;
+ goto done;
+ }
+
+ assert(sizeof(FSSYNC_VGQry_response_t) <= res->payload.len);
+
+ rc = VVGCache_query_r(dp, vcom->vop->volume, res->payload.buf);
+ switch (rc) {
+ case 0:
+ res->hdr.response_len += sizeof(FSSYNC_VGQry_response_t);
+ code = SYNC_OK;
+ break;
+ case EAGAIN:
+ res->hdr.reason = FSYNC_PART_SCANNING;
+ break;
+ case ENOENT:
+ res->hdr.reason = FSYNC_UNKNOWN_VOLID;
+ break;
+ default:
+ break;
+ }
+
+ done:
+ return code;
+}
+
+static afs_int32
+FSYNC_com_VGUpdate(SYNC_command * com, SYNC_response * res)
+{
+ afs_int32 code = SYNC_FAILED;
+ struct DiskPartition64 * dp;
+ FSSYNC_VGUpdate_command_t * vgucom;
+ int rc;
+
+ if (com->recv_len != (sizeof(com->hdr) + sizeof(*vgucom))) {
+ res->hdr.reason = SYNC_REASON_MALFORMED_PACKET;
+ res->hdr.flags |= SYNC_FLAG_CHANNEL_SHUTDOWN;
+ code = SYNC_COM_ERROR;
+ goto done;
+ }
+
+ vgucom = com->payload.buf;
+
+ if (SYNC_verifyProtocolString(vgucom->partName, sizeof(vgucom->partName))) {
+ res->hdr.reason = SYNC_REASON_MALFORMED_PACKET;
+ goto done;
+ }
+
+ dp = VGetPartition_r(vgucom->partName, 0);
+ if (dp == NULL) {
+ res->hdr.reason = FSYNC_BAD_PART;
+ goto done;
+ }
+
+ switch(com->hdr.command) {
+ case FSYNC_VG_ADD:
+ rc = VVGCache_entry_add_r(dp, vgucom->parent, vgucom->child, NULL);
+ break;
+
+ case FSYNC_VG_DEL:
+ rc = VVGCache_entry_del_r(dp, vgucom->parent, vgucom->child);
+ break;
+
+ default:
+ Log("FSYNC_com_VGUpdate called improperly\n");
+ rc = -1;
+ break;
+ }
+
+ /* EINVAL means the partition VGC doesn't exist at all; not really
+ * an error */
+ if (rc == 0 || rc == EINVAL) {
+ code = SYNC_OK;
+ }
+
+ if (rc == ENOENT) {
+ res->hdr.reason = FSYNC_UNKNOWN_VOLID;
+ } else {
+ res->hdr.reason = FSYNC_WHATEVER;
+ }
+
+ done:
+ return code;
+}
+
+static afs_int32
+FSYNC_com_VGScan(FSSYNC_VolOp_command * vcom, SYNC_response * res)
+{
+ afs_int32 code = SYNC_FAILED;
+ struct DiskPartition64 * dp;
+
+ if (SYNC_verifyProtocolString(vcom->vop->partName, sizeof(vcom->vop->partName))) {
+ res->hdr.reason = SYNC_REASON_MALFORMED_PACKET;
+ goto done;
+ }
+
+ dp = VGetPartition_r(vcom->vop->partName, 0);
+ if (dp == NULL) {
+ res->hdr.reason = FSYNC_BAD_PART;
+ goto done;
+ }
+
+ if (VVGCache_scanStart_r(dp) == 0) {
+ code = SYNC_OK;
+ }
+
+ done:
+ return code;
+}
+
+static afs_int32
+FSYNC_com_VGScanAll(FSSYNC_VolOp_command * com, SYNC_response * res)
+{
+ afs_int32 code = SYNC_FAILED;
+
+ if (VVGCache_scanStart_r(NULL) == 0) {
+ code = SYNC_OK;
+ }
+
+ return code;
+}
#endif /* AFS_DEMAND_ATTACH_FS */
static afs_int32
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
*
- * Portions Copyright (c) 2006-2008 Sine Nomine Associates
+ * Portions Copyright (c) 2006-2010 Sine Nomine Associates
*/
/*
#ifndef __fssync_h_
#define __fssync_h_
-
#define FSYNC_PROTO_VERSION 3
+#include "voldefs.h"
/**
* FSYNC command codes.
FSYNC_VOL_FORCE_ERROR = SYNC_COM_CODE_DECL(16), /**< force volume into error state */
FSYNC_VOL_LEAVE_OFF = SYNC_COM_CODE_DECL(17), /**< end vol op, but leave volume offline */
FSYNC_VOL_QUERY_VNODE = SYNC_COM_CODE_DECL(18), /**< query vnode state */
+ FSYNC_VG_QUERY = SYNC_COM_CODE_DECL(19), /**< Query volume group membership for a given volume id */
+ FSYNC_VG_ADD = SYNC_COM_CODE_DECL(20), /**< add a volume id to a vg */
+ FSYNC_VG_DEL = SYNC_COM_CODE_DECL(21), /**< delete a volume id from a vg */
+ FSYNC_VG_SCAN = SYNC_COM_CODE_DECL(22), /**< force a re-scan of a given partition */
+ FSYNC_VG_SCAN_ALL = SYNC_COM_CODE_DECL(23), /**< force a re-scan of all vice partitions */
FSYNC_OP_CODE_END
};
FSYNC_UNKNOWN_VNID = SYNC_REASON_CODE_DECL(9), /**< vnode id not known by fileserver */
FSYNC_WRONG_PART = SYNC_REASON_CODE_DECL(10),/**< volume attached on different partition */
FSYNC_BAD_STATE = SYNC_REASON_CODE_DECL(11),/**< current volume state does not allow this operation */
+ FSYNC_BAD_PART = SYNC_REASON_CODE_DECL(12),/**< invalid disk partition */
+ FSYNC_PART_SCANNING = SYNC_REASON_CODE_DECL(13),/**< partition is busy scanning VGs */
FSYNC_REASON_CODE_END
};
char partName[16]; /**< partition name */
} FSSYNC_VnQry_hdr;
+/**
+ * fssync protocol volume group query response message.
+ */
+typedef struct FSSYNC_VGQry_response {
+ afs_uint32 rw; /**< rw volume id */
+ afs_uint32 children[VOL_VG_MAX_VOLS]; /**< vector of children */
+} FSSYNC_VGQry_response_t;
+
+/**
+ * fssync protocol volume group update command message.
+ */
+typedef struct FSSYNC_VGUpdate_command {
+ afs_uint32 parent; /**< rw volume id */
+ afs_uint32 child; /**< volume id to associate with parent
+ * (can legally be the parent itself) */
+ char partName[16]; /**< name of vice partition on which this
+ * volume group resides */
+} FSSYNC_VGUpdate_command_t;
#define FSSYNC_IN_PORT 2040
#define FSSYNC_UN_PATH "fssync.sock"
int command, int reason,
SYNC_response * res);
-/* volume operations interface */
+/* volume operations control interface */
extern afs_int32 FSYNC_VolOp(VolumeId volume, char *partName, int com, int reason,
SYNC_response * res);
extern void FSYNC_fsInit(void);
+/* volume group cache coherence interfaces */
+extern afs_int32 FSYNC_VGCQuery(char * part, VolumeId parent,
+ FSSYNC_VGQry_response_t *, SYNC_response *res);
+extern afs_int32 FSYNC_VGCAdd(char *part, VolumeId parent, VolumeId child,
+ int reason, SYNC_response *res);
+extern afs_int32 FSYNC_VGCDel(char *part, VolumeId parent, VolumeId child,
+ int reason, SYNC_response *res);
+extern afs_int32 FSYNC_VGCScan(char *part, int reason);
+
#endif /* __fssync_h_ */
struct DiskPartition64 *DiskPartitionList;
#ifdef AFS_DEMAND_ATTACH_FS
+/* file to lock to conceptually "lock" the vol headers on a partition */
+#define AFS_PARTLOCK_FILE ".volheaders.lock"
+
static struct DiskPartition64 *DiskPartitionTable[VOLMAXPARTS+1];
static struct DiskPartition64 * VLookupPartition_r(char * path);
VInitPartition_r(char *path, char *devname, Device dev)
{
struct DiskPartition64 *dp, *op;
+
dp = (struct DiskPartition64 *)malloc(sizeof(struct DiskPartition64));
/* Add it to the end, to preserve order when we print statistics */
for (op = DiskPartitionList; op; op = op->next) {
assert(pthread_cond_init(&dp->vol_list.cv, NULL) == 0);
dp->vol_list.len = 0;
dp->vol_list.busy = 0;
+ {
+ char lockpath[MAXPATHLEN+1];
+ afs_snprintf(lockpath, MAXPATHLEN, "%s/" AFS_PARTLOCK_FILE, dp->name);
+ lockpath[MAXPATHLEN] = '\0';
+ VLockFileInit(&dp->headerLockFile, lockpath);
+ }
+ VDiskLockInit(&dp->headerLock, &dp->headerLockFile, 1);
#endif /* AFS_DEMAND_ATTACH_FS */
}
#ifdef AFS_DEMAND_ATTACH_FS
+/* new-style partition locks; these are only to have some mutual exclusion
+ * between the VGC scanner and volume utilies creating/altering vol headers
+ */
+
+/**
+ * lock a partition's vol headers.
+ *
+ * @param[in] dp the partition to lock
+ * @param[in] locktype READ_LOCK or WRITE_LOCK
+ *
+ * @return operation status
+ * @retval 0 success
+ */
+int
+VPartHeaderLock(struct DiskPartition64 *dp, int locktype)
+{
+ int code;
+
+ /* block on acquiring the lock */
+ int nonblock = 0;
+
+ code = VGetDiskLock(&dp->headerLock, locktype, nonblock);
+ if (code) {
+ Log("VPartHeaderLock: error %d locking partititon %s\n", code,
+ VPartitionPath(dp));
+ }
+ return code;
+}
+
+/**
+ * unlock a partition's vol headers.
+ *
+ * @param[in] dp the partition to unlock
+ * @param[in] locktype READ_LOCK or WRITE_LOCK
+ */
+void
+VPartHeaderUnlock(struct DiskPartition64 *dp, int locktype)
+{
+ VReleaseDiskLock(&dp->headerLock, locktype);
+}
+
/* XXX not sure this will work on AFS_NT40_ENV
* needs to be tested!
*/
*/
+#ifndef AFS_VOL_PARTITION_H
+#define AFS_VOL_PARTITION_H
+
#include <afs/param.h>
#include "nfs.h"
#if defined(AFS_HPUX_ENV)
int busy; /* asynch vol list op in progress */
pthread_cond_t cv; /* vol_list.busy change cond var */
} vol_list;
+ struct VLockFile headerLockFile;
+ struct VDiskLock headerLock; /* lock for the collective headers on the partition */
#endif /* AFS_DEMAND_ATTACH_FS */
};
#ifdef AFS_DEMAND_ATTACH_FS
extern struct DiskPartition64 *VGetPartitionById(afs_int32 index, int abortp);
extern struct DiskPartition64 *VGetPartitionById_r(afs_int32 index, int abortp);
+extern int VPartHeaderLock(struct DiskPartition64 *dp, int locktype);
+extern void VPartHeaderUnlock(struct DiskPartition64 *dp, int locktype);
#endif
extern int VAttachPartitions(void);
extern void VLockPartition(char *name);
extern int VDiskUsage(struct Volume *vp, afs_sfsize_t blocks);
extern void VPrintDiskStats(void);
extern int VInitPartitionPackage(void);
+
+#endif /* AFS_VOL_PARTITION_H */
/*
- * Copyright 2006-2008, Sine Nomine Associates and others.
+ * Copyright 2006-2010, Sine Nomine Associates and others.
* All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
--- /dev/null
+/*
+ * Copyright 2009-2010, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/*
+ * demand attach fs
+ * volume group membership cache
+ */
+
+#include <afsconfig.h>
+#include <afs/param.h>
+
+#ifdef AFS_DEMAND_ATTACH_FS
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <afs/assert.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/param.h>
+#include <lock.h>
+#if defined(AFS_SUN5_ENV) || defined(AFS_HPUX_ENV)
+#include <unistd.h>
+#endif
+#include <afs/afsutil.h>
+#include <lwp.h>
+#include "nfs.h"
+#include <afs/afsint.h>
+#include "ihandle.h"
+#include "vnode.h"
+#include "volume.h"
+#include "viceinode.h"
+#include "voldefs.h"
+#include "partition.h"
+#include <afs/errors.h>
+
+#define __VOL_VG_CACHE_IMPL 1
+
+#include "vg_cache.h"
+#include "vg_cache_impl.h"
+
+static int _VVGC_lookup(struct DiskPartition64 *,
+ VolumeId volid,
+ VVGCache_entry_t ** entry,
+ VVGCache_hash_entry_t ** hentry);
+static int _VVGC_entry_alloc(VVGCache_entry_t ** entry);
+static int _VVGC_entry_free(VVGCache_entry_t * entry);
+static int _VVGC_entry_get(VVGCache_entry_t * entry);
+static int _VVGC_entry_put(struct DiskPartition64 *,
+ VVGCache_entry_t * entry);
+static int _VVGC_entry_add(struct DiskPartition64 *,
+ VolumeId volid,
+ VVGCache_entry_t **,
+ VVGCache_hash_entry_t **);
+static int _VVGC_entry_cl_add(VVGCache_entry_t *, VolumeId);
+static int _VVGC_entry_cl_del(struct DiskPartition64 *, VVGCache_entry_t *,
+ VolumeId);
+static int _VVGC_entry_export(VVGCache_entry_t *, VVGCache_query_t *);
+static int _VVGC_hash_entry_alloc(VVGCache_hash_entry_t ** entry);
+static int _VVGC_hash_entry_free(VVGCache_hash_entry_t * entry);
+static int _VVGC_hash_entry_add(struct DiskPartition64 *,
+ VolumeId,
+ VVGCache_entry_t *,
+ VVGCache_hash_entry_t **);
+static int _VVGC_hash_entry_del(VVGCache_hash_entry_t * entry);
+static int _VVGC_hash_entry_unlink(VVGCache_hash_entry_t * entry);
+
+VVGCache_hash_table_t VVGCache_hash_table;
+VVGCache_t VVGCache;
+
+/**
+ * initialize volume group cache subsystem.
+ *
+ * @return operation status
+ * @retval 0 success
+ */
+int
+VVGCache_PkgInit(void)
+{
+ int code = 0;
+ int i;
+
+ /* allocate hash table */
+ VVGCache_hash_table.hash_buckets =
+ malloc(VolumeHashTable.Size * sizeof(struct rx_queue));
+ if (VVGCache_hash_table.hash_buckets == NULL) {
+ code = ENOMEM;
+ goto error;
+ }
+
+ /* setup hash chain heads */
+ for (i = 0; i < VolumeHashTable.Size; i++) {
+ queue_Init(&VVGCache_hash_table.hash_buckets[i]);
+ }
+
+ /* initialize per-partition VVGC state */
+ for (i = 0; i <= VOLMAXPARTS; i++) {
+ VVGCache.part[i].state = VVGC_PART_STATE_INVALID;
+ VVGCache.part[i].dlist_hash_buckets = NULL;
+ code = pthread_cond_init(&VVGCache.part[i].cv, NULL);
+ if (code) {
+ goto error;
+ }
+ }
+
+ error:
+ return code;
+}
+
+/**
+ * shut down volume group cache subsystem.
+ *
+ * @return operation status
+ * @retval 0 success
+ *
+ * @todo implement
+ */
+int
+VVGCache_PkgShutdown(void)
+{
+ int i;
+
+ /* fix it later */
+
+ /* free hash table */
+ free(VVGCache_hash_table.hash_buckets);
+ VVGCache_hash_table.hash_buckets = NULL;
+
+ /* destroy per-partition VVGC state */
+ for (i = 0; i <= VOLMAXPARTS; i++) {
+ VVGCache.part[i].state = VVGC_PART_STATE_INVALID;
+ pthread_cond_destroy(&VVGCache.part[i].cv);
+ }
+
+ return EOPNOTSUPP;
+}
+
+/**
+ * allocate a cache entry.
+ *
+ * @param[out] entry_out pointer to newly allocated entry
+ *
+ * @return operation status
+ * @retval 0 success
+ *
+ * @internal
+ */
+static int
+_VVGC_entry_alloc(VVGCache_entry_t ** entry_out)
+{
+ int code = 0;
+ VVGCache_entry_t * ent;
+
+ *entry_out = ent = malloc(sizeof(VVGCache_entry_t));
+ if (ent == NULL) {
+ code = ENOMEM;
+ goto error;
+ }
+
+ memset(ent, 0, sizeof(*ent));
+
+ error:
+ return code;
+}
+
+/**
+ * free a cache entry.
+ *
+ * @param[in] entry cache entry
+ *
+ * @return operation status
+ * @retval 0 success
+ *
+ * @internal
+ */
+static int
+_VVGC_entry_free(VVGCache_entry_t * entry)
+{
+ int code = 0;
+
+ assert(entry->refcnt == 0);
+ free(entry);
+
+ return code;
+}
+
+/**
+ * allocate and register an entry for a volume group.
+ *
+ * @param[in] dp disk partition object
+ * @param[in] volid volume id
+ * @param[out] entry_out vg cache object pointer
+ * @param[out] hash_out vg cache hash entry object pointer
+ *
+ * @pre - VOL_LOCK held
+ * - no such entry exists in hash table
+ *
+ * @return operation status
+ * @retval 0 success
+ *
+ * @internal
+ */
+static int
+_VVGC_entry_add(struct DiskPartition64 * dp,
+ VolumeId volid,
+ VVGCache_entry_t ** entry_out,
+ VVGCache_hash_entry_t ** hash_out)
+{
+ int code = 0;
+ VVGCache_entry_t * ent;
+
+ code = _VVGC_entry_alloc(&ent);
+ if (code) {
+ goto error;
+ }
+
+ ent->rw = volid;
+ /* refcnt will be inc'd when a child is added */
+ ent->refcnt = 0;
+
+ code = _VVGC_hash_entry_add(dp, volid, ent, hash_out);
+ if (code) {
+ goto error;
+ }
+
+ if (entry_out) {
+ *entry_out = ent;
+ }
+ return code;
+
+ error:
+ if (ent) {
+ _VVGC_entry_free(ent);
+ ent = NULL;
+ }
+ return code;
+}
+
+/**
+ * add a volid to the entry's child list.
+ *
+ * @param[in] ent volume group object
+ * @param[in] volid volume id
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval -1 child table is full
+ *
+ * @internal
+ */
+static int
+_VVGC_entry_cl_add(VVGCache_entry_t * ent,
+ VolumeId volid)
+{
+ int code = 0, i, empty_found, empty_idx;
+
+ /* search table to avoid duplicates */
+ for (i = 0, empty_found = 0;
+ i < VOL_VG_MAX_VOLS;
+ i++) {
+ if (ent->children[i] == volid) {
+ ViceLog(1, ("VVGC_entry_cl_add: tried to add duplicate vol "
+ "%lu to VG %lu\n",
+ afs_printable_uint32_lu(volid),
+ afs_printable_uint32_lu(ent->rw)));
+ goto done;
+ }
+ if (!empty_found && !ent->children[i]) {
+ empty_idx = i;
+ empty_found = 1;
+ /* don't break; make sure we go through all children so we don't
+ * add a duplicate entry */
+ }
+ }
+
+ /* verify table isn't full */
+ if (!empty_found) {
+ code = -1;
+ ViceLog(0, ("VVGC_entry_cl_add: tried to add vol %lu to VG %lu, but VG "
+ "is full\n", afs_printable_uint32_lu(volid),
+ afs_printable_uint32_lu(ent->rw)));
+ goto done;
+ }
+
+ /* add entry */
+ ent->children[empty_idx] = volid;
+
+ /* inc refcount */
+ code = _VVGC_entry_get(ent);
+
+ done:
+ return code;
+}
+
+/**
+ * delete a volid from the entry's child list.
+ *
+ * @param[in] dp disk partition object
+ * @param[in] ent volume group object
+ * @param[in] volid volume id
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval -1 no such entry found
+ *
+ * @internal
+ */
+static int
+_VVGC_entry_cl_del(struct DiskPartition64 *dp,
+ VVGCache_entry_t * ent,
+ VolumeId volid)
+{
+ int code = -1, i;
+
+ for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
+ if (ent->children[i] == volid) {
+ ent->children[i] = 0;
+ code = 0;
+ goto done;
+ }
+ }
+
+ done:
+ if (!code) {
+ code = _VVGC_entry_put(dp, ent);
+ }
+
+ return code;
+}
+
+/**
+ * add a refcount to an entry.
+ *
+ * @param[in] entry cache entry
+ *
+ * @pre VOL_LOCK held
+ *
+ * @return operation status
+ * @retval 0 success
+ *
+ * @internal
+ */
+static int _VVGC_entry_get(VVGCache_entry_t * entry)
+{
+ entry->refcnt++;
+ return 0;
+}
+
+/**
+ * put back a reference to an entry.
+ *
+ * @param[in] dp disk partition object
+ * @param[in] entry cache entry
+ *
+ * @pre VOL_LOCK held
+ *
+ * @warning do not attempt to deref pointer after calling this interface
+ *
+ * @return operation status
+ * @retval 0 success
+ *
+ * @note dp is needed to lookup the RW hash entry to unlink, if we are
+ * putting back the final reference and freeing
+ *
+ * @internal
+ */
+static int
+_VVGC_entry_put(struct DiskPartition64 * dp, VVGCache_entry_t * entry)
+{
+ int code = 0;
+
+ assert(entry->refcnt > 0);
+
+ if (--entry->refcnt == 0) {
+ VVGCache_entry_t *nentry;
+ VVGCache_hash_entry_t *hentry;
+
+ /* first, try to delete the RW id hash entry pointing to this
+ * entry */
+ code = _VVGC_lookup(dp, entry->rw, &nentry, &hentry);
+ if (!code) {
+ if (nentry != entry) {
+ /* looking up the rw of this entry points to a different
+ * entry; should not happen */
+ ViceLog(0, ("VVGC_entry_put: error: entry lookup for entry %lu "
+ "found different entry than was passed",
+ afs_printable_uint32_lu(entry->rw)));
+ code = -1;
+ } else {
+ code = _VVGC_hash_entry_unlink(hentry);
+ hentry = NULL;
+ }
+ } else if (code == ENOENT) {
+ /* ignore ENOENT; this shouldn't happen, since the RW hash
+ * entry should always exist if the entry does... but we
+ * were going to delete it anyway, so try to continue */
+ ViceLog(0, ("VVGC_entry_put: warning: tried to unlink entry for "
+ "vol %lu, but RW hash entry doesn't exist; continuing "
+ "anyway...\n", afs_printable_uint32_lu(entry->rw)));
+
+ code = 0;
+ }
+
+ /* now, just free the entry itself */
+ if (!code) {
+ code = _VVGC_entry_free(entry);
+ }
+ }
+
+ return code;
+}
+
+/**
+ * export a volume group entry in the external object format.
+ *
+ * @param[in] ent internal-format volume group object
+ * @param[out] qry external-format volume group object
+ *
+ * @pre VOL_LOCK held
+ *
+ * @return operation status
+ * @retval 0 success
+ *
+ * @internal
+ */
+static int
+_VVGC_entry_export(VVGCache_entry_t * ent, VVGCache_query_t * qry)
+{
+ int i;
+
+ qry->rw = ent->rw;
+ for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
+ qry->children[i] = ent->children[i];
+ }
+
+ return 0;
+}
+
+/**
+ * allocate a hash table entry structure.
+ *
+ * @param[out] entry_out address in which to store newly allocated hash entry struct
+ *
+ * @return operation status
+ * @retval 0 success
+ *
+ * @internal
+ */
+static int
+_VVGC_hash_entry_alloc(VVGCache_hash_entry_t ** entry_out)
+{
+ int code = 0;
+ VVGCache_hash_entry_t * ent;
+
+ *entry_out = ent = malloc(sizeof(VVGCache_hash_entry_t));
+ if (ent == NULL) {
+ code = ENOMEM;
+ }
+
+ return code;
+}
+
+/**
+ * free a hash table entry structure.
+ *
+ * @param[in] entry hash table entry structure to be freed
+ *
+ * @return operation status
+ * @retval 0 success
+ *
+ * @internal
+ */
+static int
+_VVGC_hash_entry_free(VVGCache_hash_entry_t * entry)
+{
+ int code = 0;
+
+ free(entry);
+
+ return code;
+}
+
+/**
+ * add an entry to the hash table.
+ *
+ * @param[in] dp disk partition object
+ * @param[in] volid volume id
+ * @param[in] ent volume group object
+ * @param[out] hash_out address in which to store pointer to hash entry
+ *
+ * @pre VOL_LOCK held
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval EEXIST hash entry for volid already exists, and it points to
+ * a different VG entry
+ *
+ * @internal
+ */
+static int
+_VVGC_hash_entry_add(struct DiskPartition64 * dp,
+ VolumeId volid,
+ VVGCache_entry_t * ent,
+ VVGCache_hash_entry_t ** hash_out)
+{
+ int code = 0;
+ VVGCache_hash_entry_t * hent;
+ int hash = VVGC_HASH(volid);
+ VVGCache_entry_t *nent;
+
+ code = _VVGC_lookup(dp, volid, &nent, hash_out);
+ if (!code) {
+ if (ent != nent) {
+ ViceLog(0, ("_VVGC_hash_entry_add: tried to add a duplicate "
+ " nonmatching entry for vol %lu: original "
+ "(%"AFS_PTR_FMT",%lu) new (%"AFS_PTR_FMT",%lu)\n",
+ afs_printable_uint32_lu(volid),
+ nent, afs_printable_uint32_lu(nent->rw),
+ ent, afs_printable_uint32_lu(ent->rw)));
+ return EEXIST;
+ }
+ ViceLog(1, ("_VVGC_hash_entry_add: tried to add duplicate "
+ "hash entry for vol %lu, VG %lu",
+ afs_printable_uint32_lu(volid),
+ afs_printable_uint32_lu(ent->rw)));
+ /* accept attempts to add matching duplicate entries; just
+ * pretend we added it */
+ return 0;
+ }
+
+ code = _VVGC_hash_entry_alloc(&hent);
+ if (code) {
+ goto done;
+ }
+
+ hent->entry = ent;
+ hent->dp = dp;
+ hent->volid = volid;
+ queue_Append(&VVGCache_hash_table.hash_buckets[hash],
+ hent);
+
+ done:
+ if (hash_out) {
+ *hash_out = hent;
+ }
+ return code;
+}
+
+/**
+ * remove an entry from the hash table.
+ *
+ * @param[in] hent hash table entry
+ *
+ * @pre VOL_LOCK held
+ *
+ * @return operation status
+ * @retval 0 success
+ *
+ * @internal
+ */
+static int
+_VVGC_hash_entry_del(VVGCache_hash_entry_t * hent)
+{
+ int code = 0, res;
+ int rw = 0;
+
+ if (hent->entry->rw == hent->volid) {
+ rw = 1;
+ }
+
+ code = _VVGC_entry_cl_del(hent->dp, hent->entry, hent->volid);
+ /* note: hent->entry is possibly NULL after _VVGC_entry_cl_del, and
+ * if hent->entry->rw == hent->volid, it is possible for hent to
+ * have been freed */
+
+ if (!rw) {
+ /* If we are the RW id, don't unlink, since we still need the
+ * hash entry to exist, so when we lookup children, they can
+ * look up the RW id hash chain, and they will all go to the
+ * same object.
+ *
+ * If we are the last entry and the entry should be deleted,
+ * _VVGC_entry_cl_del will take care of unlinking the RW hash entry.
+ */
+ res = _VVGC_hash_entry_unlink(hent);
+ if (res) {
+ code = res;
+ }
+ }
+
+ return code;
+}
+
+/**
+ * low-level interface to remove an entry from the hash table.
+ *
+ * Does not alter the refcount or worry about the children lists or
+ * anything like that; just removes the hash table entry, frees it, and
+ * that's all. You probably want @see _VVGC_hash_entry_del instead.
+ *
+ * @param[in] hent hash table entry
+ *
+ * @pre VOL_LOCK held
+ *
+ * @return operation status
+ * @retval 0 success
+ *
+ * @internal
+ */
+static int
+_VVGC_hash_entry_unlink(VVGCache_hash_entry_t * hent)
+{
+ int code;
+
+ queue_Remove(hent);
+ hent->entry = NULL;
+ hent->volid = 0;
+ code = _VVGC_hash_entry_free(hent);
+
+ return code;
+}
+
+/**
+ * lookup a vg cache entry given any member volume id.
+ *
+ * @param[in] dp disk partition object
+ * @param[in] volid vg member volume id
+ * @param[out] entry_out address in which to store volume group entry structure pointer
+ * @param[out] hash_out address in which to store hash entry pointer
+ *
+ * @pre VOL_LOCK held
+ *
+ * @warning - it is up to the caller to get a ref to entry_out, if needed
+ * - hash_out must not be referenced after dropping VOL_LOCK
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval ENOENT volume id not found
+ * @retval EINVAL partition's VGC is invalid
+ *
+ * @internal
+ */
+static int
+_VVGC_lookup(struct DiskPartition64 * dp,
+ VolumeId volid,
+ VVGCache_entry_t ** entry_out,
+ VVGCache_hash_entry_t ** hash_out)
+{
+ int code = ENOENT;
+ int bucket = VVGC_HASH(volid);
+ struct VVGCache_hash_entry * ent, * nent;
+
+ if (VVGCache.part[dp->index].state == VVGC_PART_STATE_INVALID) {
+ return EINVAL;
+ }
+
+ *entry_out = NULL;
+
+ for (queue_Scan(&VVGCache_hash_table.hash_buckets[bucket],
+ ent,
+ nent,
+ VVGCache_hash_entry)) {
+ if (ent->volid == volid && ent->dp == dp) {
+ code = 0;
+ *entry_out = ent->entry;
+ if (hash_out) {
+ *hash_out = ent;
+ }
+ break;
+ }
+ }
+
+ return code;
+}
+
+/**
+ * add an entry to the volume group cache.
+ *
+ * @param[in] dp disk partition object
+ * @param[in] parent parent volume id
+ * @param[in] child child volume id
+ * @param[out] newvg if non-NULL, *newvg is 1 if adding this added a
+ * new VG, 0 if we added to an existing VG
+ *
+ * @pre VOL_LOCK held
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval -1 parent and child are already registered in
+ * different VGs
+ */
+int
+VVGCache_entry_add_r(struct DiskPartition64 * dp,
+ VolumeId parent,
+ VolumeId child,
+ afs_int32 *newvg)
+{
+ int code = 0, res;
+ VVGCache_entry_t * child_ent, * parent_ent;
+
+ if (newvg) {
+ *newvg = 0;
+ }
+
+ /* check for existing entries */
+ res = _VVGC_lookup(dp, child, &child_ent, NULL);
+ if (res && res != ENOENT) {
+ code = res;
+ goto done;
+ }
+
+ res = _VVGC_lookup(dp, parent, &parent_ent, NULL);
+ if (res && res != ENOENT) {
+ code = res;
+ goto done;
+ }
+
+ /*
+ * branch based upon existence of parent and child nodes
+ */
+ if (parent_ent && child_ent) {
+ /* both exist. we're done.
+ * if they point different places, then report the error. */
+ if (child_ent != parent_ent) {
+ code = -1;
+ }
+ if (parent == child) {
+ /* if we're adding the RW entry as a child, the RW id may
+ * not be in the child array yet, so make sure not to skip
+ * over that */
+ goto cladd;
+ }
+ goto done;
+ } else if (!parent_ent && child_ent) {
+ /* child exists.
+ * update vg root volid, and add hash entry. */
+ parent_ent = child_ent;
+ parent_ent->rw = parent;
+
+ code = _VVGC_hash_entry_add(dp,
+ parent,
+ parent_ent,
+ NULL);
+ goto done;
+ } else if (!child_ent && !parent_ent) {
+ code = _VVGC_entry_add(dp,
+ parent,
+ &parent_ent,
+ NULL);
+ if (code) {
+ goto done;
+ }
+ if (newvg) {
+ *newvg = 1;
+ }
+ if (child == parent) {
+ /* if we're the RW, skip over adding the child hash entry;
+ * we already added the hash entry when creating the entry */
+ child_ent = parent_ent;
+ goto cladd;
+ }
+ }
+
+ assert(!child_ent);
+ child_ent = parent_ent;
+ code = _VVGC_hash_entry_add(dp,
+ child,
+ child_ent,
+ NULL);
+ if (code) {
+ goto done;
+ }
+
+ cladd:
+ code = _VVGC_entry_cl_add(child_ent, child);
+
+ done:
+ if (code && code != EINVAL) {
+ ViceLog(0, ("VVGCache_entry_add: error %d trying to add vol %lu to VG"
+ " %lu on partition %s", code, afs_printable_uint32_lu(child),
+ afs_printable_uint32_lu(parent), VPartitionPath(dp)));
+ }
+
+ if (code == 0 && VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING) {
+ /* we successfully added the entry; make sure it's not on the
+ * to-delete list, so it doesn't get deleted later */
+ code = _VVGC_dlist_del_r(dp, parent, child);
+ if (code && code != ENOENT) {
+ ViceLog(0, ("VVGCache_entry_add: error %d trying to remove vol "
+ "%lu (parent %lu) from the to-delete list for part "
+ "%s.\n", code, afs_printable_uint32_lu(child),
+ afs_printable_uint32_lu(parent),
+ VPartitionPath(dp)));
+ } else {
+ code = 0;
+ }
+ }
+
+ return code;
+}
+
+/**
+ * add an entry to the volume group cache.
+ *
+ * @param[in] dp disk partition object
+ * @param[in] parent parent volume id
+ * @param[in] child child volume id
+ * @param[out] newvg if non-NULL, *newvg is 1 if adding this added a
+ * new VG, 0 if we added to an existing VG
+ *
+ * @return operation status
+ * @retval 0 success
+ */
+int
+VVGCache_entry_add(struct DiskPartition64 * dp,
+ VolumeId parent,
+ VolumeId child,
+ afs_int32 *newvg)
+{
+ int code = 0;
+
+ VOL_LOCK;
+ VVGCache_entry_add_r(dp, parent, child, newvg);
+ VOL_UNLOCK;
+
+ return code;
+}
+
+/**
+ * delete an entry from the volume group cache.
+ *
+ * If partition is scanning, actually puts the entry on a list of entries
+ * to delete when the scan is done.
+ *
+ * @param[in] dp disk partition object
+ * @param[in] parent parent volume id
+ * @param[in] child child volume id
+ *
+ * @pre VOL_LOCK held
+ *
+ * @return operation status
+ * @retval 0 success
+ */
+int
+VVGCache_entry_del_r(struct DiskPartition64 * dp,
+ VolumeId parent, VolumeId child)
+{
+ if (VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING) {
+ int code;
+ code = _VVGC_dlist_add_r(dp, parent, child);
+ if (code) {
+ return code;
+ }
+ }
+ return _VVGC_entry_purge_r(dp, parent, child);
+}
+
+/**
+ * delete an entry from the volume group cache.
+ *
+ * @param[in] dp disk partition object
+ * @param[in] parent parent volume id
+ * @param[in] child child volume id
+ *
+ * @pre VOL_LOCK held
+ *
+ * @internal
+ *
+ * @return operation status
+ * @retval 0 success
+ */
+int
+_VVGC_entry_purge_r(struct DiskPartition64 * dp,
+ VolumeId parent, VolumeId child)
+{
+ int code = 0, res;
+ VVGCache_entry_t * parent_ent, * child_ent;
+ VVGCache_hash_entry_t * child_hent;
+
+ /* check mappings for each volid */
+ res = _VVGC_lookup(dp, parent, &parent_ent, NULL);
+ if (res) {
+ code = res;
+ goto done;
+ }
+ res = _VVGC_lookup(dp, child, &child_ent, &child_hent);
+ if (res) {
+ code = res;
+ goto done;
+ }
+
+ /* if the mappings don't match, we have a serious error */
+ if (parent_ent != child_ent) {
+ ViceLog(0, ("VVGCache_entry_del: trying to delete vol %lu from VG %lu, "
+ "but vol %lu points to VGC entry %"AFS_PTR_FMT" and VG %lu "
+ "points to VGC entry %"AFS_PTR_FMT"\n",
+ afs_printable_uint32_lu(child),
+ afs_printable_uint32_lu(parent),
+ afs_printable_uint32_lu(child),
+ child_ent, afs_printable_uint32_lu(parent), parent_ent));
+ code = -1;
+ goto done;
+ }
+
+ code = _VVGC_hash_entry_del(child_hent);
+
+ done:
+ return code;
+}
+
+/**
+ * delete an entry from the volume group cache.
+ *
+ * @param[in] dp disk partition object
+ * @param[in] parent parent volume id
+ * @param[in] child child volume id
+ *
+ * @return operation status
+ * @retval 0 success
+ */
+int
+VVGCache_entry_del(struct DiskPartition64 * dp,
+ VolumeId parent, VolumeId child)
+{
+ int code;
+
+ VOL_LOCK;
+ code = VVGCache_entry_del_r(dp, parent, child);
+ VOL_UNLOCK;
+
+ return code;
+}
+
+/**
+ * query a volume group by any member volume id.
+ *
+ * @param[in] dp disk partition object
+ * @param[in] volume volume id of a member of VG
+ * @param[out] res vg membership data
+ *
+ * @pre VOL_LOCK held
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval EAGAIN partition needs to finish scanning
+ */
+int
+VVGCache_query_r(struct DiskPartition64 * dp,
+ VolumeId volume,
+ VVGCache_query_t * res)
+{
+ int code = 0;
+ VVGCache_entry_t * ent;
+
+ /* If cache for this partition doesn't exist; start a scan */
+ if (VVGCache.part[dp->index].state == VVGC_PART_STATE_INVALID) {
+ code = VVGCache_scanStart_r(dp);
+ if (code == 0 || code == -3) {
+ /* -3 means another thread already started scanning */
+ return EAGAIN;
+ }
+ return code;
+ }
+ if (VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING) {
+ return EAGAIN;
+ }
+
+ code = _VVGC_lookup(dp, volume, &ent, NULL);
+ if (!code) {
+ code = _VVGC_entry_export(ent, res);
+ }
+
+ return code;
+}
+
+/**
+ * query a volume group by any member volume id.
+ *
+ * @param[in] dp disk partition object
+ * @param[in] volume volume id of a member of VG
+ * @param[out] res vg membership data
+ *
+ * @return operation status
+ * @retval 0 success
+ */
+int
+VVGCache_query(struct DiskPartition64 * dp,
+ VolumeId volume, VVGCache_query_t * res)
+{
+ int code;
+
+ VOL_LOCK;
+ code = VVGCache_query_r(dp, volume, res);
+ VOL_UNLOCK;
+
+ return code;
+}
+
+/**
+ * begin asynchronous scan of on-disk volume group metadata.
+ *
+ * @param[in] dp disk partition object
+ *
+ * @pre VOL_LOCK held
+ *
+ * @return operation status
+ * @retval 0 success
+ */
+int
+VVGCache_scanStart_r(struct DiskPartition64 * dp)
+{
+ int code = 0, res;
+
+ if (dp) {
+ code = _VVGC_scan_start(dp);
+ } else {
+ /* start a scanner thread on each partition */
+ for (dp = DiskPartitionList; dp; dp = dp->next) {
+ res = _VVGC_scan_start(dp);
+ if (res) {
+ code = res;
+ }
+ }
+ }
+
+ return code;
+}
+
+/**
+ * begin asynchronous scan of on-disk volume group metadata.
+ *
+ * @param[in] dp disk partition object
+ *
+ * @return operation status
+ * @retval 0 success
+ */
+int
+VVGCache_scanStart(struct DiskPartition64 * dp)
+{
+ int code;
+
+ VOL_LOCK;
+ code = VVGCache_scanStart_r(dp);
+ VOL_UNLOCK;
+
+ return code;
+}
+
+/**
+ * wait for async on-disk VG metadata scan to complete.
+ *
+ * @param[in] dp disk partition object
+ *
+ * @pre VOL_LOCK held
+ *
+ * @warning this routine must drop VOL_LOCK internally
+ *
+ * @return operation status
+ * @retval 0 success
+ */
+int
+VVGCache_scanWait_r(struct DiskPartition64 * dp)
+{
+ int code = 0;
+
+ while (VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING) {
+ VOL_CV_WAIT(&VVGCache.part[dp->index].cv);
+ }
+
+ return code;
+}
+
+/**
+ * wait for async on-disk VG metadata scan to complete.
+ *
+ * @param[in] dp disk partition object
+ *
+ * @return operation status
+ * @retval 0 success
+ */
+int
+VVGCache_scanWait(struct DiskPartition64 * dp)
+{
+ int code;
+
+ VOL_LOCK;
+ code = VVGCache_scanWait_r(dp);
+ VOL_UNLOCK;
+
+ return code;
+}
+
+/**
+ * flush all cache entries for a given disk partition.
+ *
+ * @param[in] part disk partition object
+ *
+ * @pre VOL_LOCK held
+ *
+ * @return operation status
+ * @retval 0 success
+ *
+ * @internal
+ */
+int
+_VVGC_flush_part_r(struct DiskPartition64 * part)
+{
+ int code = 0, res;
+ int i;
+ VVGCache_hash_entry_t * ent, * nent;
+
+ for (i = 0; i < VolumeHashTable.Size; i++) {
+ for (queue_Scan(&VVGCache_hash_table.hash_buckets[i],
+ ent,
+ nent,
+ VVGCache_hash_entry)) {
+ if (ent->dp == part) {
+ VolumeId volid = ent->volid;
+ res = _VVGC_hash_entry_del(ent);
+ if (res) {
+ ViceLog(0, ("_VVGC_flush_part_r: error %d deleting hash entry for %lu\n",
+ res, afs_printable_uint32_lu(volid)));
+ code = res;
+ }
+ }
+ }
+ }
+
+ return code;
+}
+
+/**
+ * flush all cache entries for a given disk partition.
+ *
+ * @param[in] part disk partition object
+ *
+ * @return operation status
+ * @retval 0 success
+ *
+ * @internal
+ */
+int
+_VVGC_flush_part(struct DiskPartition64 * part)
+{
+ int code;
+
+ VOL_LOCK;
+ code = _VVGC_flush_part_r(part);
+ VOL_UNLOCK;
+
+ return code;
+}
+
+
+/**
+ * change VVGC partition state.
+ *
+ * @param[in] part disk partition object
+ * @param[in] state new state
+ *
+ * @pre VOL_LOCK is held
+ *
+ * @return old state
+ *
+ * @internal
+ */
+int
+_VVGC_state_change(struct DiskPartition64 * part,
+ VVGCache_part_state_t state)
+{
+ VVGCache_part_state_t old_state;
+
+ old_state = VVGCache.part[part->index].state;
+ VVGCache.part[part->index].state = state;
+
+ if (old_state != state) {
+ pthread_cond_broadcast(&VVGCache.part[part->index].cv);
+ }
+
+ return old_state;
+}
+
+#endif /* AFS_DEMAND_ATTACH_FS */
--- /dev/null
+/*
+ * Copyright 2009-2010, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/*
+ * demand attach fs
+ * volume group membership cache
+ */
+
+#ifndef _AFS_VOL_VG_CACHE_H
+#define _AFS_VOL_VG_CACHE_H 1
+
+#include "vg_cache_types.h"
+#include "partition.h"
+
+extern int VVGCache_entry_add(struct DiskPartition64 *, VolumeId parent,
+ VolumeId child, afs_int32 *newvg);
+extern int VVGCache_entry_add_r(struct DiskPartition64 *, VolumeId parent,
+ VolumeId child, afs_int32 *newvg);
+extern int VVGCache_entry_del(struct DiskPartition64 *, VolumeId parent, VolumeId child);
+extern int VVGCache_entry_del_r(struct DiskPartition64 *, VolumeId parent, VolumeId child);
+extern int VVGCache_query(struct DiskPartition64 *, VolumeId volume, VVGCache_query_t * res);
+extern int VVGCache_query_r(struct DiskPartition64 *, VolumeId volume, VVGCache_query_t * res);
+
+extern int VVGCache_scanStart(struct DiskPartition64 *);
+extern int VVGCache_scanStart_r(struct DiskPartition64 *);
+extern int VVGCache_scanWait(struct DiskPartition64 *);
+extern int VVGCache_scanWait_r(struct DiskPartition64 *);
+extern int VVGCache_checkPartition_r(struct DiskPartition64 *);
+
+extern int VVGCache_PkgInit(void);
+extern int VVGCache_PkgShutdown(void);
+
+
+#endif /* _AFS_VOL_VG_CACHE_H */
--- /dev/null
+/*
+ * Copyright 2009-2010, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/*
+ * demand attach fs
+ * volume group membership cache
+ */
+
+#ifndef _AFS_VOL_VG_CACHE_IMPL_H
+#define _AFS_VOL_VG_CACHE_IMPL_H 1
+
+#define VVGC_SCAN_TBL_LEN 4096 /**< thread-local partition scan table size */
+
+#include "vg_cache_impl_types.h"
+
+extern VVGCache_hash_table_t VVGCache_hash_table;
+extern VVGCache_t VVGCache;
+
+extern int _VVGC_flush_part(struct DiskPartition64 * part);
+extern int _VVGC_flush_part_r(struct DiskPartition64 * part);
+extern int _VVGC_scan_start(struct DiskPartition64 * dp);
+extern int _VVGC_state_change(struct DiskPartition64 * part,
+ VVGCache_part_state_t state);
+extern int _VVGC_entry_purge_r(struct DiskPartition64 * dp,
+ VolumeId parent, VolumeId child);
+extern int _VVGC_dlist_add_r(struct DiskPartition64 *dp,
+ VolumeId parent, VolumeId child);
+extern int _VVGC_dlist_del_r(struct DiskPartition64 *dp,
+ VolumeId parent, VolumeId child);
+
+#define VVGC_HASH(volumeId) (volumeId&(VolumeHashTable.Mask))
+
+#endif /* _AFS_VOL_VG_CACHE_H */
--- /dev/null
+/*
+ * Copyright 2009-2010, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/*
+ * demand attach fs
+ * volume group membership cache
+ */
+
+#ifndef _AFS_VOL_VG_CACHE_IMPL_TYPES_H
+#define _AFS_VOL_VG_CACHE_IMPL_TYPES_H 1
+
+#ifndef __VOL_VG_CACHE_IMPL
+#error "do not include this file outside of the volume group cache implementation"
+#endif
+
+#include "volume.h"
+#include <rx/rx_queue.h>
+#include <signal.h>
+
+
+/**
+ * volume group cache node.
+ */
+typedef struct VVGCache_entry {
+ VolumeId rw; /**< rw volume id */
+ VolumeId children[VOL_VG_MAX_VOLS]; /**< vector of children */
+ afs_uint32 refcnt; /**< hash chain refcount */
+} VVGCache_entry_t;
+
+/**
+ * volume group hash table.
+ */
+typedef struct VVGCache_hash_table {
+ struct rx_queue * hash_buckets; /**< variable-length array of
+ * hash buckets */
+} VVGCache_hash_table_t;
+
+/**
+ * volume group hash bucket.
+ *
+ * @see VVGCache_hash_table_t
+ */
+typedef struct VVGCache_hash_entry {
+ struct rx_queue hash_chain; /**< hash chain pointers */
+ VolumeId volid; /**< volume id */
+ struct DiskPartition64 * dp; /**< associated disk partition */
+ VVGCache_entry_t * entry; /**< volume group cache entry */
+} VVGCache_hash_entry_t;
+
+/* scanner implementation details */
+
+/**
+ * scan element.
+ */
+typedef struct VVGCache_scan_entry {
+ VolumeId volid;
+ VolumeId parent;
+} VVGCache_scan_entry_t;
+
+/**
+ * scan table.
+ */
+typedef struct VVGCache_scan_table {
+ unsigned int idx;
+
+ /* stats */
+ unsigned long newvols;
+ unsigned long newvgs;
+
+ VVGCache_scan_entry_t entries[VVGC_SCAN_TBL_LEN];
+} VVGCache_scan_table_t;
+
+/**
+ * VVGC partition state enumeration.
+ */
+typedef enum VVGCache_part_state {
+ VVGC_PART_STATE_VALID, /**< vvgc data for partition is valid */
+ VVGC_PART_STATE_INVALID, /**< vvgc data for partition is known to be invalid */
+ VVGC_PART_STATE_UPDATING /**< vvgc data for partition is currently updating */
+} VVGCache_part_state_t;
+
+/**
+ * entry in the 'to-delete' list.
+ *
+ * @see _VVGC_dlist_add_r
+ */
+typedef struct VVGCache_dlist_entry {
+ struct rx_queue hash_chain; /**< hash chain pointers */
+ VolumeId child; /**< child volid of the VGC entry */
+ VolumeId parent; /**< parent volid of the VGC entry */
+} VVGCache_dlist_entry_t;
+
+/**
+ * VVGC partition state.
+ */
+typedef struct VVGCache_part {
+ VVGCache_part_state_t state; /**< state of VVGC for this partition */
+ pthread_cond_t cv; /**< state change cv */
+ struct rx_queue *dlist_hash_buckets; /**< variable-length array of hash
+ * buckets. Queues contain
+ * VVGCache_dlist_entry_t's.
+ * This is NULL when we are not
+ * scanning. */
+} VVGCache_part_t;
+
+/**
+ * VVGC global state.
+ */
+typedef struct VVGCache {
+ VVGCache_part_t part[VOLMAXPARTS+1]; /**< state of VVGC for each partition */
+} VVGCache_t;
+
+#endif /* _AFS_VOL_VG_CACHE_IMPL_TYPES_H */
--- /dev/null
+/*
+ * Copyright 2009-2010, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/*
+ * demand attach fs
+ * volume group membership cache
+ */
+
+#ifndef _AFS_VOL_VG_CACHE_TYPES_H
+#define _AFS_VOL_VG_CACHE_TYPES_H 1
+
+#include "voldefs.h"
+
+/**
+ * volume group query response
+ */
+typedef struct VVGCache_query {
+ afs_uint32 rw; /**< rw volume id */
+ afs_uint32 children[VOL_VG_MAX_VOLS]; /**< vector of children */
+} VVGCache_query_t;
+
+
+#endif /* _AFS_VOL_VG_CACHE_TYPES_H */
--- /dev/null
+/*
+ * Copyright 2009-2010, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+/*
+ * demand attach fs
+ * volume group membership cache
+ * asynchronous partition scanner
+ */
+
+#include <afsconfig.h>
+#include <afs/param.h>
+
+#ifdef AFS_DEMAND_ATTACH_FS
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <afs/assert.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/param.h>
+#include <lock.h>
+#if defined(AFS_SUN5_ENV) || defined(AFS_HPUX_ENV)
+#include <unistd.h>
+#endif
+#include <afs/afsutil.h>
+#include <lwp.h>
+#include "nfs.h"
+#include <afs/afsint.h>
+#include "ihandle.h"
+#include "vnode.h"
+#include "volume.h"
+#include "viceinode.h"
+#include "voldefs.h"
+#include "partition.h"
+#include <afs/errors.h>
+
+#define __VOL_VG_CACHE_IMPL 1
+
+#include "vg_cache.h"
+#include "vg_cache_impl.h"
+
+#ifdef O_LARGEFILE
+#define afs_open open64
+#else /* !O_LARGEFILE */
+#define afs_open open
+#endif /* !O_LARGEFILE */
+
+static int _VVGC_scan_table_init(VVGCache_scan_table_t * tbl);
+static int _VVGC_scan_table_add(VVGCache_scan_table_t * tbl,
+ struct DiskPartition64 * dp,
+ VolumeId volid,
+ VolumeId parent);
+static int _VVGC_scan_table_flush(VVGCache_scan_table_t * tbl,
+ struct DiskPartition64 * dp);
+static void * _VVGC_scanner_thread(void *);
+static int _VVGC_scan_partition(struct DiskPartition64 * part);
+static VVGCache_dlist_entry_t * _VVGC_dlist_lookup_r(struct DiskPartition64 *dp,
+ VolumeId parent,
+ VolumeId child);
+static void _VVGC_flush_dlist(struct DiskPartition64 *dp);
+
+/**
+ * init a thread-local scan table.
+ *
+ * @param[in] tbl scan table
+ *
+ * @return operation status
+ * @retval 0 success
+ *
+ * @internal
+ */
+static int
+_VVGC_scan_table_init(VVGCache_scan_table_t * tbl)
+{
+ memset(tbl, 0, sizeof(*tbl));
+
+ return 0;
+}
+
+/**
+ * add an entry to the thread-local scan table.
+ *
+ * @param[in] tbl scan table
+ * @param[in] dp disk partition object
+ * @param[in] volid volume id
+ * @param[in] parent parent volume id
+ *
+ * @pre VOL_LOCK is NOT held
+ *
+ * @note if the table is full, this routine will acquire
+ * VOL_LOCK and flush the table to the global one.
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval nonzero a VVGCache_entry_add_r operation failed during a
+ * flush of the thread-local table
+ *
+ * @internal
+ */
+static int
+_VVGC_scan_table_add(VVGCache_scan_table_t * tbl,
+ struct DiskPartition64 * dp,
+ VolumeId volid,
+ VolumeId parent)
+{
+ int code = 0;
+
+ if (tbl->idx == VVGC_SCAN_TBL_LEN) {
+ code = _VVGC_scan_table_flush(tbl, dp);
+ }
+
+ tbl->entries[tbl->idx].volid = volid;
+ tbl->entries[tbl->idx].parent = parent;
+ tbl->idx++;
+
+ return code;
+}
+
+/**
+ * flush thread-local scan table to the global VG cache.
+ *
+ * @param[in] tbl scan table
+ * @param[in] dp disk partition object
+ *
+ * @pre VOL_LOCK is NOT held
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval nonzero a VVGCache_entry_add_r operation failed during a
+ * flush of the thread-local table
+ *
+ * @internal
+ */
+static int
+_VVGC_scan_table_flush(VVGCache_scan_table_t * tbl,
+ struct DiskPartition64 * dp)
+{
+ int code = 0, res, i;
+ afs_int32 newvg = 0;
+ unsigned long newvols, newvgs;
+
+ newvols = tbl->newvols;
+ newvgs = tbl->newvgs;
+
+ VOL_LOCK;
+
+ for (i = 0; i < tbl->idx; i++) {
+ /*
+ * We need to check the 'to-delete' list and prevent adding any entries
+ * that are on it. The volser could potentially create a volume in one
+ * VG, then delete it and put it on another VG. If we are doing a scan
+ * when that happens, tbl->entries could have the entries for trying to
+ * put the vol on both VGs, though at least one of them will also be on
+ * the dlist. If we put everything in tbl->entries on the VGC then try
+ * to delete afterwards, putting one entry on the VGC cause an error,
+ * and we'll fail to add it. So instead, avoid adding any new VGC
+ * entries if it is on the dlist.
+ */
+ if (_VVGC_dlist_lookup_r(dp, tbl->entries[i].parent,
+ tbl->entries[i].volid)) {
+ continue;
+ }
+ res = VVGCache_entry_add_r(dp,
+ tbl->entries[i].parent,
+ tbl->entries[i].volid,
+ &newvg);
+ if (res) {
+ code = res;
+ } else {
+ newvols++;
+ newvgs += newvg;
+ }
+ }
+
+ /* flush the to-delete list while we're here. We don't need to preserve
+ * the list across the entire scan, and flushing it each time we flush
+ * a scan table will keep the size of the dlist down */
+ _VVGC_flush_dlist(dp);
+
+ VOL_UNLOCK;
+
+ ViceLog(125, ("VVGC_scan_table_flush: flushed %d entries from "
+ "scan table to global VG cache\n", tbl->idx));
+ ViceLog(125, ("VVGC_scan_table_flush: %s total: %lu vols, %lu groups\n",
+ VPartitionPath(dp), newvols, newvgs));
+
+ res = _VVGC_scan_table_init(tbl);
+ if (res) {
+ code = res;
+ }
+
+ tbl->newvols = newvols;
+ tbl->newvgs = newvgs;
+
+ return code;
+}
+
+/**
+ * read a volume header from disk into a VolumeHeader structure.
+ *
+ * @param[in] path absolute path to .vol volume header
+ * @param[out] hdr volume header object
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval ENOENT volume header does not exist
+ * @retval EINVAL volume header is invalid
+ *
+ * @internal
+ */
+static int
+_VVGC_read_header(const char *path, struct VolumeHeader *hdr)
+{
+ int fd;
+ int code;
+ struct VolumeDiskHeader diskHeader;
+
+ fd = afs_open(path, O_RDONLY);
+ if (fd == -1) {
+ ViceLog(0, ("_VVGC_read_header: could not open %s; error = %d\n",
+ path, errno));
+ return ENOENT;
+ }
+
+ code = read(fd, &diskHeader, sizeof(diskHeader));
+ close(fd);
+ if (code != sizeof(diskHeader)) {
+ ViceLog(0, ("_VVGC_read_header: could not read disk header from %s; error = %d\n",
+ path, errno));
+ return EINVAL;
+ }
+
+ if (diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
+ ViceLog(0, ("_VVGC_read_header: disk header %s has magic %lu, should "
+ "be %lu\n", path,
+ afs_printable_uint32_lu(diskHeader.stamp.magic),
+ afs_printable_uint32_lu(VOLUMEHEADERMAGIC)));
+ return EINVAL;
+ }
+
+ DiskToVolumeHeader(hdr, &diskHeader);
+ return 0;
+}
+
+/**
+ * determines what to do with a volume header during a VGC scan.
+ *
+ * @param[in] dp the disk partition object
+ * @param[in] node_path the absolute path to the header to handle
+ * @param[out] hdr the header read in from disk
+ * @param[out] skip 1 if we should skip the header (pretend it doesn't
+ * exist), 0 otherwise
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval -1 internal error beyond just failing to read the header file
+ */
+static int
+_VVGC_handle_header(struct DiskPartition64 *dp, const char *node_path,
+ struct VolumeHeader *hdr, int *skip)
+{
+ int code;
+
+ *skip = 1;
+
+ code = _VVGC_read_header(node_path, hdr);
+ if (code) {
+ /* retry while holding a partition write lock, to ensure we're not
+ * racing a writer/creator of the header */
+
+ if (code == ENOENT) {
+ /* Ignore ENOENT; it's as if we never got it from readdir in the
+ * first place. Other error codes means the header exists, but
+ * there's something wrong with it. */
+ return 0;
+ }
+
+ code = VPartHeaderLock(dp, WRITE_LOCK);
+ if (code) {
+ ViceLog(0, ("_VVGC_handle_header: error acquiring partition "
+ "write lock while trying to open %s\n",
+ node_path));
+ return -1;
+ }
+ code = _VVGC_read_header(node_path, hdr);
+ VPartHeaderUnlock(dp, WRITE_LOCK);
+ }
+
+ if (code) {
+ if (code != ENOENT) {
+ ViceLog(0, ("_VVGC_scan_partition: %s does not appear to be a "
+ "legitimate volume header file; deleted\n",
+ node_path));
+
+ if (unlink(node_path)) {
+ ViceLog(0, ("Unable to unlink %s (errno = %d)\n",
+ node_path, errno));
+ }
+ }
+ return 0;
+ }
+
+ /* header is fine; do not skip it, and do not error out */
+ *skip = 0;
+ return 0;
+}
+
+/**
+ * scan a disk partition for .vol files
+ *
+ * @param[in] part disk partition object
+ *
+ * @pre VOL_LOCK is NOT held
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval -1 invalid disk partition object
+ * @retval -2 failed to flush stale entries for this partition
+ *
+ * @internal
+ */
+static int
+_VVGC_scan_partition(struct DiskPartition64 * part)
+{
+ int code, res, skip;
+ DIR *dirp = NULL;
+ struct VolumeHeader hdr;
+ struct dirent *dp;
+ VVGCache_scan_table_t tbl;
+ char *part_path = NULL, *p;
+ char node_path[MAXPATHLEN];
+
+ code = _VVGC_scan_table_init(&tbl);
+ if (code) {
+ ViceLog(0, ("VVGC_scan_partition: could not init scan table; error = %d\n",
+ code));
+ goto done;
+ }
+ part_path = VPartitionPath(part);
+ if (part_path == NULL) {
+ ViceLog(0, ("VVGC_scan_partition: invalid partition object given; aborting scan\n"));
+ code = -1;
+ goto done;
+ }
+
+ VOL_LOCK;
+ res = _VVGC_flush_part_r(part);
+ if (res) {
+ ViceLog(0, ("VVGC_scan_partition: error flushing partition %s; error = %d\n",
+ VPartitionPath(part), res));
+ code = -2;
+ }
+ VOL_UNLOCK;
+ if (code) {
+ goto done;
+ }
+
+ dirp = opendir(part_path);
+ if (dirp == NULL) {
+ ViceLog(0, ("VVGC_scan_partition: could not open %s, aborting scan; error = %d\n",
+ part_path, errno));
+ code = -1;
+ goto done;
+ }
+
+ ViceLog(5, ("VVGC_scan_partition: scanning partition %s for VG cache\n",
+ part_path));
+
+ while ((dp = readdir(dirp))) {
+ p = strrchr(dp->d_name, '.');
+ if (p == NULL || strcmp(p, VHDREXT) != 0) {
+ continue;
+ }
+ snprintf(node_path,
+ sizeof(node_path),
+ "%s/%s",
+ VPartitionPath(part),
+ dp->d_name);
+
+ res = _VVGC_handle_header(part, node_path, &hdr, &skip);
+ if (res) {
+ /* internal error; error out */
+ code = -1;
+ goto done;
+ }
+ if (skip) {
+ continue;
+ }
+
+ res = _VVGC_scan_table_add(&tbl,
+ part,
+ hdr.id,
+ hdr.parent);
+ if (res) {
+ ViceLog(0, ("VVGC_scan_partition: error %d adding volume %s to scan table\n",
+ res, node_path));
+ code = res;
+ }
+ }
+
+ _VVGC_scan_table_flush(&tbl, part);
+
+ done:
+ if (dirp) {
+ closedir(dirp);
+ dirp = NULL;
+ }
+ if (code) {
+ ViceLog(0, ("VVGC_scan_partition: error %d while scanning %s\n",
+ code, part_path));
+ } else {
+ ViceLog(0, ("VVGC_scan_partition: finished scanning %s: %lu volumes in %lu groups\n",
+ part_path, tbl.newvols, tbl.newvgs));
+ }
+
+ VOL_LOCK;
+
+ _VVGC_flush_dlist(part);
+ free(VVGCache.part[part->index].dlist_hash_buckets);
+ VVGCache.part[part->index].dlist_hash_buckets = NULL;
+
+ if (code) {
+ _VVGC_state_change(part, VVGC_PART_STATE_INVALID);
+ } else {
+ _VVGC_state_change(part, VVGC_PART_STATE_VALID);
+ }
+
+ VOL_UNLOCK;
+
+ return code;
+}
+
+/**
+ * scanner thread.
+ */
+static void *
+_VVGC_scanner_thread(void * args)
+{
+ struct DiskPartition64 *part = args;
+ int code;
+
+ code = _VVGC_scan_partition(part);
+ if (code) {
+ ViceLog(0, ("Error: _VVGC_scan_partition failed with code %d for partition %s\n",
+ code, VPartitionPath(part)));
+ }
+
+ return NULL;
+}
+
+/**
+ * start a background scan.
+ *
+ * @param[in] dp disk partition object
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval -1 internal error
+ * @retval -3 racing against another thread
+ *
+ * @internal
+ */
+int
+_VVGC_scan_start(struct DiskPartition64 * dp)
+{
+ int code = 0;
+ pthread_t tid;
+ pthread_attr_t attrs;
+ int i;
+
+ if (_VVGC_state_change(dp,
+ VVGC_PART_STATE_UPDATING)
+ == VVGC_PART_STATE_UPDATING) {
+ /* race */
+ ViceLog(0, ("VVGC_scan_partition: race detected; aborting scanning partition %s\n",
+ VPartitionPath(dp)));
+ code = -3;
+ goto error;
+ }
+
+ /* initialize partition's to-delete list */
+ VVGCache.part[dp->index].dlist_hash_buckets =
+ malloc(VolumeHashTable.Size * sizeof(struct rx_queue));
+ if (!VVGCache.part[dp->index].dlist_hash_buckets) {
+ code = -1;
+ goto error;
+ }
+ for (i = 0; i < VolumeHashTable.Size; i++) {
+ queue_Init(&VVGCache.part[dp->index].dlist_hash_buckets[i]);
+ }
+
+ code = pthread_attr_init(&attrs);
+ if (code) {
+ goto error;
+ }
+
+ code = pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
+ if (code) {
+ goto error;
+ }
+
+ code = pthread_create(&tid, &attrs, &_VVGC_scanner_thread, dp);
+
+ if (code) {
+ VVGCache_part_state_t old_state;
+
+ ViceLog(0, ("_VVGC_scan_start: pthread_create failed with %d\n", code));
+
+ old_state = _VVGC_state_change(dp, VVGC_PART_STATE_INVALID);
+ assert(old_state == VVGC_PART_STATE_UPDATING);
+ }
+
+ error:
+ if (code) {
+ ViceLog(0, ("_VVGC_scan_start failed with code %d for partition %s\n",
+ code, VPartitionPath(dp)));
+ if (VVGCache.part[dp->index].dlist_hash_buckets) {
+ free(VVGCache.part[dp->index].dlist_hash_buckets);
+ VVGCache.part[dp->index].dlist_hash_buckets = NULL;
+ }
+ }
+
+ return code;
+}
+
+/**
+ * looks up an entry on the to-delete list, if it exists.
+ *
+ * @param[in] dp the partition whose dlist we are looking at
+ * @param[in] parent the parent volume ID we're looking for
+ * @param[in] child the child volume ID we're looking for
+ *
+ * @return a pointer to the entry in the dlist for that entry
+ * @retval NULL the requested entry does not exist in the dlist
+ */
+static VVGCache_dlist_entry_t *
+_VVGC_dlist_lookup_r(struct DiskPartition64 *dp, VolumeId parent,
+ VolumeId child)
+{
+ int bucket = VVGC_HASH(child);
+ VVGCache_dlist_entry_t *ent, *nent;
+
+ for (queue_Scan(&VVGCache.part[dp->index].dlist_hash_buckets[bucket],
+ ent, nent,
+ VVGCache_dlist_entry)) {
+
+ if (ent->child == child && ent->parent == parent) {
+ return ent;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * delete all of the entries in the dlist from the VGC.
+ *
+ * Traverses the to-delete list for the specified partition, and deletes
+ * the specified entries from the global VGC. Also deletes the entries from
+ * the dlist itself as it goes along.
+ *
+ * @param[in] dp the partition whose dlist we are flushing
+ */
+static void
+_VVGC_flush_dlist(struct DiskPartition64 *dp)
+{
+ int i;
+ VVGCache_dlist_entry_t *ent, *nent;
+
+ for (i = 0; i < VolumeHashTable.Size; i++) {
+ for (queue_Scan(&VVGCache.part[dp->index].dlist_hash_buckets[i],
+ ent, nent,
+ VVGCache_dlist_entry)) {
+
+ _VVGC_entry_purge_r(dp, ent->parent, ent->child);
+ queue_Remove(ent);
+ free(ent);
+ }
+ }
+}
+
+/**
+ * add a VGC entry to the partition's to-delete list.
+ *
+ * This adds a VGC entry (a parent/child pair) to a list of VGC entries to
+ * be deleted from the VGC at the end of a VGC scan. This is necessary,
+ * while a VGC scan is ocurring, volumes may be deleted. Since a VGC scan
+ * scans a partition in VVGC_SCAN_TBL_LEN chunks, a VGC delete operation
+ * may delete a volume, only for it to be added again when the VGC scan's
+ * table adds it to the VGC. So when a VGC entry is deleted and a VGC scan
+ * is running, this function must be called to ensure it does not come
+ * back onto the VGC.
+ *
+ * @param[in] dp the partition to whose dlist we are adding
+ * @param[in] parent the parent volumeID of the VGC entry
+ * @param[in] child the child volumeID of the VGC entry
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval ENOMEM memory allocation error
+ *
+ * @pre VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING
+ *
+ * @internal VGC use only
+ */
+int
+_VVGC_dlist_add_r(struct DiskPartition64 *dp, VolumeId parent,
+ VolumeId child)
+{
+ int bucket = VVGC_HASH(child);
+ VVGCache_dlist_entry_t *entry;
+
+ entry = malloc(sizeof(*entry));
+ if (!entry) {
+ return ENOMEM;
+ }
+
+ entry->child = child;
+ entry->parent = parent;
+
+ queue_Append(&VVGCache.part[dp->index].dlist_hash_buckets[bucket],
+ entry);
+ return 0;
+}
+
+/**
+ * delete a VGC entry from the partition's to-delete list.
+ *
+ * When a VGC scan is ocurring, and a volume is removed, but then created
+ * again, we need to ensure that it does not get deleted from being on the
+ * dlist. Call this function whenever adding a new entry to the VGC during
+ * a VGC scan to ensure it doesn't get deleted later.
+ *
+ * @param[in] dp the partition from whose dlist we are deleting
+ * @param[in] parent the parent volumeID of the VGC entry
+ * @param[in] child the child volumeID of the VGC entry
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval ENOENT the specified VGC entry is not on the dlist
+ *
+ * @pre VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING
+ *
+ * @internal VGC use only
+ *
+ * @see _VVGC_dlist_add_r
+ */
+int
+_VVGC_dlist_del_r(struct DiskPartition64 *dp, VolumeId parent,
+ VolumeId child)
+{
+ VVGCache_dlist_entry_t *ent;
+
+ ent = _VVGC_dlist_lookup_r(dp, parent, child);
+ if (!ent) {
+ return ENOENT;
+ }
+
+ queue_Remove(ent);
+ free(ent);
+
+ return 0;
+}
+
+#endif /* AFS_DEMAND_ATTACH_FS */
#include "vol-salvage.h"
#include "vol_internal.h"
+#ifdef FSSYNC_BUILD_CLIENT
+#include "vg_cache.h"
+#endif
+
#ifdef AFS_NT40_ENV
#include <pthread.h>
#endif
struct VnodeInfo vnodeInfo[nVNODECLASSES];
-struct VolumeSummary *volumeSummaryp; /* Holds all the volumes in a part */
+struct VolumeSummary *volumeSummaryp = NULL; /* Holds all the volumes in a part */
int nVolumes; /* Number of volumes (read-write and read-only)
* in volume summary */
/*@printflike@*/ void Log(const char *format, ...);
/*@printflike@*/ void Abort(const char *format, ...);
static int IsVnodeOrphaned(VnodeId vnode);
+static int AskVolumeSummary(VolumeId singleVolumeNumber);
/* Uniquifier stored in the Inode */
static Unique
return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
}
+/**
+ * Gleans volumeSummary information by asking the fileserver
+ *
+ * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
+ * salvaging a whole partition
+ *
+ * @return whether we obtained the volume summary information or not
+ * @retval 0 success; we obtained the volume summary information
+ * @retval nonzero we did not get the volume summary information; either the
+ * fileserver responded with an error, or we are not supposed to
+ * ask the fileserver for the information (e.g. we are salvaging
+ * the entire partition or we are not the salvageserver)
+ *
+ * @note for non-DAFS, always returns 1
+ */
+static int
+AskVolumeSummary(VolumeId singleVolumeNumber)
+{
+ afs_int32 code = 1;
+#ifdef FSSYNC_BUILD_CLIENT
+ if (programType == salvageServer) {
+ if (singleVolumeNumber) {
+ FSSYNC_VGQry_response_t q_res;
+ SYNC_response res;
+ struct VolumeSummary *vsp;
+ int i;
+ struct VolumeDiskHeader diskHdr;
+
+ memset(&res, 0, sizeof(res));
+
+ code = FSYNC_VGCQuery(fileSysPartition->name, singleVolumeNumber, &q_res, &res);
+
+ /*
+ * We must wait for the partition to finish scanning before
+ * can continue, since we will not know if we got the entire
+ * VG membership unless the partition is fully scanned.
+ * We could, in theory, just scan the partition ourselves if
+ * the VG cache is not ready, but we would be doing the exact
+ * same scan the fileserver is doing; it will almost always
+ * be faster to wait for the fileserver. The only exceptions
+ * are if the partition does not take very long to scan, and
+ * in that case it's fast either way, so who cares?
+ */
+ if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
+ Log("waiting for fileserver to finish scanning partition %s...\n",
+ fileSysPartition->name);
+
+ for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
+ /* linearly ramp up from 1 to 10 seconds; nothing fancy,
+ * just so small partitions don't need to wait over 10
+ * seconds every time, and large partitions are generally
+ * polled only once every ten seconds. */
+ sleep((i > 10) ? (i = 10) : i);
+
+ code = FSYNC_VGCQuery(fileSysPartition->name, singleVolumeNumber, &q_res, &res);
+ }
+ }
+
+ if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
+ /* This can happen if there's no header for the volume
+ * we're salvaging, or no headers exist for the VG (if
+ * we're salvaging an RW). Act as if we got a response
+ * with no VG members. The headers may be created during
+ * salvaging, if there are inodes in this VG. */
+ code = 0;
+ memset(&q_res, 0, sizeof(q_res));
+ q_res.rw = singleVolumeNumber;
+ }
+
+ if (code) {
+ Log("fileserver refused VGCQuery request for volume %lu on "
+ "partition %s, code %ld reason %ld\n",
+ afs_printable_uint32_lu(singleVolumeNumber),
+ fileSysPartition->name,
+ afs_printable_int32_ld(code),
+ afs_printable_int32_ld(res.hdr.reason));
+ goto done;
+ }
+
+ if (q_res.rw != singleVolumeNumber) {
+ Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
+ afs_printable_uint32_lu(singleVolumeNumber),
+ afs_printable_uint32_lu(q_res.rw));
+#ifdef SALVSYNC_BUILD_CLIENT
+ if (SALVSYNC_LinkVolume(q_res.rw,
+ singleVolumeNumber,
+ fileSysPartition->name,
+ NULL) != SYNC_OK) {
+ Log("schedule request failed\n");
+ }
+#endif /* SALVSYNC_BUILD_CLIENT */
+ Exit(SALSRV_EXIT_VOLGROUP_LINK);
+ }
+
+ volumeSummaryp = malloc(VOL_VG_MAX_VOLS * sizeof(struct VolumeSummary));
+ assert(volumeSummaryp != NULL);
+
+ nVolumes = 0;
+ vsp = volumeSummaryp;
+
+ for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
+ char name[VMAXPATHLEN];
+
+ if (!q_res.children[i]) {
+ continue;
+ }
+
+ if (q_res.children[i] != singleVolumeNumber) {
+ AskOffline(q_res.children[i], fileSysPartition->name);
+ }
+ code = VReadVolumeDiskHeader(q_res.children[i], fileSysPartition, &diskHdr);
+ if (code) {
+ Log("Cannot read header for %lu; trying to salvage group anyway\n",
+ afs_printable_uint32_lu(q_res.children[i]));
+ code = 0;
+ continue;
+ }
+
+ DiskToVolumeHeader(&vsp->header, &diskHdr);
+ VolumeExternalName_r(q_res.children[i], name, sizeof(name));
+ vsp->fileName = ToString(name);
+ nVolumes++;
+ vsp++;
+ }
+
+ qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
+ CompareVolumes);
+ }
+ done:
+ if (code) {
+ Log("Cannot get volume summary from fileserver; falling back to scanning "
+ "entire partition\n");
+ }
+ }
+#endif /* FSSYNC_BUILD_CLIENT */
+ return code;
+}
+
void
GetVolumeSummary(VolumeId singleVolumeNumber)
{
struct VolumeDiskHeader diskHeader;
struct dirent *dp;
+ if (AskVolumeSummary(singleVolumeNumber) == 0) {
+ /* we successfully got the vol information from the fileserver; no
+ * need to scan the partition */
+ return;
+ }
+
/* Get headers from volume directory */
dirp = opendir(fileSysPath);
if (dirp == NULL)
#endif
if (!nvols)
nvols = 1;
- volumeSummaryp =
- (struct VolumeSummary *)malloc(nvols *
- sizeof(struct VolumeSummary));
- } else
- volumeSummaryp =
- (struct VolumeSummary *)malloc(20 * sizeof(struct VolumeSummary));
+ } else {
+ nvols = VOL_VG_MAX_VOLS;
+ }
+
+ volumeSummaryp = malloc(nvols * sizeof(struct VolumeSummary));
assert(volumeSummaryp != NULL);
nVolumes = 0;
AskOffline(VolumeId volumeId, char * partition)
{
afs_int32 code, i;
+ SYNC_response res;
+
+ memset(&res, 0, sizeof(res));
for (i = 0; i < 3; i++) {
- code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_OFF, FSYNC_SALVAGE, NULL);
+ code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
if (code == SYNC_OK) {
break;
#define VOLMAXTYPES 3 /* _current_ max number of types */
+/* the maximum number of volumes in a volume group that we can handle */
+#define VOL_VG_MAX_VOLS 20
+
/* maximum numbe of Vice partitions */
#define VOLMAXPARTS 255
volume header ON: this means that the volumes will not be attached by the
file server and WILL BE DESTROYED the next time a system salvage is performed */
+#ifdef FSSYNC_BUILD_CLIENT
static void
RemoveInodes(Device dev, VolumeId vid)
{
return (VAttachVolumeByName_r(ec, partname, headerName, V_SECRETLY));
}
+#endif /* FSSYNC_BUILD_CLIENT */
void
return code;
}
+#ifdef FSSYNC_BUILD_CLIENT
/**
* write an existing volume disk header.
*
int fd;
char path[MAXPATHLEN];
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* prevent racing with VGC scanners reading the vol header while we are
+ * writing it */
+ code = VPartHeaderLock(dp, READ_LOCK);
+ if (code) {
+ return EIO;
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
flags |= O_RDWR;
(void)afs_snprintf(path, sizeof(path),
}
}
+#ifdef AFS_DEMAND_ATTACH_FS
+ VPartHeaderUnlock(dp, READ_LOCK);
+#endif /* AFS_DEMAND_ATTACH_FS */
+
return code;
}
{
afs_int32 code;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VolumeDiskHeader_t oldhdr;
+ int delvgc = 0, addvgc = 0;
+ SYNC_response res;
+
+ /* first, see if anything with the volume IDs have changed; if so, we
+ * need to update the VGC */
+
+ code = VReadVolumeDiskHeader(hdr->id, dp, &oldhdr);
+ if (code == 0 && (oldhdr.id != hdr->id || oldhdr.parent != hdr->parent)) {
+ /* the vol id or parent vol id changed; need to delete the VGC entry
+ * for the old vol id/parent, and add the new one */
+ delvgc = 1;
+ addvgc = 1;
+
+ } else if (code) {
+ /* couldn't get the old header info; add the new header info to the
+ * VGC in case it hasn't been added yet */
+ addvgc = 1;
+ }
+
+#endif /* AFS_DEMAND_ATTACH_FS */
+
code = _VWriteVolumeDiskHeader(hdr, dp, 0);
if (code) {
goto done;
}
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (delvgc) {
+ memset(&res, 0, sizeof(res));
+ code = FSYNC_VGCDel(dp->name, oldhdr.parent, oldhdr.id, FSYNC_WHATEVER, &res);
+
+ /* unknown vol id is okay; it just further suggests the old header
+ * data was bogus, which is fine since we're trying to fix it */
+ if (code && res.hdr.reason != FSYNC_UNKNOWN_VOLID) {
+ Log("VWriteVolumeDiskHeader: FSYNC_VGCDel(%s, %lu, %lu) "
+ "failed with code %ld reason %ld\n", dp->name,
+ afs_printable_uint32_lu(oldhdr.parent),
+ afs_printable_uint32_lu(oldhdr.id),
+ afs_printable_int32_ld(code),
+ afs_printable_int32_ld(res.hdr.reason));
+ }
+
+ }
+ if (addvgc) {
+ memset(&res, 0, sizeof(res));
+ code = FSYNC_VGCAdd(dp->name, hdr->parent, hdr->id, FSYNC_WHATEVER, &res);
+ if (code) {
+ Log("VWriteVolumeDiskHeader: FSYNC_VGCAdd(%s, %lu, %lu) "
+ "failed with code %ld reason %ld\n", dp->name,
+ afs_printable_uint32_lu(hdr->parent),
+ afs_printable_uint32_lu(hdr->id),
+ afs_printable_int32_ld(code),
+ afs_printable_int32_ld(res.hdr.reason));
+ }
+ }
+
+#endif /* AFS_DEMAND_ATTACH_FS */
+
done:
return code;
}
struct DiskPartition64 * dp)
{
afs_int32 code = 0;
+#ifdef AFS_DEMAND_ATTACH_FS
+ SYNC_response res;
+#endif /* AFS_DEMAND_ATTACH_FS */
code = _VWriteVolumeDiskHeader(hdr, dp, O_CREAT | O_EXCL);
if (code) {
goto done;
}
+#ifdef AFS_DEMAND_ATTACH_FS
+ memset(&res, 0, sizeof(res));
+ code = FSYNC_VGCAdd(dp->name, hdr->parent, hdr->id, FSYNC_WHATEVER, &res);
+ if (code) {
+ Log("VCreateVolumeDiskHeader: FSYNC_VGCAdd(%s, %lu, %lu) failed "
+ "with code %ld reason %ld\n", dp->name,
+ afs_printable_uint32_lu(hdr->parent),
+ afs_printable_uint32_lu(hdr->id),
+ afs_printable_int32_ld(code),
+ afs_printable_int32_ld(res.hdr.reason));
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
done:
return code;
}
{
afs_int32 code = 0;
char path[MAXPATHLEN];
+#ifdef AFS_DEMAND_ATTACH_FS
+ SYNC_response res;
+#endif /* AFS_DEMAND_ATTACH_FS */
(void)afs_snprintf(path, sizeof(path),
"%s/" VFORMAT,
goto done;
}
+#ifdef AFS_DEMAND_ATTACH_FS
+ memset(&res, 0, sizeof(res));
+ if (!parent) {
+ FSSYNC_VGQry_response_t q_res;
+
+ code = FSYNC_VGCQuery(dp->name, volid, &q_res, &res);
+ if (code) {
+ Log("VDestroyVolumeDiskHeader: FSYNC_VGCQuery(%s, %lu) failed "
+ "with code %ld, reason %ld\n", dp->name,
+ afs_printable_uint32_lu(volid), afs_printable_int32_ld(code),
+ afs_printable_int32_ld(res.hdr.reason));
+ goto done;
+ }
+
+ parent = q_res.rw;
+
+ }
+ code = FSYNC_VGCDel(dp->name, parent, volid, FSYNC_WHATEVER, &res);
+ if (code) {
+ Log("VDestroyVolumeDiskHeader: FSYNC_VGCDel(%s, %lu, %lu) failed "
+ "with code %ld reason %ld\n", dp->name,
+ afs_printable_uint32_lu(parent),
+ afs_printable_uint32_lu(volid),
+ afs_printable_int32_ld(code),
+ afs_printable_int32_ld(res.hdr.reason));
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
done:
return code;
}
+#endif /* FSSYNC_BUILD_CLIENT */
#ifdef AFS_DEMAND_ATTACH_FS
_VUnlockFd(lf->fd, offset);
}
- assert(pthread_mutex_lock(&lf->mutex) == 0);
+ assert(pthread_mutex_unlock(&lf->mutex) == 0);
}
/**