return (*serversppp ? 0 : CM_ERROR_NOSUCHVOLUME);
}
+void
+cm_SetServerBusyStatus(cm_serverRef_t *serversp, cm_server_t *serverp)
+{
+ cm_serverRef_t *tsrp;
+
+ lock_ObtainWrite(&cm_serverLock);
+ for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+ if (tsrp->status == srv_deleted)
+ continue;
+ if (tsrp->server == serverp && tsrp->status == srv_not_busy) {
+ tsrp->status = srv_busy;
+ break;
+ }
+ }
+ lock_ReleaseWrite(&cm_serverLock);
+}
+
+void
+cm_ResetServerBusyStatus(cm_serverRef_t *serversp)
+{
+ cm_serverRef_t *tsrp;
+
+ lock_ObtainWrite(&cm_serverLock);
+ for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+ if (tsrp->status == srv_deleted)
+ continue;
+ if (tsrp->status == srv_busy) {
+ tsrp->status = srv_not_busy;
+ }
+ }
+ lock_ReleaseWrite(&cm_serverLock);
+}
+
/*
* Analyze the error return from an RPC. Determine whether or not to retry,
* and if we're going to retry, determine whether failover is appropriate,
format = "All servers are offline when accessing cell %s volume %d.";
LogEvent(EVENTLOG_WARNING_TYPE, msgID, cellp->name, fidp->volume);
+ if (!serversp) {
+ code = cm_GetServerList(fidp, userp, reqp, &replicated, &serverspp);
+ if (code == 0) {
+ serversp = *serverspp;
+ free_svr_list = 1;
+ }
+ }
+ cm_ResetServerBusyStatus(serversp);
+ if (free_svr_list) {
+ cm_FreeServerList(serverspp, 0);
+ free_svr_list = 0;
+ serversp = NULL;
+ }
+
code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp,
CM_GETVOL_FLAG_NO_LRU_UPDATE,
&volp);
format = "All servers are busy when accessing cell %s volume %d.";
LogEvent(EVENTLOG_WARNING_TYPE, msgID, cellp->name, fidp->volume);
+ if (!serversp) {
+ code = cm_GetServerList(fidp, userp, reqp, &replicated, &serverspp);
+ if (code == 0) {
+ serversp = *serverspp;
+ free_svr_list = 1;
+ }
+ }
+ cm_ResetServerBusyStatus(serversp);
+ if (free_svr_list) {
+ cm_FreeServerList(serverspp, 0);
+ free_svr_list = 0;
+ serversp = NULL;
+ }
+
code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp,
CM_GETVOL_FLAG_NO_LRU_UPDATE,
&volp);
if (code == 0) {
if (timeLeft > 7) {
thrd_Sleep(5000);
-
statep = cm_VolumeStateByID(volp, fidp->volume);
- if (statep->state != vl_offline &&
- statep->state != vl_busy &&
- statep->state != vl_unknown) {
- retry = 1;
- } else {
- if (!serversp) {
- code = cm_GetServerList(fidp, userp, reqp, &replicated, &serverspp);
- if (code == 0) {
- serversp = *serverspp;
- free_svr_list = 1;
- }
- }
- lock_ObtainWrite(&cm_serverLock);
- for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
- if (tsrp->status == srv_deleted)
- continue;
- if (tsrp->status == srv_busy) {
- tsrp->status = srv_not_busy;
- }
- }
- lock_ReleaseWrite(&cm_serverLock);
- if (free_svr_list) {
- cm_FreeServerList(serverspp, 0);
- serversp = NULL;
- free_svr_list = 0;
- }
-
- cm_UpdateVolumeStatus(volp, fidp->volume);
- retry = 1;
- }
- } else {
- cm_UpdateVolumeStatus(volp, fidp->volume);
+ retry = 1;
}
+ cm_UpdateVolumeStatus(volp, fidp->volume);
lock_ObtainRead(&cm_volumeLock);
cm_PutVolume(volp);
thrd_Sleep(5000);
if (serversp) {
- lock_ObtainWrite(&cm_serverLock);
- for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
- if (tsrp->status == srv_deleted)
- continue;
- if (tsrp->status == srv_busy) {
- tsrp->status = srv_not_busy;
- }
- }
- lock_ReleaseWrite(&cm_serverLock);
+ cm_ResetServerBusyStatus(serversp);
retry = 1;
}
}
LogEvent(EVENTLOG_WARNING_TYPE, msgID, addr, fidp->volume, cellp->name);
}
- lock_ObtainWrite(&cm_serverLock);
- for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
- if (tsrp->status == srv_deleted)
- continue;
- if (tsrp->server == serverp && tsrp->status == srv_not_busy) {
- tsrp->status = srv_busy;
- if (fidp) { /* File Server query */
- lock_ReleaseWrite(&cm_serverLock);
- code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp,
- CM_GETVOL_FLAG_NO_LRU_UPDATE,
- &volp);
- if (code == 0)
- statep = cm_VolumeStateByID(volp, fidp->volume);
- lock_ObtainWrite(&cm_serverLock);
- }
- break;
- }
- }
- lock_ReleaseWrite(&cm_serverLock);
+ cm_SetServerBusyStatus(serversp, serverp);
+
+ if (fidp) { /* File Server query */
+ code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp,
+ CM_GETVOL_FLAG_NO_LRU_UPDATE,
+ &volp);
+ if (code == 0) {
+ statep = cm_VolumeStateByID(volp, fidp->volume);
- if (statep) {
- cm_UpdateVolumeStatus(volp, statep->ID);
- lock_ObtainRead(&cm_volumeLock);
- cm_PutVolume(volp);
- lock_ReleaseRead(&cm_volumeLock);
- volp = NULL;
+ if (statep)
+ cm_UpdateVolumeStatus(volp, statep->ID);
+
+ lock_ObtainRead(&cm_volumeLock);
+ cm_PutVolume(volp);
+ lock_ReleaseRead(&cm_volumeLock);
+ volp = NULL;
+ }
}
if (free_svr_list) {
/* special codes: missing volumes */
else if (errorCode == VNOVOL || errorCode == VMOVED || errorCode == VOFFLINE ||
- errorCode == VSALVAGE || errorCode == VNOSERVICE || errorCode == VIO)
+ errorCode == VSALVAGE || errorCode == VIO)
{
/* In case of timeout */
reqp->volumeError = errorCode;
msgID = MSG_SERVER_REPORTS_VSALVAGE;
format = "Server %s reported volume %d in cell %s as needs salvage.";
break;
- case VNOSERVICE:
- msgID = MSG_SERVER_REPORTS_VNOSERVICE;
- format = "Server %s reported volume %d in cell %s as not in service.";
- break;
case VIO:
msgID = MSG_SERVER_REPORTS_VIO;
format = "Server %s reported volume %d in cell %s as temporarily unaccessible.";
osi_LogSaveString(afsd_logp,addr));
retry = 1;
}
+ else if (errorCode == VNOSERVICE) {
+ /*
+ * The server did not service the RPC.
+ * If this was a file server RPC it means that for at
+ * least the file server's idle dead timeout period the
+ * file server did not receive any new data packets from
+ * client.
+ *
+ * The RPC was not serviced so it can be retried and any
+ * existing status information is still valid.
+ */
+ if (fidp) {
+ if (serverp)
+ sprintf(addr, "%d.%d.%d.%d",
+ ((serverp->addr.sin_addr.s_addr & 0xff)),
+ ((serverp->addr.sin_addr.s_addr & 0xff00)>> 8),
+ ((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16),
+ ((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24));
+
+ LogEvent(EVENTLOG_WARNING_TYPE, MSG_SERVER_REPORTS_VNOSERVICE, addr);
+ osi_Log1(afsd_logp, "Server %s reported volume %d in cell %s as not in service.",
+ osi_LogSaveString(afsd_logp,addr), fidp->volume, cellp->name);
+ }
+
+ if (timeLeft > 2)
+ retry = 1;
+ }
else if (errorCode == RX_CALL_IDLE) {
/*
* RPC failed because the server failed to respond with data
reqp->flags &= ~CM_REQ_VOLUME_UPDATED;
}
+ if ( serversp &&
+ errorCode != VBUSY &&
+ errorCode != VRESTARTING &&
+ errorCode != CM_ERROR_ALLBUSY)
+ {
+ cm_ResetServerBusyStatus(serversp);
+ }
+
/* retry until we fail to find a connection */
return retry;
}