unsigned short ConnDeadtimeout = CM_CONN_CONNDEADTIME;
unsigned short HardDeadtimeout = CM_CONN_HARDDEADTIME;
unsigned short IdleDeadtimeout = CM_CONN_IDLEDEADTIME;
+unsigned short NatPingInterval = CM_CONN_NATPINGINTERVAL;
#define LANMAN_WKS_PARAM_KEY "SYSTEM\\CurrentControlSet\\Services\\lanmanworkstation\\parameters"
#define LANMAN_WKS_SESSION_TIMEOUT "SessTimeout"
IdleDeadtimeout = (unsigned short)dwValue;
afsi_log("IdleDeadTimeout is %d", IdleDeadtimeout);
}
+ dummyLen = sizeof(DWORD);
+ code = RegQueryValueEx(parmKey, "NatPingInterval", NULL, NULL,
+ (BYTE *) &dwValue, &dummyLen);
+ if (code == ERROR_SUCCESS) {
+ NatPingInterval = (unsigned short)dwValue;
+ }
+ afsi_log("NatPingInterval is %d", NatPingInterval);
RegCloseKey(parmKey);
}
void cm_InitReq(cm_req_t *reqp)
{
- memset((char *)reqp, 0, sizeof(cm_req_t));
+ memset(reqp, 0, sizeof(cm_req_t));
reqp->startTime = GetTickCount();
}
long code;
char addr[16]="unknown";
int forcing_new = 0;
+ char *format;
+ DWORD msgID;
osi_Log2(afsd_logp, "cm_Analyze connp 0x%p, code 0x%x",
connp, errorCode);
}
else if (errorCode == CM_ERROR_ALLDOWN) {
- osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLDOWN.");
/* Servers marked DOWN will be restored by the background daemon
* thread as they become available. The volume status is
* updated as the server state changes.
*/
+ if (fidp) {
+ osi_Log2(afsd_logp, "cm_Analyze passed CM_ERROR_DOWN (FS cell %s vol 0x%x)",
+ cellp->name, fidp->volume);
+ msgID = MSG_ALL_SERVERS_DOWN;
+ format = "All servers are unreachable when accessing cell %s volume %d.";
+ LogEvent(EVENTLOG_WARNING_TYPE, msgID, cellp->name, fidp->volume);
+ } else {
+ osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLDOWN (VL Server)");
+ }
}
else if (errorCode == CM_ERROR_ALLOFFLINE) {
- osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLOFFLINE.");
/* Volume instances marked offline will be restored by the
* background daemon thread as they become available
*/
if (fidp) {
+ osi_Log2(afsd_logp, "cm_Analyze passed CM_ERROR_ALLOFFLINE (FS cell %s vol 0x%x)",
+ cellp->name, fidp->volume);
+ msgID = MSG_ALL_SERVERS_OFFLINE;
+ format = "All servers are offline when accessing cell %s volume %d.";
+ LogEvent(EVENTLOG_WARNING_TYPE, msgID, cellp->name, fidp->volume);
+
code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp,
CM_GETVOL_FLAG_NO_LRU_UPDATE,
&volp);
lock_ReleaseRead(&cm_volumeLock);
volp = NULL;
}
- }
+ } else {
+ osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLOFFLINE (VL Server)");
+ }
}
else if (errorCode == CM_ERROR_ALLBUSY) {
/* Volumes that are busy cannot be determined to be non-busy
* without actually attempting to access them.
*/
- osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLBUSY.");
-
if (fidp) { /* File Server query */
+ osi_Log2(afsd_logp, "cm_Analyze passed CM_ERROR_ALLBUSY (FS cell %s vol 0x%x)",
+ cellp->name, fidp->volume);
+ msgID = MSG_ALL_SERVERS_BUSY;
+ format = "All servers are busy when accessing cell %s volume %d.";
+ LogEvent(EVENTLOG_WARNING_TYPE, msgID, cellp->name, fidp->volume);
+
code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp,
CM_GETVOL_FLAG_NO_LRU_UPDATE,
&volp);
volp = NULL;
}
} else { /* VL Server query */
+ osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLBUSY (VL Server).");
+
if (timeLeft > 7) {
thrd_Sleep(5000);
free_svr_list = 1;
}
}
+
+ switch ( errorCode ) {
+ case VBUSY:
+ msgID = MSG_SERVER_REPORTS_VBUSY;
+ format = "Server %s reported busy when accessing volume %d.";
+ break;
+ case VRESTARTING:
+ msgID = MSG_SERVER_REPORTS_VRESTARTING;
+ format = "Server %s reported restarting when accessing volume %d.";
+ break;
+ }
+
+ if (serverp && fidp) {
+ /* Log server being offline for this volume */
+ sprintf(addr, "%d.%d.%d.%d",
+ ((serverp->addr.sin_addr.s_addr & 0xff)),
+ ((serverp->addr.sin_addr.s_addr & 0xff00)>> 8),
+ ((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16),
+ ((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24));
+
+ osi_Log2(afsd_logp, format, osi_LogSaveString(afsd_logp,addr), fidp->volume);
+ LogEvent(EVENTLOG_WARNING_TYPE, msgID, addr, fidp->volume);
+ }
+
lock_ObtainWrite(&cm_serverLock);
for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
if (tsrp->status == srv_deleted)
else if (errorCode == VNOVOL || errorCode == VMOVED || errorCode == VOFFLINE ||
errorCode == VSALVAGE || errorCode == VNOSERVICE || errorCode == VIO)
{
- char *format;
- DWORD msgID;
-
/* In case of timeout */
reqp->volumeError = errorCode;
lock_ReleaseRead(&cm_serverLock);
if (firstError == 0) {
- if (allDown)
+ if (allDown) {
firstError = (reqp->tokenError ? reqp->tokenError :
(reqp->idleError ? RX_CALL_TIMEOUT : CM_ERROR_ALLDOWN));
- else if (allBusy)
+ /*
+ * if we experienced either a token error or and idle dead time error
+ * and now all of the servers are down, we have either tried them
+ * all or lost connectivity. Clear the error we are returning so
+ * we will not return it indefinitely if the request is retried.
+ */
+ reqp->idleError = reqp->tokenError = 0;
+ } else if (allBusy) {
firstError = CM_ERROR_ALLBUSY;
- else if (allOffline || (someBusy && someOffline))
+ } else if (allOffline || (someBusy && someOffline)) {
firstError = CM_ERROR_ALLOFFLINE;
- else {
+ } else {
osi_Log0(afsd_logp, "cm_ConnByMServers returning impossible error TIMEDOUT");
firstError = CM_ERROR_TIMEDOUT;
}
rx_SetConnDeadTime(tcp->rxconnp, ConnDeadtimeout);
rx_SetConnHardDeadTime(tcp->rxconnp, HardDeadtimeout);
rx_SetConnIdleDeadTime(tcp->rxconnp, IdleDeadtimeout);
+ if (NatPingInterval)
+ rx_SetConnSecondsUntilNatPing(tcp->rxconnp, NatPingInterval);
tcp->ucgen = ucellp->gen;
if (secObjp)
rxs_Release(secObjp); /* Decrement the initial refCount */