// Use the ReportEvent API to write an entry to the system event log.
//
#define MAXARGS 8
-#define STRLEN 64
+#define STRLEN 128
+
VOID
LogEvent(WORD wEventType, DWORD dwEventID, ...)
{
va_list listArgs;
HANDLE hEventSource;
+ HANDLE hMutex = NULL;
LPTSTR lpArgs[MAXARGS];
CHAR lpStrings[MAXARGS][STRLEN];
+ static CHAR lpLastStrings[MAXARGS][STRLEN];
WORD wNumArgs = 0;
- WORD wNumStrings = 0;
+ static WORD wLastNumArgs = MAXARGS;
+ static time_t lastMessageTime = 0;
+ static WORD wLastEventType = 0;
+ static DWORD dwLastEventID = 0;
+ time_t now;
DWORD code;
+ BOOL bLogMessage = TRUE;
+ WORD i = 0, j;
// Ensure that our event source is properly initialized.
if (!AddEventSource())
case MSG_SERVER_REPORTS_VSALVAGE:
case MSG_SERVER_REPORTS_VNOSERVICE:
case MSG_SERVER_REPORTS_VIO:
+ case MSG_SERVER_REPORTS_VBUSY:
+ case MSG_SERVER_REPORTS_VRESTARTING:
+ case MSG_ALL_SERVERS_BUSY:
+ case MSG_ALL_SERVERS_OFFLINE:
+ case MSG_ALL_SERVERS_DOWN:
wNumArgs = 2;
lpArgs[0] = va_arg(listArgs, LPTSTR);
StringCbPrintf(lpStrings[1],STRLEN,"%d",va_arg(listArgs,afs_int32));
// Make sure we were not given too many args.
if (wNumArgs >= MAXARGS)
- return;
+ goto done;
+
+ hMutex = CreateMutex( NULL, TRUE, "AFSD Event Log Mutex");
+ if (hMutex == NULL)
+ goto done;
+
+ if (GetLastError() == ERROR_ALREADY_EXISTS) {
+ code = WaitForSingleObject( hMutex, 500);
+ if (code != WAIT_OBJECT_0)
+ goto done;
+ }
+
+ /*
+ * We rate limit consecutive duplicate messages to one every
+ * five seconds.
+ */
+ now = time(NULL);
+ if (now < lastMessageTime + 5 &&
+ wEventType == wLastEventType &&
+ dwEventID == dwLastEventID &&
+ wNumArgs == wLastNumArgs) {
+ for (i=0; i<wNumArgs; i++) {
+ if ( strncmp(lpArgs[i], lpLastStrings[i], STRLEN))
+ break;
+ }
+ if (i == wNumArgs)
+ bLogMessage = FALSE;
+ }
+
+ if ( bLogMessage) {
+ wLastNumArgs = wNumArgs;
+ wLastEventType = wEventType;
+ dwLastEventID = dwEventID;
+ lastMessageTime = now;
+
+ for ( j = (i == wNumArgs ? 0 : i) ; i < wNumArgs; i++) {
+ StringCbCopyEx( lpLastStrings[i], STRLEN, lpArgs[i], NULL, NULL, STRSAFE_NULL_ON_FAILURE);
+ }
+ }
+
+ ReleaseMutex(hMutex);
// Log the event.
- code = ReportEvent(hEventSource, // handle of event source
- wEventType, // event type
- 0, // event category
- dwEventID, // event ID
- NULL, // current user's SID
- wNumArgs, // strings in lpszArgs
- 0, // no bytes of raw data
- wNumArgs ? lpArgs : NULL,// array of error strings
- NULL); // no raw data
+ if ( bLogMessage)
+ code = ReportEvent(hEventSource, // handle of event source
+ wEventType, // event type
+ 0, // event category
+ dwEventID, // event ID
+ NULL, // current user's SID
+ wNumArgs, // strings in lpszArgs
+ 0, // no bytes of raw data
+ wNumArgs ? lpArgs : NULL,// array of error strings
+ NULL); // no raw data
+ done:
+ if (hMutex)
+ CloseHandle(hMutex);
DeregisterEventSource(hEventSource);
}
.
MessageId=
-Severity=Warning
+Severity=Informational
Facility=System
SymbolicName=MSG_SERVER_REPORTS_VMOVED
Language=English
A dirty buffer was not written to the file server (Cell=%1, Volume=%2, Vnode=%3, Unique=%4, Offset=%5, Length=%6).
.
+MessageId=
+Severity=Informational
+Facility=System
+SymbolicName=MSG_SERVER_REPORTS_VBUSY
+Language=English
+Server %1 reported busy when accessing volume %2.
+.
+
+MessageId=
+Severity=Informational
+Facility=System
+SymbolicName=MSG_SERVER_REPORTS_VRESTARTING
+Language=English
+Server %1 reported restarting when accessing volume %2.
+.
+
+MessageId=
+Severity=Informational
+Facility=System
+SymbolicName=MSG_ALL_SERVERS_BUSY
+Language=English
+All servers are busy when accessing cell %1 volume %2.
+.
+
+MessageId=
+Severity=Informational
+Facility=System
+SymbolicName=MSG_ALL_SERVERS_OFFLINE
+Language=English
+All servers are offline when accessing cell %1 volume %2.
+.
+
+MessageId=
+Severity=Informational
+Facility=System
+SymbolicName=MSG_ALL_SERVERS_DOWN
+Language=English
+All servers are unreachable when accessing cell %1 volume %2.
+.
+
+
;#endif /* __AFSD_EVENTMESSAGES_H_ 1 */
long code;
char addr[16]="unknown";
int forcing_new = 0;
+ char *format;
+ DWORD msgID;
osi_Log2(afsd_logp, "cm_Analyze connp 0x%p, code 0x%x",
connp, errorCode);
}
else if (errorCode == CM_ERROR_ALLDOWN) {
- osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLDOWN.");
/* Servers marked DOWN will be restored by the background daemon
* thread as they become available. The volume status is
* updated as the server state changes.
*/
+ if (fidp) {
+ osi_Log2(afsd_logp, "cm_Analyze passed CM_ERROR_DOWN (FS cell %s vol 0x%x)",
+ cellp->name, fidp->volume);
+ msgID = MSG_ALL_SERVERS_DOWN;
+ format = "All servers are unreachable when accessing cell %s volume %d.";
+ LogEvent(EVENTLOG_WARNING_TYPE, msgID, cellp->name, fidp->volume);
+ } else {
+ osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLDOWN (VL Server)");
+ }
}
else if (errorCode == CM_ERROR_ALLOFFLINE) {
- osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLOFFLINE.");
/* Volume instances marked offline will be restored by the
* background daemon thread as they become available
*/
if (fidp) {
+ osi_Log2(afsd_logp, "cm_Analyze passed CM_ERROR_ALLOFFLINE (FS cell %s vol 0x%x)",
+ cellp->name, fidp->volume);
+ msgID = MSG_ALL_SERVERS_OFFLINE;
+ format = "All servers are offline when accessing cell %s volume %d.";
+ LogEvent(EVENTLOG_WARNING_TYPE, msgID, cellp->name, fidp->volume);
+
code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp,
CM_GETVOL_FLAG_NO_LRU_UPDATE,
&volp);
lock_ReleaseRead(&cm_volumeLock);
volp = NULL;
}
- }
+ } else {
+ osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLOFFLINE (VL Server)");
+ }
}
else if (errorCode == CM_ERROR_ALLBUSY) {
/* Volumes that are busy cannot be determined to be non-busy
* without actually attempting to access them.
*/
- osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLBUSY.");
-
if (fidp) { /* File Server query */
+ osi_Log2(afsd_logp, "cm_Analyze passed CM_ERROR_ALLBUSY (FS cell %s vol 0x%x)",
+ cellp->name, fidp->volume);
+ msgID = MSG_ALL_SERVERS_BUSY;
+ format = "All servers are busy when accessing cell %s volume %d.";
+ LogEvent(EVENTLOG_WARNING_TYPE, msgID, cellp->name, fidp->volume);
+
code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp,
CM_GETVOL_FLAG_NO_LRU_UPDATE,
&volp);
volp = NULL;
}
} else { /* VL Server query */
+ osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLBUSY (VL Server).");
+
if (timeLeft > 7) {
thrd_Sleep(5000);
free_svr_list = 1;
}
}
+
+ switch ( errorCode ) {
+ case VBUSY:
+ msgID = MSG_SERVER_REPORTS_VBUSY;
+ format = "Server %s reported busy when accessing volume %d.";
+ break;
+ case VRESTARTING:
+ msgID = MSG_SERVER_REPORTS_VRESTARTING;
+ format = "Server %s reported restarting when accessing volume %d.";
+ break;
+ }
+
+ if (serverp && fidp) {
+ /* Log server being offline for this volume */
+ sprintf(addr, "%d.%d.%d.%d",
+ ((serverp->addr.sin_addr.s_addr & 0xff)),
+ ((serverp->addr.sin_addr.s_addr & 0xff00)>> 8),
+ ((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16),
+ ((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24));
+
+ osi_Log2(afsd_logp, format, osi_LogSaveString(afsd_logp,addr), fidp->volume);
+ LogEvent(EVENTLOG_WARNING_TYPE, msgID, addr, fidp->volume);
+ }
+
lock_ObtainWrite(&cm_serverLock);
for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
if (tsrp->status == srv_deleted)
else if (errorCode == VNOVOL || errorCode == VMOVED || errorCode == VOFFLINE ||
errorCode == VSALVAGE || errorCode == VNOSERVICE || errorCode == VIO)
{
- char *format;
- DWORD msgID;
-
/* In case of timeout */
reqp->volumeError = errorCode;