DEVEL15-windows-shutdown-20090628
[openafs.git] / src / WINNT / afsd / cm_daemon.c
index d90cf80..63e5c89 100644 (file)
 #include <afs/param.h>
 #include <afs/stds.h>
 
-#ifndef DJGPP
 #include <windows.h>
 #include <winsock2.h>
 #include <iphlpapi.h>
-#else
-#include <netdb.h>
-#endif /* !DJGPP */
 #include <stdlib.h>
 #include <malloc.h>
 #include <string.h>
 
 /* in seconds */
 long cm_daemonCheckDownInterval  = 180;
-long cm_daemonCheckUpInterval    = 600;
+long cm_daemonCheckUpInterval    = 240;
 long cm_daemonCheckVolInterval   = 3600;
 long cm_daemonCheckCBInterval    = 60;
+long cm_daemonCheckVolCBInterval = 0;
 long cm_daemonCheckLockInterval  = 60;
 long cm_daemonTokenCheckInterval = 180;
+long cm_daemonCheckOfflineVolInterval = 600;
+long cm_daemonPerformanceTuningInterval = 0;
 
 osi_rwlock_t cm_daemonLock;
 
@@ -46,31 +45,54 @@ int cm_bkgWaitingForCount;  /* true if someone's waiting for cm_bkgQueueCount to
 cm_bkgRequest_t *cm_bkgListp;          /* first elt in the list of requests */
 cm_bkgRequest_t *cm_bkgListEndp;       /* last elt in the list of requests */
 
-static int daemon_ShutdownFlag = 0;
+int daemon_ShutdownFlag = 0;
+static int cm_nDaemons = 0;
+static time_t lastIPAddrChange = 0;
+
+static EVENT_HANDLE cm_Daemon_ShutdownEvent = NULL;
+static EVENT_HANDLE cm_IPAddrDaemon_ShutdownEvent = NULL;
+static EVENT_HANDLE cm_BkgDaemon_ShutdownEvent[CM_MAX_DAEMONS] = 
+       {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL};
 
-#ifndef DJGPP
 void cm_IpAddrDaemon(long parm)
 {
     extern void smb_CheckVCs(void);
+    char * name = "cm_IPAddrDaemon_ShutdownEvent";
+
+    cm_IPAddrDaemon_ShutdownEvent = thrd_CreateEvent(NULL, FALSE, FALSE, name);
+    if ( GetLastError() == ERROR_ALREADY_EXISTS )
+        afsi_log("Event Object Already Exists: %s", name);
 
     rx_StartClientThread();
 
     while (daemon_ShutdownFlag == 0) {
-       DWORD Result = NotifyAddrChange(NULL,NULL);
-       if (Result == NO_ERROR && daemon_ShutdownFlag == 0) {
-           osi_Log0(afsd_logp, "cm_IpAddrDaemon CheckDownServers");
-           Sleep(2500);
-           cm_ForceNewConnectionsAllServers();
-            cm_CheckServers(CM_FLAG_CHECKUPSERVERS | CM_FLAG_CHECKDOWNSERVERS, NULL);
-           smb_CheckVCs();
+       DWORD Result;
+        
+        thrd_SetEvent(cm_IPAddrDaemon_ShutdownEvent);
+        Result = NotifyAddrChange(NULL,NULL);
+        if (Result == NO_ERROR && daemon_ShutdownFlag == 0) {
+            lastIPAddrChange = osi_Time();
+            smb_SetLanAdapterChangeDetected();
+            cm_SetLanAdapterChangeDetected();
+            thrd_ResetEvent(cm_IPAddrDaemon_ShutdownEvent);
        }       
     }
+
+    thrd_SetEvent(cm_IPAddrDaemon_ShutdownEvent);
 }
-#endif
 
-void cm_BkgDaemon(long parm)
+void cm_BkgDaemon(void * parm)
 {
     cm_bkgRequest_t *rp;
+    afs_int32 code;
+    char name[32] = "";
+    long daemonID = (long)parm;
+
+    snprintf(name, sizeof(name), "cm_BkgDaemon_ShutdownEvent%d", daemonID);
+
+    cm_BkgDaemon_ShutdownEvent[daemonID] = thrd_CreateEvent(NULL, FALSE, FALSE, name);
+    if ( GetLastError() == ERROR_ALREADY_EXISTS )
+        afsi_log("Event Object Already Exists: %s", name);
 
     rx_StartClientThread();
 
@@ -83,25 +105,70 @@ void cm_BkgDaemon(long parm)
         }
                 
         /* we found a request */
-        rp = cm_bkgListEndp;
-        cm_bkgListEndp = (cm_bkgRequest_t *) osi_QPrev(&rp->q);
-        osi_QRemove((osi_queue_t **) &cm_bkgListp, &rp->q);
-        osi_assert(cm_bkgQueueCount-- > 0);
+        for (rp = cm_bkgListEndp; rp; rp = (cm_bkgRequest_t *) osi_QPrev(&rp->q))
+       {
+           if (cm_ServerAvailable(&rp->scp->fid, rp->userp) && 
+                !(rp->scp->flags & CM_SCACHEFLAG_DATASTORING))
+               break;
+       }
+       if (rp == NULL) {
+           /* we couldn't find a request that we could process at the current time */
+           lock_ReleaseWrite(&cm_daemonLock);
+           Sleep(1000);
+           lock_ObtainWrite(&cm_daemonLock);
+           continue;
+       }
+
+        osi_QRemoveHT((osi_queue_t **) &cm_bkgListp, (osi_queue_t **) &cm_bkgListEndp, &rp->q);
+        osi_assertx(cm_bkgQueueCount-- > 0, "cm_bkgQueueCount 0");
         lock_ReleaseWrite(&cm_daemonLock);
 
-       osi_Log2(afsd_logp,"cm_BkgDaemon (before) scp 0x%x ref %d",rp->scp, rp->scp->refCount);
+       osi_Log1(afsd_logp,"cm_BkgDaemon processing request 0x%p", rp);
 
-        (*rp->procp)(rp->scp, rp->p1, rp->p2, rp->p3, rp->p4, rp->userp);
-                
+#ifdef DEBUG_REFCOUNT
+       osi_Log2(afsd_logp,"cm_BkgDaemon (before) scp 0x%x ref %d",rp->scp, rp->scp->refCount);
+#endif
+        code = (*rp->procp)(rp->scp, rp->p1, rp->p2, rp->p3, rp->p4, rp->userp);
+#ifdef DEBUG_REFCOUNT                
        osi_Log2(afsd_logp,"cm_BkgDaemon (after) scp 0x%x ref %d",rp->scp, rp->scp->refCount);
+#endif
 
-       cm_ReleaseUser(rp->userp);
-        cm_ReleaseSCache(rp->scp);
-        free(rp);
-
-        lock_ObtainWrite(&cm_daemonLock);
+        /* 
+         * Keep the following list synchronized with the
+         * error code list in cm_BkgStore.  
+         * cm_SyncOpDone(CM_SCACHESYNC_ASYNCSTORE) will be called there unless
+         * one of these errors has occurred.
+         */
+       switch ( code ) {
+       case CM_ERROR_TIMEDOUT: /* or server restarting */
+       case CM_ERROR_RETRY:
+       case CM_ERROR_WOULDBLOCK:
+       case CM_ERROR_ALLBUSY:
+       case CM_ERROR_ALLDOWN:
+       case CM_ERROR_ALLOFFLINE:
+       case CM_ERROR_PARTIALWRITE:
+           osi_Log2(afsd_logp,"cm_BkgDaemon re-queueing failed request 0x%p code 0x%x",
+                    rp, code);
+            lock_ObtainWrite(&cm_daemonLock);
+           cm_bkgQueueCount++;
+           osi_QAddT((osi_queue_t **) &cm_bkgListp, (osi_queue_t **)&cm_bkgListEndp, &rp->q);
+           break;
+       case 0:  /* success */
+       default: /* other error */
+           if (code == 0)
+                osi_Log1(afsd_logp,"cm_BkgDaemon SUCCESS: request 0x%p", rp);
+            else
+                osi_Log2(afsd_logp,"cm_BkgDaemon FAILED: request dropped 0x%p code 0x%x",
+                    rp, code);
+           cm_ReleaseUser(rp->userp);
+           cm_ReleaseSCache(rp->scp);
+           free(rp);
+            lock_ObtainWrite(&cm_daemonLock);
+       }
     }
     lock_ReleaseWrite(&cm_daemonLock);
+
+    thrd_SetEvent(cm_BkgDaemon_ShutdownEvent[daemonID]);
 }
 
 void cm_QueueBKGRequest(cm_scache_t *scp, cm_bkgProc_t *procp, afs_uint32 p1, afs_uint32 p2, afs_uint32 p3, afs_uint32 p4,
@@ -206,42 +273,72 @@ cm_DaemonCheckInit(void)
        return;
 
     dummyLen = sizeof(DWORD);
-    code = RegQueryValueEx(parmKey, "DownServerCheckInterval", NULL, NULL,
+    code = RegQueryValueEx(parmKey, "daemonCheckDownInterval", NULL, NULL,
                            (BYTE *) &dummy, &dummyLen);
-    if (code == ERROR_SUCCESS)
+    if (code == ERROR_SUCCESS && dummy)
        cm_daemonCheckDownInterval = dummy;
-    
+    afsi_log("daemonCheckDownInterval is %d", cm_daemonCheckDownInterval);
+
     dummyLen = sizeof(DWORD);
-    code = RegQueryValueEx(parmKey, "UpServerCheckInterval", NULL, NULL,
+    code = RegQueryValueEx(parmKey, "daemonCheckUpInterval", NULL, NULL,
                            (BYTE *) &dummy, &dummyLen);
-    if (code == ERROR_SUCCESS)
+    if (code == ERROR_SUCCESS && dummy)
        cm_daemonCheckUpInterval = dummy;
-    
+    afsi_log("daemonCheckUpInterval is %d", cm_daemonCheckUpInterval);
+
     dummyLen = sizeof(DWORD);
-    code = RegQueryValueEx(parmKey, "VolumeCheckInterval", NULL, NULL,
+    code = RegQueryValueEx(parmKey, "daemonCheckVolInterval", NULL, NULL,
                            (BYTE *) &dummy, &dummyLen);
-    if (code == ERROR_SUCCESS)
+    if (code == ERROR_SUCCESS && dummy)
        cm_daemonCheckVolInterval = dummy;
-    
+    afsi_log("daemonCheckVolInterval is %d", cm_daemonCheckVolInterval);
+
     dummyLen = sizeof(DWORD);
-    code = RegQueryValueEx(parmKey, "CallbackCheckInterval", NULL, NULL,
+    code = RegQueryValueEx(parmKey, "daemonCheckCBInterval", NULL, NULL,
                            (BYTE *) &dummy, &dummyLen);
-    if (code == ERROR_SUCCESS)
+    if (code == ERROR_SUCCESS && dummy)
        cm_daemonCheckCBInterval = dummy;
-    
+    afsi_log("daemonCheckCBInterval is %d", cm_daemonCheckCBInterval);
+
     dummyLen = sizeof(DWORD);
-    code = RegQueryValueEx(parmKey, "LockCheckInterval", NULL, NULL,
+    code = RegQueryValueEx(parmKey, "daemonCheckVolCBInterval", NULL, NULL,
                            (BYTE *) &dummy, &dummyLen);
-    if (code == ERROR_SUCCESS)
+    if (code == ERROR_SUCCESS && dummy)
+       cm_daemonCheckVolCBInterval = dummy;
+    afsi_log("daemonCheckVolCBInterval is %d", cm_daemonCheckVolCBInterval);
+
+    dummyLen = sizeof(DWORD);
+    code = RegQueryValueEx(parmKey, "daemonCheckLockInterval", NULL, NULL,
+                           (BYTE *) &dummy, &dummyLen);
+    if (code == ERROR_SUCCESS && dummy)
        cm_daemonCheckLockInterval = dummy;
+    afsi_log("daemonCheckLockInterval is %d", cm_daemonCheckLockInterval);
+
+    dummyLen = sizeof(DWORD);
+    code = RegQueryValueEx(parmKey, "daemonCheckTokenInterval", NULL, NULL,
+                           (BYTE *) &dummy, &dummyLen);
+    if (code == ERROR_SUCCESS && dummy)
+       cm_daemonTokenCheckInterval = dummy;
+    afsi_log("daemonCheckTokenInterval is %d", cm_daemonTokenCheckInterval);
+
+    dummyLen = sizeof(DWORD);
+    code = RegQueryValueEx(parmKey, "daemonCheckOfflineVolInterval", NULL, NULL,
+                           (BYTE *) &dummy, &dummyLen);
+    if (code == ERROR_SUCCESS && dummy)
+       cm_daemonCheckOfflineVolInterval = dummy;
+    afsi_log("daemonCheckOfflineVolInterval is %d", cm_daemonCheckOfflineVolInterval);
     
     dummyLen = sizeof(DWORD);
-    code = RegQueryValueEx(parmKey, "TokenCheckInterval", NULL, NULL,
+    code = RegQueryValueEx(parmKey, "daemonPerformanceTuningInterval", NULL, NULL,
                            (BYTE *) &dummy, &dummyLen);
     if (code == ERROR_SUCCESS)
-       cm_daemonTokenCheckInterval = dummy;
+       cm_daemonPerformanceTuningInterval = dummy;
+    afsi_log("daemonPerformanceTuningInterval is %d", cm_daemonPerformanceTuningInterval);
     
     RegCloseKey(parmKey);
+
+    if (cm_daemonPerformanceTuningInterval)
+        cm_PerformanceTuningInit();
 }
 
 /* periodic check daemon */
@@ -251,14 +348,23 @@ void cm_Daemon(long parm)
     time_t lastLockCheck;
     time_t lastVolCheck;
     time_t lastCBExpirationCheck;
+    time_t lastVolCBRenewalCheck;
     time_t lastDownServerCheck;
     time_t lastUpServerCheck;
     time_t lastTokenCacheCheck;
+    time_t lastBusyVolCheck;
+    time_t lastPerformanceCheck;
     char thostName[200];
     unsigned long code;
     struct hostent *thp;
     HMODULE hHookDll;
+    char * name = "cm_Daemon_ShutdownEvent";
     int configureFirewall = IsWindowsFirewallPresent();
+    int bAddrChangeCheck = 0;
+
+    cm_Daemon_ShutdownEvent = thrd_CreateEvent(NULL, FALSE, FALSE, name);
+    if ( GetLastError() == ERROR_ALREADY_EXISTS )
+        afsi_log("Event Object Already Exists: %s", name);
 
     if (!configureFirewall) {
        afsi_log("No Windows Firewall detected");
@@ -289,13 +395,26 @@ void cm_Daemon(long parm)
     now = osi_Time();
     lastVolCheck = now - cm_daemonCheckVolInterval/2 + (rand() % cm_daemonCheckVolInterval);
     lastCBExpirationCheck = now - cm_daemonCheckCBInterval/2 + (rand() % cm_daemonCheckCBInterval);
+    if (cm_daemonCheckVolCBInterval)
+        lastVolCBRenewalCheck = now - cm_daemonCheckVolCBInterval/2 + (rand() % cm_daemonCheckVolCBInterval);
     lastLockCheck = now - cm_daemonCheckLockInterval/2 + (rand() % cm_daemonCheckLockInterval);
     lastDownServerCheck = now - cm_daemonCheckDownInterval/2 + (rand() % cm_daemonCheckDownInterval);
     lastUpServerCheck = now - cm_daemonCheckUpInterval/2 + (rand() % cm_daemonCheckUpInterval);
     lastTokenCacheCheck = now - cm_daemonTokenCheckInterval/2 + (rand() % cm_daemonTokenCheckInterval);
+    lastBusyVolCheck = now - cm_daemonCheckOfflineVolInterval/2 * (rand() % cm_daemonCheckOfflineVolInterval);
+    if (cm_daemonPerformanceTuningInterval)
+        lastPerformanceCheck = now - cm_daemonPerformanceTuningInterval/2 * (rand() % cm_daemonPerformanceTuningInterval);
 
     while (daemon_ShutdownFlag == 0) {
-       if (configureFirewall) {
+       /* check to see if the listener threads halted due to network 
+        * disconnect or other issues.  If so, attempt to restart them.
+        */
+       smb_RestartListeners(0);
+
+        if (daemon_ShutdownFlag == 1)
+            break;
+
+        if (configureFirewall) {
            /* Open Microsoft Firewall to allow in port 7001 */
            switch (icf_CheckAndAddAFSPorts(AFS_PORTSET_CLIENT)) {
            case 0:
@@ -318,49 +437,103 @@ void cm_Daemon(long parm)
 
         /* find out what time it is */
         now = osi_Time();
+        
+        /* Determine whether an address change took place that we need to respond to */
+        if (bAddrChangeCheck)
+            bAddrChangeCheck = 0;
+
+        if (lastIPAddrChange != 0 && lastIPAddrChange + 2500 < now) {
+            bAddrChangeCheck = 1;
+            lastIPAddrChange = 0;
+        }
 
         /* check down servers */
-        if (now > lastDownServerCheck + cm_daemonCheckDownInterval) {
+        if ((bAddrChangeCheck || now > lastDownServerCheck + cm_daemonCheckDownInterval) &&
+            daemon_ShutdownFlag == 0) {
             lastDownServerCheck = now;
            osi_Log0(afsd_logp, "cm_Daemon CheckDownServers");
             cm_CheckServers(CM_FLAG_CHECKDOWNSERVERS, NULL);
+            if (daemon_ShutdownFlag == 1)
+                break;
            now = osi_Time();
         }
 
+        if (bAddrChangeCheck)
+            cm_ForceNewConnectionsAllServers();
+
         /* check up servers */
-        if (now > lastUpServerCheck + cm_daemonCheckUpInterval) {
+        if ((bAddrChangeCheck || now > lastUpServerCheck + cm_daemonCheckUpInterval) &&
+            daemon_ShutdownFlag == 0) {
             lastUpServerCheck = now;
            osi_Log0(afsd_logp, "cm_Daemon CheckUpServers");
             cm_CheckServers(CM_FLAG_CHECKUPSERVERS, NULL);
+            if (daemon_ShutdownFlag == 1)
+                break;
            now = osi_Time();
         }
 
-        if (now > lastVolCheck + cm_daemonCheckVolInterval) {
+        if (bAddrChangeCheck) {
+            smb_CheckVCs();
+            cm_VolStatus_Network_Addr_Change();
+        }
+
+        if (now > lastVolCheck + cm_daemonCheckVolInterval &&
+            daemon_ShutdownFlag == 0) {
             lastVolCheck = now;
-            cm_CheckVolumes();
+            cm_RefreshVolumes();
+            if (daemon_ShutdownFlag == 1)
+                break;
            now = osi_Time();
         }
 
-        if (now > lastCBExpirationCheck + cm_daemonCheckCBInterval) {
+        if (cm_daemonCheckVolCBInterval && 
+            now > lastVolCBRenewalCheck + cm_daemonCheckVolCBInterval &&
+            daemon_ShutdownFlag == 0) {
+            lastVolCBRenewalCheck = now;
+            cm_VolumeRenewROCallbacks();
+            if (daemon_ShutdownFlag == 1)
+                break;
+            now = osi_Time();
+        }
+
+        if ((bAddrChangeCheck || now > lastBusyVolCheck + cm_daemonCheckOfflineVolInterval) &&
+            daemon_ShutdownFlag == 0) {
+            lastVolCheck = now;
+            cm_CheckOfflineVolumes();
+            if (daemon_ShutdownFlag == 1)
+                break;
+           now = osi_Time();
+        }
+
+        if (now > lastCBExpirationCheck + cm_daemonCheckCBInterval &&
+            daemon_ShutdownFlag == 0) {
             lastCBExpirationCheck = now;
             cm_CheckCBExpiration();
+            if (daemon_ShutdownFlag == 1)
+                break;
            now = osi_Time();
         }
 
-        if (now > lastLockCheck + cm_daemonCheckLockInterval) {
+        if (now > lastLockCheck + cm_daemonCheckLockInterval &&
+            daemon_ShutdownFlag == 0) {
             lastLockCheck = now;
             cm_CheckLocks();
+            if (daemon_ShutdownFlag == 1)
+                break;
            now = osi_Time();
         }
 
-        if (now > lastTokenCacheCheck + cm_daemonTokenCheckInterval) {
+        if (now > lastTokenCacheCheck + cm_daemonTokenCheckInterval &&
+            daemon_ShutdownFlag == 0) {
             lastTokenCacheCheck = now;
             cm_CheckTokenCache(now);
+            if (daemon_ShutdownFlag == 1)
+                break;
            now = osi_Time();
         }
 
         /* allow an exit to be called prior to stopping the service */
-        hHookDll = LoadLibrary(AFSD_HOOK_DLL);
+        hHookDll = cm_LoadAfsdHookLib();
         if (hHookDll)
         {
             BOOL hookRc = TRUE;
@@ -378,15 +551,44 @@ void cm_Daemon(long parm)
             }
         }
 
-       thrd_Sleep(30 * 1000);          /* sleep 30 seconds */
-        if (daemon_ShutdownFlag == 1)
-            return;
+        if (daemon_ShutdownFlag == 1) {
+            break;
+        }
+
+        if (cm_daemonPerformanceTuningInterval &&
+            now > lastPerformanceCheck + cm_daemonPerformanceTuningInterval &&
+             daemon_ShutdownFlag == 0) {
+            lastPerformanceCheck = now;
+            cm_PerformanceTuningCheck();
+            if (daemon_ShutdownFlag == 1)
+                break;
+           now = osi_Time();
+        }
+        
+        thrd_Sleep(10000);             /* sleep 10 seconds */
     }
+    thrd_SetEvent(cm_Daemon_ShutdownEvent);
 }       
 
 void cm_DaemonShutdown(void)
 {
+    int i;
+    DWORD code;
+
     daemon_ShutdownFlag = 1;
+    osi_Wakeup((LONG_PTR) &cm_bkgListp);
+
+    /* wait for shutdown */
+    if (cm_Daemon_ShutdownEvent)
+        code = thrd_WaitForSingleObject_Event(cm_Daemon_ShutdownEvent, INFINITE); 
+
+    for ( i=0; i<cm_nDaemons; i++) {
+        if (cm_BkgDaemon_ShutdownEvent[i])
+            code = thrd_WaitForSingleObject_Event(cm_BkgDaemon_ShutdownEvent[i], INFINITE);
+    }
+
+    if (cm_IPAddrDaemon_ShutdownEvent)
+        code = thrd_WaitForSingleObject_Event(cm_IPAddrDaemon_ShutdownEvent, INFINITE);
 }
 
 void cm_InitDaemon(int nDaemons)
@@ -395,30 +597,31 @@ void cm_InitDaemon(int nDaemons)
     long pid;
     thread_t phandle;
     int i;
-        
+
+    cm_nDaemons = (nDaemons > CM_MAX_DAEMONS) ? CM_MAX_DAEMONS : nDaemons;
+    
     if (osi_Once(&once)) {
-        lock_InitializeRWLock(&cm_daemonLock, "cm_daemonLock");
+        lock_InitializeRWLock(&cm_daemonLock, "cm_daemonLock", 
+                               LOCK_HIERARCHY_DAEMON_GLOBAL);
         osi_EndOnce(&once);
 
-#ifndef DJGPP
        /* creating IP Address Change monitor daemon */
         phandle = thrd_Create((SecurityAttrib) 0, 0,
                                (ThreadFunc) cm_IpAddrDaemon, 0, 0, &pid, "cm_IpAddrDaemon");
-        osi_assert(phandle != NULL);
+        osi_assertx(phandle != NULL, "cm_IpAddrDaemon thread creation failure");
         thrd_CloseHandle(phandle);
-#endif /* DJGPP */
 
         /* creating pinging daemon */
         phandle = thrd_Create((SecurityAttrib) 0, 0,
                                (ThreadFunc) cm_Daemon, 0, 0, &pid, "cm_Daemon");
-        osi_assert(phandle != NULL);
+        osi_assertx(phandle != NULL, "cm_Daemon thread creation failure");
         thrd_CloseHandle(phandle);
 
-       for(i=0; i < nDaemons; i++) {
+       for(i=0; i < cm_nDaemons; i++) {
             phandle = thrd_Create((SecurityAttrib) 0, 0,
-                                   (ThreadFunc) cm_BkgDaemon, 0, 0, &pid,
+                                   (ThreadFunc) cm_BkgDaemon, (LPVOID)i, 0, &pid,
                                    "cm_BkgDaemon");
-            osi_assert(phandle != NULL);
+            osi_assertx(phandle != NULL, "cm_BkgDaemon thread creation failure");
             thrd_CloseHandle(phandle);
         }
     }