viced: avoid aborting on host table exhaustion
[openafs.git] / src / viced / host.c
index 212c6c7..0630067 100644 (file)
@@ -200,7 +200,7 @@ GetHTBlock(void)
 
     if (HTBlocks == h_MAXHOSTTABLES) {
        ViceLog(0, ("h_MAXHOSTTABLES reached\n"));
-       ShutDownAndCore(PANIC);
+       return;
     }
 
     block = (struct HTBlock *)malloc(sizeof(struct HTBlock));
@@ -233,7 +233,8 @@ GetHT(void)
 
     if (HTFree == NULL)
        GetHTBlock();
-    osi_Assert(HTFree != NULL);
+    if (HTFree == NULL)
+       return NULL;
     entry = HTFree;
     HTFree = entry->next;
     HTs++;
@@ -674,7 +675,7 @@ h_flushhostcps(afs_uint32 hostaddr, afs_uint16 hport)
  */
 #define        DEF_ROPCONS 2115
 
-struct host *
+static struct host *
 h_Alloc_r(struct rx_connection *r_con)
 {
     struct servent *serverentry;
@@ -684,6 +685,8 @@ h_Alloc_r(struct rx_connection *r_con)
 #endif /* FS_STATS_DETAILED */
 
     host = GetHT();
+    if (!host)
+       return NULL;
 
     host->host = rxr_HostOf(r_con);
     host->port = rxr_PortOf(r_con);
@@ -745,6 +748,7 @@ h_SetupCallbackConn_r(struct host * host)
        rx_NewConnection(host->host, host->port, 1, sc, 0);
     rx_SetConnDeadTime(host->callback_rxcon, 50);
     rx_SetConnHardDeadTime(host->callback_rxcon, AFS_HARDDEADTIME);
+    rx_SetConnSecondsUntilNatPing(host->callback_rxcon, 20);
 }
 
 /* h_Lookup_r
@@ -1000,8 +1004,22 @@ h_Enumerate(int (*proc) (struct host*, void *), void *param)
        h_Release_r(list[i]);
        H_UNLOCK;
        /* bail out of the enumeration early */
-       if (H_ENUMERATE_ISSET_BAIL(flags))
+       if (H_ENUMERATE_ISSET_BAIL(flags)) {
            break;
+       } else if (flags) {
+           ViceLog(0, ("h_Enumerate got back invalid return value %d\n", flags));
+           ShutDownAndCore(PANIC);
+       }
+    }
+    if (i < count-1) {
+       /* we bailed out of enumerating hosts early; we still have holds on
+        * some of the hosts in 'list', so release them */
+       i++;
+       H_LOCK;
+       for ( ; i < count; i++) {
+           h_Release_r(list[i]);
+       }
+       H_UNLOCK;
     }
     free((void *)list);
 }      /* h_Enumerate */
@@ -1083,7 +1101,13 @@ h_Enumerate_r(int (*proc) (struct host *, void *),
            flags = (*proc) (host, param);
            if (H_ENUMERATE_ISSET_BAIL(flags)) {
                h_Release_r(host); /* this might free up the host */
+               if (next) {
+                   h_Release_r(next);
+               }
                break;
+           } else if (flags) {
+               ViceLog(0, ("h_Enumerate_r got back invalid return value %d\n", flags));
+               ShutDownAndCore(PANIC);
            }
        }
        h_Release_r(host); /* this might free up the host */
@@ -1291,12 +1315,7 @@ removeAddress_r(struct host *host, afs_uint32 addr, afs_uint16 port)
                     rxconn = NULL;
                 }
 
-                if (!sc)
-                    sc = rxnull_NewClientSecurityObject();
-                host->callback_rxcon =
-                    rx_NewConnection(host->host, host->port, 1, sc, 0);
-                rx_SetConnDeadTime(host->callback_rxcon, 50);
-                rx_SetConnHardDeadTime(host->callback_rxcon, AFS_HARDDEADTIME);
+               h_SetupCallbackConn_r(host);
             }
         } else {
             /* not the primary addr/port, just invalidate it */
@@ -1715,6 +1734,7 @@ h_GetHost_r(struct rx_connection *tcon)
             cb_in = rx_NewConnection(haddr, hport, 1, sc, 0);
             rx_SetConnDeadTime(cb_in, 50);
             rx_SetConnHardDeadTime(cb_in, AFS_HARDDEADTIME);
+           rx_SetConnSecondsUntilNatPing(cb_in, 20);
 
             code =
                 RXAFSCB_TellMeAboutYourself(cb_in, &interf, &caps);
@@ -1934,6 +1954,8 @@ h_GetHost_r(struct rx_connection *tcon)
        }
     } else {
        host = h_Alloc_r(tcon); /* returned held and locked */
+       if (!host)
+           goto gethost_out;
        h_gethostcps_r(host, FT_ApproxTime());
        if (!(host->Console & 1)) {
            int pident = 0;
@@ -2342,11 +2364,12 @@ h_FindClient_r(struct rx_connection *tcon)
     client = (struct client *)rx_GetSpecific(tcon, rxcon_client_key);
     if (client && client->sid == rxr_CidOf(tcon)
        && client->VenusEpoch == rxr_GetEpoch(tcon)
-       && !(client->host->hostFlags & HOSTDELETED)) {
+       && !(client->host->hostFlags & HOSTDELETED)
+       && !client->deleted) {
 
        client->refCount++;
        h_Hold_r(client->host);
-       if (!client->deleted && client->prfail != 2) {
+       if (client->prfail != 2) {
            /* Could add shared lock on client here */
            /* note that we don't have to lock entry in this path to
             * ensure CPS is initialized, since we don't call rx_SetSpecific
@@ -2543,17 +2566,25 @@ h_FindClient_r(struct rx_connection *tcon)
                created = 0;
            }
            oldClient->refCount++;
+
+           h_Hold_r(oldClient->host);
+           h_Release_r(client->host);
+
            H_UNLOCK;
            ObtainWriteLock(&oldClient->lock);
            H_LOCK;
            client = oldClient;
+           host = oldClient->host;
        } else {
-           ViceLog(0, ("FindClient: deleted client %p(%x) already had "
-                       "conn %p (host %s:%d), stolen by client %p(%x)\n",
-                       oldClient, oldClient->sid, tcon,
-                       afs_inet_ntoa_r(rxr_HostOf(tcon), hoststr),
-                       ntohs(rxr_PortOf(tcon)),
-                       client, client->sid));
+           ViceLog(0, ("FindClient: deleted client %p(%x ref %d host %p href "
+                       "%d) already had conn %p (host %s:%d, cid %x), stolen "
+                       "by client %p(%x, ref %d host %p href %d)\n",
+                       oldClient, oldClient->sid, oldClient->refCount,
+                       oldClient->host, oldClient->host->refCount, tcon,
+                       afs_inet_ntoa_r(rxr_HostOf(tcon), hoststr),
+                       ntohs(rxr_PortOf(tcon)), rxr_CidOf(tcon),
+                       client, client->sid, client->refCount,
+                       client->host, client->host->refCount));
            /* rx_SetSpecific will be done immediately below */
        }
     }
@@ -2639,6 +2670,16 @@ GetClient(struct rx_connection *tcon, struct client **cp)
        H_UNLOCK;
        return VICETOKENDEAD;
     }
+    if (client->deleted) {
+       ViceLog(0, ("GetClient: got deleted client, connection will appear "
+                   "anonymous; tcon %p cid %x client %p ref %d host %p "
+                   "(%s:%d) href %d ViceId %d\n",
+                   tcon, rxr_CidOf(tcon), client, client->refCount,
+                   client->host,
+                   afs_inet_ntoa_r(client->host->host, hoststr),
+                   (int)ntohs(client->host->port), client->host->refCount,
+                   (int)client->ViceId));
+    }
 
     client->refCount++;
     *cp = client;
@@ -3326,6 +3367,7 @@ h_stateRestoreHost(struct fs_dump_state * state)
        osi_Assert(hcps != NULL);
     }
 
+    /* for restoring state, we better be able to get a host! */
     host = GetHT();
     osi_Assert(host != NULL);